diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/AndroidManifest.xml glmark2-2021.02/android/AndroidManifest.xml --- glmark2-2014.03+git20150611.fa71af2d/android/AndroidManifest.xml 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/AndroidManifest.xml 2021-04-25 01:47:22.000000000 +0800 @@ -1,8 +1,11 @@ - + android:versionName="2021.02" package="org.linaro.glmark2"> + + @@ -50,5 +53,6 @@ - + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/meson.build glmark2-2021.02/android/assets/meson.build --- glmark2-2014.03+git20150611.fa71af2d/android/assets/meson.build 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/assets/meson.build 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,14 @@ +install_subdir( + 'shaders', + install_dir : join_paths([get_option('datadir'), 'glmark2']) + ) + +install_subdir( + 'models', + install_dir : join_paths([get_option('datadir'), 'glmark2']) + ) + +install_subdir( + 'textures', + install_dir : join_paths([get_option('datadir'), 'glmark2']) + ) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/bump-height.frag glmark2-2021.02/android/assets/shaders/bump-height.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/bump-height.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/bump-height.frag 2021-04-25 01:47:22.000000000 +0800 @@ -4,7 +4,12 @@ uniform sampler2D HeightMap; +#ifdef GL_FRAGMENT_PRECISION_HIGH +varying highp vec2 TextureCoord; +#else varying vec2 TextureCoord; +#endif + varying vec3 NormalEye; varying vec3 TangentEye; varying vec3 BitangentEye; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/conditionals.frag glmark2-2021.02/android/assets/shaders/conditionals.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/conditionals.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/conditionals.frag 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,14 @@ void main(void) { - float d = fract(gl_FragCoord.x * gl_FragCoord.y * 0.0001); +#ifdef GL_FRAGMENT_PRECISION_HIGH + // should be declared highp since the multiplication can overflow in + // mediump, particularly if mediump is implemented as fp16 + highp vec2 FragCoord = gl_FragCoord.xy; +#else + vec2 FragCoord = gl_FragCoord.xy; +#endif + float d = fract(FragCoord.x * FragCoord.y * 0.0001); $MAIN$ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/function.frag glmark2-2021.02/android/assets/shaders/function.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/function.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/function.frag 2021-04-25 01:47:22.000000000 +0800 @@ -8,7 +8,14 @@ void main(void) { - float d = fract(gl_FragCoord.x * gl_FragCoord.y * 0.0001); +#ifdef GL_FRAGMENT_PRECISION_HIGH + // should be declared highp since the multiplication can overflow in + // mediump, particularly if mediump is implemented as fp16 + highp vec2 FragCoord = gl_FragCoord.xy; +#else + vec2 FragCoord = gl_FragCoord.xy; +#endif + float d = fract(FragCoord.x * FragCoord.y * 0.0001); $MAIN$ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/ideas-lamp-lit.frag glmark2-2021.02/android/assets/shaders/ideas-lamp-lit.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/ideas-lamp-lit.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/ideas-lamp-lit.frag 2021-04-25 01:47:22.000000000 +0800 @@ -6,7 +6,6 @@ vec4 position; }; LightSourceParameters lightSource[3]; -uniform mat4 modelview; uniform vec4 light0Position; uniform vec4 light1Position; uniform vec4 light2Position; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/light-refract.frag glmark2-2021.02/android/assets/shaders/light-refract.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/light-refract.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/light-refract.frag 2021-04-25 01:47:22.000000000 +0800 @@ -23,7 +23,7 @@ vec3 mc_perspective = (MapCoord.xyz / MapCoord.w) + front_refraction; vec2 dcoord = mc_perspective.st * point_five + point_five; vec4 distance_value = texture2D(DistanceMap, dcoord); - vec3 back_position = vertex_position.xyz + front_refraction * distance_value.z; + vec3 back_position = vertex_position.xyz + front_refraction * distance_value.x; // Use the exit point to index the map of back-side normals, and use the // back-side position and normal to find the transmitted vector out of the // object. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/loop.frag glmark2-2021.02/android/assets/shaders/loop.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/loop.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/loop.frag 2021-04-25 01:47:22.000000000 +0800 @@ -3,7 +3,14 @@ void main(void) { - float d = fract(gl_FragCoord.x * gl_FragCoord.y * 0.0001); +#ifdef GL_FRAGMENT_PRECISION_HIGH + // should be declared highp since the multiplication can overflow in + // mediump, particularly if mediump is implemented as fp16 + highp vec2 FragCoord = gl_FragCoord.xy; +#else + vec2 FragCoord = gl_FragCoord.xy; +#endif + float d = fract(FragCoord.x * FragCoord.y * 0.0001); $MAIN$ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/shadow.frag glmark2-2021.02/android/assets/shaders/shadow.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/shadow.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/shadow.frag 2021-04-25 01:47:22.000000000 +0800 @@ -8,7 +8,7 @@ vec4 sc_perspective = ShadowCoord / ShadowCoord.w; sc_perspective.z += 0.1505; vec4 shadow_value = texture2D(ShadowMap, sc_perspective.st); - float light_distance = shadow_value.z; + float light_distance = shadow_value.x; float shadow = 1.0; if (ShadowCoord.w > 0.0 && light_distance < sc_perspective.z) { shadow = 0.5; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/terrain.frag glmark2-2021.02/android/assets/shaders/terrain.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/terrain.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/terrain.frag 2021-04-25 01:47:22.000000000 +0800 @@ -3,9 +3,6 @@ uniform vec3 uSpecularColor; uniform float uShininess; uniform float uOpacity; -uniform bool enableDiffuse1; -uniform bool enableDiffuse2; -uniform bool enableSpecular; uniform sampler2D tDiffuse1; uniform sampler2D tDiffuse2; uniform sampler2D tDetail; @@ -70,7 +67,12 @@ vec3 pointSpecular = vec3( 0.0 ); for ( int i = 0; i < MAX_POINT_LIGHTS; i ++ ) { vec4 lPosition = viewMatrix * vec4( pointLightPosition[ i ], 1.0 ); +#ifdef GL_FRAGMENT_PRECISION_HIGH + // should be highp for correct behaviour if mediump is implemented as fp16 + highp vec3 lVector = lPosition.xyz + vViewPosition.xyz; +#else vec3 lVector = lPosition.xyz + vViewPosition.xyz; +#endif float lDistance = 1.0; if ( pointLightDistance[ i ] > 0.0 ) lDistance = 1.0 - min( ( length( lVector ) / pointLightDistance[ i ] ), 1.0 ); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/terrain-noise.frag glmark2-2021.02/android/assets/shaders/terrain-noise.frag --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/terrain-noise.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/terrain-noise.frag 2021-04-25 01:47:22.000000000 +0800 @@ -17,7 +17,13 @@ uniform MEDIUMP vec2 uvScale; varying vec2 vUv; +#ifdef GL_FRAGMENT_PRECISION_HIGH +// x should be passed as highp since the intermediate multiplications can +// overflow with mediump +vec4 permute(highp vec4 x) +#else vec4 permute(vec4 x) +#endif { return mod(((x * 34.0) + 1.0) * x, 289.0); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/terrain.vert glmark2-2021.02/android/assets/shaders/terrain.vert --- glmark2-2014.03+git20150611.fa71af2d/android/assets/shaders/terrain.vert 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/assets/shaders/terrain.vert 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,6 @@ uniform mat4 modelViewMatrix; uniform mat4 normalMatrix; uniform mat4 projectionMatrix; -uniform mat4 viewMatrix; // Vertex attributes attribute vec3 position; Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/assets/textures/effect-2d.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/assets/textures/effect-2d.png differ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/build.sh glmark2-2021.02/android/build.sh --- glmark2-2014.03+git20150611.fa71af2d/android/build.sh 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/build.sh 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,72 @@ +#!/bin/bash + +# Copyright 2019 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ -z "${ANDROID_SDK}" ]; +then echo "Please set ANDROID_SDK, exiting"; exit 1; +else echo "ANDROID_SDK is ${ANDROID_SDK}"; +fi + +if [ -z "${ANDROID_NDK}" ]; +then echo "Please set ANDROID_NDK, exiting"; exit 1; +else echo "ANDROID_NDK is ${ANDROID_NDK}"; +fi + +if [ -z "${JAVA_HOME}" ]; +then echo "Please set JAVA_HOME, exiting"; exit 1; +else echo "JAVA_HOME is ${JAVA_HOME}"; +fi + +BUILD_TOOLS_VERSION=26.0.1 +BUILD_TOOLS=$ANDROID_SDK/build-tools/$BUILD_TOOLS_VERSION +if [ ! -d "${BUILD_TOOLS}" ]; +then echo "Please download correct build-tools version: ${BUILD_TOOLS_VERSION}, exiting"; exit 1; +else echo "BUILD_TOOLS is ${BUILD_TOOLS}"; +fi + +set -ev + +GLMARK2_BUILD_DIR=$PWD +GLMARK2_BASE_DIR=$GLMARK2_BUILD_DIR/.. +echo GLMARK2_BASE_DIR="${GLMARK2_BASE_DIR}" +echo GLMARK2_BUILD_DIR="${GLMARK2_BUILD_DIR}" + +# Android 16 is the minSdkVersion supported +ANDROID_JAR=$ANDROID_SDK/platforms/android-16/android.jar + +function create_APK() { + mkdir -p bin/lib obj + cp -r $GLMARK2_BUILD_DIR/libs/* $GLMARK2_BUILD_DIR/bin/lib + cp -r $GLMARK2_BASE_DIR/data $GLMARK2_BUILD_DIR/bin/assets + $BUILD_TOOLS/aapt package -f -m -S res -J src -M AndroidManifest.xml -I $ANDROID_JAR + $JAVA_HOME/bin/javac -d ./obj -source 1.7 -target 1.7 -bootclasspath $JAVA_HOME/jre/lib/rt.jar -classpath $ANDROID_JAR:obj -sourcepath src src/org/linaro/glmark2/*.java + $BUILD_TOOLS/dx --dex --output=bin/classes.dex ./obj + $BUILD_TOOLS/aapt package -f -M AndroidManifest.xml -S res -I $ANDROID_JAR -F $1-unaligned.apk bin + $JAVA_HOME/bin/jarsigner -verbose -keystore ~/.android/debug.keystore -storepass android -keypass android $1-unaligned.apk androiddebugkey + $BUILD_TOOLS/zipalign -f 4 $1-unaligned.apk $1.apk +} + +# +# build native libraries +# +$ANDROID_NDK/build/ndk-build -j $cores + +# +# build glmark2 APK +# +create_APK glmark2 + +echo Builds succeeded +exit 0 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/Android.mk glmark2-2021.02/android/jni/Android.mk --- glmark2-2014.03+git20150611.fa71af2d/android/jni/Android.mk 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/Android.mk 2021-04-25 01:47:22.000000000 +0800 @@ -54,7 +54,7 @@ LOCAL_MODULE_TAGS := optional LOCAL_MODULE := libglmark2-android LOCAL_STATIC_LIBRARIES := libglmark2-matrix libglmark2-png libglmark2-ideas libglmark2-jpeg -LOCAL_CFLAGS := -DGLMARK_DATA_PATH="" -DGLMARK_VERSION="\"2014.03\"" \ +LOCAL_CFLAGS := -DGLMARK_DATA_PATH="" -DGLMARK_VERSION="\"2021.02\"" \ -DGLMARK2_USE_GLESv2 -Werror -Wall -Wextra -Wnon-virtual-dtor \ -Wno-error=unused-parameter LOCAL_SHARED_LIBRARIES := liblog libz libEGL libGLESv2 libandroid libdl libstlport diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/Android.ndk.mk glmark2-2021.02/android/jni/Android.ndk.mk --- glmark2-2014.03+git20150611.fa71af2d/android/jni/Android.ndk.mk 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/Android.ndk.mk 2021-04-25 01:47:22.000000000 +0800 @@ -6,7 +6,8 @@ LOCAL_MODULE := libglmark2-matrix LOCAL_CFLAGS := -DGLMARK2_USE_GLESv2 -Werror -Wall -Wextra -Wnon-virtual-dtor \ -Wno-error=unused-parameter -LOCAL_C_INCLUDES := $(LOCAL_PATH)/src +LOCAL_C_INCLUDES := $(LOCAL_PATH)/src \ + $(LOCAL_PATH)/src/glad/include LOCAL_SRC_FILES := $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libmatrix/*.cc)) include $(BUILD_STATIC_LIBRARY) @@ -24,10 +25,51 @@ LOCAL_CFLAGS := -Werror -Wall -Wextra -Wno-error=attributes \ -Wno-error=unused-parameter -Wno-error=unused-function -Wno-error=unused-variable LOCAL_C_INCLUDES := $(LOCAL_PATH)/src/libjpeg-turbo/ -LOCAL_SRC_FILES := $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/simd/*.c)) \ - $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/simd/*.S)) \ - $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/*.c)) - +LOCAL_SRC_FILES := $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcapimin.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcapistd.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jccoefct.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jccolor.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcdctmgr.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jchuff.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcinit.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcmainct.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcmarker.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcmaster.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcomapi.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcparam.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcphuff.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcprepct.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jcsample.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdapimin.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdapistd.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdatadst.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdatasrc.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdcoefct.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdcolor.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jddctmgr.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdhuff.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdinput.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdmainct.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdmarker.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdmaster.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdmerge.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdphuff.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdpostct.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jdsample.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jerror.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jfdctflt.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jfdctfst.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jfdctint.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jidctflt.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jidctfst.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jidctint.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jidctred.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jmemmgr.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jmemnobs.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jquant1.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jquant2.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jutils.c)) \ + $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/libjpeg-turbo/jsimd_none.c)) include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) @@ -37,26 +79,40 @@ LOCAL_CFLAGS := -DGLMARK_DATA_PATH="" -DGLMARK2_USE_GLESv2 -Werror -Wall -Wextra\ -Wnon-virtual-dtor -Wno-error=unused-parameter LOCAL_C_INCLUDES := $(LOCAL_PATH)/src \ - $(LOCAL_PATH)/src/libmatrix + $(LOCAL_PATH)/src/libmatrix \ + $(LOCAL_PATH)/src/glad/include LOCAL_SRC_FILES := $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/scene-ideas/*.cc)) +include $(BUILD_STATIC_LIBRARY) +include $(CLEAR_VARS) +LOCAL_MODULE := libglad-egl +LOCAL_CFLAGS := -Werror -Wall +LOCAL_C_INCLUDES := $(LOCAL_PATH)/src/glad/include +LOCAL_SRC_FILES := $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/glad/src/egl.c)) include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) +LOCAL_MODULE := libglad-glesv2 +LOCAL_CFLAGS := -Werror -Wall +LOCAL_C_INCLUDES := $(LOCAL_PATH)/src/glad/include +LOCAL_SRC_FILES := $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/glad/src/gles2.c)) +include $(BUILD_STATIC_LIBRARY) +include $(CLEAR_VARS) LOCAL_MODULE_TAGS := optional LOCAL_MODULE := libglmark2-android -LOCAL_STATIC_LIBRARIES := libglmark2-matrix libglmark2-png libglmark2-ideas libglmark2-jpeg -LOCAL_CFLAGS := -DGLMARK_DATA_PATH="" -DGLMARK_VERSION="\"2014.03\"" \ +LOCAL_STATIC_LIBRARIES := libglmark2-matrix libglmark2-png libglmark2-ideas libglmark2-jpeg libglad-egl libglad-glesv2 +LOCAL_CFLAGS := -DGLMARK_DATA_PATH="" -DGLMARK_VERSION="\"2021.02\"" \ -DGLMARK2_USE_GLESv2 -Werror -Wall -Wextra -Wnon-virtual-dtor \ -Wno-error=unused-parameter -LOCAL_LDLIBS := -landroid -llog -lGLESv2 -lEGL -lz +LOCAL_LDLIBS := -landroid -llog -lz LOCAL_C_INCLUDES := $(LOCAL_PATH)/src \ $(LOCAL_PATH)/src/libmatrix \ $(LOCAL_PATH)/src/scene-ideas \ $(LOCAL_PATH)/src/scene-terrain \ $(LOCAL_PATH)/src/libjpeg-turbo \ - $(LOCAL_PATH)/src/libpng + $(LOCAL_PATH)/src/libpng \ + $(LOCAL_PATH)/src/glad/include LOCAL_SRC_FILES := $(filter-out src/canvas% src/gl-state% src/native-state% src/main.cpp, \ $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/*.cpp))) \ $(subst $(LOCAL_PATH)/,,$(wildcard $(LOCAL_PATH)/src/scene-terrain/*.cpp)) \ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/Application.mk glmark2-2021.02/android/jni/Application.mk --- glmark2-2014.03+git20150611.fa71af2d/android/jni/Application.mk 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/Application.mk 2021-04-25 01:47:22.000000000 +0800 @@ -1,3 +1,4 @@ -APP_STL := stlport_static -APP_PLATFORM := android-9 +APP_ABI := all +APP_STL := c++_static +APP_PLATFORM := android-16 APP_BUILD_SCRIPT := jni/Android.ndk.mk diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/canvas-android.cpp glmark2-2021.02/android/jni/src/canvas-android.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/canvas-android.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/canvas-android.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -24,7 +24,6 @@ #include "log.h" #include "options.h" #include "gl-headers.h" -#include #include #include @@ -41,8 +40,26 @@ EGL_NONE }; + /* Init the GLAD entrypoints */ + if (!egl_lib_.open_from_alternatives({"libEGL.so", "libEGL.so.1" })) { + Log::error("Error loading EGL library\n"); + return false; + } + + if (gladLoadEGLUserPtr(EGL_NO_DISPLAY, load_proc, &egl_lib_) == 0) { + Log::error("Loading EGL entry points failed\n"); + return false; + } + /* Get the current EGL config */ EGLDisplay egl_display(eglGetCurrentDisplay()); + + /* Reinitialize GLAD with a known display */ + if (gladLoadEGLUserPtr(egl_display, load_proc, &egl_lib_) == 0) { + Log::error("Loading EGL entry points with display failed\n"); + return false; + } + EGLContext egl_context(eglGetCurrentContext()); EGLConfig egl_config(0); EGLint num_configs; @@ -51,6 +68,17 @@ eglChooseConfig(egl_display, attribs, &egl_config, 1, &num_configs); + /* Before calling GLES functions, init GLAD GLES */ + if (!gles_lib_.open("libGLESv2.so")) { + Log::error("Error loading GLES library\n"); + return false; + } + + if (!gladLoadGLES2UserPtr(load_proc, &gles_lib_)) { + Log::error("Loading GLESv2 entry points failed."); + return false; + } + resize(width_, height_); if (!eglSwapInterval(egl_display, 0)) @@ -168,6 +196,20 @@ * Private methods * *******************/ +GLADapiproc +CanvasAndroid::load_proc(void *userdata, const char *name) +{ + if (eglGetProcAddress) { + GLADapiproc sym = reinterpret_cast(eglGetProcAddress(name)); + if (sym) { + return sym; + } + } + + SharedLibrary* lib = reinterpret_cast(userdata); + return reinterpret_cast(lib->load(name)); +} + void CanvasAndroid::init_gl_extensions() { diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/canvas-android.h glmark2-2021.02/android/jni/src/canvas-android.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/canvas-android.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/canvas-android.h 2021-04-25 01:47:22.000000000 +0800 @@ -23,6 +23,8 @@ #define GLMARK2_CANVAS_ANDROID_H_ #include "canvas.h" +#include "shared-library.h" +#include "glad/egl.h" /** * Canvas for rendering to Android surfaces. @@ -49,6 +51,10 @@ void resize(int width, int height); private: + SharedLibrary egl_lib_; + SharedLibrary gles_lib_; + + static GLADapiproc load_proc(void *userdata, const char *name); void init_gl_extensions(); }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/canvas-generic.cpp glmark2-2021.02/android/jni/src/canvas-generic.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/canvas-generic.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/canvas-generic.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -103,7 +103,7 @@ Options::FrameEnd m = Options::frame_end; if (m == Options::FrameEndDefault) { - if (offscreen_) + if (offscreen_ || Options::validate) m = Options::FrameEndFinish; else m = Options::FrameEndSwap; @@ -194,7 +194,8 @@ bool CanvasGeneric::supports_gl2() { - std::string gl_version_str(reinterpret_cast(glGetString(GL_VERSION))); + const char *version = reinterpret_cast(glGetString(GL_VERSION)); + std::string gl_version_str(version ? version : ""); int gl_major(0); size_t point_pos(gl_version_str.find('.')); @@ -221,7 +222,7 @@ { bool request_fullscreen = (width == -1 && height == -1); - int vid; + intptr_t vid; if (!gl_state_.gotNativeConfig(vid)) { Log::error("Error: Couldn't get GL visual config!\n"); @@ -249,6 +250,7 @@ } native_window_ = native_state_.window(cur_properties); + window_initialized_ = true; width_ = cur_properties.width; height_ = cur_properties.height; @@ -274,6 +276,12 @@ bool CanvasGeneric::do_make_current() { + if (!window_initialized_) { + Log::error("glwindow has never been initialized, check native-state code\n" + "it should not return a valid window until create_window() is called\n"); + return false; + } + gl_state_.init_surface(native_window_); if (!gl_state_.valid()) { @@ -281,7 +289,8 @@ return false; } - gl_state_.init_gl_extensions(); + if (!gl_state_.init_gl_extensions()) + return false; if (offscreen_) { if (!ensure_fbo()) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/canvas-generic.h glmark2-2021.02/android/jni/src/canvas-generic.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/canvas-generic.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/canvas-generic.h 2021-04-25 01:47:22.000000000 +0800 @@ -37,9 +37,10 @@ CanvasGeneric(NativeState& native_state, GLState& gl_state, int width, int height) : Canvas(width, height), - native_state_(native_state), gl_state_(gl_state), + native_state_(native_state), gl_state_(gl_state), native_window_(0), gl_color_format_(0), gl_depth_format_(0), - color_renderbuffer_(0), depth_renderbuffer_(0), fbo_(0) {} + color_renderbuffer_(0), depth_renderbuffer_(0), fbo_(0), + window_initialized_(false) {} bool init(); bool reset(); @@ -70,6 +71,7 @@ GLuint color_renderbuffer_; GLuint depth_renderbuffer_; GLuint fbo_; + bool window_initialized_; }; #endif /* GLMARK2_CANVAS_GENERIC_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/EGL/eglplatform.h glmark2-2021.02/android/jni/src/glad/include/EGL/eglplatform.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/EGL/eglplatform.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/include/EGL/eglplatform.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,176 @@ +#ifndef __eglplatform_h_ +#define __eglplatform_h_ + +/* +** Copyright (c) 2007-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Platform-specific types and definitions for egl.h + * $Revision: 30994 $ on $Date: 2015-04-30 13:36:48 -0700 (Thu, 30 Apr 2015) $ + * + * Adopters may modify khrplatform.h and this file to suit their platform. + * You are encouraged to submit all modifications to the Khronos group so that + * they can be included in future versions of this file. Please submit changes + * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) + * by filing a bug against product "EGL" component "Registry". + */ + +#include + +/* Macros used in EGL function prototype declarations. + * + * EGL functions should be prototyped as: + * + * EGLAPI return-type EGLAPIENTRY eglFunction(arguments); + * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments); + * + * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h + */ + +#ifndef EGLAPI +#define EGLAPI KHRONOS_APICALL +#endif + +#ifndef EGLAPIENTRY +#define EGLAPIENTRY KHRONOS_APIENTRY +#endif +#define EGLAPIENTRYP EGLAPIENTRY* + +/* The types NativeDisplayType, NativeWindowType, and NativePixmapType + * are aliases of window-system-dependent types, such as X Display * or + * Windows Device Context. They must be defined in platform-specific + * code below. The EGL-prefixed versions of Native*Type are the same + * types, renamed in EGL 1.3 so all types in the API start with "EGL". + * + * Khronos STRONGLY RECOMMENDS that you use the default definitions + * provided below, since these changes affect both binary and source + * portability of applications using EGL running on different EGL + * implementations. + */ + +#if defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include + +typedef HDC EGLNativeDisplayType; +typedef HBITMAP EGLNativePixmapType; +typedef HWND EGLNativeWindowType; + +#elif defined(__EMSCRIPTEN__) + +typedef int EGLNativeDisplayType; +typedef int EGLNativePixmapType; +typedef int EGLNativeWindowType; + +#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */ + +typedef int EGLNativeDisplayType; +typedef void *EGLNativePixmapType; +typedef void *EGLNativeWindowType; + +#elif defined(WL_EGL_PLATFORM) + +typedef struct wl_display *EGLNativeDisplayType; +typedef struct wl_egl_pixmap *EGLNativePixmapType; +typedef struct wl_egl_window *EGLNativeWindowType; + +#elif defined(__GBM__) + +typedef struct gbm_device *EGLNativeDisplayType; +typedef struct gbm_bo *EGLNativePixmapType; +typedef void *EGLNativeWindowType; + +#elif defined(__ANDROID__) || defined(ANDROID) + +struct ANativeWindow; +struct egl_native_pixmap_t; + +typedef void* EGLNativeDisplayType; +typedef struct egl_native_pixmap_t* EGLNativePixmapType; +typedef struct ANativeWindow* EGLNativeWindowType; + +#elif defined(USE_OZONE) + +typedef intptr_t EGLNativeDisplayType; +typedef intptr_t EGLNativePixmapType; +typedef intptr_t EGLNativeWindowType; + +#elif defined(__unix__) && defined(EGL_NO_X11) + +typedef void *EGLNativeDisplayType; +typedef khronos_uintptr_t EGLNativePixmapType; +typedef khronos_uintptr_t EGLNativeWindowType; + +#elif defined(__unix__) || defined(USE_X11) + +/* X11 (tentative) */ +#include +#include + +typedef Display *EGLNativeDisplayType; +typedef Pixmap EGLNativePixmapType; +typedef Window EGLNativeWindowType; + +#elif defined(__APPLE__) + +typedef int EGLNativeDisplayType; +typedef void *EGLNativePixmapType; +typedef void *EGLNativeWindowType; + +#elif defined(__HAIKU__) + +#include + +typedef void *EGLNativeDisplayType; +typedef khronos_uintptr_t EGLNativePixmapType; +typedef khronos_uintptr_t EGLNativeWindowType; + +#else +#error "Platform not recognized" +#endif + +/* EGL 1.2 types, renamed for consistency in EGL 1.3 */ +typedef EGLNativeDisplayType NativeDisplayType; +typedef EGLNativePixmapType NativePixmapType; +typedef EGLNativeWindowType NativeWindowType; + + +/* Define EGLint. This must be a signed integral type large enough to contain + * all legal attribute names and values passed into and out of EGL, whether + * their type is boolean, bitmask, enumerant (symbolic constant), integer, + * handle, or other. While in general a 32-bit integer will suffice, if + * handles are 64 bit types, then EGLint should be defined as a signed 64-bit + * integer type. + */ +typedef khronos_int32_t EGLint; + + +/* C++ / C typecast macros for special EGL handle values */ +#if defined(__cplusplus) +#define EGL_CAST(type, value) (static_cast(value)) +#else +#define EGL_CAST(type, value) ((type) (value)) +#endif + +#endif /* __eglplatform_h */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/egl.h glmark2-2021.02/android/jni/src/glad/include/glad/egl.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/egl.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/include/glad/egl.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,554 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:15 2019 + * + * Generator: C/C++ + * Specification: egl + * Extensions: 4 + * + * APIs: + * - egl=1.5 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='egl=1.5' --extensions='EGL_EXT_platform_base,EGL_KHR_platform_gbm,EGL_KHR_platform_wayland,EGL_KHR_platform_x11' c + * + * Online: + * http://glad.sh/#api=egl%3D1.5&extensions=EGL_EXT_platform_base%2CEGL_KHR_platform_gbm%2CEGL_KHR_platform_wayland%2CEGL_KHR_platform_x11&generator=c&options= + * + */ + +#ifndef GLAD_EGL_H_ +#define GLAD_EGL_H_ + + +#define GLAD_EGL + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define EGL_ALPHA_FORMAT 0x3088 +#define EGL_ALPHA_FORMAT_NONPRE 0x308B +#define EGL_ALPHA_FORMAT_PRE 0x308C +#define EGL_ALPHA_MASK_SIZE 0x303E +#define EGL_ALPHA_SIZE 0x3021 +#define EGL_BACK_BUFFER 0x3084 +#define EGL_BAD_ACCESS 0x3002 +#define EGL_BAD_ALLOC 0x3003 +#define EGL_BAD_ATTRIBUTE 0x3004 +#define EGL_BAD_CONFIG 0x3005 +#define EGL_BAD_CONTEXT 0x3006 +#define EGL_BAD_CURRENT_SURFACE 0x3007 +#define EGL_BAD_DISPLAY 0x3008 +#define EGL_BAD_MATCH 0x3009 +#define EGL_BAD_NATIVE_PIXMAP 0x300A +#define EGL_BAD_NATIVE_WINDOW 0x300B +#define EGL_BAD_PARAMETER 0x300C +#define EGL_BAD_SURFACE 0x300D +#define EGL_BIND_TO_TEXTURE_RGB 0x3039 +#define EGL_BIND_TO_TEXTURE_RGBA 0x303A +#define EGL_BLUE_SIZE 0x3022 +#define EGL_BUFFER_DESTROYED 0x3095 +#define EGL_BUFFER_PRESERVED 0x3094 +#define EGL_BUFFER_SIZE 0x3020 +#define EGL_CLIENT_APIS 0x308D +#define EGL_CL_EVENT_HANDLE 0x309C +#define EGL_COLORSPACE 0x3087 +#define EGL_COLORSPACE_LINEAR 0x308A +#define EGL_COLORSPACE_sRGB 0x3089 +#define EGL_COLOR_BUFFER_TYPE 0x303F +#define EGL_CONDITION_SATISFIED 0x30F6 +#define EGL_CONFIG_CAVEAT 0x3027 +#define EGL_CONFIG_ID 0x3028 +#define EGL_CONFORMANT 0x3042 +#define EGL_CONTEXT_CLIENT_TYPE 0x3097 +#define EGL_CONTEXT_CLIENT_VERSION 0x3098 +#define EGL_CONTEXT_LOST 0x300E +#define EGL_CONTEXT_MAJOR_VERSION 0x3098 +#define EGL_CONTEXT_MINOR_VERSION 0x30FB +#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002 +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001 +#define EGL_CONTEXT_OPENGL_DEBUG 0x31B0 +#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1 +#define EGL_CONTEXT_OPENGL_PROFILE_MASK 0x30FD +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS 0x31B2 +#define EGL_CORE_NATIVE_ENGINE 0x305B +#define EGL_DEFAULT_DISPLAY EGL_CAST(EGLNativeDisplayType,0) +#define EGL_DEPTH_SIZE 0x3025 +#define EGL_DISPLAY_SCALING 10000 +#define EGL_DONT_CARE EGL_CAST(EGLint,-1) +#define EGL_DRAW 0x3059 +#define EGL_EXTENSIONS 0x3055 +#define EGL_FALSE 0 +#define EGL_FOREVER 0xFFFFFFFFFFFFFFFF +#define EGL_GL_COLORSPACE 0x309D +#define EGL_GL_COLORSPACE_LINEAR 0x308A +#define EGL_GL_COLORSPACE_SRGB 0x3089 +#define EGL_GL_RENDERBUFFER 0x30B9 +#define EGL_GL_TEXTURE_2D 0x30B1 +#define EGL_GL_TEXTURE_3D 0x30B2 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7 +#define EGL_GL_TEXTURE_LEVEL 0x30BC +#define EGL_GL_TEXTURE_ZOFFSET 0x30BD +#define EGL_GREEN_SIZE 0x3023 +#define EGL_HEIGHT 0x3056 +#define EGL_HORIZONTAL_RESOLUTION 0x3090 +#define EGL_IMAGE_PRESERVED 0x30D2 +#define EGL_LARGEST_PBUFFER 0x3058 +#define EGL_LEVEL 0x3029 +#define EGL_LOSE_CONTEXT_ON_RESET 0x31BF +#define EGL_LUMINANCE_BUFFER 0x308F +#define EGL_LUMINANCE_SIZE 0x303D +#define EGL_MATCH_NATIVE_PIXMAP 0x3041 +#define EGL_MAX_PBUFFER_HEIGHT 0x302A +#define EGL_MAX_PBUFFER_PIXELS 0x302B +#define EGL_MAX_PBUFFER_WIDTH 0x302C +#define EGL_MAX_SWAP_INTERVAL 0x303C +#define EGL_MIN_SWAP_INTERVAL 0x303B +#define EGL_MIPMAP_LEVEL 0x3083 +#define EGL_MIPMAP_TEXTURE 0x3082 +#define EGL_MULTISAMPLE_RESOLVE 0x3099 +#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B +#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200 +#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A +#define EGL_NATIVE_RENDERABLE 0x302D +#define EGL_NATIVE_VISUAL_ID 0x302E +#define EGL_NATIVE_VISUAL_TYPE 0x302F +#define EGL_NONE 0x3038 +#define EGL_NON_CONFORMANT_CONFIG 0x3051 +#define EGL_NOT_INITIALIZED 0x3001 +#define EGL_NO_CONTEXT EGL_CAST(EGLContext,0) +#define EGL_NO_DISPLAY EGL_CAST(EGLDisplay,0) +#define EGL_NO_IMAGE EGL_CAST(EGLImage,0) +#define EGL_NO_RESET_NOTIFICATION 0x31BE +#define EGL_NO_SURFACE EGL_CAST(EGLSurface,0) +#define EGL_NO_SYNC EGL_CAST(EGLSync,0) +#define EGL_NO_TEXTURE 0x305C +#define EGL_OPENGL_API 0x30A2 +#define EGL_OPENGL_BIT 0x0008 +#define EGL_OPENGL_ES2_BIT 0x0004 +#define EGL_OPENGL_ES3_BIT 0x00000040 +#define EGL_OPENGL_ES_API 0x30A0 +#define EGL_OPENGL_ES_BIT 0x0001 +#define EGL_OPENVG_API 0x30A1 +#define EGL_OPENVG_BIT 0x0002 +#define EGL_OPENVG_IMAGE 0x3096 +#define EGL_PBUFFER_BIT 0x0001 +#define EGL_PIXEL_ASPECT_RATIO 0x3092 +#define EGL_PIXMAP_BIT 0x0002 +#define EGL_PLATFORM_GBM_KHR 0x31D7 +#define EGL_PLATFORM_WAYLAND_KHR 0x31D8 +#define EGL_PLATFORM_X11_KHR 0x31D5 +#define EGL_PLATFORM_X11_SCREEN_KHR 0x31D6 +#define EGL_READ 0x305A +#define EGL_RED_SIZE 0x3024 +#define EGL_RENDERABLE_TYPE 0x3040 +#define EGL_RENDER_BUFFER 0x3086 +#define EGL_RGB_BUFFER 0x308E +#define EGL_SAMPLES 0x3031 +#define EGL_SAMPLE_BUFFERS 0x3032 +#define EGL_SIGNALED 0x30F2 +#define EGL_SINGLE_BUFFER 0x3085 +#define EGL_SLOW_CONFIG 0x3050 +#define EGL_STENCIL_SIZE 0x3026 +#define EGL_SUCCESS 0x3000 +#define EGL_SURFACE_TYPE 0x3033 +#define EGL_SWAP_BEHAVIOR 0x3093 +#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400 +#define EGL_SYNC_CL_EVENT 0x30FE +#define EGL_SYNC_CL_EVENT_COMPLETE 0x30FF +#define EGL_SYNC_CONDITION 0x30F8 +#define EGL_SYNC_FENCE 0x30F9 +#define EGL_SYNC_FLUSH_COMMANDS_BIT 0x0001 +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE 0x30F0 +#define EGL_SYNC_STATUS 0x30F1 +#define EGL_SYNC_TYPE 0x30F7 +#define EGL_TEXTURE_2D 0x305F +#define EGL_TEXTURE_FORMAT 0x3080 +#define EGL_TEXTURE_RGB 0x305D +#define EGL_TEXTURE_RGBA 0x305E +#define EGL_TEXTURE_TARGET 0x3081 +#define EGL_TIMEOUT_EXPIRED 0x30F5 +#define EGL_TRANSPARENT_BLUE_VALUE 0x3035 +#define EGL_TRANSPARENT_GREEN_VALUE 0x3036 +#define EGL_TRANSPARENT_RED_VALUE 0x3037 +#define EGL_TRANSPARENT_RGB 0x3052 +#define EGL_TRANSPARENT_TYPE 0x3034 +#define EGL_TRUE 1 +#define EGL_UNKNOWN EGL_CAST(EGLint,-1) +#define EGL_UNSIGNALED 0x30F3 +#define EGL_VENDOR 0x3053 +#define EGL_VERSION 0x3054 +#define EGL_VERTICAL_RESOLUTION 0x3091 +#define EGL_VG_ALPHA_FORMAT 0x3088 +#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B +#define EGL_VG_ALPHA_FORMAT_PRE 0x308C +#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040 +#define EGL_VG_COLORSPACE 0x3087 +#define EGL_VG_COLORSPACE_LINEAR 0x308A +#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020 +#define EGL_VG_COLORSPACE_sRGB 0x3089 +#define EGL_WIDTH 0x3057 +#define EGL_WINDOW_BIT 0x0004 + + +#include +#include + + + + + + + + + + + +struct AHardwareBuffer; + +typedef unsigned int EGLBoolean; +typedef unsigned int EGLenum; +typedef intptr_t EGLAttribKHR; +typedef intptr_t EGLAttrib; +typedef void *EGLClientBuffer; +typedef void *EGLConfig; +typedef void *EGLContext; +typedef void *EGLDeviceEXT; +typedef void *EGLDisplay; +typedef void *EGLImage; +typedef void *EGLImageKHR; +typedef void *EGLLabelKHR; +typedef void *EGLObjectKHR; +typedef void *EGLOutputLayerEXT; +typedef void *EGLOutputPortEXT; +typedef void *EGLStreamKHR; +typedef void *EGLSurface; +typedef void *EGLSync; +typedef void *EGLSyncKHR; +typedef void *EGLSyncNV; +typedef void (*__eglMustCastToProperFunctionPointerType)(void); +typedef khronos_utime_nanoseconds_t EGLTimeKHR; +typedef khronos_utime_nanoseconds_t EGLTime; +typedef khronos_utime_nanoseconds_t EGLTimeNV; +typedef khronos_utime_nanoseconds_t EGLuint64NV; +typedef khronos_uint64_t EGLuint64KHR; +typedef khronos_stime_nanoseconds_t EGLnsecsANDROID; +typedef int EGLNativeFileDescriptorKHR; +typedef khronos_ssize_t EGLsizeiANDROID; +typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize); +typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize); +struct EGLClientPixmapHI { + void *pData; + EGLint iWidth; + EGLint iHeight; + EGLint iStride; +}; +typedef void (GLAD_API_PTR *EGLDEBUGPROCKHR)(EGLenum error,const char *command,EGLint messageType,EGLLabelKHR threadLabel,EGLLabelKHR objectLabel,const char* message); + + +#define EGL_VERSION_1_0 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_0; +#define EGL_VERSION_1_1 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_1; +#define EGL_VERSION_1_2 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_2; +#define EGL_VERSION_1_3 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_3; +#define EGL_VERSION_1_4 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_4; +#define EGL_VERSION_1_5 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_5; +#define EGL_EXT_platform_base 1 +GLAD_API_CALL int GLAD_EGL_EXT_platform_base; +#define EGL_KHR_platform_gbm 1 +GLAD_API_CALL int GLAD_EGL_KHR_platform_gbm; +#define EGL_KHR_platform_wayland 1 +GLAD_API_CALL int GLAD_EGL_KHR_platform_wayland; +#define EGL_KHR_platform_x11 1 +GLAD_API_CALL int GLAD_EGL_KHR_platform_x11; + + +typedef EGLBoolean (GLAD_API_PTR *PFNEGLBINDAPIPROC)(EGLenum api); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLBINDTEXIMAGEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint buffer); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLCHOOSECONFIGPROC)(EGLDisplay dpy, const EGLint * attrib_list, EGLConfig * configs, EGLint config_size, EGLint * num_config); +typedef EGLint (GLAD_API_PTR *PFNEGLCLIENTWAITSYNCPROC)(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLCOPYBUFFERSPROC)(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target); +typedef EGLContext (GLAD_API_PTR *PFNEGLCREATECONTEXTPROC)(EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint * attrib_list); +typedef EGLImage (GLAD_API_PTR *PFNEGLCREATEIMAGEPROC)(EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC)(EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPBUFFERSURFACEPROC)(EGLDisplay dpy, EGLConfig config, const EGLint * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPIXMAPSURFACEPROC)(EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC)(EGLDisplay dpy, EGLConfig config, void * native_pixmap, const EGLAttrib * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC)(EGLDisplay dpy, EGLConfig config, void * native_pixmap, const EGLint * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMWINDOWSURFACEPROC)(EGLDisplay dpy, EGLConfig config, void * native_window, const EGLAttrib * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC)(EGLDisplay dpy, EGLConfig config, void * native_window, const EGLint * attrib_list); +typedef EGLSync (GLAD_API_PTR *PFNEGLCREATESYNCPROC)(EGLDisplay dpy, EGLenum type, const EGLAttrib * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEWINDOWSURFACEPROC)(EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint * attrib_list); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYCONTEXTPROC)(EGLDisplay dpy, EGLContext ctx); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYIMAGEPROC)(EGLDisplay dpy, EGLImage image); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYSURFACEPROC)(EGLDisplay dpy, EGLSurface surface); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYSYNCPROC)(EGLDisplay dpy, EGLSync sync); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETCONFIGATTRIBPROC)(EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint * value); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETCONFIGSPROC)(EGLDisplay dpy, EGLConfig * configs, EGLint config_size, EGLint * num_config); +typedef EGLContext (GLAD_API_PTR *PFNEGLGETCURRENTCONTEXTPROC)(void); +typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETCURRENTDISPLAYPROC)(void); +typedef EGLSurface (GLAD_API_PTR *PFNEGLGETCURRENTSURFACEPROC)(EGLint readdraw); +typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETDISPLAYPROC)(EGLNativeDisplayType display_id); +typedef EGLint (GLAD_API_PTR *PFNEGLGETERRORPROC)(void); +typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETPLATFORMDISPLAYPROC)(EGLenum platform, void * native_display, const EGLAttrib * attrib_list); +typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETPLATFORMDISPLAYEXTPROC)(EGLenum platform, void * native_display, const EGLint * attrib_list); +typedef __eglMustCastToProperFunctionPointerType (GLAD_API_PTR *PFNEGLGETPROCADDRESSPROC)(const char * procname); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETSYNCATTRIBPROC)(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib * value); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLINITIALIZEPROC)(EGLDisplay dpy, EGLint * major, EGLint * minor); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLMAKECURRENTPROC)(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx); +typedef EGLenum (GLAD_API_PTR *PFNEGLQUERYAPIPROC)(void); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLQUERYCONTEXTPROC)(EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint * value); +typedef const char * (GLAD_API_PTR *PFNEGLQUERYSTRINGPROC)(EGLDisplay dpy, EGLint name); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLQUERYSURFACEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint * value); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLRELEASETEXIMAGEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint buffer); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLRELEASETHREADPROC)(void); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLSURFACEATTRIBPROC)(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLSWAPBUFFERSPROC)(EGLDisplay dpy, EGLSurface surface); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLSWAPINTERVALPROC)(EGLDisplay dpy, EGLint interval); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLTERMINATEPROC)(EGLDisplay dpy); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITCLIENTPROC)(void); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITGLPROC)(void); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITNATIVEPROC)(EGLint engine); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITSYNCPROC)(EGLDisplay dpy, EGLSync sync, EGLint flags); + +GLAD_API_CALL PFNEGLBINDAPIPROC glad_eglBindAPI; +#define eglBindAPI glad_eglBindAPI +GLAD_API_CALL PFNEGLBINDTEXIMAGEPROC glad_eglBindTexImage; +#define eglBindTexImage glad_eglBindTexImage +GLAD_API_CALL PFNEGLCHOOSECONFIGPROC glad_eglChooseConfig; +#define eglChooseConfig glad_eglChooseConfig +GLAD_API_CALL PFNEGLCLIENTWAITSYNCPROC glad_eglClientWaitSync; +#define eglClientWaitSync glad_eglClientWaitSync +GLAD_API_CALL PFNEGLCOPYBUFFERSPROC glad_eglCopyBuffers; +#define eglCopyBuffers glad_eglCopyBuffers +GLAD_API_CALL PFNEGLCREATECONTEXTPROC glad_eglCreateContext; +#define eglCreateContext glad_eglCreateContext +GLAD_API_CALL PFNEGLCREATEIMAGEPROC glad_eglCreateImage; +#define eglCreateImage glad_eglCreateImage +GLAD_API_CALL PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC glad_eglCreatePbufferFromClientBuffer; +#define eglCreatePbufferFromClientBuffer glad_eglCreatePbufferFromClientBuffer +GLAD_API_CALL PFNEGLCREATEPBUFFERSURFACEPROC glad_eglCreatePbufferSurface; +#define eglCreatePbufferSurface glad_eglCreatePbufferSurface +GLAD_API_CALL PFNEGLCREATEPIXMAPSURFACEPROC glad_eglCreatePixmapSurface; +#define eglCreatePixmapSurface glad_eglCreatePixmapSurface +GLAD_API_CALL PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC glad_eglCreatePlatformPixmapSurface; +#define eglCreatePlatformPixmapSurface glad_eglCreatePlatformPixmapSurface +GLAD_API_CALL PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC glad_eglCreatePlatformPixmapSurfaceEXT; +#define eglCreatePlatformPixmapSurfaceEXT glad_eglCreatePlatformPixmapSurfaceEXT +GLAD_API_CALL PFNEGLCREATEPLATFORMWINDOWSURFACEPROC glad_eglCreatePlatformWindowSurface; +#define eglCreatePlatformWindowSurface glad_eglCreatePlatformWindowSurface +GLAD_API_CALL PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC glad_eglCreatePlatformWindowSurfaceEXT; +#define eglCreatePlatformWindowSurfaceEXT glad_eglCreatePlatformWindowSurfaceEXT +GLAD_API_CALL PFNEGLCREATESYNCPROC glad_eglCreateSync; +#define eglCreateSync glad_eglCreateSync +GLAD_API_CALL PFNEGLCREATEWINDOWSURFACEPROC glad_eglCreateWindowSurface; +#define eglCreateWindowSurface glad_eglCreateWindowSurface +GLAD_API_CALL PFNEGLDESTROYCONTEXTPROC glad_eglDestroyContext; +#define eglDestroyContext glad_eglDestroyContext +GLAD_API_CALL PFNEGLDESTROYIMAGEPROC glad_eglDestroyImage; +#define eglDestroyImage glad_eglDestroyImage +GLAD_API_CALL PFNEGLDESTROYSURFACEPROC glad_eglDestroySurface; +#define eglDestroySurface glad_eglDestroySurface +GLAD_API_CALL PFNEGLDESTROYSYNCPROC glad_eglDestroySync; +#define eglDestroySync glad_eglDestroySync +GLAD_API_CALL PFNEGLGETCONFIGATTRIBPROC glad_eglGetConfigAttrib; +#define eglGetConfigAttrib glad_eglGetConfigAttrib +GLAD_API_CALL PFNEGLGETCONFIGSPROC glad_eglGetConfigs; +#define eglGetConfigs glad_eglGetConfigs +GLAD_API_CALL PFNEGLGETCURRENTCONTEXTPROC glad_eglGetCurrentContext; +#define eglGetCurrentContext glad_eglGetCurrentContext +GLAD_API_CALL PFNEGLGETCURRENTDISPLAYPROC glad_eglGetCurrentDisplay; +#define eglGetCurrentDisplay glad_eglGetCurrentDisplay +GLAD_API_CALL PFNEGLGETCURRENTSURFACEPROC glad_eglGetCurrentSurface; +#define eglGetCurrentSurface glad_eglGetCurrentSurface +GLAD_API_CALL PFNEGLGETDISPLAYPROC glad_eglGetDisplay; +#define eglGetDisplay glad_eglGetDisplay +GLAD_API_CALL PFNEGLGETERRORPROC glad_eglGetError; +#define eglGetError glad_eglGetError +GLAD_API_CALL PFNEGLGETPLATFORMDISPLAYPROC glad_eglGetPlatformDisplay; +#define eglGetPlatformDisplay glad_eglGetPlatformDisplay +GLAD_API_CALL PFNEGLGETPLATFORMDISPLAYEXTPROC glad_eglGetPlatformDisplayEXT; +#define eglGetPlatformDisplayEXT glad_eglGetPlatformDisplayEXT +GLAD_API_CALL PFNEGLGETPROCADDRESSPROC glad_eglGetProcAddress; +#define eglGetProcAddress glad_eglGetProcAddress +GLAD_API_CALL PFNEGLGETSYNCATTRIBPROC glad_eglGetSyncAttrib; +#define eglGetSyncAttrib glad_eglGetSyncAttrib +GLAD_API_CALL PFNEGLINITIALIZEPROC glad_eglInitialize; +#define eglInitialize glad_eglInitialize +GLAD_API_CALL PFNEGLMAKECURRENTPROC glad_eglMakeCurrent; +#define eglMakeCurrent glad_eglMakeCurrent +GLAD_API_CALL PFNEGLQUERYAPIPROC glad_eglQueryAPI; +#define eglQueryAPI glad_eglQueryAPI +GLAD_API_CALL PFNEGLQUERYCONTEXTPROC glad_eglQueryContext; +#define eglQueryContext glad_eglQueryContext +GLAD_API_CALL PFNEGLQUERYSTRINGPROC glad_eglQueryString; +#define eglQueryString glad_eglQueryString +GLAD_API_CALL PFNEGLQUERYSURFACEPROC glad_eglQuerySurface; +#define eglQuerySurface glad_eglQuerySurface +GLAD_API_CALL PFNEGLRELEASETEXIMAGEPROC glad_eglReleaseTexImage; +#define eglReleaseTexImage glad_eglReleaseTexImage +GLAD_API_CALL PFNEGLRELEASETHREADPROC glad_eglReleaseThread; +#define eglReleaseThread glad_eglReleaseThread +GLAD_API_CALL PFNEGLSURFACEATTRIBPROC glad_eglSurfaceAttrib; +#define eglSurfaceAttrib glad_eglSurfaceAttrib +GLAD_API_CALL PFNEGLSWAPBUFFERSPROC glad_eglSwapBuffers; +#define eglSwapBuffers glad_eglSwapBuffers +GLAD_API_CALL PFNEGLSWAPINTERVALPROC glad_eglSwapInterval; +#define eglSwapInterval glad_eglSwapInterval +GLAD_API_CALL PFNEGLTERMINATEPROC glad_eglTerminate; +#define eglTerminate glad_eglTerminate +GLAD_API_CALL PFNEGLWAITCLIENTPROC glad_eglWaitClient; +#define eglWaitClient glad_eglWaitClient +GLAD_API_CALL PFNEGLWAITGLPROC glad_eglWaitGL; +#define eglWaitGL glad_eglWaitGL +GLAD_API_CALL PFNEGLWAITNATIVEPROC glad_eglWaitNative; +#define eglWaitNative glad_eglWaitNative +GLAD_API_CALL PFNEGLWAITSYNCPROC glad_eglWaitSync; +#define eglWaitSync glad_eglWaitSync + + + + + +GLAD_API_CALL int gladLoadEGLUserPtr(EGLDisplay display, GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadEGL(EGLDisplay display, GLADloadfunc load); + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/gles2.h glmark2-2021.02/android/jni/src/glad/include/glad/gles2.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/gles2.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/include/glad/gles2.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1015 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:14 2019 + * + * Generator: C/C++ + * Specification: gl + * Extensions: 2 + * + * APIs: + * - gles2=2.0 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='gles2=2.0' --extensions='GL_OES_mapbuffer,GL_OES_required_internalformat' c + * + * Online: + * http://glad.sh/#api=gles2%3D2.0&extensions=GL_OES_mapbuffer%2CGL_OES_required_internalformat&generator=c&options= + * + */ + +#ifndef GLAD_GLES2_H_ +#define GLAD_GLES2_H_ + +#ifdef __gl2_h_ + #error OpenGL ES 2 header already included (API: gles2), remove previous include! +#endif +#define __gl2_h_ 1 + +#ifdef __gl3_h_ + #error OpenGL ES 3 header already included (API: gles2), remove previous include! +#endif +#define __gl3_h_ 1 + + +#define GLAD_GLES2 + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALPHA 0x1906 +#define GL_ALPHA8_OES 0x803C +#define GL_ALPHA_BITS 0x0D55 +#define GL_ALWAYS 0x0207 +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_BACK 0x0405 +#define GL_BLEND 0x0BE2 +#define GL_BLEND_COLOR 0x8005 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_EQUATION 0x8009 +#define GL_BLEND_EQUATION_ALPHA 0x883D +#define GL_BLEND_EQUATION_RGB 0x8009 +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLUE_BITS 0x0D54 +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_BUFFER_ACCESS_OES 0x88BB +#define GL_BUFFER_MAPPED_OES 0x88BC +#define GL_BUFFER_MAP_POINTER_OES 0x88BD +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 +#define GL_BYTE 0x1400 +#define GL_CCW 0x0901 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_COMPILE_STATUS 0x8B81 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_CONSTANT_COLOR 0x8001 +#define GL_CULL_FACE 0x0B44 +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_CURRENT_PROGRAM 0x8B8D +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 +#define GL_CW 0x0900 +#define GL_DECR 0x1E03 +#define GL_DECR_WRAP 0x8508 +#define GL_DELETE_STATUS 0x8B80 +#define GL_DEPTH24_STENCIL8_OES 0x88F0 +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_DEPTH_BITS 0x0D56 +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_DEPTH_COMPONENT16_OES 0x81A5 +#define GL_DEPTH_COMPONENT24_OES 0x81A6 +#define GL_DEPTH_COMPONENT32_OES 0x81A7 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_TEST 0x0B71 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DITHER 0x0BD0 +#define GL_DONT_CARE 0x1100 +#define GL_DST_ALPHA 0x0304 +#define GL_DST_COLOR 0x0306 +#define GL_DYNAMIC_DRAW 0x88E8 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_EQUAL 0x0202 +#define GL_EXTENSIONS 0x1F03 +#define GL_FALSE 0 +#define GL_FASTEST 0x1101 +#define GL_FIXED 0x140C +#define GL_FLOAT 0x1406 +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_FRAMEBUFFER 0x8D40 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD +#define GL_FRONT 0x0404 +#define GL_FRONT_AND_BACK 0x0408 +#define GL_FRONT_FACE 0x0B46 +#define GL_FUNC_ADD 0x8006 +#define GL_FUNC_REVERSE_SUBTRACT 0x800B +#define GL_FUNC_SUBTRACT 0x800A +#define GL_GENERATE_MIPMAP_HINT 0x8192 +#define GL_GEQUAL 0x0206 +#define GL_GREATER 0x0204 +#define GL_GREEN_BITS 0x0D53 +#define GL_HIGH_FLOAT 0x8DF2 +#define GL_HIGH_INT 0x8DF5 +#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B +#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A +#define GL_INCR 0x1E02 +#define GL_INCR_WRAP 0x8507 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_INT 0x1404 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 +#define GL_INVALID_OPERATION 0x0502 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVERT 0x150A +#define GL_KEEP 0x1E00 +#define GL_LEQUAL 0x0203 +#define GL_LESS 0x0201 +#define GL_LINEAR 0x2601 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_LINE_WIDTH 0x0B21 +#define GL_LINK_STATUS 0x8B82 +#define GL_LOW_FLOAT 0x8DF0 +#define GL_LOW_INT 0x8DF3 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE4_ALPHA4_OES 0x8043 +#define GL_LUMINANCE8_ALPHA8_OES 0x8045 +#define GL_LUMINANCE8_OES 0x8040 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C +#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_VARYING_VECTORS 0x8DFC +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_MEDIUM_FLOAT 0x8DF1 +#define GL_MEDIUM_INT 0x8DF4 +#define GL_MIRRORED_REPEAT 0x8370 +#define GL_NEAREST 0x2600 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_NEVER 0x0200 +#define GL_NICEST 0x1102 +#define GL_NONE 0 +#define GL_NOTEQUAL 0x0205 +#define GL_NO_ERROR 0 +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9 +#define GL_ONE 1 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_OUT_OF_MEMORY 0x0505 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_POINTS 0x0000 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_RED_BITS 0x0D52 +#define GL_RENDERBUFFER 0x8D41 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERER 0x1F01 +#define GL_REPEAT 0x2901 +#define GL_REPLACE 0x1E01 +#define GL_RGB 0x1907 +#define GL_RGB10_A2_EXT 0x8059 +#define GL_RGB10_EXT 0x8052 +#define GL_RGB565 0x8D62 +#define GL_RGB565_OES 0x8D62 +#define GL_RGB5_A1 0x8057 +#define GL_RGB5_A1_OES 0x8057 +#define GL_RGB8_OES 0x8051 +#define GL_RGBA 0x1908 +#define GL_RGBA4 0x8056 +#define GL_RGBA4_OES 0x8056 +#define GL_RGBA8_OES 0x8058 +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLE_COVERAGE 0x80A0 +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SCISSOR_BOX 0x0C10 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_SHADER_BINARY_FORMATS 0x8DF8 +#define GL_SHADER_COMPILER 0x8DFA +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_SHADER_TYPE 0x8B4F +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_SHORT 0x1402 +#define GL_SRC_ALPHA 0x0302 +#define GL_SRC_ALPHA_SATURATE 0x0308 +#define GL_SRC_COLOR 0x0300 +#define GL_STATIC_DRAW 0x88E4 +#define GL_STENCIL_ATTACHMENT 0x8D20 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_STENCIL_BITS 0x0D57 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_INDEX8 0x8D48 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_TEST 0x0B90 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_STREAM_DRAW 0x88E0 +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_TEXTURE 0x1702 +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_FAN 0x0006 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRUE 1 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_VENDOR 0x1F00 +#define GL_VERSION 0x1F02 +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_VIEWPORT 0x0BA2 +#define GL_WRITE_ONLY_OES 0x88B9 +#define GL_ZERO 0 + + +#include +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef void GLvoid; +typedef khronos_int8_t GLbyte; +typedef khronos_uint8_t GLubyte; +typedef khronos_int16_t GLshort; +typedef khronos_uint16_t GLushort; +typedef int GLint; +typedef unsigned int GLuint; +typedef khronos_int32_t GLclampx; +typedef int GLsizei; +typedef khronos_float_t GLfloat; +typedef khronos_float_t GLclampf; +typedef double GLdouble; +typedef double GLclampd; +typedef void *GLeglClientBufferEXT; +typedef void *GLeglImageOES; +typedef char GLchar; +typedef char GLcharARB; +#ifdef __APPLE__ +typedef void *GLhandleARB; +#else +typedef unsigned int GLhandleARB; +#endif +typedef khronos_uint16_t GLhalf; +typedef khronos_uint16_t GLhalfARB; +typedef khronos_int32_t GLfixed; +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_intptr_t GLintptr; +#else +typedef khronos_intptr_t GLintptr; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_intptr_t GLintptrARB; +#else +typedef khronos_intptr_t GLintptrARB; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_ssize_t GLsizeiptr; +#else +typedef khronos_ssize_t GLsizeiptr; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_ssize_t GLsizeiptrARB; +#else +typedef khronos_ssize_t GLsizeiptrARB; +#endif +typedef khronos_int64_t GLint64; +typedef khronos_int64_t GLint64EXT; +typedef khronos_uint64_t GLuint64; +typedef khronos_uint64_t GLuint64EXT; +typedef struct __GLsync *GLsync; +struct _cl_context; +struct _cl_event; +typedef void (GLAD_API_PTR *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCARB)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCKHR)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCAMD)(GLuint id,GLenum category,GLenum severity,GLsizei length,const GLchar *message,void *userParam); +typedef unsigned short GLhalfNV; +typedef GLintptr GLvdpauSurfaceNV; +typedef void (GLAD_API_PTR *GLVULKANPROCNV)(void); + + +#define GL_ES_VERSION_2_0 1 +GLAD_API_CALL int GLAD_GL_ES_VERSION_2_0; +#define GL_OES_mapbuffer 1 +GLAD_API_CALL int GLAD_GL_OES_mapbuffer; +#define GL_OES_required_internalformat 1 +GLAD_API_CALL int GLAD_GL_OES_required_internalformat; + + +typedef void (GLAD_API_PTR *PFNGLACTIVETEXTUREPROC)(GLenum texture); +typedef void (GLAD_API_PTR *PFNGLATTACHSHADERPROC)(GLuint program, GLuint shader); +typedef void (GLAD_API_PTR *PFNGLBINDATTRIBLOCATIONPROC)(GLuint program, GLuint index, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLBINDBUFFERPROC)(GLenum target, GLuint buffer); +typedef void (GLAD_API_PTR *PFNGLBINDFRAMEBUFFERPROC)(GLenum target, GLuint framebuffer); +typedef void (GLAD_API_PTR *PFNGLBINDRENDERBUFFERPROC)(GLenum target, GLuint renderbuffer); +typedef void (GLAD_API_PTR *PFNGLBINDTEXTUREPROC)(GLenum target, GLuint texture); +typedef void (GLAD_API_PTR *PFNGLBLENDCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONSEPARATEPROC)(GLenum modeRGB, GLenum modeAlpha); +typedef void (GLAD_API_PTR *PFNGLBLENDFUNCPROC)(GLenum sfactor, GLenum dfactor); +typedef void (GLAD_API_PTR *PFNGLBLENDFUNCSEPARATEPROC)(GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); +typedef void (GLAD_API_PTR *PFNGLBUFFERDATAPROC)(GLenum target, GLsizeiptr size, const void * data, GLenum usage); +typedef void (GLAD_API_PTR *PFNGLBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, const void * data); +typedef GLenum (GLAD_API_PTR *PFNGLCHECKFRAMEBUFFERSTATUSPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLCLEARPROC)(GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLCLEARCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLCLEARDEPTHFPROC)(GLfloat d); +typedef void (GLAD_API_PTR *PFNGLCLEARSTENCILPROC)(GLint s); +typedef void (GLAD_API_PTR *PFNGLCOLORMASKPROC)(GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +typedef void (GLAD_API_PTR *PFNGLCOMPILESHADERPROC)(GLuint shader); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef GLuint (GLAD_API_PTR *PFNGLCREATEPROGRAMPROC)(void); +typedef GLuint (GLAD_API_PTR *PFNGLCREATESHADERPROC)(GLenum type); +typedef void (GLAD_API_PTR *PFNGLCULLFACEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLDELETEBUFFERSPROC)(GLsizei n, const GLuint * buffers); +typedef void (GLAD_API_PTR *PFNGLDELETEFRAMEBUFFERSPROC)(GLsizei n, const GLuint * framebuffers); +typedef void (GLAD_API_PTR *PFNGLDELETEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLDELETERENDERBUFFERSPROC)(GLsizei n, const GLuint * renderbuffers); +typedef void (GLAD_API_PTR *PFNGLDELETESHADERPROC)(GLuint shader); +typedef void (GLAD_API_PTR *PFNGLDELETETEXTURESPROC)(GLsizei n, const GLuint * textures); +typedef void (GLAD_API_PTR *PFNGLDEPTHFUNCPROC)(GLenum func); +typedef void (GLAD_API_PTR *PFNGLDEPTHMASKPROC)(GLboolean flag); +typedef void (GLAD_API_PTR *PFNGLDEPTHRANGEFPROC)(GLfloat n, GLfloat f); +typedef void (GLAD_API_PTR *PFNGLDETACHSHADERPROC)(GLuint program, GLuint shader); +typedef void (GLAD_API_PTR *PFNGLDISABLEPROC)(GLenum cap); +typedef void (GLAD_API_PTR *PFNGLDISABLEVERTEXATTRIBARRAYPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLDRAWARRAYSPROC)(GLenum mode, GLint first, GLsizei count); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices); +typedef void (GLAD_API_PTR *PFNGLENABLEPROC)(GLenum cap); +typedef void (GLAD_API_PTR *PFNGLENABLEVERTEXATTRIBARRAYPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLFINISHPROC)(void); +typedef void (GLAD_API_PTR *PFNGLFLUSHPROC)(void); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERRENDERBUFFERPROC)(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURE2DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (GLAD_API_PTR *PFNGLFRONTFACEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLGENBUFFERSPROC)(GLsizei n, GLuint * buffers); +typedef void (GLAD_API_PTR *PFNGLGENFRAMEBUFFERSPROC)(GLsizei n, GLuint * framebuffers); +typedef void (GLAD_API_PTR *PFNGLGENRENDERBUFFERSPROC)(GLsizei n, GLuint * renderbuffers); +typedef void (GLAD_API_PTR *PFNGLGENTEXTURESPROC)(GLsizei n, GLuint * textures); +typedef void (GLAD_API_PTR *PFNGLGENERATEMIPMAPPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEATTRIBPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLint * size, GLenum * type, GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLint * size, GLenum * type, GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETATTACHEDSHADERSPROC)(GLuint program, GLsizei maxCount, GLsizei * count, GLuint * shaders); +typedef GLint (GLAD_API_PTR *PFNGLGETATTRIBLOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETBOOLEANVPROC)(GLenum pname, GLboolean * data); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPOINTERVPROC)(GLenum target, GLenum pname, void ** params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPOINTERVOESPROC)(GLenum target, GLenum pname, void ** params); +typedef GLenum (GLAD_API_PTR *PFNGLGETERRORPROC)(void); +typedef void (GLAD_API_PTR *PFNGLGETFLOATVPROC)(GLenum pname, GLfloat * data); +typedef void (GLAD_API_PTR *PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLenum target, GLenum attachment, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETINTEGERVPROC)(GLenum pname, GLint * data); +typedef void (GLAD_API_PTR *PFNGLGETPROGRAMINFOLOGPROC)(GLuint program, GLsizei bufSize, GLsizei * length, GLchar * infoLog); +typedef void (GLAD_API_PTR *PFNGLGETPROGRAMIVPROC)(GLuint program, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETRENDERBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETSHADERINFOLOGPROC)(GLuint shader, GLsizei bufSize, GLsizei * length, GLchar * infoLog); +typedef void (GLAD_API_PTR *PFNGLGETSHADERPRECISIONFORMATPROC)(GLenum shadertype, GLenum precisiontype, GLint * range, GLint * precision); +typedef void (GLAD_API_PTR *PFNGLGETSHADERSOURCEPROC)(GLuint shader, GLsizei bufSize, GLsizei * length, GLchar * source); +typedef void (GLAD_API_PTR *PFNGLGETSHADERIVPROC)(GLuint shader, GLenum pname, GLint * params); +typedef const GLubyte * (GLAD_API_PTR *PFNGLGETSTRINGPROC)(GLenum name); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERFVPROC)(GLenum target, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef GLint (GLAD_API_PTR *PFNGLGETUNIFORMLOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMFVPROC)(GLuint program, GLint location, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMIVPROC)(GLuint program, GLint location, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBPOINTERVPROC)(GLuint index, GLenum pname, void ** pointer); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBFVPROC)(GLuint index, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBIVPROC)(GLuint index, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLHINTPROC)(GLenum target, GLenum mode); +typedef GLboolean (GLAD_API_PTR *PFNGLISBUFFERPROC)(GLuint buffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISENABLEDPROC)(GLenum cap); +typedef GLboolean (GLAD_API_PTR *PFNGLISFRAMEBUFFERPROC)(GLuint framebuffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISPROGRAMPROC)(GLuint program); +typedef GLboolean (GLAD_API_PTR *PFNGLISRENDERBUFFERPROC)(GLuint renderbuffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISSHADERPROC)(GLuint shader); +typedef GLboolean (GLAD_API_PTR *PFNGLISTEXTUREPROC)(GLuint texture); +typedef void (GLAD_API_PTR *PFNGLLINEWIDTHPROC)(GLfloat width); +typedef void (GLAD_API_PTR *PFNGLLINKPROGRAMPROC)(GLuint program); +typedef void * (GLAD_API_PTR *PFNGLMAPBUFFERPROC)(GLenum target, GLenum access); +typedef void * (GLAD_API_PTR *PFNGLMAPBUFFEROESPROC)(GLenum target, GLenum access); +typedef void (GLAD_API_PTR *PFNGLPIXELSTOREIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLPOLYGONOFFSETPROC)(GLfloat factor, GLfloat units); +typedef void (GLAD_API_PTR *PFNGLREADPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void * pixels); +typedef void (GLAD_API_PTR *PFNGLRELEASESHADERCOMPILERPROC)(void); +typedef void (GLAD_API_PTR *PFNGLRENDERBUFFERSTORAGEPROC)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLSAMPLECOVERAGEPROC)(GLfloat value, GLboolean invert); +typedef void (GLAD_API_PTR *PFNGLSCISSORPROC)(GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLSHADERBINARYPROC)(GLsizei count, const GLuint * shaders, GLenum binaryformat, const void * binary, GLsizei length); +typedef void (GLAD_API_PTR *PFNGLSHADERSOURCEPROC)(GLuint shader, GLsizei count, const GLchar *const* string, const GLint * length); +typedef void (GLAD_API_PTR *PFNGLSTENCILFUNCPROC)(GLenum func, GLint ref, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILFUNCSEPARATEPROC)(GLenum face, GLenum func, GLint ref, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILMASKPROC)(GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILMASKSEPARATEPROC)(GLenum face, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILOPPROC)(GLenum fail, GLenum zfail, GLenum zpass); +typedef void (GLAD_API_PTR *PFNGLSTENCILOPSEPARATEPROC)(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE2DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERFPROC)(GLenum target, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERFVPROC)(GLenum target, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIPROC)(GLenum target, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIVPROC)(GLenum target, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1FPROC)(GLint location, GLfloat v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1IPROC)(GLint location, GLint v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2FPROC)(GLint location, GLfloat v0, GLfloat v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2IPROC)(GLint location, GLint v0, GLint v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3IPROC)(GLint location, GLint v0, GLint v1, GLint v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4IPROC)(GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef GLboolean (GLAD_API_PTR *PFNGLUNMAPBUFFERPROC)(GLenum target); +typedef GLboolean (GLAD_API_PTR *PFNGLUNMAPBUFFEROESPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLUSEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLVALIDATEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1FPROC)(GLuint index, GLfloat x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2FPROC)(GLuint index, GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBPOINTERPROC)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLVIEWPORTPROC)(GLint x, GLint y, GLsizei width, GLsizei height); + +GLAD_API_CALL PFNGLACTIVETEXTUREPROC glad_glActiveTexture; +#define glActiveTexture glad_glActiveTexture +GLAD_API_CALL PFNGLATTACHSHADERPROC glad_glAttachShader; +#define glAttachShader glad_glAttachShader +GLAD_API_CALL PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation; +#define glBindAttribLocation glad_glBindAttribLocation +GLAD_API_CALL PFNGLBINDBUFFERPROC glad_glBindBuffer; +#define glBindBuffer glad_glBindBuffer +GLAD_API_CALL PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer; +#define glBindFramebuffer glad_glBindFramebuffer +GLAD_API_CALL PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer; +#define glBindRenderbuffer glad_glBindRenderbuffer +GLAD_API_CALL PFNGLBINDTEXTUREPROC glad_glBindTexture; +#define glBindTexture glad_glBindTexture +GLAD_API_CALL PFNGLBLENDCOLORPROC glad_glBlendColor; +#define glBlendColor glad_glBlendColor +GLAD_API_CALL PFNGLBLENDEQUATIONPROC glad_glBlendEquation; +#define glBlendEquation glad_glBlendEquation +GLAD_API_CALL PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate; +#define glBlendEquationSeparate glad_glBlendEquationSeparate +GLAD_API_CALL PFNGLBLENDFUNCPROC glad_glBlendFunc; +#define glBlendFunc glad_glBlendFunc +GLAD_API_CALL PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate; +#define glBlendFuncSeparate glad_glBlendFuncSeparate +GLAD_API_CALL PFNGLBUFFERDATAPROC glad_glBufferData; +#define glBufferData glad_glBufferData +GLAD_API_CALL PFNGLBUFFERSUBDATAPROC glad_glBufferSubData; +#define glBufferSubData glad_glBufferSubData +GLAD_API_CALL PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus; +#define glCheckFramebufferStatus glad_glCheckFramebufferStatus +GLAD_API_CALL PFNGLCLEARPROC glad_glClear; +#define glClear glad_glClear +GLAD_API_CALL PFNGLCLEARCOLORPROC glad_glClearColor; +#define glClearColor glad_glClearColor +GLAD_API_CALL PFNGLCLEARDEPTHFPROC glad_glClearDepthf; +#define glClearDepthf glad_glClearDepthf +GLAD_API_CALL PFNGLCLEARSTENCILPROC glad_glClearStencil; +#define glClearStencil glad_glClearStencil +GLAD_API_CALL PFNGLCOLORMASKPROC glad_glColorMask; +#define glColorMask glad_glColorMask +GLAD_API_CALL PFNGLCOMPILESHADERPROC glad_glCompileShader; +#define glCompileShader glad_glCompileShader +GLAD_API_CALL PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D; +#define glCompressedTexImage2D glad_glCompressedTexImage2D +GLAD_API_CALL PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D; +#define glCompressedTexSubImage2D glad_glCompressedTexSubImage2D +GLAD_API_CALL PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D; +#define glCopyTexImage2D glad_glCopyTexImage2D +GLAD_API_CALL PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D; +#define glCopyTexSubImage2D glad_glCopyTexSubImage2D +GLAD_API_CALL PFNGLCREATEPROGRAMPROC glad_glCreateProgram; +#define glCreateProgram glad_glCreateProgram +GLAD_API_CALL PFNGLCREATESHADERPROC glad_glCreateShader; +#define glCreateShader glad_glCreateShader +GLAD_API_CALL PFNGLCULLFACEPROC glad_glCullFace; +#define glCullFace glad_glCullFace +GLAD_API_CALL PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers; +#define glDeleteBuffers glad_glDeleteBuffers +GLAD_API_CALL PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers; +#define glDeleteFramebuffers glad_glDeleteFramebuffers +GLAD_API_CALL PFNGLDELETEPROGRAMPROC glad_glDeleteProgram; +#define glDeleteProgram glad_glDeleteProgram +GLAD_API_CALL PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers; +#define glDeleteRenderbuffers glad_glDeleteRenderbuffers +GLAD_API_CALL PFNGLDELETESHADERPROC glad_glDeleteShader; +#define glDeleteShader glad_glDeleteShader +GLAD_API_CALL PFNGLDELETETEXTURESPROC glad_glDeleteTextures; +#define glDeleteTextures glad_glDeleteTextures +GLAD_API_CALL PFNGLDEPTHFUNCPROC glad_glDepthFunc; +#define glDepthFunc glad_glDepthFunc +GLAD_API_CALL PFNGLDEPTHMASKPROC glad_glDepthMask; +#define glDepthMask glad_glDepthMask +GLAD_API_CALL PFNGLDEPTHRANGEFPROC glad_glDepthRangef; +#define glDepthRangef glad_glDepthRangef +GLAD_API_CALL PFNGLDETACHSHADERPROC glad_glDetachShader; +#define glDetachShader glad_glDetachShader +GLAD_API_CALL PFNGLDISABLEPROC glad_glDisable; +#define glDisable glad_glDisable +GLAD_API_CALL PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray; +#define glDisableVertexAttribArray glad_glDisableVertexAttribArray +GLAD_API_CALL PFNGLDRAWARRAYSPROC glad_glDrawArrays; +#define glDrawArrays glad_glDrawArrays +GLAD_API_CALL PFNGLDRAWELEMENTSPROC glad_glDrawElements; +#define glDrawElements glad_glDrawElements +GLAD_API_CALL PFNGLENABLEPROC glad_glEnable; +#define glEnable glad_glEnable +GLAD_API_CALL PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray; +#define glEnableVertexAttribArray glad_glEnableVertexAttribArray +GLAD_API_CALL PFNGLFINISHPROC glad_glFinish; +#define glFinish glad_glFinish +GLAD_API_CALL PFNGLFLUSHPROC glad_glFlush; +#define glFlush glad_glFlush +GLAD_API_CALL PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer; +#define glFramebufferRenderbuffer glad_glFramebufferRenderbuffer +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D; +#define glFramebufferTexture2D glad_glFramebufferTexture2D +GLAD_API_CALL PFNGLFRONTFACEPROC glad_glFrontFace; +#define glFrontFace glad_glFrontFace +GLAD_API_CALL PFNGLGENBUFFERSPROC glad_glGenBuffers; +#define glGenBuffers glad_glGenBuffers +GLAD_API_CALL PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers; +#define glGenFramebuffers glad_glGenFramebuffers +GLAD_API_CALL PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers; +#define glGenRenderbuffers glad_glGenRenderbuffers +GLAD_API_CALL PFNGLGENTEXTURESPROC glad_glGenTextures; +#define glGenTextures glad_glGenTextures +GLAD_API_CALL PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap; +#define glGenerateMipmap glad_glGenerateMipmap +GLAD_API_CALL PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib; +#define glGetActiveAttrib glad_glGetActiveAttrib +GLAD_API_CALL PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform; +#define glGetActiveUniform glad_glGetActiveUniform +GLAD_API_CALL PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders; +#define glGetAttachedShaders glad_glGetAttachedShaders +GLAD_API_CALL PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation; +#define glGetAttribLocation glad_glGetAttribLocation +GLAD_API_CALL PFNGLGETBOOLEANVPROC glad_glGetBooleanv; +#define glGetBooleanv glad_glGetBooleanv +GLAD_API_CALL PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv; +#define glGetBufferParameteriv glad_glGetBufferParameteriv +GLAD_API_CALL PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv; +#define glGetBufferPointerv glad_glGetBufferPointerv +GLAD_API_CALL PFNGLGETBUFFERPOINTERVOESPROC glad_glGetBufferPointervOES; +#define glGetBufferPointervOES glad_glGetBufferPointervOES +GLAD_API_CALL PFNGLGETERRORPROC glad_glGetError; +#define glGetError glad_glGetError +GLAD_API_CALL PFNGLGETFLOATVPROC glad_glGetFloatv; +#define glGetFloatv glad_glGetFloatv +GLAD_API_CALL PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv; +#define glGetFramebufferAttachmentParameteriv glad_glGetFramebufferAttachmentParameteriv +GLAD_API_CALL PFNGLGETINTEGERVPROC glad_glGetIntegerv; +#define glGetIntegerv glad_glGetIntegerv +GLAD_API_CALL PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog; +#define glGetProgramInfoLog glad_glGetProgramInfoLog +GLAD_API_CALL PFNGLGETPROGRAMIVPROC glad_glGetProgramiv; +#define glGetProgramiv glad_glGetProgramiv +GLAD_API_CALL PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv; +#define glGetRenderbufferParameteriv glad_glGetRenderbufferParameteriv +GLAD_API_CALL PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog; +#define glGetShaderInfoLog glad_glGetShaderInfoLog +GLAD_API_CALL PFNGLGETSHADERPRECISIONFORMATPROC glad_glGetShaderPrecisionFormat; +#define glGetShaderPrecisionFormat glad_glGetShaderPrecisionFormat +GLAD_API_CALL PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource; +#define glGetShaderSource glad_glGetShaderSource +GLAD_API_CALL PFNGLGETSHADERIVPROC glad_glGetShaderiv; +#define glGetShaderiv glad_glGetShaderiv +GLAD_API_CALL PFNGLGETSTRINGPROC glad_glGetString; +#define glGetString glad_glGetString +GLAD_API_CALL PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv; +#define glGetTexParameterfv glad_glGetTexParameterfv +GLAD_API_CALL PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv; +#define glGetTexParameteriv glad_glGetTexParameteriv +GLAD_API_CALL PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation; +#define glGetUniformLocation glad_glGetUniformLocation +GLAD_API_CALL PFNGLGETUNIFORMFVPROC glad_glGetUniformfv; +#define glGetUniformfv glad_glGetUniformfv +GLAD_API_CALL PFNGLGETUNIFORMIVPROC glad_glGetUniformiv; +#define glGetUniformiv glad_glGetUniformiv +GLAD_API_CALL PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv; +#define glGetVertexAttribPointerv glad_glGetVertexAttribPointerv +GLAD_API_CALL PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv; +#define glGetVertexAttribfv glad_glGetVertexAttribfv +GLAD_API_CALL PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv; +#define glGetVertexAttribiv glad_glGetVertexAttribiv +GLAD_API_CALL PFNGLHINTPROC glad_glHint; +#define glHint glad_glHint +GLAD_API_CALL PFNGLISBUFFERPROC glad_glIsBuffer; +#define glIsBuffer glad_glIsBuffer +GLAD_API_CALL PFNGLISENABLEDPROC glad_glIsEnabled; +#define glIsEnabled glad_glIsEnabled +GLAD_API_CALL PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer; +#define glIsFramebuffer glad_glIsFramebuffer +GLAD_API_CALL PFNGLISPROGRAMPROC glad_glIsProgram; +#define glIsProgram glad_glIsProgram +GLAD_API_CALL PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer; +#define glIsRenderbuffer glad_glIsRenderbuffer +GLAD_API_CALL PFNGLISSHADERPROC glad_glIsShader; +#define glIsShader glad_glIsShader +GLAD_API_CALL PFNGLISTEXTUREPROC glad_glIsTexture; +#define glIsTexture glad_glIsTexture +GLAD_API_CALL PFNGLLINEWIDTHPROC glad_glLineWidth; +#define glLineWidth glad_glLineWidth +GLAD_API_CALL PFNGLLINKPROGRAMPROC glad_glLinkProgram; +#define glLinkProgram glad_glLinkProgram +GLAD_API_CALL PFNGLMAPBUFFERPROC glad_glMapBuffer; +#define glMapBuffer glad_glMapBuffer +GLAD_API_CALL PFNGLMAPBUFFEROESPROC glad_glMapBufferOES; +#define glMapBufferOES glad_glMapBufferOES +GLAD_API_CALL PFNGLPIXELSTOREIPROC glad_glPixelStorei; +#define glPixelStorei glad_glPixelStorei +GLAD_API_CALL PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset; +#define glPolygonOffset glad_glPolygonOffset +GLAD_API_CALL PFNGLREADPIXELSPROC glad_glReadPixels; +#define glReadPixels glad_glReadPixels +GLAD_API_CALL PFNGLRELEASESHADERCOMPILERPROC glad_glReleaseShaderCompiler; +#define glReleaseShaderCompiler glad_glReleaseShaderCompiler +GLAD_API_CALL PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage; +#define glRenderbufferStorage glad_glRenderbufferStorage +GLAD_API_CALL PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage; +#define glSampleCoverage glad_glSampleCoverage +GLAD_API_CALL PFNGLSCISSORPROC glad_glScissor; +#define glScissor glad_glScissor +GLAD_API_CALL PFNGLSHADERBINARYPROC glad_glShaderBinary; +#define glShaderBinary glad_glShaderBinary +GLAD_API_CALL PFNGLSHADERSOURCEPROC glad_glShaderSource; +#define glShaderSource glad_glShaderSource +GLAD_API_CALL PFNGLSTENCILFUNCPROC glad_glStencilFunc; +#define glStencilFunc glad_glStencilFunc +GLAD_API_CALL PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate; +#define glStencilFuncSeparate glad_glStencilFuncSeparate +GLAD_API_CALL PFNGLSTENCILMASKPROC glad_glStencilMask; +#define glStencilMask glad_glStencilMask +GLAD_API_CALL PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate; +#define glStencilMaskSeparate glad_glStencilMaskSeparate +GLAD_API_CALL PFNGLSTENCILOPPROC glad_glStencilOp; +#define glStencilOp glad_glStencilOp +GLAD_API_CALL PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate; +#define glStencilOpSeparate glad_glStencilOpSeparate +GLAD_API_CALL PFNGLTEXIMAGE2DPROC glad_glTexImage2D; +#define glTexImage2D glad_glTexImage2D +GLAD_API_CALL PFNGLTEXPARAMETERFPROC glad_glTexParameterf; +#define glTexParameterf glad_glTexParameterf +GLAD_API_CALL PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv; +#define glTexParameterfv glad_glTexParameterfv +GLAD_API_CALL PFNGLTEXPARAMETERIPROC glad_glTexParameteri; +#define glTexParameteri glad_glTexParameteri +GLAD_API_CALL PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv; +#define glTexParameteriv glad_glTexParameteriv +GLAD_API_CALL PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D; +#define glTexSubImage2D glad_glTexSubImage2D +GLAD_API_CALL PFNGLUNIFORM1FPROC glad_glUniform1f; +#define glUniform1f glad_glUniform1f +GLAD_API_CALL PFNGLUNIFORM1FVPROC glad_glUniform1fv; +#define glUniform1fv glad_glUniform1fv +GLAD_API_CALL PFNGLUNIFORM1IPROC glad_glUniform1i; +#define glUniform1i glad_glUniform1i +GLAD_API_CALL PFNGLUNIFORM1IVPROC glad_glUniform1iv; +#define glUniform1iv glad_glUniform1iv +GLAD_API_CALL PFNGLUNIFORM2FPROC glad_glUniform2f; +#define glUniform2f glad_glUniform2f +GLAD_API_CALL PFNGLUNIFORM2FVPROC glad_glUniform2fv; +#define glUniform2fv glad_glUniform2fv +GLAD_API_CALL PFNGLUNIFORM2IPROC glad_glUniform2i; +#define glUniform2i glad_glUniform2i +GLAD_API_CALL PFNGLUNIFORM2IVPROC glad_glUniform2iv; +#define glUniform2iv glad_glUniform2iv +GLAD_API_CALL PFNGLUNIFORM3FPROC glad_glUniform3f; +#define glUniform3f glad_glUniform3f +GLAD_API_CALL PFNGLUNIFORM3FVPROC glad_glUniform3fv; +#define glUniform3fv glad_glUniform3fv +GLAD_API_CALL PFNGLUNIFORM3IPROC glad_glUniform3i; +#define glUniform3i glad_glUniform3i +GLAD_API_CALL PFNGLUNIFORM3IVPROC glad_glUniform3iv; +#define glUniform3iv glad_glUniform3iv +GLAD_API_CALL PFNGLUNIFORM4FPROC glad_glUniform4f; +#define glUniform4f glad_glUniform4f +GLAD_API_CALL PFNGLUNIFORM4FVPROC glad_glUniform4fv; +#define glUniform4fv glad_glUniform4fv +GLAD_API_CALL PFNGLUNIFORM4IPROC glad_glUniform4i; +#define glUniform4i glad_glUniform4i +GLAD_API_CALL PFNGLUNIFORM4IVPROC glad_glUniform4iv; +#define glUniform4iv glad_glUniform4iv +GLAD_API_CALL PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv; +#define glUniformMatrix2fv glad_glUniformMatrix2fv +GLAD_API_CALL PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv; +#define glUniformMatrix3fv glad_glUniformMatrix3fv +GLAD_API_CALL PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv; +#define glUniformMatrix4fv glad_glUniformMatrix4fv +GLAD_API_CALL PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer; +#define glUnmapBuffer glad_glUnmapBuffer +GLAD_API_CALL PFNGLUNMAPBUFFEROESPROC glad_glUnmapBufferOES; +#define glUnmapBufferOES glad_glUnmapBufferOES +GLAD_API_CALL PFNGLUSEPROGRAMPROC glad_glUseProgram; +#define glUseProgram glad_glUseProgram +GLAD_API_CALL PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram; +#define glValidateProgram glad_glValidateProgram +GLAD_API_CALL PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f; +#define glVertexAttrib1f glad_glVertexAttrib1f +GLAD_API_CALL PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv; +#define glVertexAttrib1fv glad_glVertexAttrib1fv +GLAD_API_CALL PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f; +#define glVertexAttrib2f glad_glVertexAttrib2f +GLAD_API_CALL PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv; +#define glVertexAttrib2fv glad_glVertexAttrib2fv +GLAD_API_CALL PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f; +#define glVertexAttrib3f glad_glVertexAttrib3f +GLAD_API_CALL PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv; +#define glVertexAttrib3fv glad_glVertexAttrib3fv +GLAD_API_CALL PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f; +#define glVertexAttrib4f glad_glVertexAttrib4f +GLAD_API_CALL PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv; +#define glVertexAttrib4fv glad_glVertexAttrib4fv +GLAD_API_CALL PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer; +#define glVertexAttribPointer glad_glVertexAttribPointer +GLAD_API_CALL PFNGLVIEWPORTPROC glad_glViewport; +#define glViewport glad_glViewport + + + + + +GLAD_API_CALL int gladLoadGLES2UserPtr( GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadGLES2( GLADloadfunc load); + + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/gl.h glmark2-2021.02/android/jni/src/glad/include/glad/gl.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/gl.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/include/glad/gl.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,3686 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:14 2019 + * + * Generator: C/C++ + * Specification: gl + * Extensions: 0 + * + * APIs: + * - gl:compatibility=3.3 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='gl:compatibility=3.3' --extensions='' c + * + * Online: + * http://glad.sh/#api=gl%3Acompatibility%3D3.3&extensions=&generator=c&options= + * + */ + +#ifndef GLAD_GL_H_ +#define GLAD_GL_H_ + +#ifdef __gl_h_ + #error OpenGL header already included (API: gl), remove previous include! +#endif +#define __gl_h_ 1 + + +#define GLAD_GL + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define GL_2D 0x0600 +#define GL_2_BYTES 0x1407 +#define GL_3D 0x0601 +#define GL_3D_COLOR 0x0602 +#define GL_3D_COLOR_TEXTURE 0x0603 +#define GL_3_BYTES 0x1408 +#define GL_4D_COLOR_TEXTURE 0x0604 +#define GL_4_BYTES 0x1409 +#define GL_ACCUM 0x0100 +#define GL_ACCUM_ALPHA_BITS 0x0D5B +#define GL_ACCUM_BLUE_BITS 0x0D5A +#define GL_ACCUM_BUFFER_BIT 0x00000200 +#define GL_ACCUM_CLEAR_VALUE 0x0B80 +#define GL_ACCUM_GREEN_BITS 0x0D59 +#define GL_ACCUM_RED_BITS 0x0D58 +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36 +#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_ADD 0x0104 +#define GL_ADD_SIGNED 0x8574 +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALL_ATTRIB_BITS 0xFFFFFFFF +#define GL_ALPHA 0x1906 +#define GL_ALPHA12 0x803D +#define GL_ALPHA16 0x803E +#define GL_ALPHA4 0x803B +#define GL_ALPHA8 0x803C +#define GL_ALPHA_BIAS 0x0D1D +#define GL_ALPHA_BITS 0x0D55 +#define GL_ALPHA_INTEGER 0x8D97 +#define GL_ALPHA_SCALE 0x0D1C +#define GL_ALPHA_TEST 0x0BC0 +#define GL_ALPHA_TEST_FUNC 0x0BC1 +#define GL_ALPHA_TEST_REF 0x0BC2 +#define GL_ALREADY_SIGNALED 0x911A +#define GL_ALWAYS 0x0207 +#define GL_AMBIENT 0x1200 +#define GL_AMBIENT_AND_DIFFUSE 0x1602 +#define GL_AND 0x1501 +#define GL_AND_INVERTED 0x1504 +#define GL_AND_REVERSE 0x1502 +#define GL_ANY_SAMPLES_PASSED 0x8C2F +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_ATTRIB_STACK_DEPTH 0x0BB0 +#define GL_AUTO_NORMAL 0x0D80 +#define GL_AUX0 0x0409 +#define GL_AUX1 0x040A +#define GL_AUX2 0x040B +#define GL_AUX3 0x040C +#define GL_AUX_BUFFERS 0x0C00 +#define GL_BACK 0x0405 +#define GL_BACK_LEFT 0x0402 +#define GL_BACK_RIGHT 0x0403 +#define GL_BGR 0x80E0 +#define GL_BGRA 0x80E1 +#define GL_BGRA_INTEGER 0x8D9B +#define GL_BGR_INTEGER 0x8D9A +#define GL_BITMAP 0x1A00 +#define GL_BITMAP_TOKEN 0x0704 +#define GL_BLEND 0x0BE2 +#define GL_BLEND_COLOR 0x8005 +#define GL_BLEND_DST 0x0BE0 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_EQUATION 0x8009 +#define GL_BLEND_EQUATION_ALPHA 0x883D +#define GL_BLEND_EQUATION_RGB 0x8009 +#define GL_BLEND_SRC 0x0BE1 +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLUE 0x1905 +#define GL_BLUE_BIAS 0x0D1B +#define GL_BLUE_BITS 0x0D54 +#define GL_BLUE_INTEGER 0x8D96 +#define GL_BLUE_SCALE 0x0D1A +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_BUFFER_ACCESS 0x88BB +#define GL_BUFFER_ACCESS_FLAGS 0x911F +#define GL_BUFFER_MAPPED 0x88BC +#define GL_BUFFER_MAP_LENGTH 0x9120 +#define GL_BUFFER_MAP_OFFSET 0x9121 +#define GL_BUFFER_MAP_POINTER 0x88BD +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 +#define GL_BYTE 0x1400 +#define GL_C3F_V3F 0x2A24 +#define GL_C4F_N3F_V3F 0x2A26 +#define GL_C4UB_V2F 0x2A22 +#define GL_C4UB_V3F 0x2A23 +#define GL_CCW 0x0901 +#define GL_CLAMP 0x2900 +#define GL_CLAMP_FRAGMENT_COLOR 0x891B +#define GL_CLAMP_READ_COLOR 0x891C +#define GL_CLAMP_TO_BORDER 0x812D +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_CLAMP_VERTEX_COLOR 0x891A +#define GL_CLEAR 0x1500 +#define GL_CLIENT_ACTIVE_TEXTURE 0x84E1 +#define GL_CLIENT_ALL_ATTRIB_BITS 0xFFFFFFFF +#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1 +#define GL_CLIENT_PIXEL_STORE_BIT 0x00000001 +#define GL_CLIENT_VERTEX_ARRAY_BIT 0x00000002 +#define GL_CLIP_DISTANCE0 0x3000 +#define GL_CLIP_DISTANCE1 0x3001 +#define GL_CLIP_DISTANCE2 0x3002 +#define GL_CLIP_DISTANCE3 0x3003 +#define GL_CLIP_DISTANCE4 0x3004 +#define GL_CLIP_DISTANCE5 0x3005 +#define GL_CLIP_DISTANCE6 0x3006 +#define GL_CLIP_DISTANCE7 0x3007 +#define GL_CLIP_PLANE0 0x3000 +#define GL_CLIP_PLANE1 0x3001 +#define GL_CLIP_PLANE2 0x3002 +#define GL_CLIP_PLANE3 0x3003 +#define GL_CLIP_PLANE4 0x3004 +#define GL_CLIP_PLANE5 0x3005 +#define GL_COEFF 0x0A00 +#define GL_COLOR 0x1800 +#define GL_COLOR_ARRAY 0x8076 +#define GL_COLOR_ARRAY_BUFFER_BINDING 0x8898 +#define GL_COLOR_ARRAY_POINTER 0x8090 +#define GL_COLOR_ARRAY_SIZE 0x8081 +#define GL_COLOR_ARRAY_STRIDE 0x8083 +#define GL_COLOR_ARRAY_TYPE 0x8082 +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_COLOR_ATTACHMENT1 0x8CE1 +#define GL_COLOR_ATTACHMENT10 0x8CEA +#define GL_COLOR_ATTACHMENT11 0x8CEB +#define GL_COLOR_ATTACHMENT12 0x8CEC +#define GL_COLOR_ATTACHMENT13 0x8CED +#define GL_COLOR_ATTACHMENT14 0x8CEE +#define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_COLOR_ATTACHMENT16 0x8CF0 +#define GL_COLOR_ATTACHMENT17 0x8CF1 +#define GL_COLOR_ATTACHMENT18 0x8CF2 +#define GL_COLOR_ATTACHMENT19 0x8CF3 +#define GL_COLOR_ATTACHMENT2 0x8CE2 +#define GL_COLOR_ATTACHMENT20 0x8CF4 +#define GL_COLOR_ATTACHMENT21 0x8CF5 +#define GL_COLOR_ATTACHMENT22 0x8CF6 +#define GL_COLOR_ATTACHMENT23 0x8CF7 +#define GL_COLOR_ATTACHMENT24 0x8CF8 +#define GL_COLOR_ATTACHMENT25 0x8CF9 +#define GL_COLOR_ATTACHMENT26 0x8CFA +#define GL_COLOR_ATTACHMENT27 0x8CFB +#define GL_COLOR_ATTACHMENT28 0x8CFC +#define GL_COLOR_ATTACHMENT29 0x8CFD +#define GL_COLOR_ATTACHMENT3 0x8CE3 +#define GL_COLOR_ATTACHMENT30 0x8CFE +#define GL_COLOR_ATTACHMENT31 0x8CFF +#define GL_COLOR_ATTACHMENT4 0x8CE4 +#define GL_COLOR_ATTACHMENT5 0x8CE5 +#define GL_COLOR_ATTACHMENT6 0x8CE6 +#define GL_COLOR_ATTACHMENT7 0x8CE7 +#define GL_COLOR_ATTACHMENT8 0x8CE8 +#define GL_COLOR_ATTACHMENT9 0x8CE9 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_INDEX 0x1900 +#define GL_COLOR_INDEXES 0x1603 +#define GL_COLOR_LOGIC_OP 0x0BF2 +#define GL_COLOR_MATERIAL 0x0B57 +#define GL_COLOR_MATERIAL_FACE 0x0B55 +#define GL_COLOR_MATERIAL_PARAMETER 0x0B56 +#define GL_COLOR_SUM 0x8458 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_COMBINE 0x8570 +#define GL_COMBINE_ALPHA 0x8572 +#define GL_COMBINE_RGB 0x8571 +#define GL_COMPARE_REF_TO_TEXTURE 0x884E +#define GL_COMPARE_R_TO_TEXTURE 0x884E +#define GL_COMPILE 0x1300 +#define GL_COMPILE_AND_EXECUTE 0x1301 +#define GL_COMPILE_STATUS 0x8B81 +#define GL_COMPRESSED_ALPHA 0x84E9 +#define GL_COMPRESSED_INTENSITY 0x84EC +#define GL_COMPRESSED_LUMINANCE 0x84EA +#define GL_COMPRESSED_LUMINANCE_ALPHA 0x84EB +#define GL_COMPRESSED_RED 0x8225 +#define GL_COMPRESSED_RED_RGTC1 0x8DBB +#define GL_COMPRESSED_RG 0x8226 +#define GL_COMPRESSED_RGB 0x84ED +#define GL_COMPRESSED_RGBA 0x84EE +#define GL_COMPRESSED_RG_RGTC2 0x8DBD +#define GL_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC +#define GL_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE +#define GL_COMPRESSED_SLUMINANCE 0x8C4A +#define GL_COMPRESSED_SLUMINANCE_ALPHA 0x8C4B +#define GL_COMPRESSED_SRGB 0x8C48 +#define GL_COMPRESSED_SRGB_ALPHA 0x8C49 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 +#define GL_CONDITION_SATISFIED 0x911C +#define GL_CONSTANT 0x8576 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_CONSTANT_ATTENUATION 0x1207 +#define GL_CONSTANT_COLOR 0x8001 +#define GL_CONTEXT_COMPATIBILITY_PROFILE_BIT 0x00000002 +#define GL_CONTEXT_CORE_PROFILE_BIT 0x00000001 +#define GL_CONTEXT_FLAGS 0x821E +#define GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT 0x00000001 +#define GL_CONTEXT_PROFILE_MASK 0x9126 +#define GL_COORD_REPLACE 0x8862 +#define GL_COPY 0x1503 +#define GL_COPY_INVERTED 0x150C +#define GL_COPY_PIXEL_TOKEN 0x0706 +#define GL_COPY_READ_BUFFER 0x8F36 +#define GL_COPY_WRITE_BUFFER 0x8F37 +#define GL_CULL_FACE 0x0B44 +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_CURRENT_BIT 0x00000001 +#define GL_CURRENT_COLOR 0x0B00 +#define GL_CURRENT_FOG_COORD 0x8453 +#define GL_CURRENT_FOG_COORDINATE 0x8453 +#define GL_CURRENT_INDEX 0x0B01 +#define GL_CURRENT_NORMAL 0x0B02 +#define GL_CURRENT_PROGRAM 0x8B8D +#define GL_CURRENT_QUERY 0x8865 +#define GL_CURRENT_RASTER_COLOR 0x0B04 +#define GL_CURRENT_RASTER_DISTANCE 0x0B09 +#define GL_CURRENT_RASTER_INDEX 0x0B05 +#define GL_CURRENT_RASTER_POSITION 0x0B07 +#define GL_CURRENT_RASTER_POSITION_VALID 0x0B08 +#define GL_CURRENT_RASTER_SECONDARY_COLOR 0x845F +#define GL_CURRENT_RASTER_TEXTURE_COORDS 0x0B06 +#define GL_CURRENT_SECONDARY_COLOR 0x8459 +#define GL_CURRENT_TEXTURE_COORDS 0x0B03 +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 +#define GL_CW 0x0900 +#define GL_DECAL 0x2101 +#define GL_DECR 0x1E03 +#define GL_DECR_WRAP 0x8508 +#define GL_DELETE_STATUS 0x8B80 +#define GL_DEPTH 0x1801 +#define GL_DEPTH24_STENCIL8 0x88F0 +#define GL_DEPTH32F_STENCIL8 0x8CAD +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_DEPTH_BIAS 0x0D1F +#define GL_DEPTH_BITS 0x0D56 +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_DEPTH_CLAMP 0x864F +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_DEPTH_COMPONENT24 0x81A6 +#define GL_DEPTH_COMPONENT32 0x81A7 +#define GL_DEPTH_COMPONENT32F 0x8CAC +#define GL_DEPTH_FUNC 0x0B74 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_SCALE 0x0D1E +#define GL_DEPTH_STENCIL 0x84F9 +#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A +#define GL_DEPTH_TEST 0x0B71 +#define GL_DEPTH_TEXTURE_MODE 0x884B +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DIFFUSE 0x1201 +#define GL_DITHER 0x0BD0 +#define GL_DOMAIN 0x0A02 +#define GL_DONT_CARE 0x1100 +#define GL_DOT3_RGB 0x86AE +#define GL_DOT3_RGBA 0x86AF +#define GL_DOUBLE 0x140A +#define GL_DOUBLEBUFFER 0x0C32 +#define GL_DRAW_BUFFER 0x0C01 +#define GL_DRAW_BUFFER0 0x8825 +#define GL_DRAW_BUFFER1 0x8826 +#define GL_DRAW_BUFFER10 0x882F +#define GL_DRAW_BUFFER11 0x8830 +#define GL_DRAW_BUFFER12 0x8831 +#define GL_DRAW_BUFFER13 0x8832 +#define GL_DRAW_BUFFER14 0x8833 +#define GL_DRAW_BUFFER15 0x8834 +#define GL_DRAW_BUFFER2 0x8827 +#define GL_DRAW_BUFFER3 0x8828 +#define GL_DRAW_BUFFER4 0x8829 +#define GL_DRAW_BUFFER5 0x882A +#define GL_DRAW_BUFFER6 0x882B +#define GL_DRAW_BUFFER7 0x882C +#define GL_DRAW_BUFFER8 0x882D +#define GL_DRAW_BUFFER9 0x882E +#define GL_DRAW_FRAMEBUFFER 0x8CA9 +#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_DRAW_PIXEL_TOKEN 0x0705 +#define GL_DST_ALPHA 0x0304 +#define GL_DST_COLOR 0x0306 +#define GL_DYNAMIC_COPY 0x88EA +#define GL_DYNAMIC_DRAW 0x88E8 +#define GL_DYNAMIC_READ 0x88E9 +#define GL_EDGE_FLAG 0x0B43 +#define GL_EDGE_FLAG_ARRAY 0x8079 +#define GL_EDGE_FLAG_ARRAY_BUFFER_BINDING 0x889B +#define GL_EDGE_FLAG_ARRAY_POINTER 0x8093 +#define GL_EDGE_FLAG_ARRAY_STRIDE 0x808C +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_EMISSION 0x1600 +#define GL_ENABLE_BIT 0x00002000 +#define GL_EQUAL 0x0202 +#define GL_EQUIV 0x1509 +#define GL_EVAL_BIT 0x00010000 +#define GL_EXP 0x0800 +#define GL_EXP2 0x0801 +#define GL_EXTENSIONS 0x1F03 +#define GL_EYE_LINEAR 0x2400 +#define GL_EYE_PLANE 0x2502 +#define GL_FALSE 0 +#define GL_FASTEST 0x1101 +#define GL_FEEDBACK 0x1C01 +#define GL_FEEDBACK_BUFFER_POINTER 0x0DF0 +#define GL_FEEDBACK_BUFFER_SIZE 0x0DF1 +#define GL_FEEDBACK_BUFFER_TYPE 0x0DF2 +#define GL_FILL 0x1B02 +#define GL_FIRST_VERTEX_CONVENTION 0x8E4D +#define GL_FIXED_ONLY 0x891D +#define GL_FLAT 0x1D00 +#define GL_FLOAT 0x1406 +#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4 0x8B5C +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_FOG 0x0B60 +#define GL_FOG_BIT 0x00000080 +#define GL_FOG_COLOR 0x0B66 +#define GL_FOG_COORD 0x8451 +#define GL_FOG_COORDINATE 0x8451 +#define GL_FOG_COORDINATE_ARRAY 0x8457 +#define GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING 0x889D +#define GL_FOG_COORDINATE_ARRAY_POINTER 0x8456 +#define GL_FOG_COORDINATE_ARRAY_STRIDE 0x8455 +#define GL_FOG_COORDINATE_ARRAY_TYPE 0x8454 +#define GL_FOG_COORDINATE_SOURCE 0x8450 +#define GL_FOG_COORD_ARRAY 0x8457 +#define GL_FOG_COORD_ARRAY_BUFFER_BINDING 0x889D +#define GL_FOG_COORD_ARRAY_POINTER 0x8456 +#define GL_FOG_COORD_ARRAY_STRIDE 0x8455 +#define GL_FOG_COORD_ARRAY_TYPE 0x8454 +#define GL_FOG_COORD_SRC 0x8450 +#define GL_FOG_DENSITY 0x0B62 +#define GL_FOG_END 0x0B64 +#define GL_FOG_HINT 0x0C54 +#define GL_FOG_INDEX 0x0B61 +#define GL_FOG_MODE 0x0B65 +#define GL_FOG_START 0x0B63 +#define GL_FRAGMENT_DEPTH 0x8452 +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B +#define GL_FRAMEBUFFER 0x8D40 +#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215 +#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214 +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210 +#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211 +#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216 +#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213 +#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED 0x8DA7 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212 +#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_DEFAULT 0x8218 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER 0x8CDB +#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS 0x8DA8 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56 +#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER 0x8CDC +#define GL_FRAMEBUFFER_SRGB 0x8DB9 +#define GL_FRAMEBUFFER_UNDEFINED 0x8219 +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD +#define GL_FRONT 0x0404 +#define GL_FRONT_AND_BACK 0x0408 +#define GL_FRONT_FACE 0x0B46 +#define GL_FRONT_LEFT 0x0400 +#define GL_FRONT_RIGHT 0x0401 +#define GL_FUNC_ADD 0x8006 +#define GL_FUNC_REVERSE_SUBTRACT 0x800B +#define GL_FUNC_SUBTRACT 0x800A +#define GL_GENERATE_MIPMAP 0x8191 +#define GL_GENERATE_MIPMAP_HINT 0x8192 +#define GL_GEOMETRY_INPUT_TYPE 0x8917 +#define GL_GEOMETRY_OUTPUT_TYPE 0x8918 +#define GL_GEOMETRY_SHADER 0x8DD9 +#define GL_GEOMETRY_VERTICES_OUT 0x8916 +#define GL_GEQUAL 0x0206 +#define GL_GREATER 0x0204 +#define GL_GREEN 0x1904 +#define GL_GREEN_BIAS 0x0D19 +#define GL_GREEN_BITS 0x0D53 +#define GL_GREEN_INTEGER 0x8D95 +#define GL_GREEN_SCALE 0x0D18 +#define GL_HALF_FLOAT 0x140B +#define GL_HINT_BIT 0x00008000 +#define GL_INCR 0x1E02 +#define GL_INCR_WRAP 0x8507 +#define GL_INDEX 0x8222 +#define GL_INDEX_ARRAY 0x8077 +#define GL_INDEX_ARRAY_BUFFER_BINDING 0x8899 +#define GL_INDEX_ARRAY_POINTER 0x8091 +#define GL_INDEX_ARRAY_STRIDE 0x8086 +#define GL_INDEX_ARRAY_TYPE 0x8085 +#define GL_INDEX_BITS 0x0D51 +#define GL_INDEX_CLEAR_VALUE 0x0C20 +#define GL_INDEX_LOGIC_OP 0x0BF1 +#define GL_INDEX_MODE 0x0C30 +#define GL_INDEX_OFFSET 0x0D13 +#define GL_INDEX_SHIFT 0x0D12 +#define GL_INDEX_WRITEMASK 0x0C21 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_INT 0x1404 +#define GL_INTENSITY 0x8049 +#define GL_INTENSITY12 0x804C +#define GL_INTENSITY16 0x804D +#define GL_INTENSITY4 0x804A +#define GL_INTENSITY8 0x804B +#define GL_INTERLEAVED_ATTRIBS 0x8C8C +#define GL_INTERPOLATE 0x8575 +#define GL_INT_2_10_10_10_REV 0x8D9F +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 +#define GL_INVALID_INDEX 0xFFFFFFFF +#define GL_INVALID_OPERATION 0x0502 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVERT 0x150A +#define GL_KEEP 0x1E00 +#define GL_LAST_VERTEX_CONVENTION 0x8E4E +#define GL_LEFT 0x0406 +#define GL_LEQUAL 0x0203 +#define GL_LESS 0x0201 +#define GL_LIGHT0 0x4000 +#define GL_LIGHT1 0x4001 +#define GL_LIGHT2 0x4002 +#define GL_LIGHT3 0x4003 +#define GL_LIGHT4 0x4004 +#define GL_LIGHT5 0x4005 +#define GL_LIGHT6 0x4006 +#define GL_LIGHT7 0x4007 +#define GL_LIGHTING 0x0B50 +#define GL_LIGHTING_BIT 0x00000040 +#define GL_LIGHT_MODEL_AMBIENT 0x0B53 +#define GL_LIGHT_MODEL_COLOR_CONTROL 0x81F8 +#define GL_LIGHT_MODEL_LOCAL_VIEWER 0x0B51 +#define GL_LIGHT_MODEL_TWO_SIDE 0x0B52 +#define GL_LINE 0x1B01 +#define GL_LINEAR 0x2601 +#define GL_LINEAR_ATTENUATION 0x1208 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_LINES 0x0001 +#define GL_LINES_ADJACENCY 0x000A +#define GL_LINE_BIT 0x00000004 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_RESET_TOKEN 0x0707 +#define GL_LINE_SMOOTH 0x0B20 +#define GL_LINE_SMOOTH_HINT 0x0C52 +#define GL_LINE_STIPPLE 0x0B24 +#define GL_LINE_STIPPLE_PATTERN 0x0B25 +#define GL_LINE_STIPPLE_REPEAT 0x0B26 +#define GL_LINE_STRIP 0x0003 +#define GL_LINE_STRIP_ADJACENCY 0x000B +#define GL_LINE_TOKEN 0x0702 +#define GL_LINE_WIDTH 0x0B21 +#define GL_LINE_WIDTH_GRANULARITY 0x0B23 +#define GL_LINE_WIDTH_RANGE 0x0B22 +#define GL_LINK_STATUS 0x8B82 +#define GL_LIST_BASE 0x0B32 +#define GL_LIST_BIT 0x00020000 +#define GL_LIST_INDEX 0x0B33 +#define GL_LIST_MODE 0x0B30 +#define GL_LOAD 0x0101 +#define GL_LOGIC_OP 0x0BF1 +#define GL_LOGIC_OP_MODE 0x0BF0 +#define GL_LOWER_LEFT 0x8CA1 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE12 0x8041 +#define GL_LUMINANCE12_ALPHA12 0x8047 +#define GL_LUMINANCE12_ALPHA4 0x8046 +#define GL_LUMINANCE16 0x8042 +#define GL_LUMINANCE16_ALPHA16 0x8048 +#define GL_LUMINANCE4 0x803F +#define GL_LUMINANCE4_ALPHA4 0x8043 +#define GL_LUMINANCE6_ALPHA2 0x8044 +#define GL_LUMINANCE8 0x8040 +#define GL_LUMINANCE8_ALPHA8 0x8045 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_MAJOR_VERSION 0x821B +#define GL_MAP1_COLOR_4 0x0D90 +#define GL_MAP1_GRID_DOMAIN 0x0DD0 +#define GL_MAP1_GRID_SEGMENTS 0x0DD1 +#define GL_MAP1_INDEX 0x0D91 +#define GL_MAP1_NORMAL 0x0D92 +#define GL_MAP1_TEXTURE_COORD_1 0x0D93 +#define GL_MAP1_TEXTURE_COORD_2 0x0D94 +#define GL_MAP1_TEXTURE_COORD_3 0x0D95 +#define GL_MAP1_TEXTURE_COORD_4 0x0D96 +#define GL_MAP1_VERTEX_3 0x0D97 +#define GL_MAP1_VERTEX_4 0x0D98 +#define GL_MAP2_COLOR_4 0x0DB0 +#define GL_MAP2_GRID_DOMAIN 0x0DD2 +#define GL_MAP2_GRID_SEGMENTS 0x0DD3 +#define GL_MAP2_INDEX 0x0DB1 +#define GL_MAP2_NORMAL 0x0DB2 +#define GL_MAP2_TEXTURE_COORD_1 0x0DB3 +#define GL_MAP2_TEXTURE_COORD_2 0x0DB4 +#define GL_MAP2_TEXTURE_COORD_3 0x0DB5 +#define GL_MAP2_TEXTURE_COORD_4 0x0DB6 +#define GL_MAP2_VERTEX_3 0x0DB7 +#define GL_MAP2_VERTEX_4 0x0DB8 +#define GL_MAP_COLOR 0x0D10 +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008 +#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004 +#define GL_MAP_READ_BIT 0x0001 +#define GL_MAP_STENCIL 0x0D11 +#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020 +#define GL_MAP_WRITE_BIT 0x0002 +#define GL_MATRIX_MODE 0x0BA0 +#define GL_MAX 0x8008 +#define GL_MAX_3D_TEXTURE_SIZE 0x8073 +#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF +#define GL_MAX_ATTRIB_STACK_DEPTH 0x0D35 +#define GL_MAX_CLIENT_ATTRIB_STACK_DEPTH 0x0D3B +#define GL_MAX_CLIP_DISTANCES 0x0D32 +#define GL_MAX_CLIP_PLANES 0x0D32 +#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF +#define GL_MAX_COLOR_TEXTURE_SAMPLES 0x910E +#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33 +#define GL_MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS 0x8A32 +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E +#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31 +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C +#define GL_MAX_DEPTH_TEXTURE_SAMPLES 0x910F +#define GL_MAX_DRAW_BUFFERS 0x8824 +#define GL_MAX_DUAL_SOURCE_DRAW_BUFFERS 0x88FC +#define GL_MAX_ELEMENTS_INDICES 0x80E9 +#define GL_MAX_ELEMENTS_VERTICES 0x80E8 +#define GL_MAX_EVAL_ORDER 0x0D30 +#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125 +#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D +#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49 +#define GL_MAX_GEOMETRY_INPUT_COMPONENTS 0x9123 +#define GL_MAX_GEOMETRY_OUTPUT_COMPONENTS 0x9124 +#define GL_MAX_GEOMETRY_OUTPUT_VERTICES 0x8DE0 +#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS 0x8C29 +#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS 0x8DE1 +#define GL_MAX_GEOMETRY_UNIFORM_BLOCKS 0x8A2C +#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS 0x8DDF +#define GL_MAX_INTEGER_SAMPLES 0x9110 +#define GL_MAX_LIGHTS 0x0D31 +#define GL_MAX_LIST_NESTING 0x0B31 +#define GL_MAX_MODELVIEW_STACK_DEPTH 0x0D36 +#define GL_MAX_NAME_STACK_DEPTH 0x0D37 +#define GL_MAX_PIXEL_MAP_TABLE 0x0D34 +#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905 +#define GL_MAX_PROJECTION_STACK_DEPTH 0x0D38 +#define GL_MAX_RECTANGLE_TEXTURE_SIZE 0x84F8 +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 +#define GL_MAX_SAMPLES 0x8D57 +#define GL_MAX_SAMPLE_MASK_WORDS 0x8E59 +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_MAX_TEXTURE_BUFFER_SIZE 0x8C2B +#define GL_MAX_TEXTURE_COORDS 0x8871 +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_TEXTURE_STACK_DEPTH 0x0D39 +#define GL_MAX_TEXTURE_UNITS 0x84E2 +#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80 +#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30 +#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F +#define GL_MAX_VARYING_COMPONENTS 0x8B4B +#define GL_MAX_VARYING_FLOATS 0x8B4B +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122 +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B +#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_MIN 0x8007 +#define GL_MINOR_VERSION 0x821C +#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904 +#define GL_MIRRORED_REPEAT 0x8370 +#define GL_MODELVIEW 0x1700 +#define GL_MODELVIEW_MATRIX 0x0BA6 +#define GL_MODELVIEW_STACK_DEPTH 0x0BA3 +#define GL_MODULATE 0x2100 +#define GL_MULT 0x0103 +#define GL_MULTISAMPLE 0x809D +#define GL_MULTISAMPLE_BIT 0x20000000 +#define GL_N3F_V3F 0x2A25 +#define GL_NAME_STACK_DEPTH 0x0D70 +#define GL_NAND 0x150E +#define GL_NEAREST 0x2600 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_NEVER 0x0200 +#define GL_NICEST 0x1102 +#define GL_NONE 0 +#define GL_NOOP 0x1505 +#define GL_NOR 0x1508 +#define GL_NORMALIZE 0x0BA1 +#define GL_NORMAL_ARRAY 0x8075 +#define GL_NORMAL_ARRAY_BUFFER_BINDING 0x8897 +#define GL_NORMAL_ARRAY_POINTER 0x808F +#define GL_NORMAL_ARRAY_STRIDE 0x807F +#define GL_NORMAL_ARRAY_TYPE 0x807E +#define GL_NORMAL_MAP 0x8511 +#define GL_NOTEQUAL 0x0205 +#define GL_NO_ERROR 0 +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_NUM_EXTENSIONS 0x821D +#define GL_OBJECT_LINEAR 0x2401 +#define GL_OBJECT_PLANE 0x2501 +#define GL_OBJECT_TYPE 0x9112 +#define GL_ONE 1 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_ONE_MINUS_SRC1_ALPHA 0x88FB +#define GL_ONE_MINUS_SRC1_COLOR 0x88FA +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_OPERAND0_ALPHA 0x8598 +#define GL_OPERAND0_RGB 0x8590 +#define GL_OPERAND1_ALPHA 0x8599 +#define GL_OPERAND1_RGB 0x8591 +#define GL_OPERAND2_ALPHA 0x859A +#define GL_OPERAND2_RGB 0x8592 +#define GL_OR 0x1507 +#define GL_ORDER 0x0A01 +#define GL_OR_INVERTED 0x150D +#define GL_OR_REVERSE 0x150B +#define GL_OUT_OF_MEMORY 0x0505 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_PACK_IMAGE_HEIGHT 0x806C +#define GL_PACK_LSB_FIRST 0x0D01 +#define GL_PACK_ROW_LENGTH 0x0D02 +#define GL_PACK_SKIP_IMAGES 0x806B +#define GL_PACK_SKIP_PIXELS 0x0D04 +#define GL_PACK_SKIP_ROWS 0x0D03 +#define GL_PACK_SWAP_BYTES 0x0D00 +#define GL_PASS_THROUGH_TOKEN 0x0700 +#define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50 +#define GL_PIXEL_MAP_A_TO_A 0x0C79 +#define GL_PIXEL_MAP_A_TO_A_SIZE 0x0CB9 +#define GL_PIXEL_MAP_B_TO_B 0x0C78 +#define GL_PIXEL_MAP_B_TO_B_SIZE 0x0CB8 +#define GL_PIXEL_MAP_G_TO_G 0x0C77 +#define GL_PIXEL_MAP_G_TO_G_SIZE 0x0CB7 +#define GL_PIXEL_MAP_I_TO_A 0x0C75 +#define GL_PIXEL_MAP_I_TO_A_SIZE 0x0CB5 +#define GL_PIXEL_MAP_I_TO_B 0x0C74 +#define GL_PIXEL_MAP_I_TO_B_SIZE 0x0CB4 +#define GL_PIXEL_MAP_I_TO_G 0x0C73 +#define GL_PIXEL_MAP_I_TO_G_SIZE 0x0CB3 +#define GL_PIXEL_MAP_I_TO_I 0x0C70 +#define GL_PIXEL_MAP_I_TO_I_SIZE 0x0CB0 +#define GL_PIXEL_MAP_I_TO_R 0x0C72 +#define GL_PIXEL_MAP_I_TO_R_SIZE 0x0CB2 +#define GL_PIXEL_MAP_R_TO_R 0x0C76 +#define GL_PIXEL_MAP_R_TO_R_SIZE 0x0CB6 +#define GL_PIXEL_MAP_S_TO_S 0x0C71 +#define GL_PIXEL_MAP_S_TO_S_SIZE 0x0CB1 +#define GL_PIXEL_MODE_BIT 0x00000020 +#define GL_PIXEL_PACK_BUFFER 0x88EB +#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED +#define GL_PIXEL_UNPACK_BUFFER 0x88EC +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF +#define GL_POINT 0x1B00 +#define GL_POINTS 0x0000 +#define GL_POINT_BIT 0x00000002 +#define GL_POINT_DISTANCE_ATTENUATION 0x8129 +#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128 +#define GL_POINT_SIZE 0x0B11 +#define GL_POINT_SIZE_GRANULARITY 0x0B13 +#define GL_POINT_SIZE_MAX 0x8127 +#define GL_POINT_SIZE_MIN 0x8126 +#define GL_POINT_SIZE_RANGE 0x0B12 +#define GL_POINT_SMOOTH 0x0B10 +#define GL_POINT_SMOOTH_HINT 0x0C51 +#define GL_POINT_SPRITE 0x8861 +#define GL_POINT_SPRITE_COORD_ORIGIN 0x8CA0 +#define GL_POINT_TOKEN 0x0701 +#define GL_POLYGON 0x0009 +#define GL_POLYGON_BIT 0x00000008 +#define GL_POLYGON_MODE 0x0B40 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_POLYGON_OFFSET_LINE 0x2A02 +#define GL_POLYGON_OFFSET_POINT 0x2A01 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_POLYGON_SMOOTH 0x0B41 +#define GL_POLYGON_SMOOTH_HINT 0x0C53 +#define GL_POLYGON_STIPPLE 0x0B42 +#define GL_POLYGON_STIPPLE_BIT 0x00000010 +#define GL_POLYGON_TOKEN 0x0703 +#define GL_POSITION 0x1203 +#define GL_PREVIOUS 0x8578 +#define GL_PRIMARY_COLOR 0x8577 +#define GL_PRIMITIVES_GENERATED 0x8C87 +#define GL_PRIMITIVE_RESTART 0x8F9D +#define GL_PRIMITIVE_RESTART_INDEX 0x8F9E +#define GL_PROGRAM_POINT_SIZE 0x8642 +#define GL_PROJECTION 0x1701 +#define GL_PROJECTION_MATRIX 0x0BA7 +#define GL_PROJECTION_STACK_DEPTH 0x0BA4 +#define GL_PROVOKING_VERTEX 0x8E4F +#define GL_PROXY_TEXTURE_1D 0x8063 +#define GL_PROXY_TEXTURE_1D_ARRAY 0x8C19 +#define GL_PROXY_TEXTURE_2D 0x8064 +#define GL_PROXY_TEXTURE_2D_ARRAY 0x8C1B +#define GL_PROXY_TEXTURE_2D_MULTISAMPLE 0x9101 +#define GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9103 +#define GL_PROXY_TEXTURE_3D 0x8070 +#define GL_PROXY_TEXTURE_CUBE_MAP 0x851B +#define GL_PROXY_TEXTURE_RECTANGLE 0x84F7 +#define GL_Q 0x2003 +#define GL_QUADRATIC_ATTENUATION 0x1209 +#define GL_QUADS 0x0007 +#define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION 0x8E4C +#define GL_QUAD_STRIP 0x0008 +#define GL_QUERY_BY_REGION_NO_WAIT 0x8E16 +#define GL_QUERY_BY_REGION_WAIT 0x8E15 +#define GL_QUERY_COUNTER_BITS 0x8864 +#define GL_QUERY_NO_WAIT 0x8E14 +#define GL_QUERY_RESULT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#define GL_QUERY_WAIT 0x8E13 +#define GL_R 0x2002 +#define GL_R11F_G11F_B10F 0x8C3A +#define GL_R16 0x822A +#define GL_R16F 0x822D +#define GL_R16I 0x8233 +#define GL_R16UI 0x8234 +#define GL_R16_SNORM 0x8F98 +#define GL_R32F 0x822E +#define GL_R32I 0x8235 +#define GL_R32UI 0x8236 +#define GL_R3_G3_B2 0x2A10 +#define GL_R8 0x8229 +#define GL_R8I 0x8231 +#define GL_R8UI 0x8232 +#define GL_R8_SNORM 0x8F94 +#define GL_RASTERIZER_DISCARD 0x8C89 +#define GL_READ_BUFFER 0x0C02 +#define GL_READ_FRAMEBUFFER 0x8CA8 +#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA +#define GL_READ_ONLY 0x88B8 +#define GL_READ_WRITE 0x88BA +#define GL_RED 0x1903 +#define GL_RED_BIAS 0x0D15 +#define GL_RED_BITS 0x0D52 +#define GL_RED_INTEGER 0x8D94 +#define GL_RED_SCALE 0x0D14 +#define GL_REFLECTION_MAP 0x8512 +#define GL_RENDER 0x1C00 +#define GL_RENDERBUFFER 0x8D41 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_SAMPLES 0x8CAB +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERER 0x1F01 +#define GL_RENDER_MODE 0x0C40 +#define GL_REPEAT 0x2901 +#define GL_REPLACE 0x1E01 +#define GL_RESCALE_NORMAL 0x803A +#define GL_RETURN 0x0102 +#define GL_RG 0x8227 +#define GL_RG16 0x822C +#define GL_RG16F 0x822F +#define GL_RG16I 0x8239 +#define GL_RG16UI 0x823A +#define GL_RG16_SNORM 0x8F99 +#define GL_RG32F 0x8230 +#define GL_RG32I 0x823B +#define GL_RG32UI 0x823C +#define GL_RG8 0x822B +#define GL_RG8I 0x8237 +#define GL_RG8UI 0x8238 +#define GL_RG8_SNORM 0x8F95 +#define GL_RGB 0x1907 +#define GL_RGB10 0x8052 +#define GL_RGB10_A2 0x8059 +#define GL_RGB10_A2UI 0x906F +#define GL_RGB12 0x8053 +#define GL_RGB16 0x8054 +#define GL_RGB16F 0x881B +#define GL_RGB16I 0x8D89 +#define GL_RGB16UI 0x8D77 +#define GL_RGB16_SNORM 0x8F9A +#define GL_RGB32F 0x8815 +#define GL_RGB32I 0x8D83 +#define GL_RGB32UI 0x8D71 +#define GL_RGB4 0x804F +#define GL_RGB5 0x8050 +#define GL_RGB5_A1 0x8057 +#define GL_RGB8 0x8051 +#define GL_RGB8I 0x8D8F +#define GL_RGB8UI 0x8D7D +#define GL_RGB8_SNORM 0x8F96 +#define GL_RGB9_E5 0x8C3D +#define GL_RGBA 0x1908 +#define GL_RGBA12 0x805A +#define GL_RGBA16 0x805B +#define GL_RGBA16F 0x881A +#define GL_RGBA16I 0x8D88 +#define GL_RGBA16UI 0x8D76 +#define GL_RGBA16_SNORM 0x8F9B +#define GL_RGBA2 0x8055 +#define GL_RGBA32F 0x8814 +#define GL_RGBA32I 0x8D82 +#define GL_RGBA32UI 0x8D70 +#define GL_RGBA4 0x8056 +#define GL_RGBA8 0x8058 +#define GL_RGBA8I 0x8D8E +#define GL_RGBA8UI 0x8D7C +#define GL_RGBA8_SNORM 0x8F97 +#define GL_RGBA_INTEGER 0x8D99 +#define GL_RGBA_MODE 0x0C31 +#define GL_RGB_INTEGER 0x8D98 +#define GL_RGB_SCALE 0x8573 +#define GL_RG_INTEGER 0x8228 +#define GL_RIGHT 0x0407 +#define GL_S 0x2000 +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_BINDING 0x8919 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLES_PASSED 0x8914 +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_ALPHA_TO_ONE 0x809F +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLE_COVERAGE 0x80A0 +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_MASK 0x8E51 +#define GL_SAMPLE_MASK_VALUE 0x8E52 +#define GL_SAMPLE_POSITION 0x8E50 +#define GL_SCISSOR_BIT 0x00080000 +#define GL_SCISSOR_BOX 0x0C10 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_SECONDARY_COLOR_ARRAY 0x845E +#define GL_SECONDARY_COLOR_ARRAY_BUFFER_BINDING 0x889C +#define GL_SECONDARY_COLOR_ARRAY_POINTER 0x845D +#define GL_SECONDARY_COLOR_ARRAY_SIZE 0x845A +#define GL_SECONDARY_COLOR_ARRAY_STRIDE 0x845C +#define GL_SECONDARY_COLOR_ARRAY_TYPE 0x845B +#define GL_SELECT 0x1C02 +#define GL_SELECTION_BUFFER_POINTER 0x0DF3 +#define GL_SELECTION_BUFFER_SIZE 0x0DF4 +#define GL_SEPARATE_ATTRIBS 0x8C8D +#define GL_SEPARATE_SPECULAR_COLOR 0x81FA +#define GL_SET 0x150F +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_SHADER_TYPE 0x8B4F +#define GL_SHADE_MODEL 0x0B54 +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_SHININESS 0x1601 +#define GL_SHORT 0x1402 +#define GL_SIGNALED 0x9119 +#define GL_SIGNED_NORMALIZED 0x8F9C +#define GL_SINGLE_COLOR 0x81F9 +#define GL_SLUMINANCE 0x8C46 +#define GL_SLUMINANCE8 0x8C47 +#define GL_SLUMINANCE8_ALPHA8 0x8C45 +#define GL_SLUMINANCE_ALPHA 0x8C44 +#define GL_SMOOTH 0x1D01 +#define GL_SMOOTH_LINE_WIDTH_GRANULARITY 0x0B23 +#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22 +#define GL_SMOOTH_POINT_SIZE_GRANULARITY 0x0B13 +#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12 +#define GL_SOURCE0_ALPHA 0x8588 +#define GL_SOURCE0_RGB 0x8580 +#define GL_SOURCE1_ALPHA 0x8589 +#define GL_SOURCE1_RGB 0x8581 +#define GL_SOURCE2_ALPHA 0x858A +#define GL_SOURCE2_RGB 0x8582 +#define GL_SPECULAR 0x1202 +#define GL_SPHERE_MAP 0x2402 +#define GL_SPOT_CUTOFF 0x1206 +#define GL_SPOT_DIRECTION 0x1204 +#define GL_SPOT_EXPONENT 0x1205 +#define GL_SRC0_ALPHA 0x8588 +#define GL_SRC0_RGB 0x8580 +#define GL_SRC1_ALPHA 0x8589 +#define GL_SRC1_COLOR 0x88F9 +#define GL_SRC1_RGB 0x8581 +#define GL_SRC2_ALPHA 0x858A +#define GL_SRC2_RGB 0x8582 +#define GL_SRC_ALPHA 0x0302 +#define GL_SRC_ALPHA_SATURATE 0x0308 +#define GL_SRC_COLOR 0x0300 +#define GL_SRGB 0x8C40 +#define GL_SRGB8 0x8C41 +#define GL_SRGB8_ALPHA8 0x8C43 +#define GL_SRGB_ALPHA 0x8C42 +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#define GL_STATIC_COPY 0x88E6 +#define GL_STATIC_DRAW 0x88E4 +#define GL_STATIC_READ 0x88E5 +#define GL_STENCIL 0x1802 +#define GL_STENCIL_ATTACHMENT 0x8D20 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_STENCIL_BITS 0x0D57 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_INDEX 0x1901 +#define GL_STENCIL_INDEX1 0x8D46 +#define GL_STENCIL_INDEX16 0x8D49 +#define GL_STENCIL_INDEX4 0x8D47 +#define GL_STENCIL_INDEX8 0x8D48 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_TEST 0x0B90 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_STEREO 0x0C33 +#define GL_STREAM_COPY 0x88E2 +#define GL_STREAM_DRAW 0x88E0 +#define GL_STREAM_READ 0x88E1 +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_SUBTRACT 0x84E7 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_FLAGS 0x9115 +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_SYNC_STATUS 0x9114 +#define GL_T 0x2001 +#define GL_T2F_C3F_V3F 0x2A2A +#define GL_T2F_C4F_N3F_V3F 0x2A2C +#define GL_T2F_C4UB_V3F 0x2A29 +#define GL_T2F_N3F_V3F 0x2A2B +#define GL_T2F_V3F 0x2A27 +#define GL_T4F_C4F_N3F_V4F 0x2A2D +#define GL_T4F_V4F 0x2A28 +#define GL_TEXTURE 0x1702 +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE_1D 0x0DE0 +#define GL_TEXTURE_1D_ARRAY 0x8C18 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_TEXTURE_2D_ARRAY 0x8C1A +#define GL_TEXTURE_2D_MULTISAMPLE 0x9100 +#define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 +#define GL_TEXTURE_3D 0x806F +#define GL_TEXTURE_ALPHA_SIZE 0x805F +#define GL_TEXTURE_ALPHA_TYPE 0x8C13 +#define GL_TEXTURE_BASE_LEVEL 0x813C +#define GL_TEXTURE_BINDING_1D 0x8068 +#define GL_TEXTURE_BINDING_1D_ARRAY 0x8C1C +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE 0x9104 +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY 0x9105 +#define GL_TEXTURE_BINDING_3D 0x806A +#define GL_TEXTURE_BINDING_BUFFER 0x8C2C +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_BINDING_RECTANGLE 0x84F6 +#define GL_TEXTURE_BIT 0x00040000 +#define GL_TEXTURE_BLUE_SIZE 0x805E +#define GL_TEXTURE_BLUE_TYPE 0x8C12 +#define GL_TEXTURE_BORDER 0x1005 +#define GL_TEXTURE_BORDER_COLOR 0x1004 +#define GL_TEXTURE_BUFFER 0x8C2A +#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING 0x8C2D +#define GL_TEXTURE_COMPARE_FUNC 0x884D +#define GL_TEXTURE_COMPARE_MODE 0x884C +#define GL_TEXTURE_COMPONENTS 0x1003 +#define GL_TEXTURE_COMPRESSED 0x86A1 +#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE 0x86A0 +#define GL_TEXTURE_COMPRESSION_HINT 0x84EF +#define GL_TEXTURE_COORD_ARRAY 0x8078 +#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING 0x889A +#define GL_TEXTURE_COORD_ARRAY_POINTER 0x8092 +#define GL_TEXTURE_COORD_ARRAY_SIZE 0x8088 +#define GL_TEXTURE_COORD_ARRAY_STRIDE 0x808A +#define GL_TEXTURE_COORD_ARRAY_TYPE 0x8089 +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_CUBE_MAP_SEAMLESS 0x884F +#define GL_TEXTURE_DEPTH 0x8071 +#define GL_TEXTURE_DEPTH_SIZE 0x884A +#define GL_TEXTURE_DEPTH_TYPE 0x8C16 +#define GL_TEXTURE_ENV 0x2300 +#define GL_TEXTURE_ENV_COLOR 0x2201 +#define GL_TEXTURE_ENV_MODE 0x2200 +#define GL_TEXTURE_FILTER_CONTROL 0x8500 +#define GL_TEXTURE_FIXED_SAMPLE_LOCATIONS 0x9107 +#define GL_TEXTURE_GEN_MODE 0x2500 +#define GL_TEXTURE_GEN_Q 0x0C63 +#define GL_TEXTURE_GEN_R 0x0C62 +#define GL_TEXTURE_GEN_S 0x0C60 +#define GL_TEXTURE_GEN_T 0x0C61 +#define GL_TEXTURE_GREEN_SIZE 0x805D +#define GL_TEXTURE_GREEN_TYPE 0x8C11 +#define GL_TEXTURE_HEIGHT 0x1001 +#define GL_TEXTURE_INTENSITY_SIZE 0x8061 +#define GL_TEXTURE_INTENSITY_TYPE 0x8C15 +#define GL_TEXTURE_INTERNAL_FORMAT 0x1003 +#define GL_TEXTURE_LOD_BIAS 0x8501 +#define GL_TEXTURE_LUMINANCE_SIZE 0x8060 +#define GL_TEXTURE_LUMINANCE_TYPE 0x8C14 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MATRIX 0x0BA8 +#define GL_TEXTURE_MAX_LEVEL 0x813D +#define GL_TEXTURE_MAX_LOD 0x813B +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_MIN_LOD 0x813A +#define GL_TEXTURE_PRIORITY 0x8066 +#define GL_TEXTURE_RECTANGLE 0x84F5 +#define GL_TEXTURE_RED_SIZE 0x805C +#define GL_TEXTURE_RED_TYPE 0x8C10 +#define GL_TEXTURE_RESIDENT 0x8067 +#define GL_TEXTURE_SAMPLES 0x9106 +#define GL_TEXTURE_SHARED_SIZE 0x8C3F +#define GL_TEXTURE_STACK_DEPTH 0x0BA5 +#define GL_TEXTURE_STENCIL_SIZE 0x88F1 +#define GL_TEXTURE_SWIZZLE_A 0x8E45 +#define GL_TEXTURE_SWIZZLE_B 0x8E44 +#define GL_TEXTURE_SWIZZLE_G 0x8E43 +#define GL_TEXTURE_SWIZZLE_R 0x8E42 +#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46 +#define GL_TEXTURE_WIDTH 0x1000 +#define GL_TEXTURE_WRAP_R 0x8072 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFF +#define GL_TIMESTAMP 0x8E28 +#define GL_TIME_ELAPSED 0x88BF +#define GL_TRANSFORM_BIT 0x00001000 +#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E +#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F +#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F +#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85 +#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84 +#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88 +#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83 +#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76 +#define GL_TRANSPOSE_COLOR_MATRIX 0x84E6 +#define GL_TRANSPOSE_MODELVIEW_MATRIX 0x84E3 +#define GL_TRANSPOSE_PROJECTION_MATRIX 0x84E4 +#define GL_TRANSPOSE_TEXTURE_MATRIX 0x84E5 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLES_ADJACENCY 0x000C +#define GL_TRIANGLE_FAN 0x0006 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_STRIP_ADJACENCY 0x000D +#define GL_TRUE 1 +#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43 +#define GL_UNIFORM_BLOCK_BINDING 0x8A3F +#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40 +#define GL_UNIFORM_BLOCK_INDEX 0x8A3A +#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER 0x8A45 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44 +#define GL_UNIFORM_BUFFER 0x8A11 +#define GL_UNIFORM_BUFFER_BINDING 0x8A28 +#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34 +#define GL_UNIFORM_BUFFER_SIZE 0x8A2A +#define GL_UNIFORM_BUFFER_START 0x8A29 +#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E +#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D +#define GL_UNIFORM_NAME_LENGTH 0x8A39 +#define GL_UNIFORM_OFFSET 0x8A3B +#define GL_UNIFORM_SIZE 0x8A38 +#define GL_UNIFORM_TYPE 0x8A37 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_UNPACK_IMAGE_HEIGHT 0x806E +#define GL_UNPACK_LSB_FIRST 0x0CF1 +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#define GL_UNPACK_SKIP_IMAGES 0x806D +#define GL_UNPACK_SKIP_PIXELS 0x0CF4 +#define GL_UNPACK_SKIP_ROWS 0x0CF3 +#define GL_UNPACK_SWAP_BYTES 0x0CF0 +#define GL_UNSIGNALED 0x9118 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362 +#define GL_UNSIGNED_BYTE_3_3_2 0x8032 +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B +#define GL_UNSIGNED_INT_10_10_10_2 0x8036 +#define GL_UNSIGNED_INT_24_8 0x84FA +#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E +#define GL_UNSIGNED_INT_8_8_8_8 0x8035 +#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367 +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D +#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 +#define GL_UNSIGNED_NORMALIZED 0x8C17 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366 +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 +#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364 +#define GL_UPPER_LEFT 0x8CA2 +#define GL_V2F 0x2A20 +#define GL_V3F 0x2A21 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_VENDOR 0x1F00 +#define GL_VERSION 0x1F02 +#define GL_VERTEX_ARRAY 0x8074 +#define GL_VERTEX_ARRAY_BINDING 0x85B5 +#define GL_VERTEX_ARRAY_BUFFER_BINDING 0x8896 +#define GL_VERTEX_ARRAY_POINTER 0x808E +#define GL_VERTEX_ARRAY_SIZE 0x807A +#define GL_VERTEX_ARRAY_STRIDE 0x807C +#define GL_VERTEX_ARRAY_TYPE 0x807B +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F +#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642 +#define GL_VERTEX_PROGRAM_TWO_SIDE 0x8643 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_VIEWPORT 0x0BA2 +#define GL_VIEWPORT_BIT 0x00000800 +#define GL_WAIT_FAILED 0x911D +#define GL_WEIGHT_ARRAY_BUFFER_BINDING 0x889E +#define GL_WRITE_ONLY 0x88B9 +#define GL_XOR 0x1506 +#define GL_ZERO 0 +#define GL_ZOOM_X 0x0D16 +#define GL_ZOOM_Y 0x0D17 + + +#include +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef void GLvoid; +typedef khronos_int8_t GLbyte; +typedef khronos_uint8_t GLubyte; +typedef khronos_int16_t GLshort; +typedef khronos_uint16_t GLushort; +typedef int GLint; +typedef unsigned int GLuint; +typedef khronos_int32_t GLclampx; +typedef int GLsizei; +typedef khronos_float_t GLfloat; +typedef khronos_float_t GLclampf; +typedef double GLdouble; +typedef double GLclampd; +typedef void *GLeglClientBufferEXT; +typedef void *GLeglImageOES; +typedef char GLchar; +typedef char GLcharARB; +#ifdef __APPLE__ +typedef void *GLhandleARB; +#else +typedef unsigned int GLhandleARB; +#endif +typedef khronos_uint16_t GLhalf; +typedef khronos_uint16_t GLhalfARB; +typedef khronos_int32_t GLfixed; +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_intptr_t GLintptr; +#else +typedef khronos_intptr_t GLintptr; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_intptr_t GLintptrARB; +#else +typedef khronos_intptr_t GLintptrARB; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_ssize_t GLsizeiptr; +#else +typedef khronos_ssize_t GLsizeiptr; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_ssize_t GLsizeiptrARB; +#else +typedef khronos_ssize_t GLsizeiptrARB; +#endif +typedef khronos_int64_t GLint64; +typedef khronos_int64_t GLint64EXT; +typedef khronos_uint64_t GLuint64; +typedef khronos_uint64_t GLuint64EXT; +typedef struct __GLsync *GLsync; +struct _cl_context; +struct _cl_event; +typedef void (GLAD_API_PTR *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCARB)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCKHR)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCAMD)(GLuint id,GLenum category,GLenum severity,GLsizei length,const GLchar *message,void *userParam); +typedef unsigned short GLhalfNV; +typedef GLintptr GLvdpauSurfaceNV; +typedef void (GLAD_API_PTR *GLVULKANPROCNV)(void); + + +#define GL_VERSION_1_0 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_0; +#define GL_VERSION_1_1 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_1; +#define GL_VERSION_1_2 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_2; +#define GL_VERSION_1_3 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_3; +#define GL_VERSION_1_4 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_4; +#define GL_VERSION_1_5 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_5; +#define GL_VERSION_2_0 1 +GLAD_API_CALL int GLAD_GL_VERSION_2_0; +#define GL_VERSION_2_1 1 +GLAD_API_CALL int GLAD_GL_VERSION_2_1; +#define GL_VERSION_3_0 1 +GLAD_API_CALL int GLAD_GL_VERSION_3_0; +#define GL_VERSION_3_1 1 +GLAD_API_CALL int GLAD_GL_VERSION_3_1; +#define GL_VERSION_3_2 1 +GLAD_API_CALL int GLAD_GL_VERSION_3_2; +#define GL_VERSION_3_3 1 +GLAD_API_CALL int GLAD_GL_VERSION_3_3; + + +typedef void (GLAD_API_PTR *PFNGLACCUMPROC)(GLenum op, GLfloat value); +typedef void (GLAD_API_PTR *PFNGLACTIVETEXTUREPROC)(GLenum texture); +typedef void (GLAD_API_PTR *PFNGLALPHAFUNCPROC)(GLenum func, GLfloat ref); +typedef GLboolean (GLAD_API_PTR *PFNGLARETEXTURESRESIDENTPROC)(GLsizei n, const GLuint * textures, GLboolean * residences); +typedef void (GLAD_API_PTR *PFNGLARRAYELEMENTPROC)(GLint i); +typedef void (GLAD_API_PTR *PFNGLATTACHSHADERPROC)(GLuint program, GLuint shader); +typedef void (GLAD_API_PTR *PFNGLBEGINPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLBEGINCONDITIONALRENDERPROC)(GLuint id, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLBEGINQUERYPROC)(GLenum target, GLuint id); +typedef void (GLAD_API_PTR *PFNGLBEGINTRANSFORMFEEDBACKPROC)(GLenum primitiveMode); +typedef void (GLAD_API_PTR *PFNGLBINDATTRIBLOCATIONPROC)(GLuint program, GLuint index, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLBINDBUFFERPROC)(GLenum target, GLuint buffer); +typedef void (GLAD_API_PTR *PFNGLBINDBUFFERBASEPROC)(GLenum target, GLuint index, GLuint buffer); +typedef void (GLAD_API_PTR *PFNGLBINDBUFFERRANGEPROC)(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +typedef void (GLAD_API_PTR *PFNGLBINDFRAGDATALOCATIONPROC)(GLuint program, GLuint color, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)(GLuint program, GLuint colorNumber, GLuint index, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLBINDFRAMEBUFFERPROC)(GLenum target, GLuint framebuffer); +typedef void (GLAD_API_PTR *PFNGLBINDRENDERBUFFERPROC)(GLenum target, GLuint renderbuffer); +typedef void (GLAD_API_PTR *PFNGLBINDSAMPLERPROC)(GLuint unit, GLuint sampler); +typedef void (GLAD_API_PTR *PFNGLBINDTEXTUREPROC)(GLenum target, GLuint texture); +typedef void (GLAD_API_PTR *PFNGLBINDVERTEXARRAYPROC)(GLuint array); +typedef void (GLAD_API_PTR *PFNGLBITMAPPROC)(GLsizei width, GLsizei height, GLfloat xorig, GLfloat yorig, GLfloat xmove, GLfloat ymove, const GLubyte * bitmap); +typedef void (GLAD_API_PTR *PFNGLBLENDCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONSEPARATEPROC)(GLenum modeRGB, GLenum modeAlpha); +typedef void (GLAD_API_PTR *PFNGLBLENDFUNCPROC)(GLenum sfactor, GLenum dfactor); +typedef void (GLAD_API_PTR *PFNGLBLENDFUNCSEPARATEPROC)(GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); +typedef void (GLAD_API_PTR *PFNGLBLITFRAMEBUFFERPROC)(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +typedef void (GLAD_API_PTR *PFNGLBUFFERDATAPROC)(GLenum target, GLsizeiptr size, const void * data, GLenum usage); +typedef void (GLAD_API_PTR *PFNGLBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, const void * data); +typedef void (GLAD_API_PTR *PFNGLCALLLISTPROC)(GLuint list); +typedef void (GLAD_API_PTR *PFNGLCALLLISTSPROC)(GLsizei n, GLenum type, const void * lists); +typedef GLenum (GLAD_API_PTR *PFNGLCHECKFRAMEBUFFERSTATUSPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLCLAMPCOLORPROC)(GLenum target, GLenum clamp); +typedef void (GLAD_API_PTR *PFNGLCLEARPROC)(GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLCLEARACCUMPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLCLEARBUFFERFIPROC)(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +typedef void (GLAD_API_PTR *PFNGLCLEARBUFFERFVPROC)(GLenum buffer, GLint drawbuffer, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLCLEARBUFFERIVPROC)(GLenum buffer, GLint drawbuffer, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLCLEARBUFFERUIVPROC)(GLenum buffer, GLint drawbuffer, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLCLEARCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLCLEARDEPTHPROC)(GLdouble depth); +typedef void (GLAD_API_PTR *PFNGLCLEARINDEXPROC)(GLfloat c); +typedef void (GLAD_API_PTR *PFNGLCLEARSTENCILPROC)(GLint s); +typedef void (GLAD_API_PTR *PFNGLCLIENTACTIVETEXTUREPROC)(GLenum texture); +typedef GLenum (GLAD_API_PTR *PFNGLCLIENTWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (GLAD_API_PTR *PFNGLCLIPPLANEPROC)(GLenum plane, const GLdouble * equation); +typedef void (GLAD_API_PTR *PFNGLCOLOR3BPROC)(GLbyte red, GLbyte green, GLbyte blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3BVPROC)(const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3DPROC)(GLdouble red, GLdouble green, GLdouble blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3FPROC)(GLfloat red, GLfloat green, GLfloat blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3IPROC)(GLint red, GLint green, GLint blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3SPROC)(GLshort red, GLshort green, GLshort blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3UBPROC)(GLubyte red, GLubyte green, GLubyte blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3UBVPROC)(const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3UIPROC)(GLuint red, GLuint green, GLuint blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3UIVPROC)(const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3USPROC)(GLushort red, GLushort green, GLushort blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3USVPROC)(const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4BPROC)(GLbyte red, GLbyte green, GLbyte blue, GLbyte alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4BVPROC)(const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4DPROC)(GLdouble red, GLdouble green, GLdouble blue, GLdouble alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4FPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4IPROC)(GLint red, GLint green, GLint blue, GLint alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4SPROC)(GLshort red, GLshort green, GLshort blue, GLshort alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4UBPROC)(GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4UBVPROC)(const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4UIPROC)(GLuint red, GLuint green, GLuint blue, GLuint alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4UIVPROC)(const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4USPROC)(GLushort red, GLushort green, GLushort blue, GLushort alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4USVPROC)(const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLCOLORMASKPROC)(GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +typedef void (GLAD_API_PTR *PFNGLCOLORMASKIPROC)(GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a); +typedef void (GLAD_API_PTR *PFNGLCOLORMATERIALPROC)(GLenum face, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLCOLORP3UIPROC)(GLenum type, GLuint color); +typedef void (GLAD_API_PTR *PFNGLCOLORP3UIVPROC)(GLenum type, const GLuint * color); +typedef void (GLAD_API_PTR *PFNGLCOLORP4UIPROC)(GLenum type, GLuint color); +typedef void (GLAD_API_PTR *PFNGLCOLORP4UIVPROC)(GLenum type, const GLuint * color); +typedef void (GLAD_API_PTR *PFNGLCOLORPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLCOMPILESHADERPROC)(GLuint shader); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXIMAGE3DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOPYBUFFERSUBDATAPROC)(GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +typedef void (GLAD_API_PTR *PFNGLCOPYPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum type); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef GLuint (GLAD_API_PTR *PFNGLCREATEPROGRAMPROC)(void); +typedef GLuint (GLAD_API_PTR *PFNGLCREATESHADERPROC)(GLenum type); +typedef void (GLAD_API_PTR *PFNGLCULLFACEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLDELETEBUFFERSPROC)(GLsizei n, const GLuint * buffers); +typedef void (GLAD_API_PTR *PFNGLDELETEFRAMEBUFFERSPROC)(GLsizei n, const GLuint * framebuffers); +typedef void (GLAD_API_PTR *PFNGLDELETELISTSPROC)(GLuint list, GLsizei range); +typedef void (GLAD_API_PTR *PFNGLDELETEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLDELETEQUERIESPROC)(GLsizei n, const GLuint * ids); +typedef void (GLAD_API_PTR *PFNGLDELETERENDERBUFFERSPROC)(GLsizei n, const GLuint * renderbuffers); +typedef void (GLAD_API_PTR *PFNGLDELETESAMPLERSPROC)(GLsizei count, const GLuint * samplers); +typedef void (GLAD_API_PTR *PFNGLDELETESHADERPROC)(GLuint shader); +typedef void (GLAD_API_PTR *PFNGLDELETESYNCPROC)(GLsync sync); +typedef void (GLAD_API_PTR *PFNGLDELETETEXTURESPROC)(GLsizei n, const GLuint * textures); +typedef void (GLAD_API_PTR *PFNGLDELETEVERTEXARRAYSPROC)(GLsizei n, const GLuint * arrays); +typedef void (GLAD_API_PTR *PFNGLDEPTHFUNCPROC)(GLenum func); +typedef void (GLAD_API_PTR *PFNGLDEPTHMASKPROC)(GLboolean flag); +typedef void (GLAD_API_PTR *PFNGLDEPTHRANGEPROC)(GLdouble n, GLdouble f); +typedef void (GLAD_API_PTR *PFNGLDETACHSHADERPROC)(GLuint program, GLuint shader); +typedef void (GLAD_API_PTR *PFNGLDISABLEPROC)(GLenum cap); +typedef void (GLAD_API_PTR *PFNGLDISABLECLIENTSTATEPROC)(GLenum array); +typedef void (GLAD_API_PTR *PFNGLDISABLEVERTEXATTRIBARRAYPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLDISABLEIPROC)(GLenum target, GLuint index); +typedef void (GLAD_API_PTR *PFNGLDRAWARRAYSPROC)(GLenum mode, GLint first, GLsizei count); +typedef void (GLAD_API_PTR *PFNGLDRAWARRAYSINSTANCEDPROC)(GLenum mode, GLint first, GLsizei count, GLsizei instancecount); +typedef void (GLAD_API_PTR *PFNGLDRAWBUFFERPROC)(GLenum buf); +typedef void (GLAD_API_PTR *PFNGLDRAWBUFFERSPROC)(GLsizei n, const GLenum * bufs); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices, GLint basevertex); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSINSTANCEDPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices, GLsizei instancecount); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices, GLsizei instancecount, GLint basevertex); +typedef void (GLAD_API_PTR *PFNGLDRAWPIXELSPROC)(GLsizei width, GLsizei height, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLDRAWRANGEELEMENTSPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void * indices); +typedef void (GLAD_API_PTR *PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void * indices, GLint basevertex); +typedef void (GLAD_API_PTR *PFNGLEDGEFLAGPROC)(GLboolean flag); +typedef void (GLAD_API_PTR *PFNGLEDGEFLAGPOINTERPROC)(GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLEDGEFLAGVPROC)(const GLboolean * flag); +typedef void (GLAD_API_PTR *PFNGLENABLEPROC)(GLenum cap); +typedef void (GLAD_API_PTR *PFNGLENABLECLIENTSTATEPROC)(GLenum array); +typedef void (GLAD_API_PTR *PFNGLENABLEVERTEXATTRIBARRAYPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLENABLEIPROC)(GLenum target, GLuint index); +typedef void (GLAD_API_PTR *PFNGLENDPROC)(void); +typedef void (GLAD_API_PTR *PFNGLENDCONDITIONALRENDERPROC)(void); +typedef void (GLAD_API_PTR *PFNGLENDLISTPROC)(void); +typedef void (GLAD_API_PTR *PFNGLENDQUERYPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLENDTRANSFORMFEEDBACKPROC)(void); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD1DPROC)(GLdouble u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD1DVPROC)(const GLdouble * u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD1FPROC)(GLfloat u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD1FVPROC)(const GLfloat * u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD2DPROC)(GLdouble u, GLdouble v); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD2DVPROC)(const GLdouble * u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD2FPROC)(GLfloat u, GLfloat v); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD2FVPROC)(const GLfloat * u); +typedef void (GLAD_API_PTR *PFNGLEVALMESH1PROC)(GLenum mode, GLint i1, GLint i2); +typedef void (GLAD_API_PTR *PFNGLEVALMESH2PROC)(GLenum mode, GLint i1, GLint i2, GLint j1, GLint j2); +typedef void (GLAD_API_PTR *PFNGLEVALPOINT1PROC)(GLint i); +typedef void (GLAD_API_PTR *PFNGLEVALPOINT2PROC)(GLint i, GLint j); +typedef void (GLAD_API_PTR *PFNGLFEEDBACKBUFFERPROC)(GLsizei size, GLenum type, GLfloat * buffer); +typedef GLsync (GLAD_API_PTR *PFNGLFENCESYNCPROC)(GLenum condition, GLbitfield flags); +typedef void (GLAD_API_PTR *PFNGLFINISHPROC)(void); +typedef void (GLAD_API_PTR *PFNGLFLUSHPROC)(void); +typedef void (GLAD_API_PTR *PFNGLFLUSHMAPPEDBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDPOINTERPROC)(GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDDPROC)(GLdouble coord); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDDVPROC)(const GLdouble * coord); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDFPROC)(GLfloat coord); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDFVPROC)(const GLfloat * coord); +typedef void (GLAD_API_PTR *PFNGLFOGFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLFOGFVPROC)(GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLFOGIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLFOGIVPROC)(GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERRENDERBUFFERPROC)(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTUREPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURE1DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURE2DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURE3DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURELAYERPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); +typedef void (GLAD_API_PTR *PFNGLFRONTFACEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLFRUSTUMPROC)(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar); +typedef void (GLAD_API_PTR *PFNGLGENBUFFERSPROC)(GLsizei n, GLuint * buffers); +typedef void (GLAD_API_PTR *PFNGLGENFRAMEBUFFERSPROC)(GLsizei n, GLuint * framebuffers); +typedef GLuint (GLAD_API_PTR *PFNGLGENLISTSPROC)(GLsizei range); +typedef void (GLAD_API_PTR *PFNGLGENQUERIESPROC)(GLsizei n, GLuint * ids); +typedef void (GLAD_API_PTR *PFNGLGENRENDERBUFFERSPROC)(GLsizei n, GLuint * renderbuffers); +typedef void (GLAD_API_PTR *PFNGLGENSAMPLERSPROC)(GLsizei count, GLuint * samplers); +typedef void (GLAD_API_PTR *PFNGLGENTEXTURESPROC)(GLsizei n, GLuint * textures); +typedef void (GLAD_API_PTR *PFNGLGENVERTEXARRAYSPROC)(GLsizei n, GLuint * arrays); +typedef void (GLAD_API_PTR *PFNGLGENERATEMIPMAPPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEATTRIBPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLint * size, GLenum * type, GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLint * size, GLenum * type, GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)(GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei * length, GLchar * uniformBlockName); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMBLOCKIVPROC)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMNAMEPROC)(GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei * length, GLchar * uniformName); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMSIVPROC)(GLuint program, GLsizei uniformCount, const GLuint * uniformIndices, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETATTACHEDSHADERSPROC)(GLuint program, GLsizei maxCount, GLsizei * count, GLuint * shaders); +typedef GLint (GLAD_API_PTR *PFNGLGETATTRIBLOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETBOOLEANI_VPROC)(GLenum target, GLuint index, GLboolean * data); +typedef void (GLAD_API_PTR *PFNGLGETBOOLEANVPROC)(GLenum pname, GLboolean * data); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPARAMETERI64VPROC)(GLenum target, GLenum pname, GLint64 * params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPOINTERVPROC)(GLenum target, GLenum pname, void ** params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, void * data); +typedef void (GLAD_API_PTR *PFNGLGETCLIPPLANEPROC)(GLenum plane, GLdouble * equation); +typedef void (GLAD_API_PTR *PFNGLGETCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint level, void * img); +typedef void (GLAD_API_PTR *PFNGLGETDOUBLEVPROC)(GLenum pname, GLdouble * data); +typedef GLenum (GLAD_API_PTR *PFNGLGETERRORPROC)(void); +typedef void (GLAD_API_PTR *PFNGLGETFLOATVPROC)(GLenum pname, GLfloat * data); +typedef GLint (GLAD_API_PTR *PFNGLGETFRAGDATAINDEXPROC)(GLuint program, const GLchar * name); +typedef GLint (GLAD_API_PTR *PFNGLGETFRAGDATALOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLenum target, GLenum attachment, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETINTEGER64I_VPROC)(GLenum target, GLuint index, GLint64 * data); +typedef void (GLAD_API_PTR *PFNGLGETINTEGER64VPROC)(GLenum pname, GLint64 * data); +typedef void (GLAD_API_PTR *PFNGLGETINTEGERI_VPROC)(GLenum target, GLuint index, GLint * data); +typedef void (GLAD_API_PTR *PFNGLGETINTEGERVPROC)(GLenum pname, GLint * data); +typedef void (GLAD_API_PTR *PFNGLGETLIGHTFVPROC)(GLenum light, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETLIGHTIVPROC)(GLenum light, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETMAPDVPROC)(GLenum target, GLenum query, GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLGETMAPFVPROC)(GLenum target, GLenum query, GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLGETMAPIVPROC)(GLenum target, GLenum query, GLint * v); +typedef void (GLAD_API_PTR *PFNGLGETMATERIALFVPROC)(GLenum face, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETMATERIALIVPROC)(GLenum face, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETMULTISAMPLEFVPROC)(GLenum pname, GLuint index, GLfloat * val); +typedef void (GLAD_API_PTR *PFNGLGETPIXELMAPFVPROC)(GLenum map, GLfloat * values); +typedef void (GLAD_API_PTR *PFNGLGETPIXELMAPUIVPROC)(GLenum map, GLuint * values); +typedef void (GLAD_API_PTR *PFNGLGETPIXELMAPUSVPROC)(GLenum map, GLushort * values); +typedef void (GLAD_API_PTR *PFNGLGETPOINTERVPROC)(GLenum pname, void ** params); +typedef void (GLAD_API_PTR *PFNGLGETPOLYGONSTIPPLEPROC)(GLubyte * mask); +typedef void (GLAD_API_PTR *PFNGLGETPROGRAMINFOLOGPROC)(GLuint program, GLsizei bufSize, GLsizei * length, GLchar * infoLog); +typedef void (GLAD_API_PTR *PFNGLGETPROGRAMIVPROC)(GLuint program, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYOBJECTI64VPROC)(GLuint id, GLenum pname, GLint64 * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYOBJECTIVPROC)(GLuint id, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYOBJECTUI64VPROC)(GLuint id, GLenum pname, GLuint64 * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYOBJECTUIVPROC)(GLuint id, GLenum pname, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETRENDERBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETSHADERINFOLOGPROC)(GLuint shader, GLsizei bufSize, GLsizei * length, GLchar * infoLog); +typedef void (GLAD_API_PTR *PFNGLGETSHADERSOURCEPROC)(GLuint shader, GLsizei bufSize, GLsizei * length, GLchar * source); +typedef void (GLAD_API_PTR *PFNGLGETSHADERIVPROC)(GLuint shader, GLenum pname, GLint * params); +typedef const GLubyte * (GLAD_API_PTR *PFNGLGETSTRINGPROC)(GLenum name); +typedef const GLubyte * (GLAD_API_PTR *PFNGLGETSTRINGIPROC)(GLenum name, GLuint index); +typedef void (GLAD_API_PTR *PFNGLGETSYNCIVPROC)(GLsync sync, GLenum pname, GLsizei count, GLsizei * length, GLint * values); +typedef void (GLAD_API_PTR *PFNGLGETTEXENVFVPROC)(GLenum target, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXENVIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXGENDVPROC)(GLenum coord, GLenum pname, GLdouble * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXGENFVPROC)(GLenum coord, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXGENIVPROC)(GLenum coord, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, void * pixels); +typedef void (GLAD_API_PTR *PFNGLGETTEXLEVELPARAMETERFVPROC)(GLenum target, GLint level, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXLEVELPARAMETERIVPROC)(GLenum target, GLint level, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERFVPROC)(GLenum target, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTRANSFORMFEEDBACKVARYINGPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLsizei * size, GLenum * type, GLchar * name); +typedef GLuint (GLAD_API_PTR *PFNGLGETUNIFORMBLOCKINDEXPROC)(GLuint program, const GLchar * uniformBlockName); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMINDICESPROC)(GLuint program, GLsizei uniformCount, const GLchar *const* uniformNames, GLuint * uniformIndices); +typedef GLint (GLAD_API_PTR *PFNGLGETUNIFORMLOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMFVPROC)(GLuint program, GLint location, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMIVPROC)(GLuint program, GLint location, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMUIVPROC)(GLuint program, GLint location, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBIIVPROC)(GLuint index, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBIUIVPROC)(GLuint index, GLenum pname, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBPOINTERVPROC)(GLuint index, GLenum pname, void ** pointer); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBDVPROC)(GLuint index, GLenum pname, GLdouble * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBFVPROC)(GLuint index, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBIVPROC)(GLuint index, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLHINTPROC)(GLenum target, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLINDEXMASKPROC)(GLuint mask); +typedef void (GLAD_API_PTR *PFNGLINDEXPOINTERPROC)(GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLINDEXDPROC)(GLdouble c); +typedef void (GLAD_API_PTR *PFNGLINDEXDVPROC)(const GLdouble * c); +typedef void (GLAD_API_PTR *PFNGLINDEXFPROC)(GLfloat c); +typedef void (GLAD_API_PTR *PFNGLINDEXFVPROC)(const GLfloat * c); +typedef void (GLAD_API_PTR *PFNGLINDEXIPROC)(GLint c); +typedef void (GLAD_API_PTR *PFNGLINDEXIVPROC)(const GLint * c); +typedef void (GLAD_API_PTR *PFNGLINDEXSPROC)(GLshort c); +typedef void (GLAD_API_PTR *PFNGLINDEXSVPROC)(const GLshort * c); +typedef void (GLAD_API_PTR *PFNGLINDEXUBPROC)(GLubyte c); +typedef void (GLAD_API_PTR *PFNGLINDEXUBVPROC)(const GLubyte * c); +typedef void (GLAD_API_PTR *PFNGLINITNAMESPROC)(void); +typedef void (GLAD_API_PTR *PFNGLINTERLEAVEDARRAYSPROC)(GLenum format, GLsizei stride, const void * pointer); +typedef GLboolean (GLAD_API_PTR *PFNGLISBUFFERPROC)(GLuint buffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISENABLEDPROC)(GLenum cap); +typedef GLboolean (GLAD_API_PTR *PFNGLISENABLEDIPROC)(GLenum target, GLuint index); +typedef GLboolean (GLAD_API_PTR *PFNGLISFRAMEBUFFERPROC)(GLuint framebuffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISLISTPROC)(GLuint list); +typedef GLboolean (GLAD_API_PTR *PFNGLISPROGRAMPROC)(GLuint program); +typedef GLboolean (GLAD_API_PTR *PFNGLISQUERYPROC)(GLuint id); +typedef GLboolean (GLAD_API_PTR *PFNGLISRENDERBUFFERPROC)(GLuint renderbuffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISSAMPLERPROC)(GLuint sampler); +typedef GLboolean (GLAD_API_PTR *PFNGLISSHADERPROC)(GLuint shader); +typedef GLboolean (GLAD_API_PTR *PFNGLISSYNCPROC)(GLsync sync); +typedef GLboolean (GLAD_API_PTR *PFNGLISTEXTUREPROC)(GLuint texture); +typedef GLboolean (GLAD_API_PTR *PFNGLISVERTEXARRAYPROC)(GLuint array); +typedef void (GLAD_API_PTR *PFNGLLIGHTMODELFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLLIGHTMODELFVPROC)(GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLLIGHTMODELIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLLIGHTMODELIVPROC)(GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLLIGHTFPROC)(GLenum light, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLLIGHTFVPROC)(GLenum light, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLLIGHTIPROC)(GLenum light, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLLIGHTIVPROC)(GLenum light, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLLINESTIPPLEPROC)(GLint factor, GLushort pattern); +typedef void (GLAD_API_PTR *PFNGLLINEWIDTHPROC)(GLfloat width); +typedef void (GLAD_API_PTR *PFNGLLINKPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLLISTBASEPROC)(GLuint base); +typedef void (GLAD_API_PTR *PFNGLLOADIDENTITYPROC)(void); +typedef void (GLAD_API_PTR *PFNGLLOADMATRIXDPROC)(const GLdouble * m); +typedef void (GLAD_API_PTR *PFNGLLOADMATRIXFPROC)(const GLfloat * m); +typedef void (GLAD_API_PTR *PFNGLLOADNAMEPROC)(GLuint name); +typedef void (GLAD_API_PTR *PFNGLLOADTRANSPOSEMATRIXDPROC)(const GLdouble * m); +typedef void (GLAD_API_PTR *PFNGLLOADTRANSPOSEMATRIXFPROC)(const GLfloat * m); +typedef void (GLAD_API_PTR *PFNGLLOGICOPPROC)(GLenum opcode); +typedef void (GLAD_API_PTR *PFNGLMAP1DPROC)(GLenum target, GLdouble u1, GLdouble u2, GLint stride, GLint order, const GLdouble * points); +typedef void (GLAD_API_PTR *PFNGLMAP1FPROC)(GLenum target, GLfloat u1, GLfloat u2, GLint stride, GLint order, const GLfloat * points); +typedef void (GLAD_API_PTR *PFNGLMAP2DPROC)(GLenum target, GLdouble u1, GLdouble u2, GLint ustride, GLint uorder, GLdouble v1, GLdouble v2, GLint vstride, GLint vorder, const GLdouble * points); +typedef void (GLAD_API_PTR *PFNGLMAP2FPROC)(GLenum target, GLfloat u1, GLfloat u2, GLint ustride, GLint uorder, GLfloat v1, GLfloat v2, GLint vstride, GLint vorder, const GLfloat * points); +typedef void * (GLAD_API_PTR *PFNGLMAPBUFFERPROC)(GLenum target, GLenum access); +typedef void * (GLAD_API_PTR *PFNGLMAPBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +typedef void (GLAD_API_PTR *PFNGLMAPGRID1DPROC)(GLint un, GLdouble u1, GLdouble u2); +typedef void (GLAD_API_PTR *PFNGLMAPGRID1FPROC)(GLint un, GLfloat u1, GLfloat u2); +typedef void (GLAD_API_PTR *PFNGLMAPGRID2DPROC)(GLint un, GLdouble u1, GLdouble u2, GLint vn, GLdouble v1, GLdouble v2); +typedef void (GLAD_API_PTR *PFNGLMAPGRID2FPROC)(GLint un, GLfloat u1, GLfloat u2, GLint vn, GLfloat v1, GLfloat v2); +typedef void (GLAD_API_PTR *PFNGLMATERIALFPROC)(GLenum face, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLMATERIALFVPROC)(GLenum face, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLMATERIALIPROC)(GLenum face, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLMATERIALIVPROC)(GLenum face, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLMATRIXMODEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLMULTMATRIXDPROC)(const GLdouble * m); +typedef void (GLAD_API_PTR *PFNGLMULTMATRIXFPROC)(const GLfloat * m); +typedef void (GLAD_API_PTR *PFNGLMULTTRANSPOSEMATRIXDPROC)(const GLdouble * m); +typedef void (GLAD_API_PTR *PFNGLMULTTRANSPOSEMATRIXFPROC)(const GLfloat * m); +typedef void (GLAD_API_PTR *PFNGLMULTIDRAWARRAYSPROC)(GLenum mode, const GLint * first, const GLsizei * count, GLsizei drawcount); +typedef void (GLAD_API_PTR *PFNGLMULTIDRAWELEMENTSPROC)(GLenum mode, const GLsizei * count, GLenum type, const void *const* indices, GLsizei drawcount); +typedef void (GLAD_API_PTR *PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, const GLsizei * count, GLenum type, const void *const* indices, GLsizei drawcount, const GLint * basevertex); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1DPROC)(GLenum target, GLdouble s); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1DVPROC)(GLenum target, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1FPROC)(GLenum target, GLfloat s); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1FVPROC)(GLenum target, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1IPROC)(GLenum target, GLint s); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1IVPROC)(GLenum target, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1SPROC)(GLenum target, GLshort s); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1SVPROC)(GLenum target, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2DPROC)(GLenum target, GLdouble s, GLdouble t); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2DVPROC)(GLenum target, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2FPROC)(GLenum target, GLfloat s, GLfloat t); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2FVPROC)(GLenum target, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2IPROC)(GLenum target, GLint s, GLint t); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2IVPROC)(GLenum target, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2SPROC)(GLenum target, GLshort s, GLshort t); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2SVPROC)(GLenum target, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3DPROC)(GLenum target, GLdouble s, GLdouble t, GLdouble r); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3DVPROC)(GLenum target, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3FPROC)(GLenum target, GLfloat s, GLfloat t, GLfloat r); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3FVPROC)(GLenum target, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3IPROC)(GLenum target, GLint s, GLint t, GLint r); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3IVPROC)(GLenum target, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3SPROC)(GLenum target, GLshort s, GLshort t, GLshort r); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3SVPROC)(GLenum target, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4DPROC)(GLenum target, GLdouble s, GLdouble t, GLdouble r, GLdouble q); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4DVPROC)(GLenum target, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4FPROC)(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4FVPROC)(GLenum target, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4IPROC)(GLenum target, GLint s, GLint t, GLint r, GLint q); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4IVPROC)(GLenum target, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4SPROC)(GLenum target, GLshort s, GLshort t, GLshort r, GLshort q); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4SVPROC)(GLenum target, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP1UIPROC)(GLenum texture, GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP1UIVPROC)(GLenum texture, GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP2UIPROC)(GLenum texture, GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP2UIVPROC)(GLenum texture, GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP3UIPROC)(GLenum texture, GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP3UIVPROC)(GLenum texture, GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP4UIPROC)(GLenum texture, GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP4UIVPROC)(GLenum texture, GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLNEWLISTPROC)(GLuint list, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLNORMAL3BPROC)(GLbyte nx, GLbyte ny, GLbyte nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3BVPROC)(const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLNORMAL3DPROC)(GLdouble nx, GLdouble ny, GLdouble nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLNORMAL3FPROC)(GLfloat nx, GLfloat ny, GLfloat nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLNORMAL3IPROC)(GLint nx, GLint ny, GLint nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLNORMAL3SPROC)(GLshort nx, GLshort ny, GLshort nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLNORMALP3UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLNORMALP3UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLNORMALPOINTERPROC)(GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLORTHOPROC)(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar); +typedef void (GLAD_API_PTR *PFNGLPASSTHROUGHPROC)(GLfloat token); +typedef void (GLAD_API_PTR *PFNGLPIXELMAPFVPROC)(GLenum map, GLsizei mapsize, const GLfloat * values); +typedef void (GLAD_API_PTR *PFNGLPIXELMAPUIVPROC)(GLenum map, GLsizei mapsize, const GLuint * values); +typedef void (GLAD_API_PTR *PFNGLPIXELMAPUSVPROC)(GLenum map, GLsizei mapsize, const GLushort * values); +typedef void (GLAD_API_PTR *PFNGLPIXELSTOREFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLPIXELSTOREIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLPIXELTRANSFERFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLPIXELTRANSFERIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLPIXELZOOMPROC)(GLfloat xfactor, GLfloat yfactor); +typedef void (GLAD_API_PTR *PFNGLPOINTPARAMETERFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLPOINTPARAMETERFVPROC)(GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLPOINTPARAMETERIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLPOINTPARAMETERIVPROC)(GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLPOINTSIZEPROC)(GLfloat size); +typedef void (GLAD_API_PTR *PFNGLPOLYGONMODEPROC)(GLenum face, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLPOLYGONOFFSETPROC)(GLfloat factor, GLfloat units); +typedef void (GLAD_API_PTR *PFNGLPOLYGONSTIPPLEPROC)(const GLubyte * mask); +typedef void (GLAD_API_PTR *PFNGLPOPATTRIBPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPOPCLIENTATTRIBPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPOPMATRIXPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPOPNAMEPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPRIMITIVERESTARTINDEXPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLPRIORITIZETEXTURESPROC)(GLsizei n, const GLuint * textures, const GLfloat * priorities); +typedef void (GLAD_API_PTR *PFNGLPROVOKINGVERTEXPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLPUSHATTRIBPROC)(GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLPUSHCLIENTATTRIBPROC)(GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLPUSHMATRIXPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPUSHNAMEPROC)(GLuint name); +typedef void (GLAD_API_PTR *PFNGLQUERYCOUNTERPROC)(GLuint id, GLenum target); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2DPROC)(GLdouble x, GLdouble y); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2FPROC)(GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2IPROC)(GLint x, GLint y); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2SPROC)(GLshort x, GLshort y); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3DPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3FPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3IPROC)(GLint x, GLint y, GLint z); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3SPROC)(GLshort x, GLshort y, GLshort z); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4DPROC)(GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4FPROC)(GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4IPROC)(GLint x, GLint y, GLint z, GLint w); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4SPROC)(GLshort x, GLshort y, GLshort z, GLshort w); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLREADBUFFERPROC)(GLenum src); +typedef void (GLAD_API_PTR *PFNGLREADPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void * pixels); +typedef void (GLAD_API_PTR *PFNGLRECTDPROC)(GLdouble x1, GLdouble y1, GLdouble x2, GLdouble y2); +typedef void (GLAD_API_PTR *PFNGLRECTDVPROC)(const GLdouble * v1, const GLdouble * v2); +typedef void (GLAD_API_PTR *PFNGLRECTFPROC)(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2); +typedef void (GLAD_API_PTR *PFNGLRECTFVPROC)(const GLfloat * v1, const GLfloat * v2); +typedef void (GLAD_API_PTR *PFNGLRECTIPROC)(GLint x1, GLint y1, GLint x2, GLint y2); +typedef void (GLAD_API_PTR *PFNGLRECTIVPROC)(const GLint * v1, const GLint * v2); +typedef void (GLAD_API_PTR *PFNGLRECTSPROC)(GLshort x1, GLshort y1, GLshort x2, GLshort y2); +typedef void (GLAD_API_PTR *PFNGLRECTSVPROC)(const GLshort * v1, const GLshort * v2); +typedef GLint (GLAD_API_PTR *PFNGLRENDERMODEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLRENDERBUFFERSTORAGEPROC)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLROTATEDPROC)(GLdouble angle, GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLROTATEFPROC)(GLfloat angle, GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLSAMPLECOVERAGEPROC)(GLfloat value, GLboolean invert); +typedef void (GLAD_API_PTR *PFNGLSAMPLEMASKIPROC)(GLuint maskNumber, GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, const GLint * param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, const GLuint * param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERFPROC)(GLuint sampler, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, const GLfloat * param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERIPROC)(GLuint sampler, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, const GLint * param); +typedef void (GLAD_API_PTR *PFNGLSCALEDPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLSCALEFPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLSCISSORPROC)(GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3BPROC)(GLbyte red, GLbyte green, GLbyte blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3BVPROC)(const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3DPROC)(GLdouble red, GLdouble green, GLdouble blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3FPROC)(GLfloat red, GLfloat green, GLfloat blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3IPROC)(GLint red, GLint green, GLint blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3SPROC)(GLshort red, GLshort green, GLshort blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3UBPROC)(GLubyte red, GLubyte green, GLubyte blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3UBVPROC)(const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3UIPROC)(GLuint red, GLuint green, GLuint blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3UIVPROC)(const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3USPROC)(GLushort red, GLushort green, GLushort blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3USVPROC)(const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLORP3UIPROC)(GLenum type, GLuint color); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLORP3UIVPROC)(GLenum type, const GLuint * color); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLORPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLSELECTBUFFERPROC)(GLsizei size, GLuint * buffer); +typedef void (GLAD_API_PTR *PFNGLSHADEMODELPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLSHADERSOURCEPROC)(GLuint shader, GLsizei count, const GLchar *const* string, const GLint * length); +typedef void (GLAD_API_PTR *PFNGLSTENCILFUNCPROC)(GLenum func, GLint ref, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILFUNCSEPARATEPROC)(GLenum face, GLenum func, GLint ref, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILMASKPROC)(GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILMASKSEPARATEPROC)(GLenum face, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILOPPROC)(GLenum fail, GLenum zfail, GLenum zpass); +typedef void (GLAD_API_PTR *PFNGLSTENCILOPSEPARATEPROC)(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); +typedef void (GLAD_API_PTR *PFNGLTEXBUFFERPROC)(GLenum target, GLenum internalformat, GLuint buffer); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1DPROC)(GLdouble s); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1FPROC)(GLfloat s); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1IPROC)(GLint s); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1SPROC)(GLshort s); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2DPROC)(GLdouble s, GLdouble t); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2FPROC)(GLfloat s, GLfloat t); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2IPROC)(GLint s, GLint t); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2SPROC)(GLshort s, GLshort t); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3DPROC)(GLdouble s, GLdouble t, GLdouble r); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3FPROC)(GLfloat s, GLfloat t, GLfloat r); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3IPROC)(GLint s, GLint t, GLint r); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3SPROC)(GLshort s, GLshort t, GLshort r); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4DPROC)(GLdouble s, GLdouble t, GLdouble r, GLdouble q); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4FPROC)(GLfloat s, GLfloat t, GLfloat r, GLfloat q); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4IPROC)(GLint s, GLint t, GLint r, GLint q); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4SPROC)(GLshort s, GLshort t, GLshort r, GLshort q); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP1UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP1UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP2UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP2UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP3UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP3UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP4UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP4UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLTEXENVFPROC)(GLenum target, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLTEXENVFVPROC)(GLenum target, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLTEXENVIPROC)(GLenum target, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLTEXENVIVPROC)(GLenum target, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXGENDPROC)(GLenum coord, GLenum pname, GLdouble param); +typedef void (GLAD_API_PTR *PFNGLTEXGENDVPROC)(GLenum coord, GLenum pname, const GLdouble * params); +typedef void (GLAD_API_PTR *PFNGLTEXGENFPROC)(GLenum coord, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLTEXGENFVPROC)(GLenum coord, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLTEXGENIPROC)(GLenum coord, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLTEXGENIVPROC)(GLenum coord, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE1DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE2DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE3DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, const GLuint * params); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERFPROC)(GLenum target, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERFVPROC)(GLenum target, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIPROC)(GLenum target, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIVPROC)(GLenum target, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTRANSFORMFEEDBACKVARYINGSPROC)(GLuint program, GLsizei count, const GLchar *const* varyings, GLenum bufferMode); +typedef void (GLAD_API_PTR *PFNGLTRANSLATEDPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLTRANSLATEFPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1FPROC)(GLint location, GLfloat v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1IPROC)(GLint location, GLint v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1UIPROC)(GLint location, GLuint v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1UIVPROC)(GLint location, GLsizei count, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2FPROC)(GLint location, GLfloat v0, GLfloat v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2IPROC)(GLint location, GLint v0, GLint v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2UIPROC)(GLint location, GLuint v0, GLuint v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2UIVPROC)(GLint location, GLsizei count, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3IPROC)(GLint location, GLint v0, GLint v1, GLint v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3UIVPROC)(GLint location, GLsizei count, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4IPROC)(GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4UIVPROC)(GLint location, GLsizei count, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMBLOCKBINDINGPROC)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX2X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX2X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX3X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX3X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX4X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX4X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef GLboolean (GLAD_API_PTR *PFNGLUNMAPBUFFERPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLUSEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLVALIDATEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLVERTEX2DPROC)(GLdouble x, GLdouble y); +typedef void (GLAD_API_PTR *PFNGLVERTEX2DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX2FPROC)(GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLVERTEX2FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX2IPROC)(GLint x, GLint y); +typedef void (GLAD_API_PTR *PFNGLVERTEX2IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX2SPROC)(GLshort x, GLshort y); +typedef void (GLAD_API_PTR *PFNGLVERTEX2SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX3DPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLVERTEX3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX3FPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLVERTEX3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX3IPROC)(GLint x, GLint y, GLint z); +typedef void (GLAD_API_PTR *PFNGLVERTEX3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX3SPROC)(GLshort x, GLshort y, GLshort z); +typedef void (GLAD_API_PTR *PFNGLVERTEX3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX4DPROC)(GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (GLAD_API_PTR *PFNGLVERTEX4DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX4FPROC)(GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GLAD_API_PTR *PFNGLVERTEX4FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX4IPROC)(GLint x, GLint y, GLint z, GLint w); +typedef void (GLAD_API_PTR *PFNGLVERTEX4IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX4SPROC)(GLshort x, GLshort y, GLshort z, GLshort w); +typedef void (GLAD_API_PTR *PFNGLVERTEX4SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1DPROC)(GLuint index, GLdouble x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1DVPROC)(GLuint index, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1FPROC)(GLuint index, GLfloat x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1SPROC)(GLuint index, GLshort x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2DPROC)(GLuint index, GLdouble x, GLdouble y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2DVPROC)(GLuint index, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2FPROC)(GLuint index, GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2SPROC)(GLuint index, GLshort x, GLshort y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3DVPROC)(GLuint index, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3SPROC)(GLuint index, GLshort x, GLshort y, GLshort z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NBVPROC)(GLuint index, const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NIVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NSVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NUBPROC)(GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NUBVPROC)(GLuint index, const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NUIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NUSVPROC)(GLuint index, const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4BVPROC)(GLuint index, const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4DVPROC)(GLuint index, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4SPROC)(GLuint index, GLshort x, GLshort y, GLshort z, GLshort w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4UBVPROC)(GLuint index, const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4USVPROC)(GLuint index, const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBDIVISORPROC)(GLuint index, GLuint divisor); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI1IPROC)(GLuint index, GLint x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI1IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI1UIPROC)(GLuint index, GLuint x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI1UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI2IPROC)(GLuint index, GLint x, GLint y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI2IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI2UIPROC)(GLuint index, GLuint x, GLuint y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI2UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI3IPROC)(GLuint index, GLint x, GLint y, GLint z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI3IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI3UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI3UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4BVPROC)(GLuint index, const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4IPROC)(GLuint index, GLint x, GLint y, GLint z, GLint w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4UBVPROC)(GLuint index, const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z, GLuint w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4USVPROC)(GLuint index, const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBIPOINTERPROC)(GLuint index, GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP1UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP1UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP2UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP2UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP3UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP3UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP4UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP4UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBPOINTERPROC)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLVERTEXP2UIPROC)(GLenum type, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP2UIVPROC)(GLenum type, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP3UIPROC)(GLenum type, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP3UIVPROC)(GLenum type, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP4UIPROC)(GLenum type, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP4UIVPROC)(GLenum type, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLVIEWPORTPROC)(GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2DPROC)(GLdouble x, GLdouble y); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2FPROC)(GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2IPROC)(GLint x, GLint y); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2SPROC)(GLshort x, GLshort y); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3DPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3FPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3IPROC)(GLint x, GLint y, GLint z); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3SPROC)(GLshort x, GLshort y, GLshort z); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3SVPROC)(const GLshort * v); + +GLAD_API_CALL PFNGLACCUMPROC glad_glAccum; +#define glAccum glad_glAccum +GLAD_API_CALL PFNGLACTIVETEXTUREPROC glad_glActiveTexture; +#define glActiveTexture glad_glActiveTexture +GLAD_API_CALL PFNGLALPHAFUNCPROC glad_glAlphaFunc; +#define glAlphaFunc glad_glAlphaFunc +GLAD_API_CALL PFNGLARETEXTURESRESIDENTPROC glad_glAreTexturesResident; +#define glAreTexturesResident glad_glAreTexturesResident +GLAD_API_CALL PFNGLARRAYELEMENTPROC glad_glArrayElement; +#define glArrayElement glad_glArrayElement +GLAD_API_CALL PFNGLATTACHSHADERPROC glad_glAttachShader; +#define glAttachShader glad_glAttachShader +GLAD_API_CALL PFNGLBEGINPROC glad_glBegin; +#define glBegin glad_glBegin +GLAD_API_CALL PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender; +#define glBeginConditionalRender glad_glBeginConditionalRender +GLAD_API_CALL PFNGLBEGINQUERYPROC glad_glBeginQuery; +#define glBeginQuery glad_glBeginQuery +GLAD_API_CALL PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback; +#define glBeginTransformFeedback glad_glBeginTransformFeedback +GLAD_API_CALL PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation; +#define glBindAttribLocation glad_glBindAttribLocation +GLAD_API_CALL PFNGLBINDBUFFERPROC glad_glBindBuffer; +#define glBindBuffer glad_glBindBuffer +GLAD_API_CALL PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase; +#define glBindBufferBase glad_glBindBufferBase +GLAD_API_CALL PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange; +#define glBindBufferRange glad_glBindBufferRange +GLAD_API_CALL PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation; +#define glBindFragDataLocation glad_glBindFragDataLocation +GLAD_API_CALL PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed; +#define glBindFragDataLocationIndexed glad_glBindFragDataLocationIndexed +GLAD_API_CALL PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer; +#define glBindFramebuffer glad_glBindFramebuffer +GLAD_API_CALL PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer; +#define glBindRenderbuffer glad_glBindRenderbuffer +GLAD_API_CALL PFNGLBINDSAMPLERPROC glad_glBindSampler; +#define glBindSampler glad_glBindSampler +GLAD_API_CALL PFNGLBINDTEXTUREPROC glad_glBindTexture; +#define glBindTexture glad_glBindTexture +GLAD_API_CALL PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray; +#define glBindVertexArray glad_glBindVertexArray +GLAD_API_CALL PFNGLBITMAPPROC glad_glBitmap; +#define glBitmap glad_glBitmap +GLAD_API_CALL PFNGLBLENDCOLORPROC glad_glBlendColor; +#define glBlendColor glad_glBlendColor +GLAD_API_CALL PFNGLBLENDEQUATIONPROC glad_glBlendEquation; +#define glBlendEquation glad_glBlendEquation +GLAD_API_CALL PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate; +#define glBlendEquationSeparate glad_glBlendEquationSeparate +GLAD_API_CALL PFNGLBLENDFUNCPROC glad_glBlendFunc; +#define glBlendFunc glad_glBlendFunc +GLAD_API_CALL PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate; +#define glBlendFuncSeparate glad_glBlendFuncSeparate +GLAD_API_CALL PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer; +#define glBlitFramebuffer glad_glBlitFramebuffer +GLAD_API_CALL PFNGLBUFFERDATAPROC glad_glBufferData; +#define glBufferData glad_glBufferData +GLAD_API_CALL PFNGLBUFFERSUBDATAPROC glad_glBufferSubData; +#define glBufferSubData glad_glBufferSubData +GLAD_API_CALL PFNGLCALLLISTPROC glad_glCallList; +#define glCallList glad_glCallList +GLAD_API_CALL PFNGLCALLLISTSPROC glad_glCallLists; +#define glCallLists glad_glCallLists +GLAD_API_CALL PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus; +#define glCheckFramebufferStatus glad_glCheckFramebufferStatus +GLAD_API_CALL PFNGLCLAMPCOLORPROC glad_glClampColor; +#define glClampColor glad_glClampColor +GLAD_API_CALL PFNGLCLEARPROC glad_glClear; +#define glClear glad_glClear +GLAD_API_CALL PFNGLCLEARACCUMPROC glad_glClearAccum; +#define glClearAccum glad_glClearAccum +GLAD_API_CALL PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi; +#define glClearBufferfi glad_glClearBufferfi +GLAD_API_CALL PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv; +#define glClearBufferfv glad_glClearBufferfv +GLAD_API_CALL PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv; +#define glClearBufferiv glad_glClearBufferiv +GLAD_API_CALL PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv; +#define glClearBufferuiv glad_glClearBufferuiv +GLAD_API_CALL PFNGLCLEARCOLORPROC glad_glClearColor; +#define glClearColor glad_glClearColor +GLAD_API_CALL PFNGLCLEARDEPTHPROC glad_glClearDepth; +#define glClearDepth glad_glClearDepth +GLAD_API_CALL PFNGLCLEARINDEXPROC glad_glClearIndex; +#define glClearIndex glad_glClearIndex +GLAD_API_CALL PFNGLCLEARSTENCILPROC glad_glClearStencil; +#define glClearStencil glad_glClearStencil +GLAD_API_CALL PFNGLCLIENTACTIVETEXTUREPROC glad_glClientActiveTexture; +#define glClientActiveTexture glad_glClientActiveTexture +GLAD_API_CALL PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync; +#define glClientWaitSync glad_glClientWaitSync +GLAD_API_CALL PFNGLCLIPPLANEPROC glad_glClipPlane; +#define glClipPlane glad_glClipPlane +GLAD_API_CALL PFNGLCOLOR3BPROC glad_glColor3b; +#define glColor3b glad_glColor3b +GLAD_API_CALL PFNGLCOLOR3BVPROC glad_glColor3bv; +#define glColor3bv glad_glColor3bv +GLAD_API_CALL PFNGLCOLOR3DPROC glad_glColor3d; +#define glColor3d glad_glColor3d +GLAD_API_CALL PFNGLCOLOR3DVPROC glad_glColor3dv; +#define glColor3dv glad_glColor3dv +GLAD_API_CALL PFNGLCOLOR3FPROC glad_glColor3f; +#define glColor3f glad_glColor3f +GLAD_API_CALL PFNGLCOLOR3FVPROC glad_glColor3fv; +#define glColor3fv glad_glColor3fv +GLAD_API_CALL PFNGLCOLOR3IPROC glad_glColor3i; +#define glColor3i glad_glColor3i +GLAD_API_CALL PFNGLCOLOR3IVPROC glad_glColor3iv; +#define glColor3iv glad_glColor3iv +GLAD_API_CALL PFNGLCOLOR3SPROC glad_glColor3s; +#define glColor3s glad_glColor3s +GLAD_API_CALL PFNGLCOLOR3SVPROC glad_glColor3sv; +#define glColor3sv glad_glColor3sv +GLAD_API_CALL PFNGLCOLOR3UBPROC glad_glColor3ub; +#define glColor3ub glad_glColor3ub +GLAD_API_CALL PFNGLCOLOR3UBVPROC glad_glColor3ubv; +#define glColor3ubv glad_glColor3ubv +GLAD_API_CALL PFNGLCOLOR3UIPROC glad_glColor3ui; +#define glColor3ui glad_glColor3ui +GLAD_API_CALL PFNGLCOLOR3UIVPROC glad_glColor3uiv; +#define glColor3uiv glad_glColor3uiv +GLAD_API_CALL PFNGLCOLOR3USPROC glad_glColor3us; +#define glColor3us glad_glColor3us +GLAD_API_CALL PFNGLCOLOR3USVPROC glad_glColor3usv; +#define glColor3usv glad_glColor3usv +GLAD_API_CALL PFNGLCOLOR4BPROC glad_glColor4b; +#define glColor4b glad_glColor4b +GLAD_API_CALL PFNGLCOLOR4BVPROC glad_glColor4bv; +#define glColor4bv glad_glColor4bv +GLAD_API_CALL PFNGLCOLOR4DPROC glad_glColor4d; +#define glColor4d glad_glColor4d +GLAD_API_CALL PFNGLCOLOR4DVPROC glad_glColor4dv; +#define glColor4dv glad_glColor4dv +GLAD_API_CALL PFNGLCOLOR4FPROC glad_glColor4f; +#define glColor4f glad_glColor4f +GLAD_API_CALL PFNGLCOLOR4FVPROC glad_glColor4fv; +#define glColor4fv glad_glColor4fv +GLAD_API_CALL PFNGLCOLOR4IPROC glad_glColor4i; +#define glColor4i glad_glColor4i +GLAD_API_CALL PFNGLCOLOR4IVPROC glad_glColor4iv; +#define glColor4iv glad_glColor4iv +GLAD_API_CALL PFNGLCOLOR4SPROC glad_glColor4s; +#define glColor4s glad_glColor4s +GLAD_API_CALL PFNGLCOLOR4SVPROC glad_glColor4sv; +#define glColor4sv glad_glColor4sv +GLAD_API_CALL PFNGLCOLOR4UBPROC glad_glColor4ub; +#define glColor4ub glad_glColor4ub +GLAD_API_CALL PFNGLCOLOR4UBVPROC glad_glColor4ubv; +#define glColor4ubv glad_glColor4ubv +GLAD_API_CALL PFNGLCOLOR4UIPROC glad_glColor4ui; +#define glColor4ui glad_glColor4ui +GLAD_API_CALL PFNGLCOLOR4UIVPROC glad_glColor4uiv; +#define glColor4uiv glad_glColor4uiv +GLAD_API_CALL PFNGLCOLOR4USPROC glad_glColor4us; +#define glColor4us glad_glColor4us +GLAD_API_CALL PFNGLCOLOR4USVPROC glad_glColor4usv; +#define glColor4usv glad_glColor4usv +GLAD_API_CALL PFNGLCOLORMASKPROC glad_glColorMask; +#define glColorMask glad_glColorMask +GLAD_API_CALL PFNGLCOLORMASKIPROC glad_glColorMaski; +#define glColorMaski glad_glColorMaski +GLAD_API_CALL PFNGLCOLORMATERIALPROC glad_glColorMaterial; +#define glColorMaterial glad_glColorMaterial +GLAD_API_CALL PFNGLCOLORP3UIPROC glad_glColorP3ui; +#define glColorP3ui glad_glColorP3ui +GLAD_API_CALL PFNGLCOLORP3UIVPROC glad_glColorP3uiv; +#define glColorP3uiv glad_glColorP3uiv +GLAD_API_CALL PFNGLCOLORP4UIPROC glad_glColorP4ui; +#define glColorP4ui glad_glColorP4ui +GLAD_API_CALL PFNGLCOLORP4UIVPROC glad_glColorP4uiv; +#define glColorP4uiv glad_glColorP4uiv +GLAD_API_CALL PFNGLCOLORPOINTERPROC glad_glColorPointer; +#define glColorPointer glad_glColorPointer +GLAD_API_CALL PFNGLCOMPILESHADERPROC glad_glCompileShader; +#define glCompileShader glad_glCompileShader +GLAD_API_CALL PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D; +#define glCompressedTexImage1D glad_glCompressedTexImage1D +GLAD_API_CALL PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D; +#define glCompressedTexImage2D glad_glCompressedTexImage2D +GLAD_API_CALL PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D; +#define glCompressedTexImage3D glad_glCompressedTexImage3D +GLAD_API_CALL PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D; +#define glCompressedTexSubImage1D glad_glCompressedTexSubImage1D +GLAD_API_CALL PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D; +#define glCompressedTexSubImage2D glad_glCompressedTexSubImage2D +GLAD_API_CALL PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D; +#define glCompressedTexSubImage3D glad_glCompressedTexSubImage3D +GLAD_API_CALL PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData; +#define glCopyBufferSubData glad_glCopyBufferSubData +GLAD_API_CALL PFNGLCOPYPIXELSPROC glad_glCopyPixels; +#define glCopyPixels glad_glCopyPixels +GLAD_API_CALL PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D; +#define glCopyTexImage1D glad_glCopyTexImage1D +GLAD_API_CALL PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D; +#define glCopyTexImage2D glad_glCopyTexImage2D +GLAD_API_CALL PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D; +#define glCopyTexSubImage1D glad_glCopyTexSubImage1D +GLAD_API_CALL PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D; +#define glCopyTexSubImage2D glad_glCopyTexSubImage2D +GLAD_API_CALL PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D; +#define glCopyTexSubImage3D glad_glCopyTexSubImage3D +GLAD_API_CALL PFNGLCREATEPROGRAMPROC glad_glCreateProgram; +#define glCreateProgram glad_glCreateProgram +GLAD_API_CALL PFNGLCREATESHADERPROC glad_glCreateShader; +#define glCreateShader glad_glCreateShader +GLAD_API_CALL PFNGLCULLFACEPROC glad_glCullFace; +#define glCullFace glad_glCullFace +GLAD_API_CALL PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers; +#define glDeleteBuffers glad_glDeleteBuffers +GLAD_API_CALL PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers; +#define glDeleteFramebuffers glad_glDeleteFramebuffers +GLAD_API_CALL PFNGLDELETELISTSPROC glad_glDeleteLists; +#define glDeleteLists glad_glDeleteLists +GLAD_API_CALL PFNGLDELETEPROGRAMPROC glad_glDeleteProgram; +#define glDeleteProgram glad_glDeleteProgram +GLAD_API_CALL PFNGLDELETEQUERIESPROC glad_glDeleteQueries; +#define glDeleteQueries glad_glDeleteQueries +GLAD_API_CALL PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers; +#define glDeleteRenderbuffers glad_glDeleteRenderbuffers +GLAD_API_CALL PFNGLDELETESAMPLERSPROC glad_glDeleteSamplers; +#define glDeleteSamplers glad_glDeleteSamplers +GLAD_API_CALL PFNGLDELETESHADERPROC glad_glDeleteShader; +#define glDeleteShader glad_glDeleteShader +GLAD_API_CALL PFNGLDELETESYNCPROC glad_glDeleteSync; +#define glDeleteSync glad_glDeleteSync +GLAD_API_CALL PFNGLDELETETEXTURESPROC glad_glDeleteTextures; +#define glDeleteTextures glad_glDeleteTextures +GLAD_API_CALL PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays; +#define glDeleteVertexArrays glad_glDeleteVertexArrays +GLAD_API_CALL PFNGLDEPTHFUNCPROC glad_glDepthFunc; +#define glDepthFunc glad_glDepthFunc +GLAD_API_CALL PFNGLDEPTHMASKPROC glad_glDepthMask; +#define glDepthMask glad_glDepthMask +GLAD_API_CALL PFNGLDEPTHRANGEPROC glad_glDepthRange; +#define glDepthRange glad_glDepthRange +GLAD_API_CALL PFNGLDETACHSHADERPROC glad_glDetachShader; +#define glDetachShader glad_glDetachShader +GLAD_API_CALL PFNGLDISABLEPROC glad_glDisable; +#define glDisable glad_glDisable +GLAD_API_CALL PFNGLDISABLECLIENTSTATEPROC glad_glDisableClientState; +#define glDisableClientState glad_glDisableClientState +GLAD_API_CALL PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray; +#define glDisableVertexAttribArray glad_glDisableVertexAttribArray +GLAD_API_CALL PFNGLDISABLEIPROC glad_glDisablei; +#define glDisablei glad_glDisablei +GLAD_API_CALL PFNGLDRAWARRAYSPROC glad_glDrawArrays; +#define glDrawArrays glad_glDrawArrays +GLAD_API_CALL PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced; +#define glDrawArraysInstanced glad_glDrawArraysInstanced +GLAD_API_CALL PFNGLDRAWBUFFERPROC glad_glDrawBuffer; +#define glDrawBuffer glad_glDrawBuffer +GLAD_API_CALL PFNGLDRAWBUFFERSPROC glad_glDrawBuffers; +#define glDrawBuffers glad_glDrawBuffers +GLAD_API_CALL PFNGLDRAWELEMENTSPROC glad_glDrawElements; +#define glDrawElements glad_glDrawElements +GLAD_API_CALL PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex; +#define glDrawElementsBaseVertex glad_glDrawElementsBaseVertex +GLAD_API_CALL PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced; +#define glDrawElementsInstanced glad_glDrawElementsInstanced +GLAD_API_CALL PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex; +#define glDrawElementsInstancedBaseVertex glad_glDrawElementsInstancedBaseVertex +GLAD_API_CALL PFNGLDRAWPIXELSPROC glad_glDrawPixels; +#define glDrawPixels glad_glDrawPixels +GLAD_API_CALL PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements; +#define glDrawRangeElements glad_glDrawRangeElements +GLAD_API_CALL PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex; +#define glDrawRangeElementsBaseVertex glad_glDrawRangeElementsBaseVertex +GLAD_API_CALL PFNGLEDGEFLAGPROC glad_glEdgeFlag; +#define glEdgeFlag glad_glEdgeFlag +GLAD_API_CALL PFNGLEDGEFLAGPOINTERPROC glad_glEdgeFlagPointer; +#define glEdgeFlagPointer glad_glEdgeFlagPointer +GLAD_API_CALL PFNGLEDGEFLAGVPROC glad_glEdgeFlagv; +#define glEdgeFlagv glad_glEdgeFlagv +GLAD_API_CALL PFNGLENABLEPROC glad_glEnable; +#define glEnable glad_glEnable +GLAD_API_CALL PFNGLENABLECLIENTSTATEPROC glad_glEnableClientState; +#define glEnableClientState glad_glEnableClientState +GLAD_API_CALL PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray; +#define glEnableVertexAttribArray glad_glEnableVertexAttribArray +GLAD_API_CALL PFNGLENABLEIPROC glad_glEnablei; +#define glEnablei glad_glEnablei +GLAD_API_CALL PFNGLENDPROC glad_glEnd; +#define glEnd glad_glEnd +GLAD_API_CALL PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender; +#define glEndConditionalRender glad_glEndConditionalRender +GLAD_API_CALL PFNGLENDLISTPROC glad_glEndList; +#define glEndList glad_glEndList +GLAD_API_CALL PFNGLENDQUERYPROC glad_glEndQuery; +#define glEndQuery glad_glEndQuery +GLAD_API_CALL PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback; +#define glEndTransformFeedback glad_glEndTransformFeedback +GLAD_API_CALL PFNGLEVALCOORD1DPROC glad_glEvalCoord1d; +#define glEvalCoord1d glad_glEvalCoord1d +GLAD_API_CALL PFNGLEVALCOORD1DVPROC glad_glEvalCoord1dv; +#define glEvalCoord1dv glad_glEvalCoord1dv +GLAD_API_CALL PFNGLEVALCOORD1FPROC glad_glEvalCoord1f; +#define glEvalCoord1f glad_glEvalCoord1f +GLAD_API_CALL PFNGLEVALCOORD1FVPROC glad_glEvalCoord1fv; +#define glEvalCoord1fv glad_glEvalCoord1fv +GLAD_API_CALL PFNGLEVALCOORD2DPROC glad_glEvalCoord2d; +#define glEvalCoord2d glad_glEvalCoord2d +GLAD_API_CALL PFNGLEVALCOORD2DVPROC glad_glEvalCoord2dv; +#define glEvalCoord2dv glad_glEvalCoord2dv +GLAD_API_CALL PFNGLEVALCOORD2FPROC glad_glEvalCoord2f; +#define glEvalCoord2f glad_glEvalCoord2f +GLAD_API_CALL PFNGLEVALCOORD2FVPROC glad_glEvalCoord2fv; +#define glEvalCoord2fv glad_glEvalCoord2fv +GLAD_API_CALL PFNGLEVALMESH1PROC glad_glEvalMesh1; +#define glEvalMesh1 glad_glEvalMesh1 +GLAD_API_CALL PFNGLEVALMESH2PROC glad_glEvalMesh2; +#define glEvalMesh2 glad_glEvalMesh2 +GLAD_API_CALL PFNGLEVALPOINT1PROC glad_glEvalPoint1; +#define glEvalPoint1 glad_glEvalPoint1 +GLAD_API_CALL PFNGLEVALPOINT2PROC glad_glEvalPoint2; +#define glEvalPoint2 glad_glEvalPoint2 +GLAD_API_CALL PFNGLFEEDBACKBUFFERPROC glad_glFeedbackBuffer; +#define glFeedbackBuffer glad_glFeedbackBuffer +GLAD_API_CALL PFNGLFENCESYNCPROC glad_glFenceSync; +#define glFenceSync glad_glFenceSync +GLAD_API_CALL PFNGLFINISHPROC glad_glFinish; +#define glFinish glad_glFinish +GLAD_API_CALL PFNGLFLUSHPROC glad_glFlush; +#define glFlush glad_glFlush +GLAD_API_CALL PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange; +#define glFlushMappedBufferRange glad_glFlushMappedBufferRange +GLAD_API_CALL PFNGLFOGCOORDPOINTERPROC glad_glFogCoordPointer; +#define glFogCoordPointer glad_glFogCoordPointer +GLAD_API_CALL PFNGLFOGCOORDDPROC glad_glFogCoordd; +#define glFogCoordd glad_glFogCoordd +GLAD_API_CALL PFNGLFOGCOORDDVPROC glad_glFogCoorddv; +#define glFogCoorddv glad_glFogCoorddv +GLAD_API_CALL PFNGLFOGCOORDFPROC glad_glFogCoordf; +#define glFogCoordf glad_glFogCoordf +GLAD_API_CALL PFNGLFOGCOORDFVPROC glad_glFogCoordfv; +#define glFogCoordfv glad_glFogCoordfv +GLAD_API_CALL PFNGLFOGFPROC glad_glFogf; +#define glFogf glad_glFogf +GLAD_API_CALL PFNGLFOGFVPROC glad_glFogfv; +#define glFogfv glad_glFogfv +GLAD_API_CALL PFNGLFOGIPROC glad_glFogi; +#define glFogi glad_glFogi +GLAD_API_CALL PFNGLFOGIVPROC glad_glFogiv; +#define glFogiv glad_glFogiv +GLAD_API_CALL PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer; +#define glFramebufferRenderbuffer glad_glFramebufferRenderbuffer +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture; +#define glFramebufferTexture glad_glFramebufferTexture +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D; +#define glFramebufferTexture1D glad_glFramebufferTexture1D +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D; +#define glFramebufferTexture2D glad_glFramebufferTexture2D +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D; +#define glFramebufferTexture3D glad_glFramebufferTexture3D +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer; +#define glFramebufferTextureLayer glad_glFramebufferTextureLayer +GLAD_API_CALL PFNGLFRONTFACEPROC glad_glFrontFace; +#define glFrontFace glad_glFrontFace +GLAD_API_CALL PFNGLFRUSTUMPROC glad_glFrustum; +#define glFrustum glad_glFrustum +GLAD_API_CALL PFNGLGENBUFFERSPROC glad_glGenBuffers; +#define glGenBuffers glad_glGenBuffers +GLAD_API_CALL PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers; +#define glGenFramebuffers glad_glGenFramebuffers +GLAD_API_CALL PFNGLGENLISTSPROC glad_glGenLists; +#define glGenLists glad_glGenLists +GLAD_API_CALL PFNGLGENQUERIESPROC glad_glGenQueries; +#define glGenQueries glad_glGenQueries +GLAD_API_CALL PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers; +#define glGenRenderbuffers glad_glGenRenderbuffers +GLAD_API_CALL PFNGLGENSAMPLERSPROC glad_glGenSamplers; +#define glGenSamplers glad_glGenSamplers +GLAD_API_CALL PFNGLGENTEXTURESPROC glad_glGenTextures; +#define glGenTextures glad_glGenTextures +GLAD_API_CALL PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays; +#define glGenVertexArrays glad_glGenVertexArrays +GLAD_API_CALL PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap; +#define glGenerateMipmap glad_glGenerateMipmap +GLAD_API_CALL PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib; +#define glGetActiveAttrib glad_glGetActiveAttrib +GLAD_API_CALL PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform; +#define glGetActiveUniform glad_glGetActiveUniform +GLAD_API_CALL PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName; +#define glGetActiveUniformBlockName glad_glGetActiveUniformBlockName +GLAD_API_CALL PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv; +#define glGetActiveUniformBlockiv glad_glGetActiveUniformBlockiv +GLAD_API_CALL PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName; +#define glGetActiveUniformName glad_glGetActiveUniformName +GLAD_API_CALL PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv; +#define glGetActiveUniformsiv glad_glGetActiveUniformsiv +GLAD_API_CALL PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders; +#define glGetAttachedShaders glad_glGetAttachedShaders +GLAD_API_CALL PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation; +#define glGetAttribLocation glad_glGetAttribLocation +GLAD_API_CALL PFNGLGETBOOLEANI_VPROC glad_glGetBooleani_v; +#define glGetBooleani_v glad_glGetBooleani_v +GLAD_API_CALL PFNGLGETBOOLEANVPROC glad_glGetBooleanv; +#define glGetBooleanv glad_glGetBooleanv +GLAD_API_CALL PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v; +#define glGetBufferParameteri64v glad_glGetBufferParameteri64v +GLAD_API_CALL PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv; +#define glGetBufferParameteriv glad_glGetBufferParameteriv +GLAD_API_CALL PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv; +#define glGetBufferPointerv glad_glGetBufferPointerv +GLAD_API_CALL PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData; +#define glGetBufferSubData glad_glGetBufferSubData +GLAD_API_CALL PFNGLGETCLIPPLANEPROC glad_glGetClipPlane; +#define glGetClipPlane glad_glGetClipPlane +GLAD_API_CALL PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage; +#define glGetCompressedTexImage glad_glGetCompressedTexImage +GLAD_API_CALL PFNGLGETDOUBLEVPROC glad_glGetDoublev; +#define glGetDoublev glad_glGetDoublev +GLAD_API_CALL PFNGLGETERRORPROC glad_glGetError; +#define glGetError glad_glGetError +GLAD_API_CALL PFNGLGETFLOATVPROC glad_glGetFloatv; +#define glGetFloatv glad_glGetFloatv +GLAD_API_CALL PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex; +#define glGetFragDataIndex glad_glGetFragDataIndex +GLAD_API_CALL PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation; +#define glGetFragDataLocation glad_glGetFragDataLocation +GLAD_API_CALL PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv; +#define glGetFramebufferAttachmentParameteriv glad_glGetFramebufferAttachmentParameteriv +GLAD_API_CALL PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v; +#define glGetInteger64i_v glad_glGetInteger64i_v +GLAD_API_CALL PFNGLGETINTEGER64VPROC glad_glGetInteger64v; +#define glGetInteger64v glad_glGetInteger64v +GLAD_API_CALL PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v; +#define glGetIntegeri_v glad_glGetIntegeri_v +GLAD_API_CALL PFNGLGETINTEGERVPROC glad_glGetIntegerv; +#define glGetIntegerv glad_glGetIntegerv +GLAD_API_CALL PFNGLGETLIGHTFVPROC glad_glGetLightfv; +#define glGetLightfv glad_glGetLightfv +GLAD_API_CALL PFNGLGETLIGHTIVPROC glad_glGetLightiv; +#define glGetLightiv glad_glGetLightiv +GLAD_API_CALL PFNGLGETMAPDVPROC glad_glGetMapdv; +#define glGetMapdv glad_glGetMapdv +GLAD_API_CALL PFNGLGETMAPFVPROC glad_glGetMapfv; +#define glGetMapfv glad_glGetMapfv +GLAD_API_CALL PFNGLGETMAPIVPROC glad_glGetMapiv; +#define glGetMapiv glad_glGetMapiv +GLAD_API_CALL PFNGLGETMATERIALFVPROC glad_glGetMaterialfv; +#define glGetMaterialfv glad_glGetMaterialfv +GLAD_API_CALL PFNGLGETMATERIALIVPROC glad_glGetMaterialiv; +#define glGetMaterialiv glad_glGetMaterialiv +GLAD_API_CALL PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv; +#define glGetMultisamplefv glad_glGetMultisamplefv +GLAD_API_CALL PFNGLGETPIXELMAPFVPROC glad_glGetPixelMapfv; +#define glGetPixelMapfv glad_glGetPixelMapfv +GLAD_API_CALL PFNGLGETPIXELMAPUIVPROC glad_glGetPixelMapuiv; +#define glGetPixelMapuiv glad_glGetPixelMapuiv +GLAD_API_CALL PFNGLGETPIXELMAPUSVPROC glad_glGetPixelMapusv; +#define glGetPixelMapusv glad_glGetPixelMapusv +GLAD_API_CALL PFNGLGETPOINTERVPROC glad_glGetPointerv; +#define glGetPointerv glad_glGetPointerv +GLAD_API_CALL PFNGLGETPOLYGONSTIPPLEPROC glad_glGetPolygonStipple; +#define glGetPolygonStipple glad_glGetPolygonStipple +GLAD_API_CALL PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog; +#define glGetProgramInfoLog glad_glGetProgramInfoLog +GLAD_API_CALL PFNGLGETPROGRAMIVPROC glad_glGetProgramiv; +#define glGetProgramiv glad_glGetProgramiv +GLAD_API_CALL PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v; +#define glGetQueryObjecti64v glad_glGetQueryObjecti64v +GLAD_API_CALL PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv; +#define glGetQueryObjectiv glad_glGetQueryObjectiv +GLAD_API_CALL PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v; +#define glGetQueryObjectui64v glad_glGetQueryObjectui64v +GLAD_API_CALL PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv; +#define glGetQueryObjectuiv glad_glGetQueryObjectuiv +GLAD_API_CALL PFNGLGETQUERYIVPROC glad_glGetQueryiv; +#define glGetQueryiv glad_glGetQueryiv +GLAD_API_CALL PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv; +#define glGetRenderbufferParameteriv glad_glGetRenderbufferParameteriv +GLAD_API_CALL PFNGLGETSAMPLERPARAMETERIIVPROC glad_glGetSamplerParameterIiv; +#define glGetSamplerParameterIiv glad_glGetSamplerParameterIiv +GLAD_API_CALL PFNGLGETSAMPLERPARAMETERIUIVPROC glad_glGetSamplerParameterIuiv; +#define glGetSamplerParameterIuiv glad_glGetSamplerParameterIuiv +GLAD_API_CALL PFNGLGETSAMPLERPARAMETERFVPROC glad_glGetSamplerParameterfv; +#define glGetSamplerParameterfv glad_glGetSamplerParameterfv +GLAD_API_CALL PFNGLGETSAMPLERPARAMETERIVPROC glad_glGetSamplerParameteriv; +#define glGetSamplerParameteriv glad_glGetSamplerParameteriv +GLAD_API_CALL PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog; +#define glGetShaderInfoLog glad_glGetShaderInfoLog +GLAD_API_CALL PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource; +#define glGetShaderSource glad_glGetShaderSource +GLAD_API_CALL PFNGLGETSHADERIVPROC glad_glGetShaderiv; +#define glGetShaderiv glad_glGetShaderiv +GLAD_API_CALL PFNGLGETSTRINGPROC glad_glGetString; +#define glGetString glad_glGetString +GLAD_API_CALL PFNGLGETSTRINGIPROC glad_glGetStringi; +#define glGetStringi glad_glGetStringi +GLAD_API_CALL PFNGLGETSYNCIVPROC glad_glGetSynciv; +#define glGetSynciv glad_glGetSynciv +GLAD_API_CALL PFNGLGETTEXENVFVPROC glad_glGetTexEnvfv; +#define glGetTexEnvfv glad_glGetTexEnvfv +GLAD_API_CALL PFNGLGETTEXENVIVPROC glad_glGetTexEnviv; +#define glGetTexEnviv glad_glGetTexEnviv +GLAD_API_CALL PFNGLGETTEXGENDVPROC glad_glGetTexGendv; +#define glGetTexGendv glad_glGetTexGendv +GLAD_API_CALL PFNGLGETTEXGENFVPROC glad_glGetTexGenfv; +#define glGetTexGenfv glad_glGetTexGenfv +GLAD_API_CALL PFNGLGETTEXGENIVPROC glad_glGetTexGeniv; +#define glGetTexGeniv glad_glGetTexGeniv +GLAD_API_CALL PFNGLGETTEXIMAGEPROC glad_glGetTexImage; +#define glGetTexImage glad_glGetTexImage +GLAD_API_CALL PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv; +#define glGetTexLevelParameterfv glad_glGetTexLevelParameterfv +GLAD_API_CALL PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv; +#define glGetTexLevelParameteriv glad_glGetTexLevelParameteriv +GLAD_API_CALL PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv; +#define glGetTexParameterIiv glad_glGetTexParameterIiv +GLAD_API_CALL PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv; +#define glGetTexParameterIuiv glad_glGetTexParameterIuiv +GLAD_API_CALL PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv; +#define glGetTexParameterfv glad_glGetTexParameterfv +GLAD_API_CALL PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv; +#define glGetTexParameteriv glad_glGetTexParameteriv +GLAD_API_CALL PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying; +#define glGetTransformFeedbackVarying glad_glGetTransformFeedbackVarying +GLAD_API_CALL PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex; +#define glGetUniformBlockIndex glad_glGetUniformBlockIndex +GLAD_API_CALL PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices; +#define glGetUniformIndices glad_glGetUniformIndices +GLAD_API_CALL PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation; +#define glGetUniformLocation glad_glGetUniformLocation +GLAD_API_CALL PFNGLGETUNIFORMFVPROC glad_glGetUniformfv; +#define glGetUniformfv glad_glGetUniformfv +GLAD_API_CALL PFNGLGETUNIFORMIVPROC glad_glGetUniformiv; +#define glGetUniformiv glad_glGetUniformiv +GLAD_API_CALL PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv; +#define glGetUniformuiv glad_glGetUniformuiv +GLAD_API_CALL PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv; +#define glGetVertexAttribIiv glad_glGetVertexAttribIiv +GLAD_API_CALL PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv; +#define glGetVertexAttribIuiv glad_glGetVertexAttribIuiv +GLAD_API_CALL PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv; +#define glGetVertexAttribPointerv glad_glGetVertexAttribPointerv +GLAD_API_CALL PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv; +#define glGetVertexAttribdv glad_glGetVertexAttribdv +GLAD_API_CALL PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv; +#define glGetVertexAttribfv glad_glGetVertexAttribfv +GLAD_API_CALL PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv; +#define glGetVertexAttribiv glad_glGetVertexAttribiv +GLAD_API_CALL PFNGLHINTPROC glad_glHint; +#define glHint glad_glHint +GLAD_API_CALL PFNGLINDEXMASKPROC glad_glIndexMask; +#define glIndexMask glad_glIndexMask +GLAD_API_CALL PFNGLINDEXPOINTERPROC glad_glIndexPointer; +#define glIndexPointer glad_glIndexPointer +GLAD_API_CALL PFNGLINDEXDPROC glad_glIndexd; +#define glIndexd glad_glIndexd +GLAD_API_CALL PFNGLINDEXDVPROC glad_glIndexdv; +#define glIndexdv glad_glIndexdv +GLAD_API_CALL PFNGLINDEXFPROC glad_glIndexf; +#define glIndexf glad_glIndexf +GLAD_API_CALL PFNGLINDEXFVPROC glad_glIndexfv; +#define glIndexfv glad_glIndexfv +GLAD_API_CALL PFNGLINDEXIPROC glad_glIndexi; +#define glIndexi glad_glIndexi +GLAD_API_CALL PFNGLINDEXIVPROC glad_glIndexiv; +#define glIndexiv glad_glIndexiv +GLAD_API_CALL PFNGLINDEXSPROC glad_glIndexs; +#define glIndexs glad_glIndexs +GLAD_API_CALL PFNGLINDEXSVPROC glad_glIndexsv; +#define glIndexsv glad_glIndexsv +GLAD_API_CALL PFNGLINDEXUBPROC glad_glIndexub; +#define glIndexub glad_glIndexub +GLAD_API_CALL PFNGLINDEXUBVPROC glad_glIndexubv; +#define glIndexubv glad_glIndexubv +GLAD_API_CALL PFNGLINITNAMESPROC glad_glInitNames; +#define glInitNames glad_glInitNames +GLAD_API_CALL PFNGLINTERLEAVEDARRAYSPROC glad_glInterleavedArrays; +#define glInterleavedArrays glad_glInterleavedArrays +GLAD_API_CALL PFNGLISBUFFERPROC glad_glIsBuffer; +#define glIsBuffer glad_glIsBuffer +GLAD_API_CALL PFNGLISENABLEDPROC glad_glIsEnabled; +#define glIsEnabled glad_glIsEnabled +GLAD_API_CALL PFNGLISENABLEDIPROC glad_glIsEnabledi; +#define glIsEnabledi glad_glIsEnabledi +GLAD_API_CALL PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer; +#define glIsFramebuffer glad_glIsFramebuffer +GLAD_API_CALL PFNGLISLISTPROC glad_glIsList; +#define glIsList glad_glIsList +GLAD_API_CALL PFNGLISPROGRAMPROC glad_glIsProgram; +#define glIsProgram glad_glIsProgram +GLAD_API_CALL PFNGLISQUERYPROC glad_glIsQuery; +#define glIsQuery glad_glIsQuery +GLAD_API_CALL PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer; +#define glIsRenderbuffer glad_glIsRenderbuffer +GLAD_API_CALL PFNGLISSAMPLERPROC glad_glIsSampler; +#define glIsSampler glad_glIsSampler +GLAD_API_CALL PFNGLISSHADERPROC glad_glIsShader; +#define glIsShader glad_glIsShader +GLAD_API_CALL PFNGLISSYNCPROC glad_glIsSync; +#define glIsSync glad_glIsSync +GLAD_API_CALL PFNGLISTEXTUREPROC glad_glIsTexture; +#define glIsTexture glad_glIsTexture +GLAD_API_CALL PFNGLISVERTEXARRAYPROC glad_glIsVertexArray; +#define glIsVertexArray glad_glIsVertexArray +GLAD_API_CALL PFNGLLIGHTMODELFPROC glad_glLightModelf; +#define glLightModelf glad_glLightModelf +GLAD_API_CALL PFNGLLIGHTMODELFVPROC glad_glLightModelfv; +#define glLightModelfv glad_glLightModelfv +GLAD_API_CALL PFNGLLIGHTMODELIPROC glad_glLightModeli; +#define glLightModeli glad_glLightModeli +GLAD_API_CALL PFNGLLIGHTMODELIVPROC glad_glLightModeliv; +#define glLightModeliv glad_glLightModeliv +GLAD_API_CALL PFNGLLIGHTFPROC glad_glLightf; +#define glLightf glad_glLightf +GLAD_API_CALL PFNGLLIGHTFVPROC glad_glLightfv; +#define glLightfv glad_glLightfv +GLAD_API_CALL PFNGLLIGHTIPROC glad_glLighti; +#define glLighti glad_glLighti +GLAD_API_CALL PFNGLLIGHTIVPROC glad_glLightiv; +#define glLightiv glad_glLightiv +GLAD_API_CALL PFNGLLINESTIPPLEPROC glad_glLineStipple; +#define glLineStipple glad_glLineStipple +GLAD_API_CALL PFNGLLINEWIDTHPROC glad_glLineWidth; +#define glLineWidth glad_glLineWidth +GLAD_API_CALL PFNGLLINKPROGRAMPROC glad_glLinkProgram; +#define glLinkProgram glad_glLinkProgram +GLAD_API_CALL PFNGLLISTBASEPROC glad_glListBase; +#define glListBase glad_glListBase +GLAD_API_CALL PFNGLLOADIDENTITYPROC glad_glLoadIdentity; +#define glLoadIdentity glad_glLoadIdentity +GLAD_API_CALL PFNGLLOADMATRIXDPROC glad_glLoadMatrixd; +#define glLoadMatrixd glad_glLoadMatrixd +GLAD_API_CALL PFNGLLOADMATRIXFPROC glad_glLoadMatrixf; +#define glLoadMatrixf glad_glLoadMatrixf +GLAD_API_CALL PFNGLLOADNAMEPROC glad_glLoadName; +#define glLoadName glad_glLoadName +GLAD_API_CALL PFNGLLOADTRANSPOSEMATRIXDPROC glad_glLoadTransposeMatrixd; +#define glLoadTransposeMatrixd glad_glLoadTransposeMatrixd +GLAD_API_CALL PFNGLLOADTRANSPOSEMATRIXFPROC glad_glLoadTransposeMatrixf; +#define glLoadTransposeMatrixf glad_glLoadTransposeMatrixf +GLAD_API_CALL PFNGLLOGICOPPROC glad_glLogicOp; +#define glLogicOp glad_glLogicOp +GLAD_API_CALL PFNGLMAP1DPROC glad_glMap1d; +#define glMap1d glad_glMap1d +GLAD_API_CALL PFNGLMAP1FPROC glad_glMap1f; +#define glMap1f glad_glMap1f +GLAD_API_CALL PFNGLMAP2DPROC glad_glMap2d; +#define glMap2d glad_glMap2d +GLAD_API_CALL PFNGLMAP2FPROC glad_glMap2f; +#define glMap2f glad_glMap2f +GLAD_API_CALL PFNGLMAPBUFFERPROC glad_glMapBuffer; +#define glMapBuffer glad_glMapBuffer +GLAD_API_CALL PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange; +#define glMapBufferRange glad_glMapBufferRange +GLAD_API_CALL PFNGLMAPGRID1DPROC glad_glMapGrid1d; +#define glMapGrid1d glad_glMapGrid1d +GLAD_API_CALL PFNGLMAPGRID1FPROC glad_glMapGrid1f; +#define glMapGrid1f glad_glMapGrid1f +GLAD_API_CALL PFNGLMAPGRID2DPROC glad_glMapGrid2d; +#define glMapGrid2d glad_glMapGrid2d +GLAD_API_CALL PFNGLMAPGRID2FPROC glad_glMapGrid2f; +#define glMapGrid2f glad_glMapGrid2f +GLAD_API_CALL PFNGLMATERIALFPROC glad_glMaterialf; +#define glMaterialf glad_glMaterialf +GLAD_API_CALL PFNGLMATERIALFVPROC glad_glMaterialfv; +#define glMaterialfv glad_glMaterialfv +GLAD_API_CALL PFNGLMATERIALIPROC glad_glMateriali; +#define glMateriali glad_glMateriali +GLAD_API_CALL PFNGLMATERIALIVPROC glad_glMaterialiv; +#define glMaterialiv glad_glMaterialiv +GLAD_API_CALL PFNGLMATRIXMODEPROC glad_glMatrixMode; +#define glMatrixMode glad_glMatrixMode +GLAD_API_CALL PFNGLMULTMATRIXDPROC glad_glMultMatrixd; +#define glMultMatrixd glad_glMultMatrixd +GLAD_API_CALL PFNGLMULTMATRIXFPROC glad_glMultMatrixf; +#define glMultMatrixf glad_glMultMatrixf +GLAD_API_CALL PFNGLMULTTRANSPOSEMATRIXDPROC glad_glMultTransposeMatrixd; +#define glMultTransposeMatrixd glad_glMultTransposeMatrixd +GLAD_API_CALL PFNGLMULTTRANSPOSEMATRIXFPROC glad_glMultTransposeMatrixf; +#define glMultTransposeMatrixf glad_glMultTransposeMatrixf +GLAD_API_CALL PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays; +#define glMultiDrawArrays glad_glMultiDrawArrays +GLAD_API_CALL PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements; +#define glMultiDrawElements glad_glMultiDrawElements +GLAD_API_CALL PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex; +#define glMultiDrawElementsBaseVertex glad_glMultiDrawElementsBaseVertex +GLAD_API_CALL PFNGLMULTITEXCOORD1DPROC glad_glMultiTexCoord1d; +#define glMultiTexCoord1d glad_glMultiTexCoord1d +GLAD_API_CALL PFNGLMULTITEXCOORD1DVPROC glad_glMultiTexCoord1dv; +#define glMultiTexCoord1dv glad_glMultiTexCoord1dv +GLAD_API_CALL PFNGLMULTITEXCOORD1FPROC glad_glMultiTexCoord1f; +#define glMultiTexCoord1f glad_glMultiTexCoord1f +GLAD_API_CALL PFNGLMULTITEXCOORD1FVPROC glad_glMultiTexCoord1fv; +#define glMultiTexCoord1fv glad_glMultiTexCoord1fv +GLAD_API_CALL PFNGLMULTITEXCOORD1IPROC glad_glMultiTexCoord1i; +#define glMultiTexCoord1i glad_glMultiTexCoord1i +GLAD_API_CALL PFNGLMULTITEXCOORD1IVPROC glad_glMultiTexCoord1iv; +#define glMultiTexCoord1iv glad_glMultiTexCoord1iv +GLAD_API_CALL PFNGLMULTITEXCOORD1SPROC glad_glMultiTexCoord1s; +#define glMultiTexCoord1s glad_glMultiTexCoord1s +GLAD_API_CALL PFNGLMULTITEXCOORD1SVPROC glad_glMultiTexCoord1sv; +#define glMultiTexCoord1sv glad_glMultiTexCoord1sv +GLAD_API_CALL PFNGLMULTITEXCOORD2DPROC glad_glMultiTexCoord2d; +#define glMultiTexCoord2d glad_glMultiTexCoord2d +GLAD_API_CALL PFNGLMULTITEXCOORD2DVPROC glad_glMultiTexCoord2dv; +#define glMultiTexCoord2dv glad_glMultiTexCoord2dv +GLAD_API_CALL PFNGLMULTITEXCOORD2FPROC glad_glMultiTexCoord2f; +#define glMultiTexCoord2f glad_glMultiTexCoord2f +GLAD_API_CALL PFNGLMULTITEXCOORD2FVPROC glad_glMultiTexCoord2fv; +#define glMultiTexCoord2fv glad_glMultiTexCoord2fv +GLAD_API_CALL PFNGLMULTITEXCOORD2IPROC glad_glMultiTexCoord2i; +#define glMultiTexCoord2i glad_glMultiTexCoord2i +GLAD_API_CALL PFNGLMULTITEXCOORD2IVPROC glad_glMultiTexCoord2iv; +#define glMultiTexCoord2iv glad_glMultiTexCoord2iv +GLAD_API_CALL PFNGLMULTITEXCOORD2SPROC glad_glMultiTexCoord2s; +#define glMultiTexCoord2s glad_glMultiTexCoord2s +GLAD_API_CALL PFNGLMULTITEXCOORD2SVPROC glad_glMultiTexCoord2sv; +#define glMultiTexCoord2sv glad_glMultiTexCoord2sv +GLAD_API_CALL PFNGLMULTITEXCOORD3DPROC glad_glMultiTexCoord3d; +#define glMultiTexCoord3d glad_glMultiTexCoord3d +GLAD_API_CALL PFNGLMULTITEXCOORD3DVPROC glad_glMultiTexCoord3dv; +#define glMultiTexCoord3dv glad_glMultiTexCoord3dv +GLAD_API_CALL PFNGLMULTITEXCOORD3FPROC glad_glMultiTexCoord3f; +#define glMultiTexCoord3f glad_glMultiTexCoord3f +GLAD_API_CALL PFNGLMULTITEXCOORD3FVPROC glad_glMultiTexCoord3fv; +#define glMultiTexCoord3fv glad_glMultiTexCoord3fv +GLAD_API_CALL PFNGLMULTITEXCOORD3IPROC glad_glMultiTexCoord3i; +#define glMultiTexCoord3i glad_glMultiTexCoord3i +GLAD_API_CALL PFNGLMULTITEXCOORD3IVPROC glad_glMultiTexCoord3iv; +#define glMultiTexCoord3iv glad_glMultiTexCoord3iv +GLAD_API_CALL PFNGLMULTITEXCOORD3SPROC glad_glMultiTexCoord3s; +#define glMultiTexCoord3s glad_glMultiTexCoord3s +GLAD_API_CALL PFNGLMULTITEXCOORD3SVPROC glad_glMultiTexCoord3sv; +#define glMultiTexCoord3sv glad_glMultiTexCoord3sv +GLAD_API_CALL PFNGLMULTITEXCOORD4DPROC glad_glMultiTexCoord4d; +#define glMultiTexCoord4d glad_glMultiTexCoord4d +GLAD_API_CALL PFNGLMULTITEXCOORD4DVPROC glad_glMultiTexCoord4dv; +#define glMultiTexCoord4dv glad_glMultiTexCoord4dv +GLAD_API_CALL PFNGLMULTITEXCOORD4FPROC glad_glMultiTexCoord4f; +#define glMultiTexCoord4f glad_glMultiTexCoord4f +GLAD_API_CALL PFNGLMULTITEXCOORD4FVPROC glad_glMultiTexCoord4fv; +#define glMultiTexCoord4fv glad_glMultiTexCoord4fv +GLAD_API_CALL PFNGLMULTITEXCOORD4IPROC glad_glMultiTexCoord4i; +#define glMultiTexCoord4i glad_glMultiTexCoord4i +GLAD_API_CALL PFNGLMULTITEXCOORD4IVPROC glad_glMultiTexCoord4iv; +#define glMultiTexCoord4iv glad_glMultiTexCoord4iv +GLAD_API_CALL PFNGLMULTITEXCOORD4SPROC glad_glMultiTexCoord4s; +#define glMultiTexCoord4s glad_glMultiTexCoord4s +GLAD_API_CALL PFNGLMULTITEXCOORD4SVPROC glad_glMultiTexCoord4sv; +#define glMultiTexCoord4sv glad_glMultiTexCoord4sv +GLAD_API_CALL PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui; +#define glMultiTexCoordP1ui glad_glMultiTexCoordP1ui +GLAD_API_CALL PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv; +#define glMultiTexCoordP1uiv glad_glMultiTexCoordP1uiv +GLAD_API_CALL PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui; +#define glMultiTexCoordP2ui glad_glMultiTexCoordP2ui +GLAD_API_CALL PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv; +#define glMultiTexCoordP2uiv glad_glMultiTexCoordP2uiv +GLAD_API_CALL PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui; +#define glMultiTexCoordP3ui glad_glMultiTexCoordP3ui +GLAD_API_CALL PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv; +#define glMultiTexCoordP3uiv glad_glMultiTexCoordP3uiv +GLAD_API_CALL PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui; +#define glMultiTexCoordP4ui glad_glMultiTexCoordP4ui +GLAD_API_CALL PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv; +#define glMultiTexCoordP4uiv glad_glMultiTexCoordP4uiv +GLAD_API_CALL PFNGLNEWLISTPROC glad_glNewList; +#define glNewList glad_glNewList +GLAD_API_CALL PFNGLNORMAL3BPROC glad_glNormal3b; +#define glNormal3b glad_glNormal3b +GLAD_API_CALL PFNGLNORMAL3BVPROC glad_glNormal3bv; +#define glNormal3bv glad_glNormal3bv +GLAD_API_CALL PFNGLNORMAL3DPROC glad_glNormal3d; +#define glNormal3d glad_glNormal3d +GLAD_API_CALL PFNGLNORMAL3DVPROC glad_glNormal3dv; +#define glNormal3dv glad_glNormal3dv +GLAD_API_CALL PFNGLNORMAL3FPROC glad_glNormal3f; +#define glNormal3f glad_glNormal3f +GLAD_API_CALL PFNGLNORMAL3FVPROC glad_glNormal3fv; +#define glNormal3fv glad_glNormal3fv +GLAD_API_CALL PFNGLNORMAL3IPROC glad_glNormal3i; +#define glNormal3i glad_glNormal3i +GLAD_API_CALL PFNGLNORMAL3IVPROC glad_glNormal3iv; +#define glNormal3iv glad_glNormal3iv +GLAD_API_CALL PFNGLNORMAL3SPROC glad_glNormal3s; +#define glNormal3s glad_glNormal3s +GLAD_API_CALL PFNGLNORMAL3SVPROC glad_glNormal3sv; +#define glNormal3sv glad_glNormal3sv +GLAD_API_CALL PFNGLNORMALP3UIPROC glad_glNormalP3ui; +#define glNormalP3ui glad_glNormalP3ui +GLAD_API_CALL PFNGLNORMALP3UIVPROC glad_glNormalP3uiv; +#define glNormalP3uiv glad_glNormalP3uiv +GLAD_API_CALL PFNGLNORMALPOINTERPROC glad_glNormalPointer; +#define glNormalPointer glad_glNormalPointer +GLAD_API_CALL PFNGLORTHOPROC glad_glOrtho; +#define glOrtho glad_glOrtho +GLAD_API_CALL PFNGLPASSTHROUGHPROC glad_glPassThrough; +#define glPassThrough glad_glPassThrough +GLAD_API_CALL PFNGLPIXELMAPFVPROC glad_glPixelMapfv; +#define glPixelMapfv glad_glPixelMapfv +GLAD_API_CALL PFNGLPIXELMAPUIVPROC glad_glPixelMapuiv; +#define glPixelMapuiv glad_glPixelMapuiv +GLAD_API_CALL PFNGLPIXELMAPUSVPROC glad_glPixelMapusv; +#define glPixelMapusv glad_glPixelMapusv +GLAD_API_CALL PFNGLPIXELSTOREFPROC glad_glPixelStoref; +#define glPixelStoref glad_glPixelStoref +GLAD_API_CALL PFNGLPIXELSTOREIPROC glad_glPixelStorei; +#define glPixelStorei glad_glPixelStorei +GLAD_API_CALL PFNGLPIXELTRANSFERFPROC glad_glPixelTransferf; +#define glPixelTransferf glad_glPixelTransferf +GLAD_API_CALL PFNGLPIXELTRANSFERIPROC glad_glPixelTransferi; +#define glPixelTransferi glad_glPixelTransferi +GLAD_API_CALL PFNGLPIXELZOOMPROC glad_glPixelZoom; +#define glPixelZoom glad_glPixelZoom +GLAD_API_CALL PFNGLPOINTPARAMETERFPROC glad_glPointParameterf; +#define glPointParameterf glad_glPointParameterf +GLAD_API_CALL PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv; +#define glPointParameterfv glad_glPointParameterfv +GLAD_API_CALL PFNGLPOINTPARAMETERIPROC glad_glPointParameteri; +#define glPointParameteri glad_glPointParameteri +GLAD_API_CALL PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv; +#define glPointParameteriv glad_glPointParameteriv +GLAD_API_CALL PFNGLPOINTSIZEPROC glad_glPointSize; +#define glPointSize glad_glPointSize +GLAD_API_CALL PFNGLPOLYGONMODEPROC glad_glPolygonMode; +#define glPolygonMode glad_glPolygonMode +GLAD_API_CALL PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset; +#define glPolygonOffset glad_glPolygonOffset +GLAD_API_CALL PFNGLPOLYGONSTIPPLEPROC glad_glPolygonStipple; +#define glPolygonStipple glad_glPolygonStipple +GLAD_API_CALL PFNGLPOPATTRIBPROC glad_glPopAttrib; +#define glPopAttrib glad_glPopAttrib +GLAD_API_CALL PFNGLPOPCLIENTATTRIBPROC glad_glPopClientAttrib; +#define glPopClientAttrib glad_glPopClientAttrib +GLAD_API_CALL PFNGLPOPMATRIXPROC glad_glPopMatrix; +#define glPopMatrix glad_glPopMatrix +GLAD_API_CALL PFNGLPOPNAMEPROC glad_glPopName; +#define glPopName glad_glPopName +GLAD_API_CALL PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex; +#define glPrimitiveRestartIndex glad_glPrimitiveRestartIndex +GLAD_API_CALL PFNGLPRIORITIZETEXTURESPROC glad_glPrioritizeTextures; +#define glPrioritizeTextures glad_glPrioritizeTextures +GLAD_API_CALL PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex; +#define glProvokingVertex glad_glProvokingVertex +GLAD_API_CALL PFNGLPUSHATTRIBPROC glad_glPushAttrib; +#define glPushAttrib glad_glPushAttrib +GLAD_API_CALL PFNGLPUSHCLIENTATTRIBPROC glad_glPushClientAttrib; +#define glPushClientAttrib glad_glPushClientAttrib +GLAD_API_CALL PFNGLPUSHMATRIXPROC glad_glPushMatrix; +#define glPushMatrix glad_glPushMatrix +GLAD_API_CALL PFNGLPUSHNAMEPROC glad_glPushName; +#define glPushName glad_glPushName +GLAD_API_CALL PFNGLQUERYCOUNTERPROC glad_glQueryCounter; +#define glQueryCounter glad_glQueryCounter +GLAD_API_CALL PFNGLRASTERPOS2DPROC glad_glRasterPos2d; +#define glRasterPos2d glad_glRasterPos2d +GLAD_API_CALL PFNGLRASTERPOS2DVPROC glad_glRasterPos2dv; +#define glRasterPos2dv glad_glRasterPos2dv +GLAD_API_CALL PFNGLRASTERPOS2FPROC glad_glRasterPos2f; +#define glRasterPos2f glad_glRasterPos2f +GLAD_API_CALL PFNGLRASTERPOS2FVPROC glad_glRasterPos2fv; +#define glRasterPos2fv glad_glRasterPos2fv +GLAD_API_CALL PFNGLRASTERPOS2IPROC glad_glRasterPos2i; +#define glRasterPos2i glad_glRasterPos2i +GLAD_API_CALL PFNGLRASTERPOS2IVPROC glad_glRasterPos2iv; +#define glRasterPos2iv glad_glRasterPos2iv +GLAD_API_CALL PFNGLRASTERPOS2SPROC glad_glRasterPos2s; +#define glRasterPos2s glad_glRasterPos2s +GLAD_API_CALL PFNGLRASTERPOS2SVPROC glad_glRasterPos2sv; +#define glRasterPos2sv glad_glRasterPos2sv +GLAD_API_CALL PFNGLRASTERPOS3DPROC glad_glRasterPos3d; +#define glRasterPos3d glad_glRasterPos3d +GLAD_API_CALL PFNGLRASTERPOS3DVPROC glad_glRasterPos3dv; +#define glRasterPos3dv glad_glRasterPos3dv +GLAD_API_CALL PFNGLRASTERPOS3FPROC glad_glRasterPos3f; +#define glRasterPos3f glad_glRasterPos3f +GLAD_API_CALL PFNGLRASTERPOS3FVPROC glad_glRasterPos3fv; +#define glRasterPos3fv glad_glRasterPos3fv +GLAD_API_CALL PFNGLRASTERPOS3IPROC glad_glRasterPos3i; +#define glRasterPos3i glad_glRasterPos3i +GLAD_API_CALL PFNGLRASTERPOS3IVPROC glad_glRasterPos3iv; +#define glRasterPos3iv glad_glRasterPos3iv +GLAD_API_CALL PFNGLRASTERPOS3SPROC glad_glRasterPos3s; +#define glRasterPos3s glad_glRasterPos3s +GLAD_API_CALL PFNGLRASTERPOS3SVPROC glad_glRasterPos3sv; +#define glRasterPos3sv glad_glRasterPos3sv +GLAD_API_CALL PFNGLRASTERPOS4DPROC glad_glRasterPos4d; +#define glRasterPos4d glad_glRasterPos4d +GLAD_API_CALL PFNGLRASTERPOS4DVPROC glad_glRasterPos4dv; +#define glRasterPos4dv glad_glRasterPos4dv +GLAD_API_CALL PFNGLRASTERPOS4FPROC glad_glRasterPos4f; +#define glRasterPos4f glad_glRasterPos4f +GLAD_API_CALL PFNGLRASTERPOS4FVPROC glad_glRasterPos4fv; +#define glRasterPos4fv glad_glRasterPos4fv +GLAD_API_CALL PFNGLRASTERPOS4IPROC glad_glRasterPos4i; +#define glRasterPos4i glad_glRasterPos4i +GLAD_API_CALL PFNGLRASTERPOS4IVPROC glad_glRasterPos4iv; +#define glRasterPos4iv glad_glRasterPos4iv +GLAD_API_CALL PFNGLRASTERPOS4SPROC glad_glRasterPos4s; +#define glRasterPos4s glad_glRasterPos4s +GLAD_API_CALL PFNGLRASTERPOS4SVPROC glad_glRasterPos4sv; +#define glRasterPos4sv glad_glRasterPos4sv +GLAD_API_CALL PFNGLREADBUFFERPROC glad_glReadBuffer; +#define glReadBuffer glad_glReadBuffer +GLAD_API_CALL PFNGLREADPIXELSPROC glad_glReadPixels; +#define glReadPixels glad_glReadPixels +GLAD_API_CALL PFNGLRECTDPROC glad_glRectd; +#define glRectd glad_glRectd +GLAD_API_CALL PFNGLRECTDVPROC glad_glRectdv; +#define glRectdv glad_glRectdv +GLAD_API_CALL PFNGLRECTFPROC glad_glRectf; +#define glRectf glad_glRectf +GLAD_API_CALL PFNGLRECTFVPROC glad_glRectfv; +#define glRectfv glad_glRectfv +GLAD_API_CALL PFNGLRECTIPROC glad_glRecti; +#define glRecti glad_glRecti +GLAD_API_CALL PFNGLRECTIVPROC glad_glRectiv; +#define glRectiv glad_glRectiv +GLAD_API_CALL PFNGLRECTSPROC glad_glRects; +#define glRects glad_glRects +GLAD_API_CALL PFNGLRECTSVPROC glad_glRectsv; +#define glRectsv glad_glRectsv +GLAD_API_CALL PFNGLRENDERMODEPROC glad_glRenderMode; +#define glRenderMode glad_glRenderMode +GLAD_API_CALL PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage; +#define glRenderbufferStorage glad_glRenderbufferStorage +GLAD_API_CALL PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample; +#define glRenderbufferStorageMultisample glad_glRenderbufferStorageMultisample +GLAD_API_CALL PFNGLROTATEDPROC glad_glRotated; +#define glRotated glad_glRotated +GLAD_API_CALL PFNGLROTATEFPROC glad_glRotatef; +#define glRotatef glad_glRotatef +GLAD_API_CALL PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage; +#define glSampleCoverage glad_glSampleCoverage +GLAD_API_CALL PFNGLSAMPLEMASKIPROC glad_glSampleMaski; +#define glSampleMaski glad_glSampleMaski +GLAD_API_CALL PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv; +#define glSamplerParameterIiv glad_glSamplerParameterIiv +GLAD_API_CALL PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv; +#define glSamplerParameterIuiv glad_glSamplerParameterIuiv +GLAD_API_CALL PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf; +#define glSamplerParameterf glad_glSamplerParameterf +GLAD_API_CALL PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv; +#define glSamplerParameterfv glad_glSamplerParameterfv +GLAD_API_CALL PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri; +#define glSamplerParameteri glad_glSamplerParameteri +GLAD_API_CALL PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv; +#define glSamplerParameteriv glad_glSamplerParameteriv +GLAD_API_CALL PFNGLSCALEDPROC glad_glScaled; +#define glScaled glad_glScaled +GLAD_API_CALL PFNGLSCALEFPROC glad_glScalef; +#define glScalef glad_glScalef +GLAD_API_CALL PFNGLSCISSORPROC glad_glScissor; +#define glScissor glad_glScissor +GLAD_API_CALL PFNGLSECONDARYCOLOR3BPROC glad_glSecondaryColor3b; +#define glSecondaryColor3b glad_glSecondaryColor3b +GLAD_API_CALL PFNGLSECONDARYCOLOR3BVPROC glad_glSecondaryColor3bv; +#define glSecondaryColor3bv glad_glSecondaryColor3bv +GLAD_API_CALL PFNGLSECONDARYCOLOR3DPROC glad_glSecondaryColor3d; +#define glSecondaryColor3d glad_glSecondaryColor3d +GLAD_API_CALL PFNGLSECONDARYCOLOR3DVPROC glad_glSecondaryColor3dv; +#define glSecondaryColor3dv glad_glSecondaryColor3dv +GLAD_API_CALL PFNGLSECONDARYCOLOR3FPROC glad_glSecondaryColor3f; +#define glSecondaryColor3f glad_glSecondaryColor3f +GLAD_API_CALL PFNGLSECONDARYCOLOR3FVPROC glad_glSecondaryColor3fv; +#define glSecondaryColor3fv glad_glSecondaryColor3fv +GLAD_API_CALL PFNGLSECONDARYCOLOR3IPROC glad_glSecondaryColor3i; +#define glSecondaryColor3i glad_glSecondaryColor3i +GLAD_API_CALL PFNGLSECONDARYCOLOR3IVPROC glad_glSecondaryColor3iv; +#define glSecondaryColor3iv glad_glSecondaryColor3iv +GLAD_API_CALL PFNGLSECONDARYCOLOR3SPROC glad_glSecondaryColor3s; +#define glSecondaryColor3s glad_glSecondaryColor3s +GLAD_API_CALL PFNGLSECONDARYCOLOR3SVPROC glad_glSecondaryColor3sv; +#define glSecondaryColor3sv glad_glSecondaryColor3sv +GLAD_API_CALL PFNGLSECONDARYCOLOR3UBPROC glad_glSecondaryColor3ub; +#define glSecondaryColor3ub glad_glSecondaryColor3ub +GLAD_API_CALL PFNGLSECONDARYCOLOR3UBVPROC glad_glSecondaryColor3ubv; +#define glSecondaryColor3ubv glad_glSecondaryColor3ubv +GLAD_API_CALL PFNGLSECONDARYCOLOR3UIPROC glad_glSecondaryColor3ui; +#define glSecondaryColor3ui glad_glSecondaryColor3ui +GLAD_API_CALL PFNGLSECONDARYCOLOR3UIVPROC glad_glSecondaryColor3uiv; +#define glSecondaryColor3uiv glad_glSecondaryColor3uiv +GLAD_API_CALL PFNGLSECONDARYCOLOR3USPROC glad_glSecondaryColor3us; +#define glSecondaryColor3us glad_glSecondaryColor3us +GLAD_API_CALL PFNGLSECONDARYCOLOR3USVPROC glad_glSecondaryColor3usv; +#define glSecondaryColor3usv glad_glSecondaryColor3usv +GLAD_API_CALL PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui; +#define glSecondaryColorP3ui glad_glSecondaryColorP3ui +GLAD_API_CALL PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv; +#define glSecondaryColorP3uiv glad_glSecondaryColorP3uiv +GLAD_API_CALL PFNGLSECONDARYCOLORPOINTERPROC glad_glSecondaryColorPointer; +#define glSecondaryColorPointer glad_glSecondaryColorPointer +GLAD_API_CALL PFNGLSELECTBUFFERPROC glad_glSelectBuffer; +#define glSelectBuffer glad_glSelectBuffer +GLAD_API_CALL PFNGLSHADEMODELPROC glad_glShadeModel; +#define glShadeModel glad_glShadeModel +GLAD_API_CALL PFNGLSHADERSOURCEPROC glad_glShaderSource; +#define glShaderSource glad_glShaderSource +GLAD_API_CALL PFNGLSTENCILFUNCPROC glad_glStencilFunc; +#define glStencilFunc glad_glStencilFunc +GLAD_API_CALL PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate; +#define glStencilFuncSeparate glad_glStencilFuncSeparate +GLAD_API_CALL PFNGLSTENCILMASKPROC glad_glStencilMask; +#define glStencilMask glad_glStencilMask +GLAD_API_CALL PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate; +#define glStencilMaskSeparate glad_glStencilMaskSeparate +GLAD_API_CALL PFNGLSTENCILOPPROC glad_glStencilOp; +#define glStencilOp glad_glStencilOp +GLAD_API_CALL PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate; +#define glStencilOpSeparate glad_glStencilOpSeparate +GLAD_API_CALL PFNGLTEXBUFFERPROC glad_glTexBuffer; +#define glTexBuffer glad_glTexBuffer +GLAD_API_CALL PFNGLTEXCOORD1DPROC glad_glTexCoord1d; +#define glTexCoord1d glad_glTexCoord1d +GLAD_API_CALL PFNGLTEXCOORD1DVPROC glad_glTexCoord1dv; +#define glTexCoord1dv glad_glTexCoord1dv +GLAD_API_CALL PFNGLTEXCOORD1FPROC glad_glTexCoord1f; +#define glTexCoord1f glad_glTexCoord1f +GLAD_API_CALL PFNGLTEXCOORD1FVPROC glad_glTexCoord1fv; +#define glTexCoord1fv glad_glTexCoord1fv +GLAD_API_CALL PFNGLTEXCOORD1IPROC glad_glTexCoord1i; +#define glTexCoord1i glad_glTexCoord1i +GLAD_API_CALL PFNGLTEXCOORD1IVPROC glad_glTexCoord1iv; +#define glTexCoord1iv glad_glTexCoord1iv +GLAD_API_CALL PFNGLTEXCOORD1SPROC glad_glTexCoord1s; +#define glTexCoord1s glad_glTexCoord1s +GLAD_API_CALL PFNGLTEXCOORD1SVPROC glad_glTexCoord1sv; +#define glTexCoord1sv glad_glTexCoord1sv +GLAD_API_CALL PFNGLTEXCOORD2DPROC glad_glTexCoord2d; +#define glTexCoord2d glad_glTexCoord2d +GLAD_API_CALL PFNGLTEXCOORD2DVPROC glad_glTexCoord2dv; +#define glTexCoord2dv glad_glTexCoord2dv +GLAD_API_CALL PFNGLTEXCOORD2FPROC glad_glTexCoord2f; +#define glTexCoord2f glad_glTexCoord2f +GLAD_API_CALL PFNGLTEXCOORD2FVPROC glad_glTexCoord2fv; +#define glTexCoord2fv glad_glTexCoord2fv +GLAD_API_CALL PFNGLTEXCOORD2IPROC glad_glTexCoord2i; +#define glTexCoord2i glad_glTexCoord2i +GLAD_API_CALL PFNGLTEXCOORD2IVPROC glad_glTexCoord2iv; +#define glTexCoord2iv glad_glTexCoord2iv +GLAD_API_CALL PFNGLTEXCOORD2SPROC glad_glTexCoord2s; +#define glTexCoord2s glad_glTexCoord2s +GLAD_API_CALL PFNGLTEXCOORD2SVPROC glad_glTexCoord2sv; +#define glTexCoord2sv glad_glTexCoord2sv +GLAD_API_CALL PFNGLTEXCOORD3DPROC glad_glTexCoord3d; +#define glTexCoord3d glad_glTexCoord3d +GLAD_API_CALL PFNGLTEXCOORD3DVPROC glad_glTexCoord3dv; +#define glTexCoord3dv glad_glTexCoord3dv +GLAD_API_CALL PFNGLTEXCOORD3FPROC glad_glTexCoord3f; +#define glTexCoord3f glad_glTexCoord3f +GLAD_API_CALL PFNGLTEXCOORD3FVPROC glad_glTexCoord3fv; +#define glTexCoord3fv glad_glTexCoord3fv +GLAD_API_CALL PFNGLTEXCOORD3IPROC glad_glTexCoord3i; +#define glTexCoord3i glad_glTexCoord3i +GLAD_API_CALL PFNGLTEXCOORD3IVPROC glad_glTexCoord3iv; +#define glTexCoord3iv glad_glTexCoord3iv +GLAD_API_CALL PFNGLTEXCOORD3SPROC glad_glTexCoord3s; +#define glTexCoord3s glad_glTexCoord3s +GLAD_API_CALL PFNGLTEXCOORD3SVPROC glad_glTexCoord3sv; +#define glTexCoord3sv glad_glTexCoord3sv +GLAD_API_CALL PFNGLTEXCOORD4DPROC glad_glTexCoord4d; +#define glTexCoord4d glad_glTexCoord4d +GLAD_API_CALL PFNGLTEXCOORD4DVPROC glad_glTexCoord4dv; +#define glTexCoord4dv glad_glTexCoord4dv +GLAD_API_CALL PFNGLTEXCOORD4FPROC glad_glTexCoord4f; +#define glTexCoord4f glad_glTexCoord4f +GLAD_API_CALL PFNGLTEXCOORD4FVPROC glad_glTexCoord4fv; +#define glTexCoord4fv glad_glTexCoord4fv +GLAD_API_CALL PFNGLTEXCOORD4IPROC glad_glTexCoord4i; +#define glTexCoord4i glad_glTexCoord4i +GLAD_API_CALL PFNGLTEXCOORD4IVPROC glad_glTexCoord4iv; +#define glTexCoord4iv glad_glTexCoord4iv +GLAD_API_CALL PFNGLTEXCOORD4SPROC glad_glTexCoord4s; +#define glTexCoord4s glad_glTexCoord4s +GLAD_API_CALL PFNGLTEXCOORD4SVPROC glad_glTexCoord4sv; +#define glTexCoord4sv glad_glTexCoord4sv +GLAD_API_CALL PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui; +#define glTexCoordP1ui glad_glTexCoordP1ui +GLAD_API_CALL PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv; +#define glTexCoordP1uiv glad_glTexCoordP1uiv +GLAD_API_CALL PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui; +#define glTexCoordP2ui glad_glTexCoordP2ui +GLAD_API_CALL PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv; +#define glTexCoordP2uiv glad_glTexCoordP2uiv +GLAD_API_CALL PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui; +#define glTexCoordP3ui glad_glTexCoordP3ui +GLAD_API_CALL PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv; +#define glTexCoordP3uiv glad_glTexCoordP3uiv +GLAD_API_CALL PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui; +#define glTexCoordP4ui glad_glTexCoordP4ui +GLAD_API_CALL PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv; +#define glTexCoordP4uiv glad_glTexCoordP4uiv +GLAD_API_CALL PFNGLTEXCOORDPOINTERPROC glad_glTexCoordPointer; +#define glTexCoordPointer glad_glTexCoordPointer +GLAD_API_CALL PFNGLTEXENVFPROC glad_glTexEnvf; +#define glTexEnvf glad_glTexEnvf +GLAD_API_CALL PFNGLTEXENVFVPROC glad_glTexEnvfv; +#define glTexEnvfv glad_glTexEnvfv +GLAD_API_CALL PFNGLTEXENVIPROC glad_glTexEnvi; +#define glTexEnvi glad_glTexEnvi +GLAD_API_CALL PFNGLTEXENVIVPROC glad_glTexEnviv; +#define glTexEnviv glad_glTexEnviv +GLAD_API_CALL PFNGLTEXGENDPROC glad_glTexGend; +#define glTexGend glad_glTexGend +GLAD_API_CALL PFNGLTEXGENDVPROC glad_glTexGendv; +#define glTexGendv glad_glTexGendv +GLAD_API_CALL PFNGLTEXGENFPROC glad_glTexGenf; +#define glTexGenf glad_glTexGenf +GLAD_API_CALL PFNGLTEXGENFVPROC glad_glTexGenfv; +#define glTexGenfv glad_glTexGenfv +GLAD_API_CALL PFNGLTEXGENIPROC glad_glTexGeni; +#define glTexGeni glad_glTexGeni +GLAD_API_CALL PFNGLTEXGENIVPROC glad_glTexGeniv; +#define glTexGeniv glad_glTexGeniv +GLAD_API_CALL PFNGLTEXIMAGE1DPROC glad_glTexImage1D; +#define glTexImage1D glad_glTexImage1D +GLAD_API_CALL PFNGLTEXIMAGE2DPROC glad_glTexImage2D; +#define glTexImage2D glad_glTexImage2D +GLAD_API_CALL PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample; +#define glTexImage2DMultisample glad_glTexImage2DMultisample +GLAD_API_CALL PFNGLTEXIMAGE3DPROC glad_glTexImage3D; +#define glTexImage3D glad_glTexImage3D +GLAD_API_CALL PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample; +#define glTexImage3DMultisample glad_glTexImage3DMultisample +GLAD_API_CALL PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv; +#define glTexParameterIiv glad_glTexParameterIiv +GLAD_API_CALL PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv; +#define glTexParameterIuiv glad_glTexParameterIuiv +GLAD_API_CALL PFNGLTEXPARAMETERFPROC glad_glTexParameterf; +#define glTexParameterf glad_glTexParameterf +GLAD_API_CALL PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv; +#define glTexParameterfv glad_glTexParameterfv +GLAD_API_CALL PFNGLTEXPARAMETERIPROC glad_glTexParameteri; +#define glTexParameteri glad_glTexParameteri +GLAD_API_CALL PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv; +#define glTexParameteriv glad_glTexParameteriv +GLAD_API_CALL PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D; +#define glTexSubImage1D glad_glTexSubImage1D +GLAD_API_CALL PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D; +#define glTexSubImage2D glad_glTexSubImage2D +GLAD_API_CALL PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D; +#define glTexSubImage3D glad_glTexSubImage3D +GLAD_API_CALL PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings; +#define glTransformFeedbackVaryings glad_glTransformFeedbackVaryings +GLAD_API_CALL PFNGLTRANSLATEDPROC glad_glTranslated; +#define glTranslated glad_glTranslated +GLAD_API_CALL PFNGLTRANSLATEFPROC glad_glTranslatef; +#define glTranslatef glad_glTranslatef +GLAD_API_CALL PFNGLUNIFORM1FPROC glad_glUniform1f; +#define glUniform1f glad_glUniform1f +GLAD_API_CALL PFNGLUNIFORM1FVPROC glad_glUniform1fv; +#define glUniform1fv glad_glUniform1fv +GLAD_API_CALL PFNGLUNIFORM1IPROC glad_glUniform1i; +#define glUniform1i glad_glUniform1i +GLAD_API_CALL PFNGLUNIFORM1IVPROC glad_glUniform1iv; +#define glUniform1iv glad_glUniform1iv +GLAD_API_CALL PFNGLUNIFORM1UIPROC glad_glUniform1ui; +#define glUniform1ui glad_glUniform1ui +GLAD_API_CALL PFNGLUNIFORM1UIVPROC glad_glUniform1uiv; +#define glUniform1uiv glad_glUniform1uiv +GLAD_API_CALL PFNGLUNIFORM2FPROC glad_glUniform2f; +#define glUniform2f glad_glUniform2f +GLAD_API_CALL PFNGLUNIFORM2FVPROC glad_glUniform2fv; +#define glUniform2fv glad_glUniform2fv +GLAD_API_CALL PFNGLUNIFORM2IPROC glad_glUniform2i; +#define glUniform2i glad_glUniform2i +GLAD_API_CALL PFNGLUNIFORM2IVPROC glad_glUniform2iv; +#define glUniform2iv glad_glUniform2iv +GLAD_API_CALL PFNGLUNIFORM2UIPROC glad_glUniform2ui; +#define glUniform2ui glad_glUniform2ui +GLAD_API_CALL PFNGLUNIFORM2UIVPROC glad_glUniform2uiv; +#define glUniform2uiv glad_glUniform2uiv +GLAD_API_CALL PFNGLUNIFORM3FPROC glad_glUniform3f; +#define glUniform3f glad_glUniform3f +GLAD_API_CALL PFNGLUNIFORM3FVPROC glad_glUniform3fv; +#define glUniform3fv glad_glUniform3fv +GLAD_API_CALL PFNGLUNIFORM3IPROC glad_glUniform3i; +#define glUniform3i glad_glUniform3i +GLAD_API_CALL PFNGLUNIFORM3IVPROC glad_glUniform3iv; +#define glUniform3iv glad_glUniform3iv +GLAD_API_CALL PFNGLUNIFORM3UIPROC glad_glUniform3ui; +#define glUniform3ui glad_glUniform3ui +GLAD_API_CALL PFNGLUNIFORM3UIVPROC glad_glUniform3uiv; +#define glUniform3uiv glad_glUniform3uiv +GLAD_API_CALL PFNGLUNIFORM4FPROC glad_glUniform4f; +#define glUniform4f glad_glUniform4f +GLAD_API_CALL PFNGLUNIFORM4FVPROC glad_glUniform4fv; +#define glUniform4fv glad_glUniform4fv +GLAD_API_CALL PFNGLUNIFORM4IPROC glad_glUniform4i; +#define glUniform4i glad_glUniform4i +GLAD_API_CALL PFNGLUNIFORM4IVPROC glad_glUniform4iv; +#define glUniform4iv glad_glUniform4iv +GLAD_API_CALL PFNGLUNIFORM4UIPROC glad_glUniform4ui; +#define glUniform4ui glad_glUniform4ui +GLAD_API_CALL PFNGLUNIFORM4UIVPROC glad_glUniform4uiv; +#define glUniform4uiv glad_glUniform4uiv +GLAD_API_CALL PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding; +#define glUniformBlockBinding glad_glUniformBlockBinding +GLAD_API_CALL PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv; +#define glUniformMatrix2fv glad_glUniformMatrix2fv +GLAD_API_CALL PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv; +#define glUniformMatrix2x3fv glad_glUniformMatrix2x3fv +GLAD_API_CALL PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv; +#define glUniformMatrix2x4fv glad_glUniformMatrix2x4fv +GLAD_API_CALL PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv; +#define glUniformMatrix3fv glad_glUniformMatrix3fv +GLAD_API_CALL PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv; +#define glUniformMatrix3x2fv glad_glUniformMatrix3x2fv +GLAD_API_CALL PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv; +#define glUniformMatrix3x4fv glad_glUniformMatrix3x4fv +GLAD_API_CALL PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv; +#define glUniformMatrix4fv glad_glUniformMatrix4fv +GLAD_API_CALL PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv; +#define glUniformMatrix4x2fv glad_glUniformMatrix4x2fv +GLAD_API_CALL PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv; +#define glUniformMatrix4x3fv glad_glUniformMatrix4x3fv +GLAD_API_CALL PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer; +#define glUnmapBuffer glad_glUnmapBuffer +GLAD_API_CALL PFNGLUSEPROGRAMPROC glad_glUseProgram; +#define glUseProgram glad_glUseProgram +GLAD_API_CALL PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram; +#define glValidateProgram glad_glValidateProgram +GLAD_API_CALL PFNGLVERTEX2DPROC glad_glVertex2d; +#define glVertex2d glad_glVertex2d +GLAD_API_CALL PFNGLVERTEX2DVPROC glad_glVertex2dv; +#define glVertex2dv glad_glVertex2dv +GLAD_API_CALL PFNGLVERTEX2FPROC glad_glVertex2f; +#define glVertex2f glad_glVertex2f +GLAD_API_CALL PFNGLVERTEX2FVPROC glad_glVertex2fv; +#define glVertex2fv glad_glVertex2fv +GLAD_API_CALL PFNGLVERTEX2IPROC glad_glVertex2i; +#define glVertex2i glad_glVertex2i +GLAD_API_CALL PFNGLVERTEX2IVPROC glad_glVertex2iv; +#define glVertex2iv glad_glVertex2iv +GLAD_API_CALL PFNGLVERTEX2SPROC glad_glVertex2s; +#define glVertex2s glad_glVertex2s +GLAD_API_CALL PFNGLVERTEX2SVPROC glad_glVertex2sv; +#define glVertex2sv glad_glVertex2sv +GLAD_API_CALL PFNGLVERTEX3DPROC glad_glVertex3d; +#define glVertex3d glad_glVertex3d +GLAD_API_CALL PFNGLVERTEX3DVPROC glad_glVertex3dv; +#define glVertex3dv glad_glVertex3dv +GLAD_API_CALL PFNGLVERTEX3FPROC glad_glVertex3f; +#define glVertex3f glad_glVertex3f +GLAD_API_CALL PFNGLVERTEX3FVPROC glad_glVertex3fv; +#define glVertex3fv glad_glVertex3fv +GLAD_API_CALL PFNGLVERTEX3IPROC glad_glVertex3i; +#define glVertex3i glad_glVertex3i +GLAD_API_CALL PFNGLVERTEX3IVPROC glad_glVertex3iv; +#define glVertex3iv glad_glVertex3iv +GLAD_API_CALL PFNGLVERTEX3SPROC glad_glVertex3s; +#define glVertex3s glad_glVertex3s +GLAD_API_CALL PFNGLVERTEX3SVPROC glad_glVertex3sv; +#define glVertex3sv glad_glVertex3sv +GLAD_API_CALL PFNGLVERTEX4DPROC glad_glVertex4d; +#define glVertex4d glad_glVertex4d +GLAD_API_CALL PFNGLVERTEX4DVPROC glad_glVertex4dv; +#define glVertex4dv glad_glVertex4dv +GLAD_API_CALL PFNGLVERTEX4FPROC glad_glVertex4f; +#define glVertex4f glad_glVertex4f +GLAD_API_CALL PFNGLVERTEX4FVPROC glad_glVertex4fv; +#define glVertex4fv glad_glVertex4fv +GLAD_API_CALL PFNGLVERTEX4IPROC glad_glVertex4i; +#define glVertex4i glad_glVertex4i +GLAD_API_CALL PFNGLVERTEX4IVPROC glad_glVertex4iv; +#define glVertex4iv glad_glVertex4iv +GLAD_API_CALL PFNGLVERTEX4SPROC glad_glVertex4s; +#define glVertex4s glad_glVertex4s +GLAD_API_CALL PFNGLVERTEX4SVPROC glad_glVertex4sv; +#define glVertex4sv glad_glVertex4sv +GLAD_API_CALL PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d; +#define glVertexAttrib1d glad_glVertexAttrib1d +GLAD_API_CALL PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv; +#define glVertexAttrib1dv glad_glVertexAttrib1dv +GLAD_API_CALL PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f; +#define glVertexAttrib1f glad_glVertexAttrib1f +GLAD_API_CALL PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv; +#define glVertexAttrib1fv glad_glVertexAttrib1fv +GLAD_API_CALL PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s; +#define glVertexAttrib1s glad_glVertexAttrib1s +GLAD_API_CALL PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv; +#define glVertexAttrib1sv glad_glVertexAttrib1sv +GLAD_API_CALL PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d; +#define glVertexAttrib2d glad_glVertexAttrib2d +GLAD_API_CALL PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv; +#define glVertexAttrib2dv glad_glVertexAttrib2dv +GLAD_API_CALL PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f; +#define glVertexAttrib2f glad_glVertexAttrib2f +GLAD_API_CALL PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv; +#define glVertexAttrib2fv glad_glVertexAttrib2fv +GLAD_API_CALL PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s; +#define glVertexAttrib2s glad_glVertexAttrib2s +GLAD_API_CALL PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv; +#define glVertexAttrib2sv glad_glVertexAttrib2sv +GLAD_API_CALL PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d; +#define glVertexAttrib3d glad_glVertexAttrib3d +GLAD_API_CALL PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv; +#define glVertexAttrib3dv glad_glVertexAttrib3dv +GLAD_API_CALL PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f; +#define glVertexAttrib3f glad_glVertexAttrib3f +GLAD_API_CALL PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv; +#define glVertexAttrib3fv glad_glVertexAttrib3fv +GLAD_API_CALL PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s; +#define glVertexAttrib3s glad_glVertexAttrib3s +GLAD_API_CALL PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv; +#define glVertexAttrib3sv glad_glVertexAttrib3sv +GLAD_API_CALL PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv; +#define glVertexAttrib4Nbv glad_glVertexAttrib4Nbv +GLAD_API_CALL PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv; +#define glVertexAttrib4Niv glad_glVertexAttrib4Niv +GLAD_API_CALL PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv; +#define glVertexAttrib4Nsv glad_glVertexAttrib4Nsv +GLAD_API_CALL PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub; +#define glVertexAttrib4Nub glad_glVertexAttrib4Nub +GLAD_API_CALL PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv; +#define glVertexAttrib4Nubv glad_glVertexAttrib4Nubv +GLAD_API_CALL PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv; +#define glVertexAttrib4Nuiv glad_glVertexAttrib4Nuiv +GLAD_API_CALL PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv; +#define glVertexAttrib4Nusv glad_glVertexAttrib4Nusv +GLAD_API_CALL PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv; +#define glVertexAttrib4bv glad_glVertexAttrib4bv +GLAD_API_CALL PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d; +#define glVertexAttrib4d glad_glVertexAttrib4d +GLAD_API_CALL PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv; +#define glVertexAttrib4dv glad_glVertexAttrib4dv +GLAD_API_CALL PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f; +#define glVertexAttrib4f glad_glVertexAttrib4f +GLAD_API_CALL PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv; +#define glVertexAttrib4fv glad_glVertexAttrib4fv +GLAD_API_CALL PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv; +#define glVertexAttrib4iv glad_glVertexAttrib4iv +GLAD_API_CALL PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s; +#define glVertexAttrib4s glad_glVertexAttrib4s +GLAD_API_CALL PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv; +#define glVertexAttrib4sv glad_glVertexAttrib4sv +GLAD_API_CALL PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv; +#define glVertexAttrib4ubv glad_glVertexAttrib4ubv +GLAD_API_CALL PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv; +#define glVertexAttrib4uiv glad_glVertexAttrib4uiv +GLAD_API_CALL PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv; +#define glVertexAttrib4usv glad_glVertexAttrib4usv +GLAD_API_CALL PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor; +#define glVertexAttribDivisor glad_glVertexAttribDivisor +GLAD_API_CALL PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i; +#define glVertexAttribI1i glad_glVertexAttribI1i +GLAD_API_CALL PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv; +#define glVertexAttribI1iv glad_glVertexAttribI1iv +GLAD_API_CALL PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui; +#define glVertexAttribI1ui glad_glVertexAttribI1ui +GLAD_API_CALL PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv; +#define glVertexAttribI1uiv glad_glVertexAttribI1uiv +GLAD_API_CALL PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i; +#define glVertexAttribI2i glad_glVertexAttribI2i +GLAD_API_CALL PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv; +#define glVertexAttribI2iv glad_glVertexAttribI2iv +GLAD_API_CALL PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui; +#define glVertexAttribI2ui glad_glVertexAttribI2ui +GLAD_API_CALL PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv; +#define glVertexAttribI2uiv glad_glVertexAttribI2uiv +GLAD_API_CALL PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i; +#define glVertexAttribI3i glad_glVertexAttribI3i +GLAD_API_CALL PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv; +#define glVertexAttribI3iv glad_glVertexAttribI3iv +GLAD_API_CALL PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui; +#define glVertexAttribI3ui glad_glVertexAttribI3ui +GLAD_API_CALL PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv; +#define glVertexAttribI3uiv glad_glVertexAttribI3uiv +GLAD_API_CALL PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv; +#define glVertexAttribI4bv glad_glVertexAttribI4bv +GLAD_API_CALL PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i; +#define glVertexAttribI4i glad_glVertexAttribI4i +GLAD_API_CALL PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv; +#define glVertexAttribI4iv glad_glVertexAttribI4iv +GLAD_API_CALL PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv; +#define glVertexAttribI4sv glad_glVertexAttribI4sv +GLAD_API_CALL PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv; +#define glVertexAttribI4ubv glad_glVertexAttribI4ubv +GLAD_API_CALL PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui; +#define glVertexAttribI4ui glad_glVertexAttribI4ui +GLAD_API_CALL PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv; +#define glVertexAttribI4uiv glad_glVertexAttribI4uiv +GLAD_API_CALL PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv; +#define glVertexAttribI4usv glad_glVertexAttribI4usv +GLAD_API_CALL PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer; +#define glVertexAttribIPointer glad_glVertexAttribIPointer +GLAD_API_CALL PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui; +#define glVertexAttribP1ui glad_glVertexAttribP1ui +GLAD_API_CALL PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv; +#define glVertexAttribP1uiv glad_glVertexAttribP1uiv +GLAD_API_CALL PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui; +#define glVertexAttribP2ui glad_glVertexAttribP2ui +GLAD_API_CALL PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv; +#define glVertexAttribP2uiv glad_glVertexAttribP2uiv +GLAD_API_CALL PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui; +#define glVertexAttribP3ui glad_glVertexAttribP3ui +GLAD_API_CALL PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv; +#define glVertexAttribP3uiv glad_glVertexAttribP3uiv +GLAD_API_CALL PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui; +#define glVertexAttribP4ui glad_glVertexAttribP4ui +GLAD_API_CALL PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv; +#define glVertexAttribP4uiv glad_glVertexAttribP4uiv +GLAD_API_CALL PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer; +#define glVertexAttribPointer glad_glVertexAttribPointer +GLAD_API_CALL PFNGLVERTEXP2UIPROC glad_glVertexP2ui; +#define glVertexP2ui glad_glVertexP2ui +GLAD_API_CALL PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv; +#define glVertexP2uiv glad_glVertexP2uiv +GLAD_API_CALL PFNGLVERTEXP3UIPROC glad_glVertexP3ui; +#define glVertexP3ui glad_glVertexP3ui +GLAD_API_CALL PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv; +#define glVertexP3uiv glad_glVertexP3uiv +GLAD_API_CALL PFNGLVERTEXP4UIPROC glad_glVertexP4ui; +#define glVertexP4ui glad_glVertexP4ui +GLAD_API_CALL PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv; +#define glVertexP4uiv glad_glVertexP4uiv +GLAD_API_CALL PFNGLVERTEXPOINTERPROC glad_glVertexPointer; +#define glVertexPointer glad_glVertexPointer +GLAD_API_CALL PFNGLVIEWPORTPROC glad_glViewport; +#define glViewport glad_glViewport +GLAD_API_CALL PFNGLWAITSYNCPROC glad_glWaitSync; +#define glWaitSync glad_glWaitSync +GLAD_API_CALL PFNGLWINDOWPOS2DPROC glad_glWindowPos2d; +#define glWindowPos2d glad_glWindowPos2d +GLAD_API_CALL PFNGLWINDOWPOS2DVPROC glad_glWindowPos2dv; +#define glWindowPos2dv glad_glWindowPos2dv +GLAD_API_CALL PFNGLWINDOWPOS2FPROC glad_glWindowPos2f; +#define glWindowPos2f glad_glWindowPos2f +GLAD_API_CALL PFNGLWINDOWPOS2FVPROC glad_glWindowPos2fv; +#define glWindowPos2fv glad_glWindowPos2fv +GLAD_API_CALL PFNGLWINDOWPOS2IPROC glad_glWindowPos2i; +#define glWindowPos2i glad_glWindowPos2i +GLAD_API_CALL PFNGLWINDOWPOS2IVPROC glad_glWindowPos2iv; +#define glWindowPos2iv glad_glWindowPos2iv +GLAD_API_CALL PFNGLWINDOWPOS2SPROC glad_glWindowPos2s; +#define glWindowPos2s glad_glWindowPos2s +GLAD_API_CALL PFNGLWINDOWPOS2SVPROC glad_glWindowPos2sv; +#define glWindowPos2sv glad_glWindowPos2sv +GLAD_API_CALL PFNGLWINDOWPOS3DPROC glad_glWindowPos3d; +#define glWindowPos3d glad_glWindowPos3d +GLAD_API_CALL PFNGLWINDOWPOS3DVPROC glad_glWindowPos3dv; +#define glWindowPos3dv glad_glWindowPos3dv +GLAD_API_CALL PFNGLWINDOWPOS3FPROC glad_glWindowPos3f; +#define glWindowPos3f glad_glWindowPos3f +GLAD_API_CALL PFNGLWINDOWPOS3FVPROC glad_glWindowPos3fv; +#define glWindowPos3fv glad_glWindowPos3fv +GLAD_API_CALL PFNGLWINDOWPOS3IPROC glad_glWindowPos3i; +#define glWindowPos3i glad_glWindowPos3i +GLAD_API_CALL PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv; +#define glWindowPos3iv glad_glWindowPos3iv +GLAD_API_CALL PFNGLWINDOWPOS3SPROC glad_glWindowPos3s; +#define glWindowPos3s glad_glWindowPos3s +GLAD_API_CALL PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv; +#define glWindowPos3sv glad_glWindowPos3sv + + + + + +GLAD_API_CALL int gladLoadGLUserPtr( GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadGL( GLADloadfunc load); + + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/glx.h glmark2-2021.02/android/jni/src/glad/include/glad/glx.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/glx.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/include/glad/glx.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,570 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:13 2019 + * + * Generator: C/C++ + * Specification: glx + * Extensions: 2 + * + * APIs: + * - glx=1.4 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='glx=1.4' --extensions='GLX_EXT_swap_control,GLX_MESA_swap_control' c + * + * Online: + * http://glad.sh/#api=glx%3D1.4&extensions=GLX_EXT_swap_control%2CGLX_MESA_swap_control&generator=c&options= + * + */ + +#ifndef GLAD_GLX_H_ +#define GLAD_GLX_H_ + +#ifdef GLX_H + #error GLX header already included (API: glx), remove previous include! +#endif +#define GLX_H 1 + + +#include +#include +#include + +#include + +#define GLAD_GLX + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define GLX_ACCUM_ALPHA_SIZE 17 +#define GLX_ACCUM_BLUE_SIZE 16 +#define GLX_ACCUM_BUFFER_BIT 0x00000080 +#define GLX_ACCUM_GREEN_SIZE 15 +#define GLX_ACCUM_RED_SIZE 14 +#define GLX_ALPHA_SIZE 11 +#define GLX_AUX_BUFFERS 7 +#define GLX_AUX_BUFFERS_BIT 0x00000010 +#define GLX_BACK_LEFT_BUFFER_BIT 0x00000004 +#define GLX_BACK_RIGHT_BUFFER_BIT 0x00000008 +#define GLX_BAD_ATTRIBUTE 2 +#define GLX_BAD_CONTEXT 5 +#define GLX_BAD_ENUM 7 +#define GLX_BAD_SCREEN 1 +#define GLX_BAD_VALUE 6 +#define GLX_BAD_VISUAL 4 +#define GLX_BLUE_SIZE 10 +#define GLX_BUFFER_SIZE 2 +#define GLX_BufferSwapComplete 1 +#define GLX_COLOR_INDEX_BIT 0x00000002 +#define GLX_COLOR_INDEX_TYPE 0x8015 +#define GLX_CONFIG_CAVEAT 0x20 +#define GLX_DAMAGED 0x8020 +#define GLX_DEPTH_BUFFER_BIT 0x00000020 +#define GLX_DEPTH_SIZE 12 +#define GLX_DIRECT_COLOR 0x8003 +#define GLX_DONT_CARE 0xFFFFFFFF +#define GLX_DOUBLEBUFFER 5 +#define GLX_DRAWABLE_TYPE 0x8010 +#define GLX_EVENT_MASK 0x801F +#define GLX_EXTENSIONS 0x3 +#define GLX_EXTENSION_NAME "GLX" +#define GLX_FBCONFIG_ID 0x8013 +#define GLX_FRONT_LEFT_BUFFER_BIT 0x00000001 +#define GLX_FRONT_RIGHT_BUFFER_BIT 0x00000002 +#define GLX_GRAY_SCALE 0x8006 +#define GLX_GREEN_SIZE 9 +#define GLX_HEIGHT 0x801E +#define GLX_LARGEST_PBUFFER 0x801C +#define GLX_LEVEL 3 +#define GLX_MAX_PBUFFER_HEIGHT 0x8017 +#define GLX_MAX_PBUFFER_PIXELS 0x8018 +#define GLX_MAX_PBUFFER_WIDTH 0x8016 +#define GLX_MAX_SWAP_INTERVAL_EXT 0x20F2 +#define GLX_NONE 0x8000 +#define GLX_NON_CONFORMANT_CONFIG 0x800D +#define GLX_NO_EXTENSION 3 +#define GLX_PBUFFER 0x8023 +#define GLX_PBUFFER_BIT 0x00000004 +#define GLX_PBUFFER_CLOBBER_MASK 0x08000000 +#define GLX_PBUFFER_HEIGHT 0x8040 +#define GLX_PBUFFER_WIDTH 0x8041 +#define GLX_PIXMAP_BIT 0x00000002 +#define GLX_PRESERVED_CONTENTS 0x801B +#define GLX_PSEUDO_COLOR 0x8004 +#define GLX_PbufferClobber 0 +#define GLX_RED_SIZE 8 +#define GLX_RENDER_TYPE 0x8011 +#define GLX_RGBA 4 +#define GLX_RGBA_BIT 0x00000001 +#define GLX_RGBA_TYPE 0x8014 +#define GLX_SAMPLES 100001 +#define GLX_SAMPLE_BUFFERS 100000 +#define GLX_SAVED 0x8021 +#define GLX_SCREEN 0x800C +#define GLX_SLOW_CONFIG 0x8001 +#define GLX_STATIC_COLOR 0x8005 +#define GLX_STATIC_GRAY 0x8007 +#define GLX_STENCIL_BUFFER_BIT 0x00000040 +#define GLX_STENCIL_SIZE 13 +#define GLX_STEREO 6 +#define GLX_SWAP_INTERVAL_EXT 0x20F1 +#define GLX_TRANSPARENT_ALPHA_VALUE 0x28 +#define GLX_TRANSPARENT_BLUE_VALUE 0x27 +#define GLX_TRANSPARENT_GREEN_VALUE 0x26 +#define GLX_TRANSPARENT_INDEX 0x8009 +#define GLX_TRANSPARENT_INDEX_VALUE 0x24 +#define GLX_TRANSPARENT_RED_VALUE 0x25 +#define GLX_TRANSPARENT_RGB 0x8008 +#define GLX_TRANSPARENT_TYPE 0x23 +#define GLX_TRUE_COLOR 0x8002 +#define GLX_USE_GL 1 +#define GLX_VENDOR 0x1 +#define GLX_VERSION 0x2 +#define GLX_VISUAL_ID 0x800B +#define GLX_WIDTH 0x801D +#define GLX_WINDOW 0x8022 +#define GLX_WINDOW_BIT 0x00000001 +#define GLX_X_RENDERABLE 0x8012 +#define GLX_X_VISUAL_TYPE 0x22 +#define __GLX_NUMBER_EVENTS 17 + + +#ifndef GLEXT_64_TYPES_DEFINED +/* This code block is duplicated in glext.h, so must be protected */ +#define GLEXT_64_TYPES_DEFINED +/* Define int32_t, int64_t, and uint64_t types for UST/MSC */ +/* (as used in the GLX_OML_sync_control extension). */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#include +#elif defined(__sun__) || defined(__digital__) +#include +#if defined(__STDC__) +#if defined(__arch64__) || defined(_LP64) +typedef long int int64_t; +typedef unsigned long int uint64_t; +#else +typedef long long int int64_t; +typedef unsigned long long int uint64_t; +#endif /* __arch64__ */ +#endif /* __STDC__ */ +#elif defined( __VMS ) || defined(__sgi) +#include +#elif defined(__SCO__) || defined(__USLC__) +#include +#elif defined(__UNIXOS2__) || defined(__SOL64__) +typedef long int int32_t; +typedef long long int int64_t; +typedef unsigned long long int uint64_t; +#elif defined(_WIN32) && defined(__GNUC__) +#include +#elif defined(_WIN32) +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +/* Fallback if nothing above works */ +#include +#endif +#endif + + + + + + + + + + + + + + + + +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) + +#else + +#endif + +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) + +#else + +#endif + + + + + + + +typedef XID GLXFBConfigID; +typedef struct __GLXFBConfigRec *GLXFBConfig; +typedef XID GLXContextID; +typedef struct __GLXcontextRec *GLXContext; +typedef XID GLXPixmap; +typedef XID GLXDrawable; +typedef XID GLXWindow; +typedef XID GLXPbuffer; +typedef void (GLAD_API_PTR *__GLXextFuncPtr)(void); +typedef XID GLXVideoCaptureDeviceNV; +typedef unsigned int GLXVideoDeviceNV; +typedef XID GLXVideoSourceSGIX; +typedef XID GLXFBConfigIDSGIX; +typedef struct __GLXFBConfigRec *GLXFBConfigSGIX; +typedef XID GLXPbufferSGIX; +typedef struct { + int event_type; /* GLX_DAMAGED or GLX_SAVED */ + int draw_type; /* GLX_WINDOW or GLX_PBUFFER */ + unsigned long serial; /* # of last request processed by server */ + Bool send_event; /* true if this came for SendEvent request */ + Display *display; /* display the event was read from */ + GLXDrawable drawable; /* XID of Drawable */ + unsigned int buffer_mask; /* mask indicating which buffers are affected */ + unsigned int aux_buffer; /* which aux buffer was affected */ + int x, y; + int width, height; + int count; /* if nonzero, at least this many more */ +} GLXPbufferClobberEvent; +typedef struct { + int type; + unsigned long serial; /* # of last request processed by server */ + Bool send_event; /* true if this came from a SendEvent request */ + Display *display; /* Display the event was read from */ + GLXDrawable drawable; /* drawable on which event was requested in event mask */ + int event_type; + int64_t ust; + int64_t msc; + int64_t sbc; +} GLXBufferSwapComplete; +typedef union __GLXEvent { + GLXPbufferClobberEvent glxpbufferclobber; + GLXBufferSwapComplete glxbufferswapcomplete; + long pad[24]; +} GLXEvent; +typedef struct { + int type; + unsigned long serial; + Bool send_event; + Display *display; + int extension; + int evtype; + GLXDrawable window; + Bool stereo_tree; +} GLXStereoNotifyEventEXT; +typedef struct { + int type; + unsigned long serial; /* # of last request processed by server */ + Bool send_event; /* true if this came for SendEvent request */ + Display *display; /* display the event was read from */ + GLXDrawable drawable; /* i.d. of Drawable */ + int event_type; /* GLX_DAMAGED_SGIX or GLX_SAVED_SGIX */ + int draw_type; /* GLX_WINDOW_SGIX or GLX_PBUFFER_SGIX */ + unsigned int mask; /* mask indicating which buffers are affected*/ + int x, y; + int width, height; + int count; /* if nonzero, at least this many more */ +} GLXBufferClobberEventSGIX; +typedef struct { + char pipeName[80]; /* Should be [GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX] */ + int networkId; +} GLXHyperpipeNetworkSGIX; +typedef struct { + char pipeName[80]; /* Should be [GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX] */ + int channel; + unsigned int participationType; + int timeSlice; +} GLXHyperpipeConfigSGIX; +typedef struct { + char pipeName[80]; /* Should be [GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX] */ + int srcXOrigin, srcYOrigin, srcWidth, srcHeight; + int destXOrigin, destYOrigin, destWidth, destHeight; +} GLXPipeRect; +typedef struct { + char pipeName[80]; /* Should be [GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX] */ + int XOrigin, YOrigin, maxHeight, maxWidth; +} GLXPipeRectLimits; + + +#define GLX_VERSION_1_0 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_0; +#define GLX_VERSION_1_1 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_1; +#define GLX_VERSION_1_2 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_2; +#define GLX_VERSION_1_3 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_3; +#define GLX_VERSION_1_4 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_4; +#define GLX_EXT_swap_control 1 +GLAD_API_CALL int GLAD_GLX_EXT_swap_control; +#define GLX_MESA_swap_control 1 +GLAD_API_CALL int GLAD_GLX_MESA_swap_control; + + +typedef GLXFBConfig * (GLAD_API_PTR *PFNGLXCHOOSEFBCONFIGPROC)(Display * dpy, int screen, const int * attrib_list, int * nelements); +typedef XVisualInfo * (GLAD_API_PTR *PFNGLXCHOOSEVISUALPROC)(Display * dpy, int screen, int * attribList); +typedef void (GLAD_API_PTR *PFNGLXCOPYCONTEXTPROC)(Display * dpy, GLXContext src, GLXContext dst, unsigned long mask); +typedef GLXContext (GLAD_API_PTR *PFNGLXCREATECONTEXTPROC)(Display * dpy, XVisualInfo * vis, GLXContext shareList, Bool direct); +typedef GLXPixmap (GLAD_API_PTR *PFNGLXCREATEGLXPIXMAPPROC)(Display * dpy, XVisualInfo * visual, Pixmap pixmap); +typedef GLXContext (GLAD_API_PTR *PFNGLXCREATENEWCONTEXTPROC)(Display * dpy, GLXFBConfig config, int render_type, GLXContext share_list, Bool direct); +typedef GLXPbuffer (GLAD_API_PTR *PFNGLXCREATEPBUFFERPROC)(Display * dpy, GLXFBConfig config, const int * attrib_list); +typedef GLXPixmap (GLAD_API_PTR *PFNGLXCREATEPIXMAPPROC)(Display * dpy, GLXFBConfig config, Pixmap pixmap, const int * attrib_list); +typedef GLXWindow (GLAD_API_PTR *PFNGLXCREATEWINDOWPROC)(Display * dpy, GLXFBConfig config, Window win, const int * attrib_list); +typedef void (GLAD_API_PTR *PFNGLXDESTROYCONTEXTPROC)(Display * dpy, GLXContext ctx); +typedef void (GLAD_API_PTR *PFNGLXDESTROYGLXPIXMAPPROC)(Display * dpy, GLXPixmap pixmap); +typedef void (GLAD_API_PTR *PFNGLXDESTROYPBUFFERPROC)(Display * dpy, GLXPbuffer pbuf); +typedef void (GLAD_API_PTR *PFNGLXDESTROYPIXMAPPROC)(Display * dpy, GLXPixmap pixmap); +typedef void (GLAD_API_PTR *PFNGLXDESTROYWINDOWPROC)(Display * dpy, GLXWindow win); +typedef const char * (GLAD_API_PTR *PFNGLXGETCLIENTSTRINGPROC)(Display * dpy, int name); +typedef int (GLAD_API_PTR *PFNGLXGETCONFIGPROC)(Display * dpy, XVisualInfo * visual, int attrib, int * value); +typedef GLXContext (GLAD_API_PTR *PFNGLXGETCURRENTCONTEXTPROC)(void); +typedef Display * (GLAD_API_PTR *PFNGLXGETCURRENTDISPLAYPROC)(void); +typedef GLXDrawable (GLAD_API_PTR *PFNGLXGETCURRENTDRAWABLEPROC)(void); +typedef GLXDrawable (GLAD_API_PTR *PFNGLXGETCURRENTREADDRAWABLEPROC)(void); +typedef int (GLAD_API_PTR *PFNGLXGETFBCONFIGATTRIBPROC)(Display * dpy, GLXFBConfig config, int attribute, int * value); +typedef GLXFBConfig * (GLAD_API_PTR *PFNGLXGETFBCONFIGSPROC)(Display * dpy, int screen, int * nelements); +typedef __GLXextFuncPtr (GLAD_API_PTR *PFNGLXGETPROCADDRESSPROC)(const GLubyte * procName); +typedef void (GLAD_API_PTR *PFNGLXGETSELECTEDEVENTPROC)(Display * dpy, GLXDrawable draw, unsigned long * event_mask); +typedef int (GLAD_API_PTR *PFNGLXGETSWAPINTERVALMESAPROC)(void); +typedef XVisualInfo * (GLAD_API_PTR *PFNGLXGETVISUALFROMFBCONFIGPROC)(Display * dpy, GLXFBConfig config); +typedef Bool (GLAD_API_PTR *PFNGLXISDIRECTPROC)(Display * dpy, GLXContext ctx); +typedef Bool (GLAD_API_PTR *PFNGLXMAKECONTEXTCURRENTPROC)(Display * dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx); +typedef Bool (GLAD_API_PTR *PFNGLXMAKECURRENTPROC)(Display * dpy, GLXDrawable drawable, GLXContext ctx); +typedef int (GLAD_API_PTR *PFNGLXQUERYCONTEXTPROC)(Display * dpy, GLXContext ctx, int attribute, int * value); +typedef void (GLAD_API_PTR *PFNGLXQUERYDRAWABLEPROC)(Display * dpy, GLXDrawable draw, int attribute, unsigned int * value); +typedef Bool (GLAD_API_PTR *PFNGLXQUERYEXTENSIONPROC)(Display * dpy, int * errorb, int * event); +typedef const char * (GLAD_API_PTR *PFNGLXQUERYEXTENSIONSSTRINGPROC)(Display * dpy, int screen); +typedef const char * (GLAD_API_PTR *PFNGLXQUERYSERVERSTRINGPROC)(Display * dpy, int screen, int name); +typedef Bool (GLAD_API_PTR *PFNGLXQUERYVERSIONPROC)(Display * dpy, int * maj, int * min); +typedef void (GLAD_API_PTR *PFNGLXSELECTEVENTPROC)(Display * dpy, GLXDrawable draw, unsigned long event_mask); +typedef void (GLAD_API_PTR *PFNGLXSWAPBUFFERSPROC)(Display * dpy, GLXDrawable drawable); +typedef void (GLAD_API_PTR *PFNGLXSWAPINTERVALEXTPROC)(Display * dpy, GLXDrawable drawable, int interval); +typedef int (GLAD_API_PTR *PFNGLXSWAPINTERVALMESAPROC)(unsigned int interval); +typedef void (GLAD_API_PTR *PFNGLXUSEXFONTPROC)(Font font, int first, int count, int list); +typedef void (GLAD_API_PTR *PFNGLXWAITGLPROC)(void); +typedef void (GLAD_API_PTR *PFNGLXWAITXPROC)(void); + +GLAD_API_CALL PFNGLXCHOOSEFBCONFIGPROC glad_glXChooseFBConfig; +#define glXChooseFBConfig glad_glXChooseFBConfig +GLAD_API_CALL PFNGLXCHOOSEVISUALPROC glad_glXChooseVisual; +#define glXChooseVisual glad_glXChooseVisual +GLAD_API_CALL PFNGLXCOPYCONTEXTPROC glad_glXCopyContext; +#define glXCopyContext glad_glXCopyContext +GLAD_API_CALL PFNGLXCREATECONTEXTPROC glad_glXCreateContext; +#define glXCreateContext glad_glXCreateContext +GLAD_API_CALL PFNGLXCREATEGLXPIXMAPPROC glad_glXCreateGLXPixmap; +#define glXCreateGLXPixmap glad_glXCreateGLXPixmap +GLAD_API_CALL PFNGLXCREATENEWCONTEXTPROC glad_glXCreateNewContext; +#define glXCreateNewContext glad_glXCreateNewContext +GLAD_API_CALL PFNGLXCREATEPBUFFERPROC glad_glXCreatePbuffer; +#define glXCreatePbuffer glad_glXCreatePbuffer +GLAD_API_CALL PFNGLXCREATEPIXMAPPROC glad_glXCreatePixmap; +#define glXCreatePixmap glad_glXCreatePixmap +GLAD_API_CALL PFNGLXCREATEWINDOWPROC glad_glXCreateWindow; +#define glXCreateWindow glad_glXCreateWindow +GLAD_API_CALL PFNGLXDESTROYCONTEXTPROC glad_glXDestroyContext; +#define glXDestroyContext glad_glXDestroyContext +GLAD_API_CALL PFNGLXDESTROYGLXPIXMAPPROC glad_glXDestroyGLXPixmap; +#define glXDestroyGLXPixmap glad_glXDestroyGLXPixmap +GLAD_API_CALL PFNGLXDESTROYPBUFFERPROC glad_glXDestroyPbuffer; +#define glXDestroyPbuffer glad_glXDestroyPbuffer +GLAD_API_CALL PFNGLXDESTROYPIXMAPPROC glad_glXDestroyPixmap; +#define glXDestroyPixmap glad_glXDestroyPixmap +GLAD_API_CALL PFNGLXDESTROYWINDOWPROC glad_glXDestroyWindow; +#define glXDestroyWindow glad_glXDestroyWindow +GLAD_API_CALL PFNGLXGETCLIENTSTRINGPROC glad_glXGetClientString; +#define glXGetClientString glad_glXGetClientString +GLAD_API_CALL PFNGLXGETCONFIGPROC glad_glXGetConfig; +#define glXGetConfig glad_glXGetConfig +GLAD_API_CALL PFNGLXGETCURRENTCONTEXTPROC glad_glXGetCurrentContext; +#define glXGetCurrentContext glad_glXGetCurrentContext +GLAD_API_CALL PFNGLXGETCURRENTDISPLAYPROC glad_glXGetCurrentDisplay; +#define glXGetCurrentDisplay glad_glXGetCurrentDisplay +GLAD_API_CALL PFNGLXGETCURRENTDRAWABLEPROC glad_glXGetCurrentDrawable; +#define glXGetCurrentDrawable glad_glXGetCurrentDrawable +GLAD_API_CALL PFNGLXGETCURRENTREADDRAWABLEPROC glad_glXGetCurrentReadDrawable; +#define glXGetCurrentReadDrawable glad_glXGetCurrentReadDrawable +GLAD_API_CALL PFNGLXGETFBCONFIGATTRIBPROC glad_glXGetFBConfigAttrib; +#define glXGetFBConfigAttrib glad_glXGetFBConfigAttrib +GLAD_API_CALL PFNGLXGETFBCONFIGSPROC glad_glXGetFBConfigs; +#define glXGetFBConfigs glad_glXGetFBConfigs +GLAD_API_CALL PFNGLXGETPROCADDRESSPROC glad_glXGetProcAddress; +#define glXGetProcAddress glad_glXGetProcAddress +GLAD_API_CALL PFNGLXGETSELECTEDEVENTPROC glad_glXGetSelectedEvent; +#define glXGetSelectedEvent glad_glXGetSelectedEvent +GLAD_API_CALL PFNGLXGETSWAPINTERVALMESAPROC glad_glXGetSwapIntervalMESA; +#define glXGetSwapIntervalMESA glad_glXGetSwapIntervalMESA +GLAD_API_CALL PFNGLXGETVISUALFROMFBCONFIGPROC glad_glXGetVisualFromFBConfig; +#define glXGetVisualFromFBConfig glad_glXGetVisualFromFBConfig +GLAD_API_CALL PFNGLXISDIRECTPROC glad_glXIsDirect; +#define glXIsDirect glad_glXIsDirect +GLAD_API_CALL PFNGLXMAKECONTEXTCURRENTPROC glad_glXMakeContextCurrent; +#define glXMakeContextCurrent glad_glXMakeContextCurrent +GLAD_API_CALL PFNGLXMAKECURRENTPROC glad_glXMakeCurrent; +#define glXMakeCurrent glad_glXMakeCurrent +GLAD_API_CALL PFNGLXQUERYCONTEXTPROC glad_glXQueryContext; +#define glXQueryContext glad_glXQueryContext +GLAD_API_CALL PFNGLXQUERYDRAWABLEPROC glad_glXQueryDrawable; +#define glXQueryDrawable glad_glXQueryDrawable +GLAD_API_CALL PFNGLXQUERYEXTENSIONPROC glad_glXQueryExtension; +#define glXQueryExtension glad_glXQueryExtension +GLAD_API_CALL PFNGLXQUERYEXTENSIONSSTRINGPROC glad_glXQueryExtensionsString; +#define glXQueryExtensionsString glad_glXQueryExtensionsString +GLAD_API_CALL PFNGLXQUERYSERVERSTRINGPROC glad_glXQueryServerString; +#define glXQueryServerString glad_glXQueryServerString +GLAD_API_CALL PFNGLXQUERYVERSIONPROC glad_glXQueryVersion; +#define glXQueryVersion glad_glXQueryVersion +GLAD_API_CALL PFNGLXSELECTEVENTPROC glad_glXSelectEvent; +#define glXSelectEvent glad_glXSelectEvent +GLAD_API_CALL PFNGLXSWAPBUFFERSPROC glad_glXSwapBuffers; +#define glXSwapBuffers glad_glXSwapBuffers +GLAD_API_CALL PFNGLXSWAPINTERVALEXTPROC glad_glXSwapIntervalEXT; +#define glXSwapIntervalEXT glad_glXSwapIntervalEXT +GLAD_API_CALL PFNGLXSWAPINTERVALMESAPROC glad_glXSwapIntervalMESA; +#define glXSwapIntervalMESA glad_glXSwapIntervalMESA +GLAD_API_CALL PFNGLXUSEXFONTPROC glad_glXUseXFont; +#define glXUseXFont glad_glXUseXFont +GLAD_API_CALL PFNGLXWAITGLPROC glad_glXWaitGL; +#define glXWaitGL glad_glXWaitGL +GLAD_API_CALL PFNGLXWAITXPROC glad_glXWaitX; +#define glXWaitX glad_glXWaitX + + + + + +GLAD_API_CALL int gladLoadGLXUserPtr(Display *display, int screen, GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadGLX(Display *display, int screen, GLADloadfunc load); + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/wgl.h glmark2-2021.02/android/jni/src/glad/include/glad/wgl.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/glad/wgl.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/include/glad/wgl.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,298 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:13 2019 + * + * Generator: C/C++ + * Specification: wgl + * Extensions: 3 + * + * APIs: + * - wgl=1.0 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='wgl=1.0' --extensions='WGL_ARB_extensions_string,WGL_EXT_extensions_string,WGL_EXT_swap_control' c + * + * Online: + * http://glad.sh/#api=wgl%3D1.0&extensions=WGL_ARB_extensions_string%2CWGL_EXT_extensions_string%2CWGL_EXT_swap_control&generator=c&options= + * + */ + +#ifndef GLAD_WGL_H_ +#define GLAD_WGL_H_ + +#include +#include + +#define GLAD_WGL + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define WGL_FONT_LINES 0 +#define WGL_FONT_POLYGONS 1 +#define WGL_SWAP_MAIN_PLANE 0x00000001 +#define WGL_SWAP_OVERLAY1 0x00000002 +#define WGL_SWAP_OVERLAY10 0x00000400 +#define WGL_SWAP_OVERLAY11 0x00000800 +#define WGL_SWAP_OVERLAY12 0x00001000 +#define WGL_SWAP_OVERLAY13 0x00002000 +#define WGL_SWAP_OVERLAY14 0x00004000 +#define WGL_SWAP_OVERLAY15 0x00008000 +#define WGL_SWAP_OVERLAY2 0x00000004 +#define WGL_SWAP_OVERLAY3 0x00000008 +#define WGL_SWAP_OVERLAY4 0x00000010 +#define WGL_SWAP_OVERLAY5 0x00000020 +#define WGL_SWAP_OVERLAY6 0x00000040 +#define WGL_SWAP_OVERLAY7 0x00000080 +#define WGL_SWAP_OVERLAY8 0x00000100 +#define WGL_SWAP_OVERLAY9 0x00000200 +#define WGL_SWAP_UNDERLAY1 0x00010000 +#define WGL_SWAP_UNDERLAY10 0x02000000 +#define WGL_SWAP_UNDERLAY11 0x04000000 +#define WGL_SWAP_UNDERLAY12 0x08000000 +#define WGL_SWAP_UNDERLAY13 0x10000000 +#define WGL_SWAP_UNDERLAY14 0x20000000 +#define WGL_SWAP_UNDERLAY15 0x40000000 +#define WGL_SWAP_UNDERLAY2 0x00020000 +#define WGL_SWAP_UNDERLAY3 0x00040000 +#define WGL_SWAP_UNDERLAY4 0x00080000 +#define WGL_SWAP_UNDERLAY5 0x00100000 +#define WGL_SWAP_UNDERLAY6 0x00200000 +#define WGL_SWAP_UNDERLAY7 0x00400000 +#define WGL_SWAP_UNDERLAY8 0x00800000 +#define WGL_SWAP_UNDERLAY9 0x01000000 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +struct _GPU_DEVICE { + DWORD cb; + CHAR DeviceName[32]; + CHAR DeviceString[128]; + DWORD Flags; + RECT rcVirtualScreen; +}; +DECLARE_HANDLE(HPBUFFERARB); +DECLARE_HANDLE(HPBUFFEREXT); +DECLARE_HANDLE(HVIDEOOUTPUTDEVICENV); +DECLARE_HANDLE(HPVIDEODEV); +DECLARE_HANDLE(HPGPUNV); +DECLARE_HANDLE(HGPUNV); +DECLARE_HANDLE(HVIDEOINPUTDEVICENV); +typedef struct _GPU_DEVICE GPU_DEVICE; +typedef struct _GPU_DEVICE *PGPU_DEVICE; + + +#define WGL_VERSION_1_0 1 +GLAD_API_CALL int GLAD_WGL_VERSION_1_0; +#define WGL_ARB_extensions_string 1 +GLAD_API_CALL int GLAD_WGL_ARB_extensions_string; +#define WGL_EXT_extensions_string 1 +GLAD_API_CALL int GLAD_WGL_EXT_extensions_string; +#define WGL_EXT_swap_control 1 +GLAD_API_CALL int GLAD_WGL_EXT_swap_control; + + +typedef int (GLAD_API_PTR *PFNCHOOSEPIXELFORMATPROC)(HDC hDc, const PIXELFORMATDESCRIPTOR * pPfd); +typedef int (GLAD_API_PTR *PFNDESCRIBEPIXELFORMATPROC)(HDC hdc, int ipfd, UINT cjpfd, const PIXELFORMATDESCRIPTOR * ppfd); +typedef UINT (GLAD_API_PTR *PFNGETENHMETAFILEPIXELFORMATPROC)(HENHMETAFILE hemf, const PIXELFORMATDESCRIPTOR * ppfd); +typedef int (GLAD_API_PTR *PFNGETPIXELFORMATPROC)(HDC hdc); +typedef BOOL (GLAD_API_PTR *PFNSETPIXELFORMATPROC)(HDC hdc, int ipfd, const PIXELFORMATDESCRIPTOR * ppfd); +typedef BOOL (GLAD_API_PTR *PFNSWAPBUFFERSPROC)(HDC hdc); +typedef BOOL (GLAD_API_PTR *PFNWGLCOPYCONTEXTPROC)(HGLRC hglrcSrc, HGLRC hglrcDst, UINT mask); +typedef HGLRC (GLAD_API_PTR *PFNWGLCREATECONTEXTPROC)(HDC hDc); +typedef HGLRC (GLAD_API_PTR *PFNWGLCREATELAYERCONTEXTPROC)(HDC hDc, int level); +typedef BOOL (GLAD_API_PTR *PFNWGLDELETECONTEXTPROC)(HGLRC oldContext); +typedef BOOL (GLAD_API_PTR *PFNWGLDESCRIBELAYERPLANEPROC)(HDC hDc, int pixelFormat, int layerPlane, UINT nBytes, const LAYERPLANEDESCRIPTOR * plpd); +typedef HGLRC (GLAD_API_PTR *PFNWGLGETCURRENTCONTEXTPROC)(void); +typedef HDC (GLAD_API_PTR *PFNWGLGETCURRENTDCPROC)(void); +typedef const char * (GLAD_API_PTR *PFNWGLGETEXTENSIONSSTRINGARBPROC)(HDC hdc); +typedef const char * (GLAD_API_PTR *PFNWGLGETEXTENSIONSSTRINGEXTPROC)(void); +typedef int (GLAD_API_PTR *PFNWGLGETLAYERPALETTEENTRIESPROC)(HDC hdc, int iLayerPlane, int iStart, int cEntries, const COLORREF * pcr); +typedef PROC (GLAD_API_PTR *PFNWGLGETPROCADDRESSPROC)(LPCSTR lpszProc); +typedef int (GLAD_API_PTR *PFNWGLGETSWAPINTERVALEXTPROC)(void); +typedef BOOL (GLAD_API_PTR *PFNWGLMAKECURRENTPROC)(HDC hDc, HGLRC newContext); +typedef BOOL (GLAD_API_PTR *PFNWGLREALIZELAYERPALETTEPROC)(HDC hdc, int iLayerPlane, BOOL bRealize); +typedef int (GLAD_API_PTR *PFNWGLSETLAYERPALETTEENTRIESPROC)(HDC hdc, int iLayerPlane, int iStart, int cEntries, const COLORREF * pcr); +typedef BOOL (GLAD_API_PTR *PFNWGLSHARELISTSPROC)(HGLRC hrcSrvShare, HGLRC hrcSrvSource); +typedef BOOL (GLAD_API_PTR *PFNWGLSWAPINTERVALEXTPROC)(int interval); +typedef BOOL (GLAD_API_PTR *PFNWGLSWAPLAYERBUFFERSPROC)(HDC hdc, UINT fuFlags); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTBITMAPSPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTBITMAPSAPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTBITMAPSWPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTOUTLINESPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase, FLOAT deviation, FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTOUTLINESAPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase, FLOAT deviation, FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTOUTLINESWPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase, FLOAT deviation, FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf); + +GLAD_API_CALL PFNWGLGETEXTENSIONSSTRINGARBPROC glad_wglGetExtensionsStringARB; +#define wglGetExtensionsStringARB glad_wglGetExtensionsStringARB +GLAD_API_CALL PFNWGLGETEXTENSIONSSTRINGEXTPROC glad_wglGetExtensionsStringEXT; +#define wglGetExtensionsStringEXT glad_wglGetExtensionsStringEXT +GLAD_API_CALL PFNWGLGETSWAPINTERVALEXTPROC glad_wglGetSwapIntervalEXT; +#define wglGetSwapIntervalEXT glad_wglGetSwapIntervalEXT +GLAD_API_CALL PFNWGLSWAPINTERVALEXTPROC glad_wglSwapIntervalEXT; +#define wglSwapIntervalEXT glad_wglSwapIntervalEXT + + + + + +GLAD_API_CALL int gladLoadWGLUserPtr(HDC hdc, GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadWGL(HDC hdc, GLADloadfunc load); + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/KHR/khrplatform.h glmark2-2021.02/android/jni/src/glad/include/KHR/khrplatform.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/include/KHR/khrplatform.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/include/KHR/khrplatform.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,290 @@ +#ifndef __khrplatform_h_ +#define __khrplatform_h_ + +/* +** Copyright (c) 2008-2018 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Khronos platform-specific types and definitions. + * + * The master copy of khrplatform.h is maintained in the Khronos EGL + * Registry repository at https://github.com/KhronosGroup/EGL-Registry + * The last semantic modification to khrplatform.h was at commit ID: + * 67a3e0864c2d75ea5287b9f3d2eb74a745936692 + * + * Adopters may modify this file to suit their platform. Adopters are + * encouraged to submit platform specific modifications to the Khronos + * group so that they can be included in future versions of this file. + * Please submit changes by filing pull requests or issues on + * the EGL Registry repository linked above. + * + * + * See the Implementer's Guidelines for information about where this file + * should be located on your system and for more details of its use: + * http://www.khronos.org/registry/implementers_guide.pdf + * + * This file should be included as + * #include + * by Khronos client API header files that use its types and defines. + * + * The types in khrplatform.h should only be used to define API-specific types. + * + * Types defined in khrplatform.h: + * khronos_int8_t signed 8 bit + * khronos_uint8_t unsigned 8 bit + * khronos_int16_t signed 16 bit + * khronos_uint16_t unsigned 16 bit + * khronos_int32_t signed 32 bit + * khronos_uint32_t unsigned 32 bit + * khronos_int64_t signed 64 bit + * khronos_uint64_t unsigned 64 bit + * khronos_intptr_t signed same number of bits as a pointer + * khronos_uintptr_t unsigned same number of bits as a pointer + * khronos_ssize_t signed size + * khronos_usize_t unsigned size + * khronos_float_t signed 32 bit floating point + * khronos_time_ns_t unsigned 64 bit time in nanoseconds + * khronos_utime_nanoseconds_t unsigned time interval or absolute time in + * nanoseconds + * khronos_stime_nanoseconds_t signed time interval in nanoseconds + * khronos_boolean_enum_t enumerated boolean type. This should + * only be used as a base type when a client API's boolean type is + * an enum. Client APIs which use an integer or other type for + * booleans cannot use this as the base type for their boolean. + * + * Tokens defined in khrplatform.h: + * + * KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values. + * + * KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0. + * KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0. + * + * Calling convention macros defined in this file: + * KHRONOS_APICALL + * KHRONOS_APIENTRY + * KHRONOS_APIATTRIBUTES + * + * These may be used in function prototypes as: + * + * KHRONOS_APICALL void KHRONOS_APIENTRY funcname( + * int arg1, + * int arg2) KHRONOS_APIATTRIBUTES; + */ + +#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC) +# define KHRONOS_STATIC 1 +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APICALL + *------------------------------------------------------------------------- + * This precedes the return type of the function in the function prototype. + */ +#if defined(KHRONOS_STATIC) + /* If the preprocessor constant KHRONOS_STATIC is defined, make the + * header compatible with static linking. */ +# define KHRONOS_APICALL +#elif defined(_WIN32) +# define KHRONOS_APICALL __declspec(dllimport) +#elif defined (__SYMBIAN32__) +# define KHRONOS_APICALL IMPORT_C +#elif defined(__ANDROID__) +# define KHRONOS_APICALL __attribute__((visibility("default"))) +#else +# define KHRONOS_APICALL +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIENTRY + *------------------------------------------------------------------------- + * This follows the return type of the function and precedes the function + * name in the function prototype. + */ +#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(KHRONOS_STATIC) + /* Win32 but not WinCE */ +# define KHRONOS_APIENTRY __stdcall +#else +# define KHRONOS_APIENTRY +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIATTRIBUTES + *------------------------------------------------------------------------- + * This follows the closing parenthesis of the function prototype arguments. + */ +#if defined (__ARMCC_2__) +#define KHRONOS_APIATTRIBUTES __softfp +#else +#define KHRONOS_APIATTRIBUTES +#endif + +/*------------------------------------------------------------------------- + * basic type definitions + *-----------------------------------------------------------------------*/ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__) + + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__VMS ) || defined(__sgi) + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(_WIN32) && !defined(__SCITECH_SNAP__) + +/* + * Win32 + */ +typedef __int32 khronos_int32_t; +typedef unsigned __int32 khronos_uint32_t; +typedef __int64 khronos_int64_t; +typedef unsigned __int64 khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__sun__) || defined(__digital__) + +/* + * Sun or Digital + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#if defined(__arch64__) || defined(_LP64) +typedef long int khronos_int64_t; +typedef unsigned long int khronos_uint64_t; +#else +typedef long long int khronos_int64_t; +typedef unsigned long long int khronos_uint64_t; +#endif /* __arch64__ */ +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif 0 + +/* + * Hypothetical platform with no float or int64 support + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#define KHRONOS_SUPPORT_INT64 0 +#define KHRONOS_SUPPORT_FLOAT 0 + +#else + +/* + * Generic fallback + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#endif + + +/* + * Types that are (so far) the same on all platforms + */ +typedef signed char khronos_int8_t; +typedef unsigned char khronos_uint8_t; +typedef signed short int khronos_int16_t; +typedef unsigned short int khronos_uint16_t; + +/* + * Types that differ between LLP64 and LP64 architectures - in LLP64, + * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears + * to be the only LLP64 architecture in current use. + */ +#ifdef _WIN64 +typedef signed long long int khronos_intptr_t; +typedef unsigned long long int khronos_uintptr_t; +typedef signed long long int khronos_ssize_t; +typedef unsigned long long int khronos_usize_t; +#else +typedef signed long int khronos_intptr_t; +typedef unsigned long int khronos_uintptr_t; +typedef signed long int khronos_ssize_t; +typedef unsigned long int khronos_usize_t; +#endif + +#if KHRONOS_SUPPORT_FLOAT +/* + * Float type + */ +typedef float khronos_float_t; +#endif + +#if KHRONOS_SUPPORT_INT64 +/* Time types + * + * These types can be used to represent a time interval in nanoseconds or + * an absolute Unadjusted System Time. Unadjusted System Time is the number + * of nanoseconds since some arbitrary system event (e.g. since the last + * time the system booted). The Unadjusted System Time is an unsigned + * 64 bit value that wraps back to 0 every 584 years. Time intervals + * may be either signed or unsigned. + */ +typedef khronos_uint64_t khronos_utime_nanoseconds_t; +typedef khronos_int64_t khronos_stime_nanoseconds_t; +#endif + +/* + * Dummy value used to pad enum types to 32 bits. + */ +#ifndef KHRONOS_MAX_ENUM +#define KHRONOS_MAX_ENUM 0x7FFFFFFF +#endif + +/* + * Enumerated boolean type + * + * Values other than zero should be considered to be true. Therefore + * comparisons should not be made against KHRONOS_TRUE. + */ +typedef enum { + KHRONOS_FALSE = 0, + KHRONOS_TRUE = 1, + KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM +} khronos_boolean_enum_t; + +#endif /* __khrplatform_h_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/README.md glmark2-2021.02/android/jni/src/glad/README.md --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/README.md 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/README.md 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,7 @@ +This directory contains a copy of the +[glad2](https://github.com/Dav1dde/glad/tree/glad2) library. + +The contents of this directory can be recreated (at the latest available +upstream version) with [this][permalink] permalink. + +[permalink]: http://gen.glad.sh/#profile=gl%3Dcompatibility%2Cgles1%3Dcommon&api=egl%3D1.5%2Cgl%3D3.3%2Cgles2%3D2.0%2Cglx%3D1.4%2Cwgl%3D1.0&extensions=EGL_EXT_platform_base%2CEGL_KHR_platform_gbm%2CEGL_KHR_platform_wayland%2CEGL_KHR_platform_x11%2CGLX_EXT_swap_control%2CGLX_MESA_swap_control%2CGL_OES_mapbuffer%2CGL_OES_required_internalformat%2CWGL_ARB_extensions_string%2CWGL_EXT_extensions_string%2CWGL_EXT_swap_control&generator=c diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/egl.c glmark2-2021.02/android/jni/src/glad/src/egl.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/egl.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/src/egl.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,258 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_EGL_VERSION_1_0 = 0; +int GLAD_EGL_VERSION_1_1 = 0; +int GLAD_EGL_VERSION_1_2 = 0; +int GLAD_EGL_VERSION_1_3 = 0; +int GLAD_EGL_VERSION_1_4 = 0; +int GLAD_EGL_VERSION_1_5 = 0; +int GLAD_EGL_EXT_platform_base = 0; +int GLAD_EGL_KHR_platform_gbm = 0; +int GLAD_EGL_KHR_platform_wayland = 0; +int GLAD_EGL_KHR_platform_x11 = 0; + + + +PFNEGLBINDAPIPROC glad_eglBindAPI = NULL; +PFNEGLBINDTEXIMAGEPROC glad_eglBindTexImage = NULL; +PFNEGLCHOOSECONFIGPROC glad_eglChooseConfig = NULL; +PFNEGLCLIENTWAITSYNCPROC glad_eglClientWaitSync = NULL; +PFNEGLCOPYBUFFERSPROC glad_eglCopyBuffers = NULL; +PFNEGLCREATECONTEXTPROC glad_eglCreateContext = NULL; +PFNEGLCREATEIMAGEPROC glad_eglCreateImage = NULL; +PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC glad_eglCreatePbufferFromClientBuffer = NULL; +PFNEGLCREATEPBUFFERSURFACEPROC glad_eglCreatePbufferSurface = NULL; +PFNEGLCREATEPIXMAPSURFACEPROC glad_eglCreatePixmapSurface = NULL; +PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC glad_eglCreatePlatformPixmapSurface = NULL; +PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC glad_eglCreatePlatformPixmapSurfaceEXT = NULL; +PFNEGLCREATEPLATFORMWINDOWSURFACEPROC glad_eglCreatePlatformWindowSurface = NULL; +PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC glad_eglCreatePlatformWindowSurfaceEXT = NULL; +PFNEGLCREATESYNCPROC glad_eglCreateSync = NULL; +PFNEGLCREATEWINDOWSURFACEPROC glad_eglCreateWindowSurface = NULL; +PFNEGLDESTROYCONTEXTPROC glad_eglDestroyContext = NULL; +PFNEGLDESTROYIMAGEPROC glad_eglDestroyImage = NULL; +PFNEGLDESTROYSURFACEPROC glad_eglDestroySurface = NULL; +PFNEGLDESTROYSYNCPROC glad_eglDestroySync = NULL; +PFNEGLGETCONFIGATTRIBPROC glad_eglGetConfigAttrib = NULL; +PFNEGLGETCONFIGSPROC glad_eglGetConfigs = NULL; +PFNEGLGETCURRENTCONTEXTPROC glad_eglGetCurrentContext = NULL; +PFNEGLGETCURRENTDISPLAYPROC glad_eglGetCurrentDisplay = NULL; +PFNEGLGETCURRENTSURFACEPROC glad_eglGetCurrentSurface = NULL; +PFNEGLGETDISPLAYPROC glad_eglGetDisplay = NULL; +PFNEGLGETERRORPROC glad_eglGetError = NULL; +PFNEGLGETPLATFORMDISPLAYPROC glad_eglGetPlatformDisplay = NULL; +PFNEGLGETPLATFORMDISPLAYEXTPROC glad_eglGetPlatformDisplayEXT = NULL; +PFNEGLGETPROCADDRESSPROC glad_eglGetProcAddress = NULL; +PFNEGLGETSYNCATTRIBPROC glad_eglGetSyncAttrib = NULL; +PFNEGLINITIALIZEPROC glad_eglInitialize = NULL; +PFNEGLMAKECURRENTPROC glad_eglMakeCurrent = NULL; +PFNEGLQUERYAPIPROC glad_eglQueryAPI = NULL; +PFNEGLQUERYCONTEXTPROC glad_eglQueryContext = NULL; +PFNEGLQUERYSTRINGPROC glad_eglQueryString = NULL; +PFNEGLQUERYSURFACEPROC glad_eglQuerySurface = NULL; +PFNEGLRELEASETEXIMAGEPROC glad_eglReleaseTexImage = NULL; +PFNEGLRELEASETHREADPROC glad_eglReleaseThread = NULL; +PFNEGLSURFACEATTRIBPROC glad_eglSurfaceAttrib = NULL; +PFNEGLSWAPBUFFERSPROC glad_eglSwapBuffers = NULL; +PFNEGLSWAPINTERVALPROC glad_eglSwapInterval = NULL; +PFNEGLTERMINATEPROC glad_eglTerminate = NULL; +PFNEGLWAITCLIENTPROC glad_eglWaitClient = NULL; +PFNEGLWAITGLPROC glad_eglWaitGL = NULL; +PFNEGLWAITNATIVEPROC glad_eglWaitNative = NULL; +PFNEGLWAITSYNCPROC glad_eglWaitSync = NULL; + + +static void glad_egl_load_EGL_VERSION_1_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_0) return; + glad_eglChooseConfig = (PFNEGLCHOOSECONFIGPROC) load(userptr, "eglChooseConfig"); + glad_eglCopyBuffers = (PFNEGLCOPYBUFFERSPROC) load(userptr, "eglCopyBuffers"); + glad_eglCreateContext = (PFNEGLCREATECONTEXTPROC) load(userptr, "eglCreateContext"); + glad_eglCreatePbufferSurface = (PFNEGLCREATEPBUFFERSURFACEPROC) load(userptr, "eglCreatePbufferSurface"); + glad_eglCreatePixmapSurface = (PFNEGLCREATEPIXMAPSURFACEPROC) load(userptr, "eglCreatePixmapSurface"); + glad_eglCreateWindowSurface = (PFNEGLCREATEWINDOWSURFACEPROC) load(userptr, "eglCreateWindowSurface"); + glad_eglDestroyContext = (PFNEGLDESTROYCONTEXTPROC) load(userptr, "eglDestroyContext"); + glad_eglDestroySurface = (PFNEGLDESTROYSURFACEPROC) load(userptr, "eglDestroySurface"); + glad_eglGetConfigAttrib = (PFNEGLGETCONFIGATTRIBPROC) load(userptr, "eglGetConfigAttrib"); + glad_eglGetConfigs = (PFNEGLGETCONFIGSPROC) load(userptr, "eglGetConfigs"); + glad_eglGetCurrentDisplay = (PFNEGLGETCURRENTDISPLAYPROC) load(userptr, "eglGetCurrentDisplay"); + glad_eglGetCurrentSurface = (PFNEGLGETCURRENTSURFACEPROC) load(userptr, "eglGetCurrentSurface"); + glad_eglGetDisplay = (PFNEGLGETDISPLAYPROC) load(userptr, "eglGetDisplay"); + glad_eglGetError = (PFNEGLGETERRORPROC) load(userptr, "eglGetError"); + glad_eglGetProcAddress = (PFNEGLGETPROCADDRESSPROC) load(userptr, "eglGetProcAddress"); + glad_eglInitialize = (PFNEGLINITIALIZEPROC) load(userptr, "eglInitialize"); + glad_eglMakeCurrent = (PFNEGLMAKECURRENTPROC) load(userptr, "eglMakeCurrent"); + glad_eglQueryContext = (PFNEGLQUERYCONTEXTPROC) load(userptr, "eglQueryContext"); + glad_eglQueryString = (PFNEGLQUERYSTRINGPROC) load(userptr, "eglQueryString"); + glad_eglQuerySurface = (PFNEGLQUERYSURFACEPROC) load(userptr, "eglQuerySurface"); + glad_eglSwapBuffers = (PFNEGLSWAPBUFFERSPROC) load(userptr, "eglSwapBuffers"); + glad_eglTerminate = (PFNEGLTERMINATEPROC) load(userptr, "eglTerminate"); + glad_eglWaitGL = (PFNEGLWAITGLPROC) load(userptr, "eglWaitGL"); + glad_eglWaitNative = (PFNEGLWAITNATIVEPROC) load(userptr, "eglWaitNative"); +} +static void glad_egl_load_EGL_VERSION_1_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_1) return; + glad_eglBindTexImage = (PFNEGLBINDTEXIMAGEPROC) load(userptr, "eglBindTexImage"); + glad_eglReleaseTexImage = (PFNEGLRELEASETEXIMAGEPROC) load(userptr, "eglReleaseTexImage"); + glad_eglSurfaceAttrib = (PFNEGLSURFACEATTRIBPROC) load(userptr, "eglSurfaceAttrib"); + glad_eglSwapInterval = (PFNEGLSWAPINTERVALPROC) load(userptr, "eglSwapInterval"); +} +static void glad_egl_load_EGL_VERSION_1_2( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_2) return; + glad_eglBindAPI = (PFNEGLBINDAPIPROC) load(userptr, "eglBindAPI"); + glad_eglCreatePbufferFromClientBuffer = (PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC) load(userptr, "eglCreatePbufferFromClientBuffer"); + glad_eglQueryAPI = (PFNEGLQUERYAPIPROC) load(userptr, "eglQueryAPI"); + glad_eglReleaseThread = (PFNEGLRELEASETHREADPROC) load(userptr, "eglReleaseThread"); + glad_eglWaitClient = (PFNEGLWAITCLIENTPROC) load(userptr, "eglWaitClient"); +} +static void glad_egl_load_EGL_VERSION_1_4( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_4) return; + glad_eglGetCurrentContext = (PFNEGLGETCURRENTCONTEXTPROC) load(userptr, "eglGetCurrentContext"); +} +static void glad_egl_load_EGL_VERSION_1_5( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_5) return; + glad_eglClientWaitSync = (PFNEGLCLIENTWAITSYNCPROC) load(userptr, "eglClientWaitSync"); + glad_eglCreateImage = (PFNEGLCREATEIMAGEPROC) load(userptr, "eglCreateImage"); + glad_eglCreatePlatformPixmapSurface = (PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC) load(userptr, "eglCreatePlatformPixmapSurface"); + glad_eglCreatePlatformWindowSurface = (PFNEGLCREATEPLATFORMWINDOWSURFACEPROC) load(userptr, "eglCreatePlatformWindowSurface"); + glad_eglCreateSync = (PFNEGLCREATESYNCPROC) load(userptr, "eglCreateSync"); + glad_eglDestroyImage = (PFNEGLDESTROYIMAGEPROC) load(userptr, "eglDestroyImage"); + glad_eglDestroySync = (PFNEGLDESTROYSYNCPROC) load(userptr, "eglDestroySync"); + glad_eglGetPlatformDisplay = (PFNEGLGETPLATFORMDISPLAYPROC) load(userptr, "eglGetPlatformDisplay"); + glad_eglGetSyncAttrib = (PFNEGLGETSYNCATTRIBPROC) load(userptr, "eglGetSyncAttrib"); + glad_eglWaitSync = (PFNEGLWAITSYNCPROC) load(userptr, "eglWaitSync"); +} +static void glad_egl_load_EGL_EXT_platform_base( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_EXT_platform_base) return; + glad_eglCreatePlatformPixmapSurfaceEXT = (PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC) load(userptr, "eglCreatePlatformPixmapSurfaceEXT"); + glad_eglCreatePlatformWindowSurfaceEXT = (PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC) load(userptr, "eglCreatePlatformWindowSurfaceEXT"); + glad_eglGetPlatformDisplayEXT = (PFNEGLGETPLATFORMDISPLAYEXTPROC) load(userptr, "eglGetPlatformDisplayEXT"); +} + + + +static int glad_egl_get_extensions(EGLDisplay display, const char **extensions) { + *extensions = eglQueryString(display, EGL_EXTENSIONS); + + return extensions != NULL; +} + +static int glad_egl_has_extension(const char *extensions, const char *ext) { + const char *loc; + const char *terminator; + if(extensions == NULL) { + return 0; + } + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) { + return 0; + } + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) { + return 1; + } + extensions = terminator; + } +} + +static GLADapiproc glad_egl_get_proc_from_userptr(void *userptr, const char *name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_egl_find_extensions_egl(EGLDisplay display) { + const char *extensions; + if (!glad_egl_get_extensions(display, &extensions)) return 0; + + GLAD_EGL_EXT_platform_base = glad_egl_has_extension(extensions, "EGL_EXT_platform_base"); + GLAD_EGL_KHR_platform_gbm = glad_egl_has_extension(extensions, "EGL_KHR_platform_gbm"); + GLAD_EGL_KHR_platform_wayland = glad_egl_has_extension(extensions, "EGL_KHR_platform_wayland"); + GLAD_EGL_KHR_platform_x11 = glad_egl_has_extension(extensions, "EGL_KHR_platform_x11"); + + return 1; +} + +static int glad_egl_find_core_egl(EGLDisplay display) { + int major, minor; + const char *version; + + if (display == NULL) { + display = EGL_NO_DISPLAY; /* this is usually NULL, better safe than sorry */ + } + if (display == EGL_NO_DISPLAY) { + display = eglGetCurrentDisplay(); + } +#ifdef EGL_VERSION_1_4 + if (display == EGL_NO_DISPLAY) { + display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + } +#endif +#ifndef EGL_VERSION_1_5 + if (display == EGL_NO_DISPLAY) { + return 0; + } +#endif + + version = eglQueryString(display, EGL_VERSION); + (void) eglGetError(); + + if (version == NULL) { + major = 1; + minor = 0; + } else { + GLAD_IMPL_UTIL_SSCANF(version, "%d.%d", &major, &minor); + } + + GLAD_EGL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + GLAD_EGL_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1; + GLAD_EGL_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1; + GLAD_EGL_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1; + GLAD_EGL_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1; + GLAD_EGL_VERSION_1_5 = (major == 1 && minor >= 5) || major > 1; + + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadEGLUserPtr(EGLDisplay display, GLADuserptrloadfunc load, void* userptr) { + int version; + eglGetDisplay = (PFNEGLGETDISPLAYPROC) load(userptr, "eglGetDisplay"); + eglGetCurrentDisplay = (PFNEGLGETCURRENTDISPLAYPROC) load(userptr, "eglGetCurrentDisplay"); + eglQueryString = (PFNEGLQUERYSTRINGPROC) load(userptr, "eglQueryString"); + eglGetError = (PFNEGLGETERRORPROC) load(userptr, "eglGetError"); + if (eglGetDisplay == NULL || eglGetCurrentDisplay == NULL || eglQueryString == NULL || eglGetError == NULL) return 0; + + version = glad_egl_find_core_egl(display); + if (!version) return 0; + glad_egl_load_EGL_VERSION_1_0(load, userptr); + glad_egl_load_EGL_VERSION_1_1(load, userptr); + glad_egl_load_EGL_VERSION_1_2(load, userptr); + glad_egl_load_EGL_VERSION_1_4(load, userptr); + glad_egl_load_EGL_VERSION_1_5(load, userptr); + + if (!glad_egl_find_extensions_egl(display)) return 0; + glad_egl_load_EGL_EXT_platform_base(load, userptr); + + return version; +} + +int gladLoadEGL(EGLDisplay display, GLADloadfunc load) { + return gladLoadEGLUserPtr(display, glad_egl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/gl.c glmark2-2021.02/android/jni/src/glad/src/gl.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/gl.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/src/gl.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1709 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_GL_VERSION_1_0 = 0; +int GLAD_GL_VERSION_1_1 = 0; +int GLAD_GL_VERSION_1_2 = 0; +int GLAD_GL_VERSION_1_3 = 0; +int GLAD_GL_VERSION_1_4 = 0; +int GLAD_GL_VERSION_1_5 = 0; +int GLAD_GL_VERSION_2_0 = 0; +int GLAD_GL_VERSION_2_1 = 0; +int GLAD_GL_VERSION_3_0 = 0; +int GLAD_GL_VERSION_3_1 = 0; +int GLAD_GL_VERSION_3_2 = 0; +int GLAD_GL_VERSION_3_3 = 0; + + + +PFNGLACCUMPROC glad_glAccum = NULL; +PFNGLACTIVETEXTUREPROC glad_glActiveTexture = NULL; +PFNGLALPHAFUNCPROC glad_glAlphaFunc = NULL; +PFNGLARETEXTURESRESIDENTPROC glad_glAreTexturesResident = NULL; +PFNGLARRAYELEMENTPROC glad_glArrayElement = NULL; +PFNGLATTACHSHADERPROC glad_glAttachShader = NULL; +PFNGLBEGINPROC glad_glBegin = NULL; +PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender = NULL; +PFNGLBEGINQUERYPROC glad_glBeginQuery = NULL; +PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback = NULL; +PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation = NULL; +PFNGLBINDBUFFERPROC glad_glBindBuffer = NULL; +PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase = NULL; +PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange = NULL; +PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation = NULL; +PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed = NULL; +PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer = NULL; +PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer = NULL; +PFNGLBINDSAMPLERPROC glad_glBindSampler = NULL; +PFNGLBINDTEXTUREPROC glad_glBindTexture = NULL; +PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray = NULL; +PFNGLBITMAPPROC glad_glBitmap = NULL; +PFNGLBLENDCOLORPROC glad_glBlendColor = NULL; +PFNGLBLENDEQUATIONPROC glad_glBlendEquation = NULL; +PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate = NULL; +PFNGLBLENDFUNCPROC glad_glBlendFunc = NULL; +PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate = NULL; +PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer = NULL; +PFNGLBUFFERDATAPROC glad_glBufferData = NULL; +PFNGLBUFFERSUBDATAPROC glad_glBufferSubData = NULL; +PFNGLCALLLISTPROC glad_glCallList = NULL; +PFNGLCALLLISTSPROC glad_glCallLists = NULL; +PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus = NULL; +PFNGLCLAMPCOLORPROC glad_glClampColor = NULL; +PFNGLCLEARPROC glad_glClear = NULL; +PFNGLCLEARACCUMPROC glad_glClearAccum = NULL; +PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi = NULL; +PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv = NULL; +PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv = NULL; +PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv = NULL; +PFNGLCLEARCOLORPROC glad_glClearColor = NULL; +PFNGLCLEARDEPTHPROC glad_glClearDepth = NULL; +PFNGLCLEARINDEXPROC glad_glClearIndex = NULL; +PFNGLCLEARSTENCILPROC glad_glClearStencil = NULL; +PFNGLCLIENTACTIVETEXTUREPROC glad_glClientActiveTexture = NULL; +PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync = NULL; +PFNGLCLIPPLANEPROC glad_glClipPlane = NULL; +PFNGLCOLOR3BPROC glad_glColor3b = NULL; +PFNGLCOLOR3BVPROC glad_glColor3bv = NULL; +PFNGLCOLOR3DPROC glad_glColor3d = NULL; +PFNGLCOLOR3DVPROC glad_glColor3dv = NULL; +PFNGLCOLOR3FPROC glad_glColor3f = NULL; +PFNGLCOLOR3FVPROC glad_glColor3fv = NULL; +PFNGLCOLOR3IPROC glad_glColor3i = NULL; +PFNGLCOLOR3IVPROC glad_glColor3iv = NULL; +PFNGLCOLOR3SPROC glad_glColor3s = NULL; +PFNGLCOLOR3SVPROC glad_glColor3sv = NULL; +PFNGLCOLOR3UBPROC glad_glColor3ub = NULL; +PFNGLCOLOR3UBVPROC glad_glColor3ubv = NULL; +PFNGLCOLOR3UIPROC glad_glColor3ui = NULL; +PFNGLCOLOR3UIVPROC glad_glColor3uiv = NULL; +PFNGLCOLOR3USPROC glad_glColor3us = NULL; +PFNGLCOLOR3USVPROC glad_glColor3usv = NULL; +PFNGLCOLOR4BPROC glad_glColor4b = NULL; +PFNGLCOLOR4BVPROC glad_glColor4bv = NULL; +PFNGLCOLOR4DPROC glad_glColor4d = NULL; +PFNGLCOLOR4DVPROC glad_glColor4dv = NULL; +PFNGLCOLOR4FPROC glad_glColor4f = NULL; +PFNGLCOLOR4FVPROC glad_glColor4fv = NULL; +PFNGLCOLOR4IPROC glad_glColor4i = NULL; +PFNGLCOLOR4IVPROC glad_glColor4iv = NULL; +PFNGLCOLOR4SPROC glad_glColor4s = NULL; +PFNGLCOLOR4SVPROC glad_glColor4sv = NULL; +PFNGLCOLOR4UBPROC glad_glColor4ub = NULL; +PFNGLCOLOR4UBVPROC glad_glColor4ubv = NULL; +PFNGLCOLOR4UIPROC glad_glColor4ui = NULL; +PFNGLCOLOR4UIVPROC glad_glColor4uiv = NULL; +PFNGLCOLOR4USPROC glad_glColor4us = NULL; +PFNGLCOLOR4USVPROC glad_glColor4usv = NULL; +PFNGLCOLORMASKPROC glad_glColorMask = NULL; +PFNGLCOLORMASKIPROC glad_glColorMaski = NULL; +PFNGLCOLORMATERIALPROC glad_glColorMaterial = NULL; +PFNGLCOLORP3UIPROC glad_glColorP3ui = NULL; +PFNGLCOLORP3UIVPROC glad_glColorP3uiv = NULL; +PFNGLCOLORP4UIPROC glad_glColorP4ui = NULL; +PFNGLCOLORP4UIVPROC glad_glColorP4uiv = NULL; +PFNGLCOLORPOINTERPROC glad_glColorPointer = NULL; +PFNGLCOMPILESHADERPROC glad_glCompileShader = NULL; +PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D = NULL; +PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D = NULL; +PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D = NULL; +PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData = NULL; +PFNGLCOPYPIXELSPROC glad_glCopyPixels = NULL; +PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D = NULL; +PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D = NULL; +PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D = NULL; +PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D = NULL; +PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D = NULL; +PFNGLCREATEPROGRAMPROC glad_glCreateProgram = NULL; +PFNGLCREATESHADERPROC glad_glCreateShader = NULL; +PFNGLCULLFACEPROC glad_glCullFace = NULL; +PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers = NULL; +PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers = NULL; +PFNGLDELETELISTSPROC glad_glDeleteLists = NULL; +PFNGLDELETEPROGRAMPROC glad_glDeleteProgram = NULL; +PFNGLDELETEQUERIESPROC glad_glDeleteQueries = NULL; +PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers = NULL; +PFNGLDELETESAMPLERSPROC glad_glDeleteSamplers = NULL; +PFNGLDELETESHADERPROC glad_glDeleteShader = NULL; +PFNGLDELETESYNCPROC glad_glDeleteSync = NULL; +PFNGLDELETETEXTURESPROC glad_glDeleteTextures = NULL; +PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays = NULL; +PFNGLDEPTHFUNCPROC glad_glDepthFunc = NULL; +PFNGLDEPTHMASKPROC glad_glDepthMask = NULL; +PFNGLDEPTHRANGEPROC glad_glDepthRange = NULL; +PFNGLDETACHSHADERPROC glad_glDetachShader = NULL; +PFNGLDISABLEPROC glad_glDisable = NULL; +PFNGLDISABLECLIENTSTATEPROC glad_glDisableClientState = NULL; +PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray = NULL; +PFNGLDISABLEIPROC glad_glDisablei = NULL; +PFNGLDRAWARRAYSPROC glad_glDrawArrays = NULL; +PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced = NULL; +PFNGLDRAWBUFFERPROC glad_glDrawBuffer = NULL; +PFNGLDRAWBUFFERSPROC glad_glDrawBuffers = NULL; +PFNGLDRAWELEMENTSPROC glad_glDrawElements = NULL; +PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex = NULL; +PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced = NULL; +PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex = NULL; +PFNGLDRAWPIXELSPROC glad_glDrawPixels = NULL; +PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements = NULL; +PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex = NULL; +PFNGLEDGEFLAGPROC glad_glEdgeFlag = NULL; +PFNGLEDGEFLAGPOINTERPROC glad_glEdgeFlagPointer = NULL; +PFNGLEDGEFLAGVPROC glad_glEdgeFlagv = NULL; +PFNGLENABLEPROC glad_glEnable = NULL; +PFNGLENABLECLIENTSTATEPROC glad_glEnableClientState = NULL; +PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray = NULL; +PFNGLENABLEIPROC glad_glEnablei = NULL; +PFNGLENDPROC glad_glEnd = NULL; +PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender = NULL; +PFNGLENDLISTPROC glad_glEndList = NULL; +PFNGLENDQUERYPROC glad_glEndQuery = NULL; +PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback = NULL; +PFNGLEVALCOORD1DPROC glad_glEvalCoord1d = NULL; +PFNGLEVALCOORD1DVPROC glad_glEvalCoord1dv = NULL; +PFNGLEVALCOORD1FPROC glad_glEvalCoord1f = NULL; +PFNGLEVALCOORD1FVPROC glad_glEvalCoord1fv = NULL; +PFNGLEVALCOORD2DPROC glad_glEvalCoord2d = NULL; +PFNGLEVALCOORD2DVPROC glad_glEvalCoord2dv = NULL; +PFNGLEVALCOORD2FPROC glad_glEvalCoord2f = NULL; +PFNGLEVALCOORD2FVPROC glad_glEvalCoord2fv = NULL; +PFNGLEVALMESH1PROC glad_glEvalMesh1 = NULL; +PFNGLEVALMESH2PROC glad_glEvalMesh2 = NULL; +PFNGLEVALPOINT1PROC glad_glEvalPoint1 = NULL; +PFNGLEVALPOINT2PROC glad_glEvalPoint2 = NULL; +PFNGLFEEDBACKBUFFERPROC glad_glFeedbackBuffer = NULL; +PFNGLFENCESYNCPROC glad_glFenceSync = NULL; +PFNGLFINISHPROC glad_glFinish = NULL; +PFNGLFLUSHPROC glad_glFlush = NULL; +PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange = NULL; +PFNGLFOGCOORDPOINTERPROC glad_glFogCoordPointer = NULL; +PFNGLFOGCOORDDPROC glad_glFogCoordd = NULL; +PFNGLFOGCOORDDVPROC glad_glFogCoorddv = NULL; +PFNGLFOGCOORDFPROC glad_glFogCoordf = NULL; +PFNGLFOGCOORDFVPROC glad_glFogCoordfv = NULL; +PFNGLFOGFPROC glad_glFogf = NULL; +PFNGLFOGFVPROC glad_glFogfv = NULL; +PFNGLFOGIPROC glad_glFogi = NULL; +PFNGLFOGIVPROC glad_glFogiv = NULL; +PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer = NULL; +PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture = NULL; +PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D = NULL; +PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D = NULL; +PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D = NULL; +PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer = NULL; +PFNGLFRONTFACEPROC glad_glFrontFace = NULL; +PFNGLFRUSTUMPROC glad_glFrustum = NULL; +PFNGLGENBUFFERSPROC glad_glGenBuffers = NULL; +PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers = NULL; +PFNGLGENLISTSPROC glad_glGenLists = NULL; +PFNGLGENQUERIESPROC glad_glGenQueries = NULL; +PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers = NULL; +PFNGLGENSAMPLERSPROC glad_glGenSamplers = NULL; +PFNGLGENTEXTURESPROC glad_glGenTextures = NULL; +PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays = NULL; +PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap = NULL; +PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib = NULL; +PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform = NULL; +PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName = NULL; +PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv = NULL; +PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName = NULL; +PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv = NULL; +PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders = NULL; +PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation = NULL; +PFNGLGETBOOLEANI_VPROC glad_glGetBooleani_v = NULL; +PFNGLGETBOOLEANVPROC glad_glGetBooleanv = NULL; +PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v = NULL; +PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv = NULL; +PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv = NULL; +PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData = NULL; +PFNGLGETCLIPPLANEPROC glad_glGetClipPlane = NULL; +PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage = NULL; +PFNGLGETDOUBLEVPROC glad_glGetDoublev = NULL; +PFNGLGETERRORPROC glad_glGetError = NULL; +PFNGLGETFLOATVPROC glad_glGetFloatv = NULL; +PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex = NULL; +PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation = NULL; +PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv = NULL; +PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v = NULL; +PFNGLGETINTEGER64VPROC glad_glGetInteger64v = NULL; +PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v = NULL; +PFNGLGETINTEGERVPROC glad_glGetIntegerv = NULL; +PFNGLGETLIGHTFVPROC glad_glGetLightfv = NULL; +PFNGLGETLIGHTIVPROC glad_glGetLightiv = NULL; +PFNGLGETMAPDVPROC glad_glGetMapdv = NULL; +PFNGLGETMAPFVPROC glad_glGetMapfv = NULL; +PFNGLGETMAPIVPROC glad_glGetMapiv = NULL; +PFNGLGETMATERIALFVPROC glad_glGetMaterialfv = NULL; +PFNGLGETMATERIALIVPROC glad_glGetMaterialiv = NULL; +PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv = NULL; +PFNGLGETPIXELMAPFVPROC glad_glGetPixelMapfv = NULL; +PFNGLGETPIXELMAPUIVPROC glad_glGetPixelMapuiv = NULL; +PFNGLGETPIXELMAPUSVPROC glad_glGetPixelMapusv = NULL; +PFNGLGETPOINTERVPROC glad_glGetPointerv = NULL; +PFNGLGETPOLYGONSTIPPLEPROC glad_glGetPolygonStipple = NULL; +PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog = NULL; +PFNGLGETPROGRAMIVPROC glad_glGetProgramiv = NULL; +PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v = NULL; +PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv = NULL; +PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v = NULL; +PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv = NULL; +PFNGLGETQUERYIVPROC glad_glGetQueryiv = NULL; +PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv = NULL; +PFNGLGETSAMPLERPARAMETERIIVPROC glad_glGetSamplerParameterIiv = NULL; +PFNGLGETSAMPLERPARAMETERIUIVPROC glad_glGetSamplerParameterIuiv = NULL; +PFNGLGETSAMPLERPARAMETERFVPROC glad_glGetSamplerParameterfv = NULL; +PFNGLGETSAMPLERPARAMETERIVPROC glad_glGetSamplerParameteriv = NULL; +PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog = NULL; +PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource = NULL; +PFNGLGETSHADERIVPROC glad_glGetShaderiv = NULL; +PFNGLGETSTRINGPROC glad_glGetString = NULL; +PFNGLGETSTRINGIPROC glad_glGetStringi = NULL; +PFNGLGETSYNCIVPROC glad_glGetSynciv = NULL; +PFNGLGETTEXENVFVPROC glad_glGetTexEnvfv = NULL; +PFNGLGETTEXENVIVPROC glad_glGetTexEnviv = NULL; +PFNGLGETTEXGENDVPROC glad_glGetTexGendv = NULL; +PFNGLGETTEXGENFVPROC glad_glGetTexGenfv = NULL; +PFNGLGETTEXGENIVPROC glad_glGetTexGeniv = NULL; +PFNGLGETTEXIMAGEPROC glad_glGetTexImage = NULL; +PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv = NULL; +PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv = NULL; +PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv = NULL; +PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv = NULL; +PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv = NULL; +PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv = NULL; +PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying = NULL; +PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex = NULL; +PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices = NULL; +PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation = NULL; +PFNGLGETUNIFORMFVPROC glad_glGetUniformfv = NULL; +PFNGLGETUNIFORMIVPROC glad_glGetUniformiv = NULL; +PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv = NULL; +PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv = NULL; +PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv = NULL; +PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv = NULL; +PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv = NULL; +PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv = NULL; +PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv = NULL; +PFNGLHINTPROC glad_glHint = NULL; +PFNGLINDEXMASKPROC glad_glIndexMask = NULL; +PFNGLINDEXPOINTERPROC glad_glIndexPointer = NULL; +PFNGLINDEXDPROC glad_glIndexd = NULL; +PFNGLINDEXDVPROC glad_glIndexdv = NULL; +PFNGLINDEXFPROC glad_glIndexf = NULL; +PFNGLINDEXFVPROC glad_glIndexfv = NULL; +PFNGLINDEXIPROC glad_glIndexi = NULL; +PFNGLINDEXIVPROC glad_glIndexiv = NULL; +PFNGLINDEXSPROC glad_glIndexs = NULL; +PFNGLINDEXSVPROC glad_glIndexsv = NULL; +PFNGLINDEXUBPROC glad_glIndexub = NULL; +PFNGLINDEXUBVPROC glad_glIndexubv = NULL; +PFNGLINITNAMESPROC glad_glInitNames = NULL; +PFNGLINTERLEAVEDARRAYSPROC glad_glInterleavedArrays = NULL; +PFNGLISBUFFERPROC glad_glIsBuffer = NULL; +PFNGLISENABLEDPROC glad_glIsEnabled = NULL; +PFNGLISENABLEDIPROC glad_glIsEnabledi = NULL; +PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer = NULL; +PFNGLISLISTPROC glad_glIsList = NULL; +PFNGLISPROGRAMPROC glad_glIsProgram = NULL; +PFNGLISQUERYPROC glad_glIsQuery = NULL; +PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer = NULL; +PFNGLISSAMPLERPROC glad_glIsSampler = NULL; +PFNGLISSHADERPROC glad_glIsShader = NULL; +PFNGLISSYNCPROC glad_glIsSync = NULL; +PFNGLISTEXTUREPROC glad_glIsTexture = NULL; +PFNGLISVERTEXARRAYPROC glad_glIsVertexArray = NULL; +PFNGLLIGHTMODELFPROC glad_glLightModelf = NULL; +PFNGLLIGHTMODELFVPROC glad_glLightModelfv = NULL; +PFNGLLIGHTMODELIPROC glad_glLightModeli = NULL; +PFNGLLIGHTMODELIVPROC glad_glLightModeliv = NULL; +PFNGLLIGHTFPROC glad_glLightf = NULL; +PFNGLLIGHTFVPROC glad_glLightfv = NULL; +PFNGLLIGHTIPROC glad_glLighti = NULL; +PFNGLLIGHTIVPROC glad_glLightiv = NULL; +PFNGLLINESTIPPLEPROC glad_glLineStipple = NULL; +PFNGLLINEWIDTHPROC glad_glLineWidth = NULL; +PFNGLLINKPROGRAMPROC glad_glLinkProgram = NULL; +PFNGLLISTBASEPROC glad_glListBase = NULL; +PFNGLLOADIDENTITYPROC glad_glLoadIdentity = NULL; +PFNGLLOADMATRIXDPROC glad_glLoadMatrixd = NULL; +PFNGLLOADMATRIXFPROC glad_glLoadMatrixf = NULL; +PFNGLLOADNAMEPROC glad_glLoadName = NULL; +PFNGLLOADTRANSPOSEMATRIXDPROC glad_glLoadTransposeMatrixd = NULL; +PFNGLLOADTRANSPOSEMATRIXFPROC glad_glLoadTransposeMatrixf = NULL; +PFNGLLOGICOPPROC glad_glLogicOp = NULL; +PFNGLMAP1DPROC glad_glMap1d = NULL; +PFNGLMAP1FPROC glad_glMap1f = NULL; +PFNGLMAP2DPROC glad_glMap2d = NULL; +PFNGLMAP2FPROC glad_glMap2f = NULL; +PFNGLMAPBUFFERPROC glad_glMapBuffer = NULL; +PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange = NULL; +PFNGLMAPGRID1DPROC glad_glMapGrid1d = NULL; +PFNGLMAPGRID1FPROC glad_glMapGrid1f = NULL; +PFNGLMAPGRID2DPROC glad_glMapGrid2d = NULL; +PFNGLMAPGRID2FPROC glad_glMapGrid2f = NULL; +PFNGLMATERIALFPROC glad_glMaterialf = NULL; +PFNGLMATERIALFVPROC glad_glMaterialfv = NULL; +PFNGLMATERIALIPROC glad_glMateriali = NULL; +PFNGLMATERIALIVPROC glad_glMaterialiv = NULL; +PFNGLMATRIXMODEPROC glad_glMatrixMode = NULL; +PFNGLMULTMATRIXDPROC glad_glMultMatrixd = NULL; +PFNGLMULTMATRIXFPROC glad_glMultMatrixf = NULL; +PFNGLMULTTRANSPOSEMATRIXDPROC glad_glMultTransposeMatrixd = NULL; +PFNGLMULTTRANSPOSEMATRIXFPROC glad_glMultTransposeMatrixf = NULL; +PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays = NULL; +PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements = NULL; +PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex = NULL; +PFNGLMULTITEXCOORD1DPROC glad_glMultiTexCoord1d = NULL; +PFNGLMULTITEXCOORD1DVPROC glad_glMultiTexCoord1dv = NULL; +PFNGLMULTITEXCOORD1FPROC glad_glMultiTexCoord1f = NULL; +PFNGLMULTITEXCOORD1FVPROC glad_glMultiTexCoord1fv = NULL; +PFNGLMULTITEXCOORD1IPROC glad_glMultiTexCoord1i = NULL; +PFNGLMULTITEXCOORD1IVPROC glad_glMultiTexCoord1iv = NULL; +PFNGLMULTITEXCOORD1SPROC glad_glMultiTexCoord1s = NULL; +PFNGLMULTITEXCOORD1SVPROC glad_glMultiTexCoord1sv = NULL; +PFNGLMULTITEXCOORD2DPROC glad_glMultiTexCoord2d = NULL; +PFNGLMULTITEXCOORD2DVPROC glad_glMultiTexCoord2dv = NULL; +PFNGLMULTITEXCOORD2FPROC glad_glMultiTexCoord2f = NULL; +PFNGLMULTITEXCOORD2FVPROC glad_glMultiTexCoord2fv = NULL; +PFNGLMULTITEXCOORD2IPROC glad_glMultiTexCoord2i = NULL; +PFNGLMULTITEXCOORD2IVPROC glad_glMultiTexCoord2iv = NULL; +PFNGLMULTITEXCOORD2SPROC glad_glMultiTexCoord2s = NULL; +PFNGLMULTITEXCOORD2SVPROC glad_glMultiTexCoord2sv = NULL; +PFNGLMULTITEXCOORD3DPROC glad_glMultiTexCoord3d = NULL; +PFNGLMULTITEXCOORD3DVPROC glad_glMultiTexCoord3dv = NULL; +PFNGLMULTITEXCOORD3FPROC glad_glMultiTexCoord3f = NULL; +PFNGLMULTITEXCOORD3FVPROC glad_glMultiTexCoord3fv = NULL; +PFNGLMULTITEXCOORD3IPROC glad_glMultiTexCoord3i = NULL; +PFNGLMULTITEXCOORD3IVPROC glad_glMultiTexCoord3iv = NULL; +PFNGLMULTITEXCOORD3SPROC glad_glMultiTexCoord3s = NULL; +PFNGLMULTITEXCOORD3SVPROC glad_glMultiTexCoord3sv = NULL; +PFNGLMULTITEXCOORD4DPROC glad_glMultiTexCoord4d = NULL; +PFNGLMULTITEXCOORD4DVPROC glad_glMultiTexCoord4dv = NULL; +PFNGLMULTITEXCOORD4FPROC glad_glMultiTexCoord4f = NULL; +PFNGLMULTITEXCOORD4FVPROC glad_glMultiTexCoord4fv = NULL; +PFNGLMULTITEXCOORD4IPROC glad_glMultiTexCoord4i = NULL; +PFNGLMULTITEXCOORD4IVPROC glad_glMultiTexCoord4iv = NULL; +PFNGLMULTITEXCOORD4SPROC glad_glMultiTexCoord4s = NULL; +PFNGLMULTITEXCOORD4SVPROC glad_glMultiTexCoord4sv = NULL; +PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui = NULL; +PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv = NULL; +PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui = NULL; +PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv = NULL; +PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui = NULL; +PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv = NULL; +PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui = NULL; +PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv = NULL; +PFNGLNEWLISTPROC glad_glNewList = NULL; +PFNGLNORMAL3BPROC glad_glNormal3b = NULL; +PFNGLNORMAL3BVPROC glad_glNormal3bv = NULL; +PFNGLNORMAL3DPROC glad_glNormal3d = NULL; +PFNGLNORMAL3DVPROC glad_glNormal3dv = NULL; +PFNGLNORMAL3FPROC glad_glNormal3f = NULL; +PFNGLNORMAL3FVPROC glad_glNormal3fv = NULL; +PFNGLNORMAL3IPROC glad_glNormal3i = NULL; +PFNGLNORMAL3IVPROC glad_glNormal3iv = NULL; +PFNGLNORMAL3SPROC glad_glNormal3s = NULL; +PFNGLNORMAL3SVPROC glad_glNormal3sv = NULL; +PFNGLNORMALP3UIPROC glad_glNormalP3ui = NULL; +PFNGLNORMALP3UIVPROC glad_glNormalP3uiv = NULL; +PFNGLNORMALPOINTERPROC glad_glNormalPointer = NULL; +PFNGLORTHOPROC glad_glOrtho = NULL; +PFNGLPASSTHROUGHPROC glad_glPassThrough = NULL; +PFNGLPIXELMAPFVPROC glad_glPixelMapfv = NULL; +PFNGLPIXELMAPUIVPROC glad_glPixelMapuiv = NULL; +PFNGLPIXELMAPUSVPROC glad_glPixelMapusv = NULL; +PFNGLPIXELSTOREFPROC glad_glPixelStoref = NULL; +PFNGLPIXELSTOREIPROC glad_glPixelStorei = NULL; +PFNGLPIXELTRANSFERFPROC glad_glPixelTransferf = NULL; +PFNGLPIXELTRANSFERIPROC glad_glPixelTransferi = NULL; +PFNGLPIXELZOOMPROC glad_glPixelZoom = NULL; +PFNGLPOINTPARAMETERFPROC glad_glPointParameterf = NULL; +PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv = NULL; +PFNGLPOINTPARAMETERIPROC glad_glPointParameteri = NULL; +PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv = NULL; +PFNGLPOINTSIZEPROC glad_glPointSize = NULL; +PFNGLPOLYGONMODEPROC glad_glPolygonMode = NULL; +PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset = NULL; +PFNGLPOLYGONSTIPPLEPROC glad_glPolygonStipple = NULL; +PFNGLPOPATTRIBPROC glad_glPopAttrib = NULL; +PFNGLPOPCLIENTATTRIBPROC glad_glPopClientAttrib = NULL; +PFNGLPOPMATRIXPROC glad_glPopMatrix = NULL; +PFNGLPOPNAMEPROC glad_glPopName = NULL; +PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex = NULL; +PFNGLPRIORITIZETEXTURESPROC glad_glPrioritizeTextures = NULL; +PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex = NULL; +PFNGLPUSHATTRIBPROC glad_glPushAttrib = NULL; +PFNGLPUSHCLIENTATTRIBPROC glad_glPushClientAttrib = NULL; +PFNGLPUSHMATRIXPROC glad_glPushMatrix = NULL; +PFNGLPUSHNAMEPROC glad_glPushName = NULL; +PFNGLQUERYCOUNTERPROC glad_glQueryCounter = NULL; +PFNGLRASTERPOS2DPROC glad_glRasterPos2d = NULL; +PFNGLRASTERPOS2DVPROC glad_glRasterPos2dv = NULL; +PFNGLRASTERPOS2FPROC glad_glRasterPos2f = NULL; +PFNGLRASTERPOS2FVPROC glad_glRasterPos2fv = NULL; +PFNGLRASTERPOS2IPROC glad_glRasterPos2i = NULL; +PFNGLRASTERPOS2IVPROC glad_glRasterPos2iv = NULL; +PFNGLRASTERPOS2SPROC glad_glRasterPos2s = NULL; +PFNGLRASTERPOS2SVPROC glad_glRasterPos2sv = NULL; +PFNGLRASTERPOS3DPROC glad_glRasterPos3d = NULL; +PFNGLRASTERPOS3DVPROC glad_glRasterPos3dv = NULL; +PFNGLRASTERPOS3FPROC glad_glRasterPos3f = NULL; +PFNGLRASTERPOS3FVPROC glad_glRasterPos3fv = NULL; +PFNGLRASTERPOS3IPROC glad_glRasterPos3i = NULL; +PFNGLRASTERPOS3IVPROC glad_glRasterPos3iv = NULL; +PFNGLRASTERPOS3SPROC glad_glRasterPos3s = NULL; +PFNGLRASTERPOS3SVPROC glad_glRasterPos3sv = NULL; +PFNGLRASTERPOS4DPROC glad_glRasterPos4d = NULL; +PFNGLRASTERPOS4DVPROC glad_glRasterPos4dv = NULL; +PFNGLRASTERPOS4FPROC glad_glRasterPos4f = NULL; +PFNGLRASTERPOS4FVPROC glad_glRasterPos4fv = NULL; +PFNGLRASTERPOS4IPROC glad_glRasterPos4i = NULL; +PFNGLRASTERPOS4IVPROC glad_glRasterPos4iv = NULL; +PFNGLRASTERPOS4SPROC glad_glRasterPos4s = NULL; +PFNGLRASTERPOS4SVPROC glad_glRasterPos4sv = NULL; +PFNGLREADBUFFERPROC glad_glReadBuffer = NULL; +PFNGLREADPIXELSPROC glad_glReadPixels = NULL; +PFNGLRECTDPROC glad_glRectd = NULL; +PFNGLRECTDVPROC glad_glRectdv = NULL; +PFNGLRECTFPROC glad_glRectf = NULL; +PFNGLRECTFVPROC glad_glRectfv = NULL; +PFNGLRECTIPROC glad_glRecti = NULL; +PFNGLRECTIVPROC glad_glRectiv = NULL; +PFNGLRECTSPROC glad_glRects = NULL; +PFNGLRECTSVPROC glad_glRectsv = NULL; +PFNGLRENDERMODEPROC glad_glRenderMode = NULL; +PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage = NULL; +PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample = NULL; +PFNGLROTATEDPROC glad_glRotated = NULL; +PFNGLROTATEFPROC glad_glRotatef = NULL; +PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage = NULL; +PFNGLSAMPLEMASKIPROC glad_glSampleMaski = NULL; +PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv = NULL; +PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv = NULL; +PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf = NULL; +PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv = NULL; +PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri = NULL; +PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv = NULL; +PFNGLSCALEDPROC glad_glScaled = NULL; +PFNGLSCALEFPROC glad_glScalef = NULL; +PFNGLSCISSORPROC glad_glScissor = NULL; +PFNGLSECONDARYCOLOR3BPROC glad_glSecondaryColor3b = NULL; +PFNGLSECONDARYCOLOR3BVPROC glad_glSecondaryColor3bv = NULL; +PFNGLSECONDARYCOLOR3DPROC glad_glSecondaryColor3d = NULL; +PFNGLSECONDARYCOLOR3DVPROC glad_glSecondaryColor3dv = NULL; +PFNGLSECONDARYCOLOR3FPROC glad_glSecondaryColor3f = NULL; +PFNGLSECONDARYCOLOR3FVPROC glad_glSecondaryColor3fv = NULL; +PFNGLSECONDARYCOLOR3IPROC glad_glSecondaryColor3i = NULL; +PFNGLSECONDARYCOLOR3IVPROC glad_glSecondaryColor3iv = NULL; +PFNGLSECONDARYCOLOR3SPROC glad_glSecondaryColor3s = NULL; +PFNGLSECONDARYCOLOR3SVPROC glad_glSecondaryColor3sv = NULL; +PFNGLSECONDARYCOLOR3UBPROC glad_glSecondaryColor3ub = NULL; +PFNGLSECONDARYCOLOR3UBVPROC glad_glSecondaryColor3ubv = NULL; +PFNGLSECONDARYCOLOR3UIPROC glad_glSecondaryColor3ui = NULL; +PFNGLSECONDARYCOLOR3UIVPROC glad_glSecondaryColor3uiv = NULL; +PFNGLSECONDARYCOLOR3USPROC glad_glSecondaryColor3us = NULL; +PFNGLSECONDARYCOLOR3USVPROC glad_glSecondaryColor3usv = NULL; +PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui = NULL; +PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv = NULL; +PFNGLSECONDARYCOLORPOINTERPROC glad_glSecondaryColorPointer = NULL; +PFNGLSELECTBUFFERPROC glad_glSelectBuffer = NULL; +PFNGLSHADEMODELPROC glad_glShadeModel = NULL; +PFNGLSHADERSOURCEPROC glad_glShaderSource = NULL; +PFNGLSTENCILFUNCPROC glad_glStencilFunc = NULL; +PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate = NULL; +PFNGLSTENCILMASKPROC glad_glStencilMask = NULL; +PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate = NULL; +PFNGLSTENCILOPPROC glad_glStencilOp = NULL; +PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate = NULL; +PFNGLTEXBUFFERPROC glad_glTexBuffer = NULL; +PFNGLTEXCOORD1DPROC glad_glTexCoord1d = NULL; +PFNGLTEXCOORD1DVPROC glad_glTexCoord1dv = NULL; +PFNGLTEXCOORD1FPROC glad_glTexCoord1f = NULL; +PFNGLTEXCOORD1FVPROC glad_glTexCoord1fv = NULL; +PFNGLTEXCOORD1IPROC glad_glTexCoord1i = NULL; +PFNGLTEXCOORD1IVPROC glad_glTexCoord1iv = NULL; +PFNGLTEXCOORD1SPROC glad_glTexCoord1s = NULL; +PFNGLTEXCOORD1SVPROC glad_glTexCoord1sv = NULL; +PFNGLTEXCOORD2DPROC glad_glTexCoord2d = NULL; +PFNGLTEXCOORD2DVPROC glad_glTexCoord2dv = NULL; +PFNGLTEXCOORD2FPROC glad_glTexCoord2f = NULL; +PFNGLTEXCOORD2FVPROC glad_glTexCoord2fv = NULL; +PFNGLTEXCOORD2IPROC glad_glTexCoord2i = NULL; +PFNGLTEXCOORD2IVPROC glad_glTexCoord2iv = NULL; +PFNGLTEXCOORD2SPROC glad_glTexCoord2s = NULL; +PFNGLTEXCOORD2SVPROC glad_glTexCoord2sv = NULL; +PFNGLTEXCOORD3DPROC glad_glTexCoord3d = NULL; +PFNGLTEXCOORD3DVPROC glad_glTexCoord3dv = NULL; +PFNGLTEXCOORD3FPROC glad_glTexCoord3f = NULL; +PFNGLTEXCOORD3FVPROC glad_glTexCoord3fv = NULL; +PFNGLTEXCOORD3IPROC glad_glTexCoord3i = NULL; +PFNGLTEXCOORD3IVPROC glad_glTexCoord3iv = NULL; +PFNGLTEXCOORD3SPROC glad_glTexCoord3s = NULL; +PFNGLTEXCOORD3SVPROC glad_glTexCoord3sv = NULL; +PFNGLTEXCOORD4DPROC glad_glTexCoord4d = NULL; +PFNGLTEXCOORD4DVPROC glad_glTexCoord4dv = NULL; +PFNGLTEXCOORD4FPROC glad_glTexCoord4f = NULL; +PFNGLTEXCOORD4FVPROC glad_glTexCoord4fv = NULL; +PFNGLTEXCOORD4IPROC glad_glTexCoord4i = NULL; +PFNGLTEXCOORD4IVPROC glad_glTexCoord4iv = NULL; +PFNGLTEXCOORD4SPROC glad_glTexCoord4s = NULL; +PFNGLTEXCOORD4SVPROC glad_glTexCoord4sv = NULL; +PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui = NULL; +PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv = NULL; +PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui = NULL; +PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv = NULL; +PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui = NULL; +PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv = NULL; +PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui = NULL; +PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv = NULL; +PFNGLTEXCOORDPOINTERPROC glad_glTexCoordPointer = NULL; +PFNGLTEXENVFPROC glad_glTexEnvf = NULL; +PFNGLTEXENVFVPROC glad_glTexEnvfv = NULL; +PFNGLTEXENVIPROC glad_glTexEnvi = NULL; +PFNGLTEXENVIVPROC glad_glTexEnviv = NULL; +PFNGLTEXGENDPROC glad_glTexGend = NULL; +PFNGLTEXGENDVPROC glad_glTexGendv = NULL; +PFNGLTEXGENFPROC glad_glTexGenf = NULL; +PFNGLTEXGENFVPROC glad_glTexGenfv = NULL; +PFNGLTEXGENIPROC glad_glTexGeni = NULL; +PFNGLTEXGENIVPROC glad_glTexGeniv = NULL; +PFNGLTEXIMAGE1DPROC glad_glTexImage1D = NULL; +PFNGLTEXIMAGE2DPROC glad_glTexImage2D = NULL; +PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample = NULL; +PFNGLTEXIMAGE3DPROC glad_glTexImage3D = NULL; +PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample = NULL; +PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv = NULL; +PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv = NULL; +PFNGLTEXPARAMETERFPROC glad_glTexParameterf = NULL; +PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv = NULL; +PFNGLTEXPARAMETERIPROC glad_glTexParameteri = NULL; +PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv = NULL; +PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D = NULL; +PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D = NULL; +PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D = NULL; +PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings = NULL; +PFNGLTRANSLATEDPROC glad_glTranslated = NULL; +PFNGLTRANSLATEFPROC glad_glTranslatef = NULL; +PFNGLUNIFORM1FPROC glad_glUniform1f = NULL; +PFNGLUNIFORM1FVPROC glad_glUniform1fv = NULL; +PFNGLUNIFORM1IPROC glad_glUniform1i = NULL; +PFNGLUNIFORM1IVPROC glad_glUniform1iv = NULL; +PFNGLUNIFORM1UIPROC glad_glUniform1ui = NULL; +PFNGLUNIFORM1UIVPROC glad_glUniform1uiv = NULL; +PFNGLUNIFORM2FPROC glad_glUniform2f = NULL; +PFNGLUNIFORM2FVPROC glad_glUniform2fv = NULL; +PFNGLUNIFORM2IPROC glad_glUniform2i = NULL; +PFNGLUNIFORM2IVPROC glad_glUniform2iv = NULL; +PFNGLUNIFORM2UIPROC glad_glUniform2ui = NULL; +PFNGLUNIFORM2UIVPROC glad_glUniform2uiv = NULL; +PFNGLUNIFORM3FPROC glad_glUniform3f = NULL; +PFNGLUNIFORM3FVPROC glad_glUniform3fv = NULL; +PFNGLUNIFORM3IPROC glad_glUniform3i = NULL; +PFNGLUNIFORM3IVPROC glad_glUniform3iv = NULL; +PFNGLUNIFORM3UIPROC glad_glUniform3ui = NULL; +PFNGLUNIFORM3UIVPROC glad_glUniform3uiv = NULL; +PFNGLUNIFORM4FPROC glad_glUniform4f = NULL; +PFNGLUNIFORM4FVPROC glad_glUniform4fv = NULL; +PFNGLUNIFORM4IPROC glad_glUniform4i = NULL; +PFNGLUNIFORM4IVPROC glad_glUniform4iv = NULL; +PFNGLUNIFORM4UIPROC glad_glUniform4ui = NULL; +PFNGLUNIFORM4UIVPROC glad_glUniform4uiv = NULL; +PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding = NULL; +PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv = NULL; +PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv = NULL; +PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv = NULL; +PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv = NULL; +PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv = NULL; +PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv = NULL; +PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv = NULL; +PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv = NULL; +PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv = NULL; +PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer = NULL; +PFNGLUSEPROGRAMPROC glad_glUseProgram = NULL; +PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram = NULL; +PFNGLVERTEX2DPROC glad_glVertex2d = NULL; +PFNGLVERTEX2DVPROC glad_glVertex2dv = NULL; +PFNGLVERTEX2FPROC glad_glVertex2f = NULL; +PFNGLVERTEX2FVPROC glad_glVertex2fv = NULL; +PFNGLVERTEX2IPROC glad_glVertex2i = NULL; +PFNGLVERTEX2IVPROC glad_glVertex2iv = NULL; +PFNGLVERTEX2SPROC glad_glVertex2s = NULL; +PFNGLVERTEX2SVPROC glad_glVertex2sv = NULL; +PFNGLVERTEX3DPROC glad_glVertex3d = NULL; +PFNGLVERTEX3DVPROC glad_glVertex3dv = NULL; +PFNGLVERTEX3FPROC glad_glVertex3f = NULL; +PFNGLVERTEX3FVPROC glad_glVertex3fv = NULL; +PFNGLVERTEX3IPROC glad_glVertex3i = NULL; +PFNGLVERTEX3IVPROC glad_glVertex3iv = NULL; +PFNGLVERTEX3SPROC glad_glVertex3s = NULL; +PFNGLVERTEX3SVPROC glad_glVertex3sv = NULL; +PFNGLVERTEX4DPROC glad_glVertex4d = NULL; +PFNGLVERTEX4DVPROC glad_glVertex4dv = NULL; +PFNGLVERTEX4FPROC glad_glVertex4f = NULL; +PFNGLVERTEX4FVPROC glad_glVertex4fv = NULL; +PFNGLVERTEX4IPROC glad_glVertex4i = NULL; +PFNGLVERTEX4IVPROC glad_glVertex4iv = NULL; +PFNGLVERTEX4SPROC glad_glVertex4s = NULL; +PFNGLVERTEX4SVPROC glad_glVertex4sv = NULL; +PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d = NULL; +PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv = NULL; +PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f = NULL; +PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv = NULL; +PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s = NULL; +PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv = NULL; +PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d = NULL; +PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv = NULL; +PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f = NULL; +PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv = NULL; +PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s = NULL; +PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv = NULL; +PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d = NULL; +PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv = NULL; +PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f = NULL; +PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv = NULL; +PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s = NULL; +PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv = NULL; +PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv = NULL; +PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv = NULL; +PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv = NULL; +PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub = NULL; +PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv = NULL; +PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv = NULL; +PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv = NULL; +PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv = NULL; +PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d = NULL; +PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv = NULL; +PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f = NULL; +PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv = NULL; +PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv = NULL; +PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s = NULL; +PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv = NULL; +PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv = NULL; +PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv = NULL; +PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv = NULL; +PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor = NULL; +PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i = NULL; +PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv = NULL; +PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui = NULL; +PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv = NULL; +PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i = NULL; +PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv = NULL; +PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui = NULL; +PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv = NULL; +PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i = NULL; +PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv = NULL; +PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui = NULL; +PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv = NULL; +PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv = NULL; +PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i = NULL; +PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv = NULL; +PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv = NULL; +PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv = NULL; +PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui = NULL; +PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv = NULL; +PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv = NULL; +PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer = NULL; +PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui = NULL; +PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv = NULL; +PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui = NULL; +PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv = NULL; +PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui = NULL; +PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv = NULL; +PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui = NULL; +PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv = NULL; +PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer = NULL; +PFNGLVERTEXP2UIPROC glad_glVertexP2ui = NULL; +PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv = NULL; +PFNGLVERTEXP3UIPROC glad_glVertexP3ui = NULL; +PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv = NULL; +PFNGLVERTEXP4UIPROC glad_glVertexP4ui = NULL; +PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv = NULL; +PFNGLVERTEXPOINTERPROC glad_glVertexPointer = NULL; +PFNGLVIEWPORTPROC glad_glViewport = NULL; +PFNGLWAITSYNCPROC glad_glWaitSync = NULL; +PFNGLWINDOWPOS2DPROC glad_glWindowPos2d = NULL; +PFNGLWINDOWPOS2DVPROC glad_glWindowPos2dv = NULL; +PFNGLWINDOWPOS2FPROC glad_glWindowPos2f = NULL; +PFNGLWINDOWPOS2FVPROC glad_glWindowPos2fv = NULL; +PFNGLWINDOWPOS2IPROC glad_glWindowPos2i = NULL; +PFNGLWINDOWPOS2IVPROC glad_glWindowPos2iv = NULL; +PFNGLWINDOWPOS2SPROC glad_glWindowPos2s = NULL; +PFNGLWINDOWPOS2SVPROC glad_glWindowPos2sv = NULL; +PFNGLWINDOWPOS3DPROC glad_glWindowPos3d = NULL; +PFNGLWINDOWPOS3DVPROC glad_glWindowPos3dv = NULL; +PFNGLWINDOWPOS3FPROC glad_glWindowPos3f = NULL; +PFNGLWINDOWPOS3FVPROC glad_glWindowPos3fv = NULL; +PFNGLWINDOWPOS3IPROC glad_glWindowPos3i = NULL; +PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv = NULL; +PFNGLWINDOWPOS3SPROC glad_glWindowPos3s = NULL; +PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv = NULL; + + +static void glad_gl_load_GL_VERSION_1_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_0) return; + glad_glAccum = (PFNGLACCUMPROC) load(userptr, "glAccum"); + glad_glAlphaFunc = (PFNGLALPHAFUNCPROC) load(userptr, "glAlphaFunc"); + glad_glBegin = (PFNGLBEGINPROC) load(userptr, "glBegin"); + glad_glBitmap = (PFNGLBITMAPPROC) load(userptr, "glBitmap"); + glad_glBlendFunc = (PFNGLBLENDFUNCPROC) load(userptr, "glBlendFunc"); + glad_glCallList = (PFNGLCALLLISTPROC) load(userptr, "glCallList"); + glad_glCallLists = (PFNGLCALLLISTSPROC) load(userptr, "glCallLists"); + glad_glClear = (PFNGLCLEARPROC) load(userptr, "glClear"); + glad_glClearAccum = (PFNGLCLEARACCUMPROC) load(userptr, "glClearAccum"); + glad_glClearColor = (PFNGLCLEARCOLORPROC) load(userptr, "glClearColor"); + glad_glClearDepth = (PFNGLCLEARDEPTHPROC) load(userptr, "glClearDepth"); + glad_glClearIndex = (PFNGLCLEARINDEXPROC) load(userptr, "glClearIndex"); + glad_glClearStencil = (PFNGLCLEARSTENCILPROC) load(userptr, "glClearStencil"); + glad_glClipPlane = (PFNGLCLIPPLANEPROC) load(userptr, "glClipPlane"); + glad_glColor3b = (PFNGLCOLOR3BPROC) load(userptr, "glColor3b"); + glad_glColor3bv = (PFNGLCOLOR3BVPROC) load(userptr, "glColor3bv"); + glad_glColor3d = (PFNGLCOLOR3DPROC) load(userptr, "glColor3d"); + glad_glColor3dv = (PFNGLCOLOR3DVPROC) load(userptr, "glColor3dv"); + glad_glColor3f = (PFNGLCOLOR3FPROC) load(userptr, "glColor3f"); + glad_glColor3fv = (PFNGLCOLOR3FVPROC) load(userptr, "glColor3fv"); + glad_glColor3i = (PFNGLCOLOR3IPROC) load(userptr, "glColor3i"); + glad_glColor3iv = (PFNGLCOLOR3IVPROC) load(userptr, "glColor3iv"); + glad_glColor3s = (PFNGLCOLOR3SPROC) load(userptr, "glColor3s"); + glad_glColor3sv = (PFNGLCOLOR3SVPROC) load(userptr, "glColor3sv"); + glad_glColor3ub = (PFNGLCOLOR3UBPROC) load(userptr, "glColor3ub"); + glad_glColor3ubv = (PFNGLCOLOR3UBVPROC) load(userptr, "glColor3ubv"); + glad_glColor3ui = (PFNGLCOLOR3UIPROC) load(userptr, "glColor3ui"); + glad_glColor3uiv = (PFNGLCOLOR3UIVPROC) load(userptr, "glColor3uiv"); + glad_glColor3us = (PFNGLCOLOR3USPROC) load(userptr, "glColor3us"); + glad_glColor3usv = (PFNGLCOLOR3USVPROC) load(userptr, "glColor3usv"); + glad_glColor4b = (PFNGLCOLOR4BPROC) load(userptr, "glColor4b"); + glad_glColor4bv = (PFNGLCOLOR4BVPROC) load(userptr, "glColor4bv"); + glad_glColor4d = (PFNGLCOLOR4DPROC) load(userptr, "glColor4d"); + glad_glColor4dv = (PFNGLCOLOR4DVPROC) load(userptr, "glColor4dv"); + glad_glColor4f = (PFNGLCOLOR4FPROC) load(userptr, "glColor4f"); + glad_glColor4fv = (PFNGLCOLOR4FVPROC) load(userptr, "glColor4fv"); + glad_glColor4i = (PFNGLCOLOR4IPROC) load(userptr, "glColor4i"); + glad_glColor4iv = (PFNGLCOLOR4IVPROC) load(userptr, "glColor4iv"); + glad_glColor4s = (PFNGLCOLOR4SPROC) load(userptr, "glColor4s"); + glad_glColor4sv = (PFNGLCOLOR4SVPROC) load(userptr, "glColor4sv"); + glad_glColor4ub = (PFNGLCOLOR4UBPROC) load(userptr, "glColor4ub"); + glad_glColor4ubv = (PFNGLCOLOR4UBVPROC) load(userptr, "glColor4ubv"); + glad_glColor4ui = (PFNGLCOLOR4UIPROC) load(userptr, "glColor4ui"); + glad_glColor4uiv = (PFNGLCOLOR4UIVPROC) load(userptr, "glColor4uiv"); + glad_glColor4us = (PFNGLCOLOR4USPROC) load(userptr, "glColor4us"); + glad_glColor4usv = (PFNGLCOLOR4USVPROC) load(userptr, "glColor4usv"); + glad_glColorMask = (PFNGLCOLORMASKPROC) load(userptr, "glColorMask"); + glad_glColorMaterial = (PFNGLCOLORMATERIALPROC) load(userptr, "glColorMaterial"); + glad_glCopyPixels = (PFNGLCOPYPIXELSPROC) load(userptr, "glCopyPixels"); + glad_glCullFace = (PFNGLCULLFACEPROC) load(userptr, "glCullFace"); + glad_glDeleteLists = (PFNGLDELETELISTSPROC) load(userptr, "glDeleteLists"); + glad_glDepthFunc = (PFNGLDEPTHFUNCPROC) load(userptr, "glDepthFunc"); + glad_glDepthMask = (PFNGLDEPTHMASKPROC) load(userptr, "glDepthMask"); + glad_glDepthRange = (PFNGLDEPTHRANGEPROC) load(userptr, "glDepthRange"); + glad_glDisable = (PFNGLDISABLEPROC) load(userptr, "glDisable"); + glad_glDrawBuffer = (PFNGLDRAWBUFFERPROC) load(userptr, "glDrawBuffer"); + glad_glDrawPixels = (PFNGLDRAWPIXELSPROC) load(userptr, "glDrawPixels"); + glad_glEdgeFlag = (PFNGLEDGEFLAGPROC) load(userptr, "glEdgeFlag"); + glad_glEdgeFlagv = (PFNGLEDGEFLAGVPROC) load(userptr, "glEdgeFlagv"); + glad_glEnable = (PFNGLENABLEPROC) load(userptr, "glEnable"); + glad_glEnd = (PFNGLENDPROC) load(userptr, "glEnd"); + glad_glEndList = (PFNGLENDLISTPROC) load(userptr, "glEndList"); + glad_glEvalCoord1d = (PFNGLEVALCOORD1DPROC) load(userptr, "glEvalCoord1d"); + glad_glEvalCoord1dv = (PFNGLEVALCOORD1DVPROC) load(userptr, "glEvalCoord1dv"); + glad_glEvalCoord1f = (PFNGLEVALCOORD1FPROC) load(userptr, "glEvalCoord1f"); + glad_glEvalCoord1fv = (PFNGLEVALCOORD1FVPROC) load(userptr, "glEvalCoord1fv"); + glad_glEvalCoord2d = (PFNGLEVALCOORD2DPROC) load(userptr, "glEvalCoord2d"); + glad_glEvalCoord2dv = (PFNGLEVALCOORD2DVPROC) load(userptr, "glEvalCoord2dv"); + glad_glEvalCoord2f = (PFNGLEVALCOORD2FPROC) load(userptr, "glEvalCoord2f"); + glad_glEvalCoord2fv = (PFNGLEVALCOORD2FVPROC) load(userptr, "glEvalCoord2fv"); + glad_glEvalMesh1 = (PFNGLEVALMESH1PROC) load(userptr, "glEvalMesh1"); + glad_glEvalMesh2 = (PFNGLEVALMESH2PROC) load(userptr, "glEvalMesh2"); + glad_glEvalPoint1 = (PFNGLEVALPOINT1PROC) load(userptr, "glEvalPoint1"); + glad_glEvalPoint2 = (PFNGLEVALPOINT2PROC) load(userptr, "glEvalPoint2"); + glad_glFeedbackBuffer = (PFNGLFEEDBACKBUFFERPROC) load(userptr, "glFeedbackBuffer"); + glad_glFinish = (PFNGLFINISHPROC) load(userptr, "glFinish"); + glad_glFlush = (PFNGLFLUSHPROC) load(userptr, "glFlush"); + glad_glFogf = (PFNGLFOGFPROC) load(userptr, "glFogf"); + glad_glFogfv = (PFNGLFOGFVPROC) load(userptr, "glFogfv"); + glad_glFogi = (PFNGLFOGIPROC) load(userptr, "glFogi"); + glad_glFogiv = (PFNGLFOGIVPROC) load(userptr, "glFogiv"); + glad_glFrontFace = (PFNGLFRONTFACEPROC) load(userptr, "glFrontFace"); + glad_glFrustum = (PFNGLFRUSTUMPROC) load(userptr, "glFrustum"); + glad_glGenLists = (PFNGLGENLISTSPROC) load(userptr, "glGenLists"); + glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC) load(userptr, "glGetBooleanv"); + glad_glGetClipPlane = (PFNGLGETCLIPPLANEPROC) load(userptr, "glGetClipPlane"); + glad_glGetDoublev = (PFNGLGETDOUBLEVPROC) load(userptr, "glGetDoublev"); + glad_glGetError = (PFNGLGETERRORPROC) load(userptr, "glGetError"); + glad_glGetFloatv = (PFNGLGETFLOATVPROC) load(userptr, "glGetFloatv"); + glad_glGetIntegerv = (PFNGLGETINTEGERVPROC) load(userptr, "glGetIntegerv"); + glad_glGetLightfv = (PFNGLGETLIGHTFVPROC) load(userptr, "glGetLightfv"); + glad_glGetLightiv = (PFNGLGETLIGHTIVPROC) load(userptr, "glGetLightiv"); + glad_glGetMapdv = (PFNGLGETMAPDVPROC) load(userptr, "glGetMapdv"); + glad_glGetMapfv = (PFNGLGETMAPFVPROC) load(userptr, "glGetMapfv"); + glad_glGetMapiv = (PFNGLGETMAPIVPROC) load(userptr, "glGetMapiv"); + glad_glGetMaterialfv = (PFNGLGETMATERIALFVPROC) load(userptr, "glGetMaterialfv"); + glad_glGetMaterialiv = (PFNGLGETMATERIALIVPROC) load(userptr, "glGetMaterialiv"); + glad_glGetPixelMapfv = (PFNGLGETPIXELMAPFVPROC) load(userptr, "glGetPixelMapfv"); + glad_glGetPixelMapuiv = (PFNGLGETPIXELMAPUIVPROC) load(userptr, "glGetPixelMapuiv"); + glad_glGetPixelMapusv = (PFNGLGETPIXELMAPUSVPROC) load(userptr, "glGetPixelMapusv"); + glad_glGetPolygonStipple = (PFNGLGETPOLYGONSTIPPLEPROC) load(userptr, "glGetPolygonStipple"); + glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString"); + glad_glGetTexEnvfv = (PFNGLGETTEXENVFVPROC) load(userptr, "glGetTexEnvfv"); + glad_glGetTexEnviv = (PFNGLGETTEXENVIVPROC) load(userptr, "glGetTexEnviv"); + glad_glGetTexGendv = (PFNGLGETTEXGENDVPROC) load(userptr, "glGetTexGendv"); + glad_glGetTexGenfv = (PFNGLGETTEXGENFVPROC) load(userptr, "glGetTexGenfv"); + glad_glGetTexGeniv = (PFNGLGETTEXGENIVPROC) load(userptr, "glGetTexGeniv"); + glad_glGetTexImage = (PFNGLGETTEXIMAGEPROC) load(userptr, "glGetTexImage"); + glad_glGetTexLevelParameterfv = (PFNGLGETTEXLEVELPARAMETERFVPROC) load(userptr, "glGetTexLevelParameterfv"); + glad_glGetTexLevelParameteriv = (PFNGLGETTEXLEVELPARAMETERIVPROC) load(userptr, "glGetTexLevelParameteriv"); + glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC) load(userptr, "glGetTexParameterfv"); + glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC) load(userptr, "glGetTexParameteriv"); + glad_glHint = (PFNGLHINTPROC) load(userptr, "glHint"); + glad_glIndexMask = (PFNGLINDEXMASKPROC) load(userptr, "glIndexMask"); + glad_glIndexd = (PFNGLINDEXDPROC) load(userptr, "glIndexd"); + glad_glIndexdv = (PFNGLINDEXDVPROC) load(userptr, "glIndexdv"); + glad_glIndexf = (PFNGLINDEXFPROC) load(userptr, "glIndexf"); + glad_glIndexfv = (PFNGLINDEXFVPROC) load(userptr, "glIndexfv"); + glad_glIndexi = (PFNGLINDEXIPROC) load(userptr, "glIndexi"); + glad_glIndexiv = (PFNGLINDEXIVPROC) load(userptr, "glIndexiv"); + glad_glIndexs = (PFNGLINDEXSPROC) load(userptr, "glIndexs"); + glad_glIndexsv = (PFNGLINDEXSVPROC) load(userptr, "glIndexsv"); + glad_glInitNames = (PFNGLINITNAMESPROC) load(userptr, "glInitNames"); + glad_glIsEnabled = (PFNGLISENABLEDPROC) load(userptr, "glIsEnabled"); + glad_glIsList = (PFNGLISLISTPROC) load(userptr, "glIsList"); + glad_glLightModelf = (PFNGLLIGHTMODELFPROC) load(userptr, "glLightModelf"); + glad_glLightModelfv = (PFNGLLIGHTMODELFVPROC) load(userptr, "glLightModelfv"); + glad_glLightModeli = (PFNGLLIGHTMODELIPROC) load(userptr, "glLightModeli"); + glad_glLightModeliv = (PFNGLLIGHTMODELIVPROC) load(userptr, "glLightModeliv"); + glad_glLightf = (PFNGLLIGHTFPROC) load(userptr, "glLightf"); + glad_glLightfv = (PFNGLLIGHTFVPROC) load(userptr, "glLightfv"); + glad_glLighti = (PFNGLLIGHTIPROC) load(userptr, "glLighti"); + glad_glLightiv = (PFNGLLIGHTIVPROC) load(userptr, "glLightiv"); + glad_glLineStipple = (PFNGLLINESTIPPLEPROC) load(userptr, "glLineStipple"); + glad_glLineWidth = (PFNGLLINEWIDTHPROC) load(userptr, "glLineWidth"); + glad_glListBase = (PFNGLLISTBASEPROC) load(userptr, "glListBase"); + glad_glLoadIdentity = (PFNGLLOADIDENTITYPROC) load(userptr, "glLoadIdentity"); + glad_glLoadMatrixd = (PFNGLLOADMATRIXDPROC) load(userptr, "glLoadMatrixd"); + glad_glLoadMatrixf = (PFNGLLOADMATRIXFPROC) load(userptr, "glLoadMatrixf"); + glad_glLoadName = (PFNGLLOADNAMEPROC) load(userptr, "glLoadName"); + glad_glLogicOp = (PFNGLLOGICOPPROC) load(userptr, "glLogicOp"); + glad_glMap1d = (PFNGLMAP1DPROC) load(userptr, "glMap1d"); + glad_glMap1f = (PFNGLMAP1FPROC) load(userptr, "glMap1f"); + glad_glMap2d = (PFNGLMAP2DPROC) load(userptr, "glMap2d"); + glad_glMap2f = (PFNGLMAP2FPROC) load(userptr, "glMap2f"); + glad_glMapGrid1d = (PFNGLMAPGRID1DPROC) load(userptr, "glMapGrid1d"); + glad_glMapGrid1f = (PFNGLMAPGRID1FPROC) load(userptr, "glMapGrid1f"); + glad_glMapGrid2d = (PFNGLMAPGRID2DPROC) load(userptr, "glMapGrid2d"); + glad_glMapGrid2f = (PFNGLMAPGRID2FPROC) load(userptr, "glMapGrid2f"); + glad_glMaterialf = (PFNGLMATERIALFPROC) load(userptr, "glMaterialf"); + glad_glMaterialfv = (PFNGLMATERIALFVPROC) load(userptr, "glMaterialfv"); + glad_glMateriali = (PFNGLMATERIALIPROC) load(userptr, "glMateriali"); + glad_glMaterialiv = (PFNGLMATERIALIVPROC) load(userptr, "glMaterialiv"); + glad_glMatrixMode = (PFNGLMATRIXMODEPROC) load(userptr, "glMatrixMode"); + glad_glMultMatrixd = (PFNGLMULTMATRIXDPROC) load(userptr, "glMultMatrixd"); + glad_glMultMatrixf = (PFNGLMULTMATRIXFPROC) load(userptr, "glMultMatrixf"); + glad_glNewList = (PFNGLNEWLISTPROC) load(userptr, "glNewList"); + glad_glNormal3b = (PFNGLNORMAL3BPROC) load(userptr, "glNormal3b"); + glad_glNormal3bv = (PFNGLNORMAL3BVPROC) load(userptr, "glNormal3bv"); + glad_glNormal3d = (PFNGLNORMAL3DPROC) load(userptr, "glNormal3d"); + glad_glNormal3dv = (PFNGLNORMAL3DVPROC) load(userptr, "glNormal3dv"); + glad_glNormal3f = (PFNGLNORMAL3FPROC) load(userptr, "glNormal3f"); + glad_glNormal3fv = (PFNGLNORMAL3FVPROC) load(userptr, "glNormal3fv"); + glad_glNormal3i = (PFNGLNORMAL3IPROC) load(userptr, "glNormal3i"); + glad_glNormal3iv = (PFNGLNORMAL3IVPROC) load(userptr, "glNormal3iv"); + glad_glNormal3s = (PFNGLNORMAL3SPROC) load(userptr, "glNormal3s"); + glad_glNormal3sv = (PFNGLNORMAL3SVPROC) load(userptr, "glNormal3sv"); + glad_glOrtho = (PFNGLORTHOPROC) load(userptr, "glOrtho"); + glad_glPassThrough = (PFNGLPASSTHROUGHPROC) load(userptr, "glPassThrough"); + glad_glPixelMapfv = (PFNGLPIXELMAPFVPROC) load(userptr, "glPixelMapfv"); + glad_glPixelMapuiv = (PFNGLPIXELMAPUIVPROC) load(userptr, "glPixelMapuiv"); + glad_glPixelMapusv = (PFNGLPIXELMAPUSVPROC) load(userptr, "glPixelMapusv"); + glad_glPixelStoref = (PFNGLPIXELSTOREFPROC) load(userptr, "glPixelStoref"); + glad_glPixelStorei = (PFNGLPIXELSTOREIPROC) load(userptr, "glPixelStorei"); + glad_glPixelTransferf = (PFNGLPIXELTRANSFERFPROC) load(userptr, "glPixelTransferf"); + glad_glPixelTransferi = (PFNGLPIXELTRANSFERIPROC) load(userptr, "glPixelTransferi"); + glad_glPixelZoom = (PFNGLPIXELZOOMPROC) load(userptr, "glPixelZoom"); + glad_glPointSize = (PFNGLPOINTSIZEPROC) load(userptr, "glPointSize"); + glad_glPolygonMode = (PFNGLPOLYGONMODEPROC) load(userptr, "glPolygonMode"); + glad_glPolygonStipple = (PFNGLPOLYGONSTIPPLEPROC) load(userptr, "glPolygonStipple"); + glad_glPopAttrib = (PFNGLPOPATTRIBPROC) load(userptr, "glPopAttrib"); + glad_glPopMatrix = (PFNGLPOPMATRIXPROC) load(userptr, "glPopMatrix"); + glad_glPopName = (PFNGLPOPNAMEPROC) load(userptr, "glPopName"); + glad_glPushAttrib = (PFNGLPUSHATTRIBPROC) load(userptr, "glPushAttrib"); + glad_glPushMatrix = (PFNGLPUSHMATRIXPROC) load(userptr, "glPushMatrix"); + glad_glPushName = (PFNGLPUSHNAMEPROC) load(userptr, "glPushName"); + glad_glRasterPos2d = (PFNGLRASTERPOS2DPROC) load(userptr, "glRasterPos2d"); + glad_glRasterPos2dv = (PFNGLRASTERPOS2DVPROC) load(userptr, "glRasterPos2dv"); + glad_glRasterPos2f = (PFNGLRASTERPOS2FPROC) load(userptr, "glRasterPos2f"); + glad_glRasterPos2fv = (PFNGLRASTERPOS2FVPROC) load(userptr, "glRasterPos2fv"); + glad_glRasterPos2i = (PFNGLRASTERPOS2IPROC) load(userptr, "glRasterPos2i"); + glad_glRasterPos2iv = (PFNGLRASTERPOS2IVPROC) load(userptr, "glRasterPos2iv"); + glad_glRasterPos2s = (PFNGLRASTERPOS2SPROC) load(userptr, "glRasterPos2s"); + glad_glRasterPos2sv = (PFNGLRASTERPOS2SVPROC) load(userptr, "glRasterPos2sv"); + glad_glRasterPos3d = (PFNGLRASTERPOS3DPROC) load(userptr, "glRasterPos3d"); + glad_glRasterPos3dv = (PFNGLRASTERPOS3DVPROC) load(userptr, "glRasterPos3dv"); + glad_glRasterPos3f = (PFNGLRASTERPOS3FPROC) load(userptr, "glRasterPos3f"); + glad_glRasterPos3fv = (PFNGLRASTERPOS3FVPROC) load(userptr, "glRasterPos3fv"); + glad_glRasterPos3i = (PFNGLRASTERPOS3IPROC) load(userptr, "glRasterPos3i"); + glad_glRasterPos3iv = (PFNGLRASTERPOS3IVPROC) load(userptr, "glRasterPos3iv"); + glad_glRasterPos3s = (PFNGLRASTERPOS3SPROC) load(userptr, "glRasterPos3s"); + glad_glRasterPos3sv = (PFNGLRASTERPOS3SVPROC) load(userptr, "glRasterPos3sv"); + glad_glRasterPos4d = (PFNGLRASTERPOS4DPROC) load(userptr, "glRasterPos4d"); + glad_glRasterPos4dv = (PFNGLRASTERPOS4DVPROC) load(userptr, "glRasterPos4dv"); + glad_glRasterPos4f = (PFNGLRASTERPOS4FPROC) load(userptr, "glRasterPos4f"); + glad_glRasterPos4fv = (PFNGLRASTERPOS4FVPROC) load(userptr, "glRasterPos4fv"); + glad_glRasterPos4i = (PFNGLRASTERPOS4IPROC) load(userptr, "glRasterPos4i"); + glad_glRasterPos4iv = (PFNGLRASTERPOS4IVPROC) load(userptr, "glRasterPos4iv"); + glad_glRasterPos4s = (PFNGLRASTERPOS4SPROC) load(userptr, "glRasterPos4s"); + glad_glRasterPos4sv = (PFNGLRASTERPOS4SVPROC) load(userptr, "glRasterPos4sv"); + glad_glReadBuffer = (PFNGLREADBUFFERPROC) load(userptr, "glReadBuffer"); + glad_glReadPixels = (PFNGLREADPIXELSPROC) load(userptr, "glReadPixels"); + glad_glRectd = (PFNGLRECTDPROC) load(userptr, "glRectd"); + glad_glRectdv = (PFNGLRECTDVPROC) load(userptr, "glRectdv"); + glad_glRectf = (PFNGLRECTFPROC) load(userptr, "glRectf"); + glad_glRectfv = (PFNGLRECTFVPROC) load(userptr, "glRectfv"); + glad_glRecti = (PFNGLRECTIPROC) load(userptr, "glRecti"); + glad_glRectiv = (PFNGLRECTIVPROC) load(userptr, "glRectiv"); + glad_glRects = (PFNGLRECTSPROC) load(userptr, "glRects"); + glad_glRectsv = (PFNGLRECTSVPROC) load(userptr, "glRectsv"); + glad_glRenderMode = (PFNGLRENDERMODEPROC) load(userptr, "glRenderMode"); + glad_glRotated = (PFNGLROTATEDPROC) load(userptr, "glRotated"); + glad_glRotatef = (PFNGLROTATEFPROC) load(userptr, "glRotatef"); + glad_glScaled = (PFNGLSCALEDPROC) load(userptr, "glScaled"); + glad_glScalef = (PFNGLSCALEFPROC) load(userptr, "glScalef"); + glad_glScissor = (PFNGLSCISSORPROC) load(userptr, "glScissor"); + glad_glSelectBuffer = (PFNGLSELECTBUFFERPROC) load(userptr, "glSelectBuffer"); + glad_glShadeModel = (PFNGLSHADEMODELPROC) load(userptr, "glShadeModel"); + glad_glStencilFunc = (PFNGLSTENCILFUNCPROC) load(userptr, "glStencilFunc"); + glad_glStencilMask = (PFNGLSTENCILMASKPROC) load(userptr, "glStencilMask"); + glad_glStencilOp = (PFNGLSTENCILOPPROC) load(userptr, "glStencilOp"); + glad_glTexCoord1d = (PFNGLTEXCOORD1DPROC) load(userptr, "glTexCoord1d"); + glad_glTexCoord1dv = (PFNGLTEXCOORD1DVPROC) load(userptr, "glTexCoord1dv"); + glad_glTexCoord1f = (PFNGLTEXCOORD1FPROC) load(userptr, "glTexCoord1f"); + glad_glTexCoord1fv = (PFNGLTEXCOORD1FVPROC) load(userptr, "glTexCoord1fv"); + glad_glTexCoord1i = (PFNGLTEXCOORD1IPROC) load(userptr, "glTexCoord1i"); + glad_glTexCoord1iv = (PFNGLTEXCOORD1IVPROC) load(userptr, "glTexCoord1iv"); + glad_glTexCoord1s = (PFNGLTEXCOORD1SPROC) load(userptr, "glTexCoord1s"); + glad_glTexCoord1sv = (PFNGLTEXCOORD1SVPROC) load(userptr, "glTexCoord1sv"); + glad_glTexCoord2d = (PFNGLTEXCOORD2DPROC) load(userptr, "glTexCoord2d"); + glad_glTexCoord2dv = (PFNGLTEXCOORD2DVPROC) load(userptr, "glTexCoord2dv"); + glad_glTexCoord2f = (PFNGLTEXCOORD2FPROC) load(userptr, "glTexCoord2f"); + glad_glTexCoord2fv = (PFNGLTEXCOORD2FVPROC) load(userptr, "glTexCoord2fv"); + glad_glTexCoord2i = (PFNGLTEXCOORD2IPROC) load(userptr, "glTexCoord2i"); + glad_glTexCoord2iv = (PFNGLTEXCOORD2IVPROC) load(userptr, "glTexCoord2iv"); + glad_glTexCoord2s = (PFNGLTEXCOORD2SPROC) load(userptr, "glTexCoord2s"); + glad_glTexCoord2sv = (PFNGLTEXCOORD2SVPROC) load(userptr, "glTexCoord2sv"); + glad_glTexCoord3d = (PFNGLTEXCOORD3DPROC) load(userptr, "glTexCoord3d"); + glad_glTexCoord3dv = (PFNGLTEXCOORD3DVPROC) load(userptr, "glTexCoord3dv"); + glad_glTexCoord3f = (PFNGLTEXCOORD3FPROC) load(userptr, "glTexCoord3f"); + glad_glTexCoord3fv = (PFNGLTEXCOORD3FVPROC) load(userptr, "glTexCoord3fv"); + glad_glTexCoord3i = (PFNGLTEXCOORD3IPROC) load(userptr, "glTexCoord3i"); + glad_glTexCoord3iv = (PFNGLTEXCOORD3IVPROC) load(userptr, "glTexCoord3iv"); + glad_glTexCoord3s = (PFNGLTEXCOORD3SPROC) load(userptr, "glTexCoord3s"); + glad_glTexCoord3sv = (PFNGLTEXCOORD3SVPROC) load(userptr, "glTexCoord3sv"); + glad_glTexCoord4d = (PFNGLTEXCOORD4DPROC) load(userptr, "glTexCoord4d"); + glad_glTexCoord4dv = (PFNGLTEXCOORD4DVPROC) load(userptr, "glTexCoord4dv"); + glad_glTexCoord4f = (PFNGLTEXCOORD4FPROC) load(userptr, "glTexCoord4f"); + glad_glTexCoord4fv = (PFNGLTEXCOORD4FVPROC) load(userptr, "glTexCoord4fv"); + glad_glTexCoord4i = (PFNGLTEXCOORD4IPROC) load(userptr, "glTexCoord4i"); + glad_glTexCoord4iv = (PFNGLTEXCOORD4IVPROC) load(userptr, "glTexCoord4iv"); + glad_glTexCoord4s = (PFNGLTEXCOORD4SPROC) load(userptr, "glTexCoord4s"); + glad_glTexCoord4sv = (PFNGLTEXCOORD4SVPROC) load(userptr, "glTexCoord4sv"); + glad_glTexEnvf = (PFNGLTEXENVFPROC) load(userptr, "glTexEnvf"); + glad_glTexEnvfv = (PFNGLTEXENVFVPROC) load(userptr, "glTexEnvfv"); + glad_glTexEnvi = (PFNGLTEXENVIPROC) load(userptr, "glTexEnvi"); + glad_glTexEnviv = (PFNGLTEXENVIVPROC) load(userptr, "glTexEnviv"); + glad_glTexGend = (PFNGLTEXGENDPROC) load(userptr, "glTexGend"); + glad_glTexGendv = (PFNGLTEXGENDVPROC) load(userptr, "glTexGendv"); + glad_glTexGenf = (PFNGLTEXGENFPROC) load(userptr, "glTexGenf"); + glad_glTexGenfv = (PFNGLTEXGENFVPROC) load(userptr, "glTexGenfv"); + glad_glTexGeni = (PFNGLTEXGENIPROC) load(userptr, "glTexGeni"); + glad_glTexGeniv = (PFNGLTEXGENIVPROC) load(userptr, "glTexGeniv"); + glad_glTexImage1D = (PFNGLTEXIMAGE1DPROC) load(userptr, "glTexImage1D"); + glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC) load(userptr, "glTexImage2D"); + glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC) load(userptr, "glTexParameterf"); + glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC) load(userptr, "glTexParameterfv"); + glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC) load(userptr, "glTexParameteri"); + glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC) load(userptr, "glTexParameteriv"); + glad_glTranslated = (PFNGLTRANSLATEDPROC) load(userptr, "glTranslated"); + glad_glTranslatef = (PFNGLTRANSLATEFPROC) load(userptr, "glTranslatef"); + glad_glVertex2d = (PFNGLVERTEX2DPROC) load(userptr, "glVertex2d"); + glad_glVertex2dv = (PFNGLVERTEX2DVPROC) load(userptr, "glVertex2dv"); + glad_glVertex2f = (PFNGLVERTEX2FPROC) load(userptr, "glVertex2f"); + glad_glVertex2fv = (PFNGLVERTEX2FVPROC) load(userptr, "glVertex2fv"); + glad_glVertex2i = (PFNGLVERTEX2IPROC) load(userptr, "glVertex2i"); + glad_glVertex2iv = (PFNGLVERTEX2IVPROC) load(userptr, "glVertex2iv"); + glad_glVertex2s = (PFNGLVERTEX2SPROC) load(userptr, "glVertex2s"); + glad_glVertex2sv = (PFNGLVERTEX2SVPROC) load(userptr, "glVertex2sv"); + glad_glVertex3d = (PFNGLVERTEX3DPROC) load(userptr, "glVertex3d"); + glad_glVertex3dv = (PFNGLVERTEX3DVPROC) load(userptr, "glVertex3dv"); + glad_glVertex3f = (PFNGLVERTEX3FPROC) load(userptr, "glVertex3f"); + glad_glVertex3fv = (PFNGLVERTEX3FVPROC) load(userptr, "glVertex3fv"); + glad_glVertex3i = (PFNGLVERTEX3IPROC) load(userptr, "glVertex3i"); + glad_glVertex3iv = (PFNGLVERTEX3IVPROC) load(userptr, "glVertex3iv"); + glad_glVertex3s = (PFNGLVERTEX3SPROC) load(userptr, "glVertex3s"); + glad_glVertex3sv = (PFNGLVERTEX3SVPROC) load(userptr, "glVertex3sv"); + glad_glVertex4d = (PFNGLVERTEX4DPROC) load(userptr, "glVertex4d"); + glad_glVertex4dv = (PFNGLVERTEX4DVPROC) load(userptr, "glVertex4dv"); + glad_glVertex4f = (PFNGLVERTEX4FPROC) load(userptr, "glVertex4f"); + glad_glVertex4fv = (PFNGLVERTEX4FVPROC) load(userptr, "glVertex4fv"); + glad_glVertex4i = (PFNGLVERTEX4IPROC) load(userptr, "glVertex4i"); + glad_glVertex4iv = (PFNGLVERTEX4IVPROC) load(userptr, "glVertex4iv"); + glad_glVertex4s = (PFNGLVERTEX4SPROC) load(userptr, "glVertex4s"); + glad_glVertex4sv = (PFNGLVERTEX4SVPROC) load(userptr, "glVertex4sv"); + glad_glViewport = (PFNGLVIEWPORTPROC) load(userptr, "glViewport"); +} +static void glad_gl_load_GL_VERSION_1_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_1) return; + glad_glAreTexturesResident = (PFNGLARETEXTURESRESIDENTPROC) load(userptr, "glAreTexturesResident"); + glad_glArrayElement = (PFNGLARRAYELEMENTPROC) load(userptr, "glArrayElement"); + glad_glBindTexture = (PFNGLBINDTEXTUREPROC) load(userptr, "glBindTexture"); + glad_glColorPointer = (PFNGLCOLORPOINTERPROC) load(userptr, "glColorPointer"); + glad_glCopyTexImage1D = (PFNGLCOPYTEXIMAGE1DPROC) load(userptr, "glCopyTexImage1D"); + glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC) load(userptr, "glCopyTexImage2D"); + glad_glCopyTexSubImage1D = (PFNGLCOPYTEXSUBIMAGE1DPROC) load(userptr, "glCopyTexSubImage1D"); + glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC) load(userptr, "glCopyTexSubImage2D"); + glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC) load(userptr, "glDeleteTextures"); + glad_glDisableClientState = (PFNGLDISABLECLIENTSTATEPROC) load(userptr, "glDisableClientState"); + glad_glDrawArrays = (PFNGLDRAWARRAYSPROC) load(userptr, "glDrawArrays"); + glad_glDrawElements = (PFNGLDRAWELEMENTSPROC) load(userptr, "glDrawElements"); + glad_glEdgeFlagPointer = (PFNGLEDGEFLAGPOINTERPROC) load(userptr, "glEdgeFlagPointer"); + glad_glEnableClientState = (PFNGLENABLECLIENTSTATEPROC) load(userptr, "glEnableClientState"); + glad_glGenTextures = (PFNGLGENTEXTURESPROC) load(userptr, "glGenTextures"); + glad_glGetPointerv = (PFNGLGETPOINTERVPROC) load(userptr, "glGetPointerv"); + glad_glIndexPointer = (PFNGLINDEXPOINTERPROC) load(userptr, "glIndexPointer"); + glad_glIndexub = (PFNGLINDEXUBPROC) load(userptr, "glIndexub"); + glad_glIndexubv = (PFNGLINDEXUBVPROC) load(userptr, "glIndexubv"); + glad_glInterleavedArrays = (PFNGLINTERLEAVEDARRAYSPROC) load(userptr, "glInterleavedArrays"); + glad_glIsTexture = (PFNGLISTEXTUREPROC) load(userptr, "glIsTexture"); + glad_glNormalPointer = (PFNGLNORMALPOINTERPROC) load(userptr, "glNormalPointer"); + glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC) load(userptr, "glPolygonOffset"); + glad_glPopClientAttrib = (PFNGLPOPCLIENTATTRIBPROC) load(userptr, "glPopClientAttrib"); + glad_glPrioritizeTextures = (PFNGLPRIORITIZETEXTURESPROC) load(userptr, "glPrioritizeTextures"); + glad_glPushClientAttrib = (PFNGLPUSHCLIENTATTRIBPROC) load(userptr, "glPushClientAttrib"); + glad_glTexCoordPointer = (PFNGLTEXCOORDPOINTERPROC) load(userptr, "glTexCoordPointer"); + glad_glTexSubImage1D = (PFNGLTEXSUBIMAGE1DPROC) load(userptr, "glTexSubImage1D"); + glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC) load(userptr, "glTexSubImage2D"); + glad_glVertexPointer = (PFNGLVERTEXPOINTERPROC) load(userptr, "glVertexPointer"); +} +static void glad_gl_load_GL_VERSION_1_2( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_2) return; + glad_glCopyTexSubImage3D = (PFNGLCOPYTEXSUBIMAGE3DPROC) load(userptr, "glCopyTexSubImage3D"); + glad_glDrawRangeElements = (PFNGLDRAWRANGEELEMENTSPROC) load(userptr, "glDrawRangeElements"); + glad_glTexImage3D = (PFNGLTEXIMAGE3DPROC) load(userptr, "glTexImage3D"); + glad_glTexSubImage3D = (PFNGLTEXSUBIMAGE3DPROC) load(userptr, "glTexSubImage3D"); +} +static void glad_gl_load_GL_VERSION_1_3( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_3) return; + glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC) load(userptr, "glActiveTexture"); + glad_glClientActiveTexture = (PFNGLCLIENTACTIVETEXTUREPROC) load(userptr, "glClientActiveTexture"); + glad_glCompressedTexImage1D = (PFNGLCOMPRESSEDTEXIMAGE1DPROC) load(userptr, "glCompressedTexImage1D"); + glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC) load(userptr, "glCompressedTexImage2D"); + glad_glCompressedTexImage3D = (PFNGLCOMPRESSEDTEXIMAGE3DPROC) load(userptr, "glCompressedTexImage3D"); + glad_glCompressedTexSubImage1D = (PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC) load(userptr, "glCompressedTexSubImage1D"); + glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) load(userptr, "glCompressedTexSubImage2D"); + glad_glCompressedTexSubImage3D = (PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC) load(userptr, "glCompressedTexSubImage3D"); + glad_glGetCompressedTexImage = (PFNGLGETCOMPRESSEDTEXIMAGEPROC) load(userptr, "glGetCompressedTexImage"); + glad_glLoadTransposeMatrixd = (PFNGLLOADTRANSPOSEMATRIXDPROC) load(userptr, "glLoadTransposeMatrixd"); + glad_glLoadTransposeMatrixf = (PFNGLLOADTRANSPOSEMATRIXFPROC) load(userptr, "glLoadTransposeMatrixf"); + glad_glMultTransposeMatrixd = (PFNGLMULTTRANSPOSEMATRIXDPROC) load(userptr, "glMultTransposeMatrixd"); + glad_glMultTransposeMatrixf = (PFNGLMULTTRANSPOSEMATRIXFPROC) load(userptr, "glMultTransposeMatrixf"); + glad_glMultiTexCoord1d = (PFNGLMULTITEXCOORD1DPROC) load(userptr, "glMultiTexCoord1d"); + glad_glMultiTexCoord1dv = (PFNGLMULTITEXCOORD1DVPROC) load(userptr, "glMultiTexCoord1dv"); + glad_glMultiTexCoord1f = (PFNGLMULTITEXCOORD1FPROC) load(userptr, "glMultiTexCoord1f"); + glad_glMultiTexCoord1fv = (PFNGLMULTITEXCOORD1FVPROC) load(userptr, "glMultiTexCoord1fv"); + glad_glMultiTexCoord1i = (PFNGLMULTITEXCOORD1IPROC) load(userptr, "glMultiTexCoord1i"); + glad_glMultiTexCoord1iv = (PFNGLMULTITEXCOORD1IVPROC) load(userptr, "glMultiTexCoord1iv"); + glad_glMultiTexCoord1s = (PFNGLMULTITEXCOORD1SPROC) load(userptr, "glMultiTexCoord1s"); + glad_glMultiTexCoord1sv = (PFNGLMULTITEXCOORD1SVPROC) load(userptr, "glMultiTexCoord1sv"); + glad_glMultiTexCoord2d = (PFNGLMULTITEXCOORD2DPROC) load(userptr, "glMultiTexCoord2d"); + glad_glMultiTexCoord2dv = (PFNGLMULTITEXCOORD2DVPROC) load(userptr, "glMultiTexCoord2dv"); + glad_glMultiTexCoord2f = (PFNGLMULTITEXCOORD2FPROC) load(userptr, "glMultiTexCoord2f"); + glad_glMultiTexCoord2fv = (PFNGLMULTITEXCOORD2FVPROC) load(userptr, "glMultiTexCoord2fv"); + glad_glMultiTexCoord2i = (PFNGLMULTITEXCOORD2IPROC) load(userptr, "glMultiTexCoord2i"); + glad_glMultiTexCoord2iv = (PFNGLMULTITEXCOORD2IVPROC) load(userptr, "glMultiTexCoord2iv"); + glad_glMultiTexCoord2s = (PFNGLMULTITEXCOORD2SPROC) load(userptr, "glMultiTexCoord2s"); + glad_glMultiTexCoord2sv = (PFNGLMULTITEXCOORD2SVPROC) load(userptr, "glMultiTexCoord2sv"); + glad_glMultiTexCoord3d = (PFNGLMULTITEXCOORD3DPROC) load(userptr, "glMultiTexCoord3d"); + glad_glMultiTexCoord3dv = (PFNGLMULTITEXCOORD3DVPROC) load(userptr, "glMultiTexCoord3dv"); + glad_glMultiTexCoord3f = (PFNGLMULTITEXCOORD3FPROC) load(userptr, "glMultiTexCoord3f"); + glad_glMultiTexCoord3fv = (PFNGLMULTITEXCOORD3FVPROC) load(userptr, "glMultiTexCoord3fv"); + glad_glMultiTexCoord3i = (PFNGLMULTITEXCOORD3IPROC) load(userptr, "glMultiTexCoord3i"); + glad_glMultiTexCoord3iv = (PFNGLMULTITEXCOORD3IVPROC) load(userptr, "glMultiTexCoord3iv"); + glad_glMultiTexCoord3s = (PFNGLMULTITEXCOORD3SPROC) load(userptr, "glMultiTexCoord3s"); + glad_glMultiTexCoord3sv = (PFNGLMULTITEXCOORD3SVPROC) load(userptr, "glMultiTexCoord3sv"); + glad_glMultiTexCoord4d = (PFNGLMULTITEXCOORD4DPROC) load(userptr, "glMultiTexCoord4d"); + glad_glMultiTexCoord4dv = (PFNGLMULTITEXCOORD4DVPROC) load(userptr, "glMultiTexCoord4dv"); + glad_glMultiTexCoord4f = (PFNGLMULTITEXCOORD4FPROC) load(userptr, "glMultiTexCoord4f"); + glad_glMultiTexCoord4fv = (PFNGLMULTITEXCOORD4FVPROC) load(userptr, "glMultiTexCoord4fv"); + glad_glMultiTexCoord4i = (PFNGLMULTITEXCOORD4IPROC) load(userptr, "glMultiTexCoord4i"); + glad_glMultiTexCoord4iv = (PFNGLMULTITEXCOORD4IVPROC) load(userptr, "glMultiTexCoord4iv"); + glad_glMultiTexCoord4s = (PFNGLMULTITEXCOORD4SPROC) load(userptr, "glMultiTexCoord4s"); + glad_glMultiTexCoord4sv = (PFNGLMULTITEXCOORD4SVPROC) load(userptr, "glMultiTexCoord4sv"); + glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC) load(userptr, "glSampleCoverage"); +} +static void glad_gl_load_GL_VERSION_1_4( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_4) return; + glad_glBlendColor = (PFNGLBLENDCOLORPROC) load(userptr, "glBlendColor"); + glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC) load(userptr, "glBlendEquation"); + glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC) load(userptr, "glBlendFuncSeparate"); + glad_glFogCoordPointer = (PFNGLFOGCOORDPOINTERPROC) load(userptr, "glFogCoordPointer"); + glad_glFogCoordd = (PFNGLFOGCOORDDPROC) load(userptr, "glFogCoordd"); + glad_glFogCoorddv = (PFNGLFOGCOORDDVPROC) load(userptr, "glFogCoorddv"); + glad_glFogCoordf = (PFNGLFOGCOORDFPROC) load(userptr, "glFogCoordf"); + glad_glFogCoordfv = (PFNGLFOGCOORDFVPROC) load(userptr, "glFogCoordfv"); + glad_glMultiDrawArrays = (PFNGLMULTIDRAWARRAYSPROC) load(userptr, "glMultiDrawArrays"); + glad_glMultiDrawElements = (PFNGLMULTIDRAWELEMENTSPROC) load(userptr, "glMultiDrawElements"); + glad_glPointParameterf = (PFNGLPOINTPARAMETERFPROC) load(userptr, "glPointParameterf"); + glad_glPointParameterfv = (PFNGLPOINTPARAMETERFVPROC) load(userptr, "glPointParameterfv"); + glad_glPointParameteri = (PFNGLPOINTPARAMETERIPROC) load(userptr, "glPointParameteri"); + glad_glPointParameteriv = (PFNGLPOINTPARAMETERIVPROC) load(userptr, "glPointParameteriv"); + glad_glSecondaryColor3b = (PFNGLSECONDARYCOLOR3BPROC) load(userptr, "glSecondaryColor3b"); + glad_glSecondaryColor3bv = (PFNGLSECONDARYCOLOR3BVPROC) load(userptr, "glSecondaryColor3bv"); + glad_glSecondaryColor3d = (PFNGLSECONDARYCOLOR3DPROC) load(userptr, "glSecondaryColor3d"); + glad_glSecondaryColor3dv = (PFNGLSECONDARYCOLOR3DVPROC) load(userptr, "glSecondaryColor3dv"); + glad_glSecondaryColor3f = (PFNGLSECONDARYCOLOR3FPROC) load(userptr, "glSecondaryColor3f"); + glad_glSecondaryColor3fv = (PFNGLSECONDARYCOLOR3FVPROC) load(userptr, "glSecondaryColor3fv"); + glad_glSecondaryColor3i = (PFNGLSECONDARYCOLOR3IPROC) load(userptr, "glSecondaryColor3i"); + glad_glSecondaryColor3iv = (PFNGLSECONDARYCOLOR3IVPROC) load(userptr, "glSecondaryColor3iv"); + glad_glSecondaryColor3s = (PFNGLSECONDARYCOLOR3SPROC) load(userptr, "glSecondaryColor3s"); + glad_glSecondaryColor3sv = (PFNGLSECONDARYCOLOR3SVPROC) load(userptr, "glSecondaryColor3sv"); + glad_glSecondaryColor3ub = (PFNGLSECONDARYCOLOR3UBPROC) load(userptr, "glSecondaryColor3ub"); + glad_glSecondaryColor3ubv = (PFNGLSECONDARYCOLOR3UBVPROC) load(userptr, "glSecondaryColor3ubv"); + glad_glSecondaryColor3ui = (PFNGLSECONDARYCOLOR3UIPROC) load(userptr, "glSecondaryColor3ui"); + glad_glSecondaryColor3uiv = (PFNGLSECONDARYCOLOR3UIVPROC) load(userptr, "glSecondaryColor3uiv"); + glad_glSecondaryColor3us = (PFNGLSECONDARYCOLOR3USPROC) load(userptr, "glSecondaryColor3us"); + glad_glSecondaryColor3usv = (PFNGLSECONDARYCOLOR3USVPROC) load(userptr, "glSecondaryColor3usv"); + glad_glSecondaryColorPointer = (PFNGLSECONDARYCOLORPOINTERPROC) load(userptr, "glSecondaryColorPointer"); + glad_glWindowPos2d = (PFNGLWINDOWPOS2DPROC) load(userptr, "glWindowPos2d"); + glad_glWindowPos2dv = (PFNGLWINDOWPOS2DVPROC) load(userptr, "glWindowPos2dv"); + glad_glWindowPos2f = (PFNGLWINDOWPOS2FPROC) load(userptr, "glWindowPos2f"); + glad_glWindowPos2fv = (PFNGLWINDOWPOS2FVPROC) load(userptr, "glWindowPos2fv"); + glad_glWindowPos2i = (PFNGLWINDOWPOS2IPROC) load(userptr, "glWindowPos2i"); + glad_glWindowPos2iv = (PFNGLWINDOWPOS2IVPROC) load(userptr, "glWindowPos2iv"); + glad_glWindowPos2s = (PFNGLWINDOWPOS2SPROC) load(userptr, "glWindowPos2s"); + glad_glWindowPos2sv = (PFNGLWINDOWPOS2SVPROC) load(userptr, "glWindowPos2sv"); + glad_glWindowPos3d = (PFNGLWINDOWPOS3DPROC) load(userptr, "glWindowPos3d"); + glad_glWindowPos3dv = (PFNGLWINDOWPOS3DVPROC) load(userptr, "glWindowPos3dv"); + glad_glWindowPos3f = (PFNGLWINDOWPOS3FPROC) load(userptr, "glWindowPos3f"); + glad_glWindowPos3fv = (PFNGLWINDOWPOS3FVPROC) load(userptr, "glWindowPos3fv"); + glad_glWindowPos3i = (PFNGLWINDOWPOS3IPROC) load(userptr, "glWindowPos3i"); + glad_glWindowPos3iv = (PFNGLWINDOWPOS3IVPROC) load(userptr, "glWindowPos3iv"); + glad_glWindowPos3s = (PFNGLWINDOWPOS3SPROC) load(userptr, "glWindowPos3s"); + glad_glWindowPos3sv = (PFNGLWINDOWPOS3SVPROC) load(userptr, "glWindowPos3sv"); +} +static void glad_gl_load_GL_VERSION_1_5( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_5) return; + glad_glBeginQuery = (PFNGLBEGINQUERYPROC) load(userptr, "glBeginQuery"); + glad_glBindBuffer = (PFNGLBINDBUFFERPROC) load(userptr, "glBindBuffer"); + glad_glBufferData = (PFNGLBUFFERDATAPROC) load(userptr, "glBufferData"); + glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC) load(userptr, "glBufferSubData"); + glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC) load(userptr, "glDeleteBuffers"); + glad_glDeleteQueries = (PFNGLDELETEQUERIESPROC) load(userptr, "glDeleteQueries"); + glad_glEndQuery = (PFNGLENDQUERYPROC) load(userptr, "glEndQuery"); + glad_glGenBuffers = (PFNGLGENBUFFERSPROC) load(userptr, "glGenBuffers"); + glad_glGenQueries = (PFNGLGENQUERIESPROC) load(userptr, "glGenQueries"); + glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC) load(userptr, "glGetBufferParameteriv"); + glad_glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC) load(userptr, "glGetBufferPointerv"); + glad_glGetBufferSubData = (PFNGLGETBUFFERSUBDATAPROC) load(userptr, "glGetBufferSubData"); + glad_glGetQueryObjectiv = (PFNGLGETQUERYOBJECTIVPROC) load(userptr, "glGetQueryObjectiv"); + glad_glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC) load(userptr, "glGetQueryObjectuiv"); + glad_glGetQueryiv = (PFNGLGETQUERYIVPROC) load(userptr, "glGetQueryiv"); + glad_glIsBuffer = (PFNGLISBUFFERPROC) load(userptr, "glIsBuffer"); + glad_glIsQuery = (PFNGLISQUERYPROC) load(userptr, "glIsQuery"); + glad_glMapBuffer = (PFNGLMAPBUFFERPROC) load(userptr, "glMapBuffer"); + glad_glUnmapBuffer = (PFNGLUNMAPBUFFERPROC) load(userptr, "glUnmapBuffer"); +} +static void glad_gl_load_GL_VERSION_2_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_2_0) return; + glad_glAttachShader = (PFNGLATTACHSHADERPROC) load(userptr, "glAttachShader"); + glad_glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC) load(userptr, "glBindAttribLocation"); + glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC) load(userptr, "glBlendEquationSeparate"); + glad_glCompileShader = (PFNGLCOMPILESHADERPROC) load(userptr, "glCompileShader"); + glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC) load(userptr, "glCreateProgram"); + glad_glCreateShader = (PFNGLCREATESHADERPROC) load(userptr, "glCreateShader"); + glad_glDeleteProgram = (PFNGLDELETEPROGRAMPROC) load(userptr, "glDeleteProgram"); + glad_glDeleteShader = (PFNGLDELETESHADERPROC) load(userptr, "glDeleteShader"); + glad_glDetachShader = (PFNGLDETACHSHADERPROC) load(userptr, "glDetachShader"); + glad_glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC) load(userptr, "glDisableVertexAttribArray"); + glad_glDrawBuffers = (PFNGLDRAWBUFFERSPROC) load(userptr, "glDrawBuffers"); + glad_glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC) load(userptr, "glEnableVertexAttribArray"); + glad_glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC) load(userptr, "glGetActiveAttrib"); + glad_glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC) load(userptr, "glGetActiveUniform"); + glad_glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC) load(userptr, "glGetAttachedShaders"); + glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC) load(userptr, "glGetAttribLocation"); + glad_glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC) load(userptr, "glGetProgramInfoLog"); + glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC) load(userptr, "glGetProgramiv"); + glad_glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC) load(userptr, "glGetShaderInfoLog"); + glad_glGetShaderSource = (PFNGLGETSHADERSOURCEPROC) load(userptr, "glGetShaderSource"); + glad_glGetShaderiv = (PFNGLGETSHADERIVPROC) load(userptr, "glGetShaderiv"); + glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC) load(userptr, "glGetUniformLocation"); + glad_glGetUniformfv = (PFNGLGETUNIFORMFVPROC) load(userptr, "glGetUniformfv"); + glad_glGetUniformiv = (PFNGLGETUNIFORMIVPROC) load(userptr, "glGetUniformiv"); + glad_glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC) load(userptr, "glGetVertexAttribPointerv"); + glad_glGetVertexAttribdv = (PFNGLGETVERTEXATTRIBDVPROC) load(userptr, "glGetVertexAttribdv"); + glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC) load(userptr, "glGetVertexAttribfv"); + glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC) load(userptr, "glGetVertexAttribiv"); + glad_glIsProgram = (PFNGLISPROGRAMPROC) load(userptr, "glIsProgram"); + glad_glIsShader = (PFNGLISSHADERPROC) load(userptr, "glIsShader"); + glad_glLinkProgram = (PFNGLLINKPROGRAMPROC) load(userptr, "glLinkProgram"); + glad_glShaderSource = (PFNGLSHADERSOURCEPROC) load(userptr, "glShaderSource"); + glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC) load(userptr, "glStencilFuncSeparate"); + glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC) load(userptr, "glStencilMaskSeparate"); + glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC) load(userptr, "glStencilOpSeparate"); + glad_glUniform1f = (PFNGLUNIFORM1FPROC) load(userptr, "glUniform1f"); + glad_glUniform1fv = (PFNGLUNIFORM1FVPROC) load(userptr, "glUniform1fv"); + glad_glUniform1i = (PFNGLUNIFORM1IPROC) load(userptr, "glUniform1i"); + glad_glUniform1iv = (PFNGLUNIFORM1IVPROC) load(userptr, "glUniform1iv"); + glad_glUniform2f = (PFNGLUNIFORM2FPROC) load(userptr, "glUniform2f"); + glad_glUniform2fv = (PFNGLUNIFORM2FVPROC) load(userptr, "glUniform2fv"); + glad_glUniform2i = (PFNGLUNIFORM2IPROC) load(userptr, "glUniform2i"); + glad_glUniform2iv = (PFNGLUNIFORM2IVPROC) load(userptr, "glUniform2iv"); + glad_glUniform3f = (PFNGLUNIFORM3FPROC) load(userptr, "glUniform3f"); + glad_glUniform3fv = (PFNGLUNIFORM3FVPROC) load(userptr, "glUniform3fv"); + glad_glUniform3i = (PFNGLUNIFORM3IPROC) load(userptr, "glUniform3i"); + glad_glUniform3iv = (PFNGLUNIFORM3IVPROC) load(userptr, "glUniform3iv"); + glad_glUniform4f = (PFNGLUNIFORM4FPROC) load(userptr, "glUniform4f"); + glad_glUniform4fv = (PFNGLUNIFORM4FVPROC) load(userptr, "glUniform4fv"); + glad_glUniform4i = (PFNGLUNIFORM4IPROC) load(userptr, "glUniform4i"); + glad_glUniform4iv = (PFNGLUNIFORM4IVPROC) load(userptr, "glUniform4iv"); + glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC) load(userptr, "glUniformMatrix2fv"); + glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC) load(userptr, "glUniformMatrix3fv"); + glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC) load(userptr, "glUniformMatrix4fv"); + glad_glUseProgram = (PFNGLUSEPROGRAMPROC) load(userptr, "glUseProgram"); + glad_glValidateProgram = (PFNGLVALIDATEPROGRAMPROC) load(userptr, "glValidateProgram"); + glad_glVertexAttrib1d = (PFNGLVERTEXATTRIB1DPROC) load(userptr, "glVertexAttrib1d"); + glad_glVertexAttrib1dv = (PFNGLVERTEXATTRIB1DVPROC) load(userptr, "glVertexAttrib1dv"); + glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC) load(userptr, "glVertexAttrib1f"); + glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC) load(userptr, "glVertexAttrib1fv"); + glad_glVertexAttrib1s = (PFNGLVERTEXATTRIB1SPROC) load(userptr, "glVertexAttrib1s"); + glad_glVertexAttrib1sv = (PFNGLVERTEXATTRIB1SVPROC) load(userptr, "glVertexAttrib1sv"); + glad_glVertexAttrib2d = (PFNGLVERTEXATTRIB2DPROC) load(userptr, "glVertexAttrib2d"); + glad_glVertexAttrib2dv = (PFNGLVERTEXATTRIB2DVPROC) load(userptr, "glVertexAttrib2dv"); + glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC) load(userptr, "glVertexAttrib2f"); + glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC) load(userptr, "glVertexAttrib2fv"); + glad_glVertexAttrib2s = (PFNGLVERTEXATTRIB2SPROC) load(userptr, "glVertexAttrib2s"); + glad_glVertexAttrib2sv = (PFNGLVERTEXATTRIB2SVPROC) load(userptr, "glVertexAttrib2sv"); + glad_glVertexAttrib3d = (PFNGLVERTEXATTRIB3DPROC) load(userptr, "glVertexAttrib3d"); + glad_glVertexAttrib3dv = (PFNGLVERTEXATTRIB3DVPROC) load(userptr, "glVertexAttrib3dv"); + glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC) load(userptr, "glVertexAttrib3f"); + glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC) load(userptr, "glVertexAttrib3fv"); + glad_glVertexAttrib3s = (PFNGLVERTEXATTRIB3SPROC) load(userptr, "glVertexAttrib3s"); + glad_glVertexAttrib3sv = (PFNGLVERTEXATTRIB3SVPROC) load(userptr, "glVertexAttrib3sv"); + glad_glVertexAttrib4Nbv = (PFNGLVERTEXATTRIB4NBVPROC) load(userptr, "glVertexAttrib4Nbv"); + glad_glVertexAttrib4Niv = (PFNGLVERTEXATTRIB4NIVPROC) load(userptr, "glVertexAttrib4Niv"); + glad_glVertexAttrib4Nsv = (PFNGLVERTEXATTRIB4NSVPROC) load(userptr, "glVertexAttrib4Nsv"); + glad_glVertexAttrib4Nub = (PFNGLVERTEXATTRIB4NUBPROC) load(userptr, "glVertexAttrib4Nub"); + glad_glVertexAttrib4Nubv = (PFNGLVERTEXATTRIB4NUBVPROC) load(userptr, "glVertexAttrib4Nubv"); + glad_glVertexAttrib4Nuiv = (PFNGLVERTEXATTRIB4NUIVPROC) load(userptr, "glVertexAttrib4Nuiv"); + glad_glVertexAttrib4Nusv = (PFNGLVERTEXATTRIB4NUSVPROC) load(userptr, "glVertexAttrib4Nusv"); + glad_glVertexAttrib4bv = (PFNGLVERTEXATTRIB4BVPROC) load(userptr, "glVertexAttrib4bv"); + glad_glVertexAttrib4d = (PFNGLVERTEXATTRIB4DPROC) load(userptr, "glVertexAttrib4d"); + glad_glVertexAttrib4dv = (PFNGLVERTEXATTRIB4DVPROC) load(userptr, "glVertexAttrib4dv"); + glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC) load(userptr, "glVertexAttrib4f"); + glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC) load(userptr, "glVertexAttrib4fv"); + glad_glVertexAttrib4iv = (PFNGLVERTEXATTRIB4IVPROC) load(userptr, "glVertexAttrib4iv"); + glad_glVertexAttrib4s = (PFNGLVERTEXATTRIB4SPROC) load(userptr, "glVertexAttrib4s"); + glad_glVertexAttrib4sv = (PFNGLVERTEXATTRIB4SVPROC) load(userptr, "glVertexAttrib4sv"); + glad_glVertexAttrib4ubv = (PFNGLVERTEXATTRIB4UBVPROC) load(userptr, "glVertexAttrib4ubv"); + glad_glVertexAttrib4uiv = (PFNGLVERTEXATTRIB4UIVPROC) load(userptr, "glVertexAttrib4uiv"); + glad_glVertexAttrib4usv = (PFNGLVERTEXATTRIB4USVPROC) load(userptr, "glVertexAttrib4usv"); + glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC) load(userptr, "glVertexAttribPointer"); +} +static void glad_gl_load_GL_VERSION_2_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_2_1) return; + glad_glUniformMatrix2x3fv = (PFNGLUNIFORMMATRIX2X3FVPROC) load(userptr, "glUniformMatrix2x3fv"); + glad_glUniformMatrix2x4fv = (PFNGLUNIFORMMATRIX2X4FVPROC) load(userptr, "glUniformMatrix2x4fv"); + glad_glUniformMatrix3x2fv = (PFNGLUNIFORMMATRIX3X2FVPROC) load(userptr, "glUniformMatrix3x2fv"); + glad_glUniformMatrix3x4fv = (PFNGLUNIFORMMATRIX3X4FVPROC) load(userptr, "glUniformMatrix3x4fv"); + glad_glUniformMatrix4x2fv = (PFNGLUNIFORMMATRIX4X2FVPROC) load(userptr, "glUniformMatrix4x2fv"); + glad_glUniformMatrix4x3fv = (PFNGLUNIFORMMATRIX4X3FVPROC) load(userptr, "glUniformMatrix4x3fv"); +} +static void glad_gl_load_GL_VERSION_3_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_3_0) return; + glad_glBeginConditionalRender = (PFNGLBEGINCONDITIONALRENDERPROC) load(userptr, "glBeginConditionalRender"); + glad_glBeginTransformFeedback = (PFNGLBEGINTRANSFORMFEEDBACKPROC) load(userptr, "glBeginTransformFeedback"); + glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC) load(userptr, "glBindBufferBase"); + glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC) load(userptr, "glBindBufferRange"); + glad_glBindFragDataLocation = (PFNGLBINDFRAGDATALOCATIONPROC) load(userptr, "glBindFragDataLocation"); + glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC) load(userptr, "glBindFramebuffer"); + glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC) load(userptr, "glBindRenderbuffer"); + glad_glBindVertexArray = (PFNGLBINDVERTEXARRAYPROC) load(userptr, "glBindVertexArray"); + glad_glBlitFramebuffer = (PFNGLBLITFRAMEBUFFERPROC) load(userptr, "glBlitFramebuffer"); + glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC) load(userptr, "glCheckFramebufferStatus"); + glad_glClampColor = (PFNGLCLAMPCOLORPROC) load(userptr, "glClampColor"); + glad_glClearBufferfi = (PFNGLCLEARBUFFERFIPROC) load(userptr, "glClearBufferfi"); + glad_glClearBufferfv = (PFNGLCLEARBUFFERFVPROC) load(userptr, "glClearBufferfv"); + glad_glClearBufferiv = (PFNGLCLEARBUFFERIVPROC) load(userptr, "glClearBufferiv"); + glad_glClearBufferuiv = (PFNGLCLEARBUFFERUIVPROC) load(userptr, "glClearBufferuiv"); + glad_glColorMaski = (PFNGLCOLORMASKIPROC) load(userptr, "glColorMaski"); + glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC) load(userptr, "glDeleteFramebuffers"); + glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC) load(userptr, "glDeleteRenderbuffers"); + glad_glDeleteVertexArrays = (PFNGLDELETEVERTEXARRAYSPROC) load(userptr, "glDeleteVertexArrays"); + glad_glDisablei = (PFNGLDISABLEIPROC) load(userptr, "glDisablei"); + glad_glEnablei = (PFNGLENABLEIPROC) load(userptr, "glEnablei"); + glad_glEndConditionalRender = (PFNGLENDCONDITIONALRENDERPROC) load(userptr, "glEndConditionalRender"); + glad_glEndTransformFeedback = (PFNGLENDTRANSFORMFEEDBACKPROC) load(userptr, "glEndTransformFeedback"); + glad_glFlushMappedBufferRange = (PFNGLFLUSHMAPPEDBUFFERRANGEPROC) load(userptr, "glFlushMappedBufferRange"); + glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC) load(userptr, "glFramebufferRenderbuffer"); + glad_glFramebufferTexture1D = (PFNGLFRAMEBUFFERTEXTURE1DPROC) load(userptr, "glFramebufferTexture1D"); + glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC) load(userptr, "glFramebufferTexture2D"); + glad_glFramebufferTexture3D = (PFNGLFRAMEBUFFERTEXTURE3DPROC) load(userptr, "glFramebufferTexture3D"); + glad_glFramebufferTextureLayer = (PFNGLFRAMEBUFFERTEXTURELAYERPROC) load(userptr, "glFramebufferTextureLayer"); + glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC) load(userptr, "glGenFramebuffers"); + glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC) load(userptr, "glGenRenderbuffers"); + glad_glGenVertexArrays = (PFNGLGENVERTEXARRAYSPROC) load(userptr, "glGenVertexArrays"); + glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC) load(userptr, "glGenerateMipmap"); + glad_glGetBooleani_v = (PFNGLGETBOOLEANI_VPROC) load(userptr, "glGetBooleani_v"); + glad_glGetFragDataLocation = (PFNGLGETFRAGDATALOCATIONPROC) load(userptr, "glGetFragDataLocation"); + glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC) load(userptr, "glGetFramebufferAttachmentParameteriv"); + glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC) load(userptr, "glGetIntegeri_v"); + glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC) load(userptr, "glGetRenderbufferParameteriv"); + glad_glGetStringi = (PFNGLGETSTRINGIPROC) load(userptr, "glGetStringi"); + glad_glGetTexParameterIiv = (PFNGLGETTEXPARAMETERIIVPROC) load(userptr, "glGetTexParameterIiv"); + glad_glGetTexParameterIuiv = (PFNGLGETTEXPARAMETERIUIVPROC) load(userptr, "glGetTexParameterIuiv"); + glad_glGetTransformFeedbackVarying = (PFNGLGETTRANSFORMFEEDBACKVARYINGPROC) load(userptr, "glGetTransformFeedbackVarying"); + glad_glGetUniformuiv = (PFNGLGETUNIFORMUIVPROC) load(userptr, "glGetUniformuiv"); + glad_glGetVertexAttribIiv = (PFNGLGETVERTEXATTRIBIIVPROC) load(userptr, "glGetVertexAttribIiv"); + glad_glGetVertexAttribIuiv = (PFNGLGETVERTEXATTRIBIUIVPROC) load(userptr, "glGetVertexAttribIuiv"); + glad_glIsEnabledi = (PFNGLISENABLEDIPROC) load(userptr, "glIsEnabledi"); + glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC) load(userptr, "glIsFramebuffer"); + glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC) load(userptr, "glIsRenderbuffer"); + glad_glIsVertexArray = (PFNGLISVERTEXARRAYPROC) load(userptr, "glIsVertexArray"); + glad_glMapBufferRange = (PFNGLMAPBUFFERRANGEPROC) load(userptr, "glMapBufferRange"); + glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC) load(userptr, "glRenderbufferStorage"); + glad_glRenderbufferStorageMultisample = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC) load(userptr, "glRenderbufferStorageMultisample"); + glad_glTexParameterIiv = (PFNGLTEXPARAMETERIIVPROC) load(userptr, "glTexParameterIiv"); + glad_glTexParameterIuiv = (PFNGLTEXPARAMETERIUIVPROC) load(userptr, "glTexParameterIuiv"); + glad_glTransformFeedbackVaryings = (PFNGLTRANSFORMFEEDBACKVARYINGSPROC) load(userptr, "glTransformFeedbackVaryings"); + glad_glUniform1ui = (PFNGLUNIFORM1UIPROC) load(userptr, "glUniform1ui"); + glad_glUniform1uiv = (PFNGLUNIFORM1UIVPROC) load(userptr, "glUniform1uiv"); + glad_glUniform2ui = (PFNGLUNIFORM2UIPROC) load(userptr, "glUniform2ui"); + glad_glUniform2uiv = (PFNGLUNIFORM2UIVPROC) load(userptr, "glUniform2uiv"); + glad_glUniform3ui = (PFNGLUNIFORM3UIPROC) load(userptr, "glUniform3ui"); + glad_glUniform3uiv = (PFNGLUNIFORM3UIVPROC) load(userptr, "glUniform3uiv"); + glad_glUniform4ui = (PFNGLUNIFORM4UIPROC) load(userptr, "glUniform4ui"); + glad_glUniform4uiv = (PFNGLUNIFORM4UIVPROC) load(userptr, "glUniform4uiv"); + glad_glVertexAttribI1i = (PFNGLVERTEXATTRIBI1IPROC) load(userptr, "glVertexAttribI1i"); + glad_glVertexAttribI1iv = (PFNGLVERTEXATTRIBI1IVPROC) load(userptr, "glVertexAttribI1iv"); + glad_glVertexAttribI1ui = (PFNGLVERTEXATTRIBI1UIPROC) load(userptr, "glVertexAttribI1ui"); + glad_glVertexAttribI1uiv = (PFNGLVERTEXATTRIBI1UIVPROC) load(userptr, "glVertexAttribI1uiv"); + glad_glVertexAttribI2i = (PFNGLVERTEXATTRIBI2IPROC) load(userptr, "glVertexAttribI2i"); + glad_glVertexAttribI2iv = (PFNGLVERTEXATTRIBI2IVPROC) load(userptr, "glVertexAttribI2iv"); + glad_glVertexAttribI2ui = (PFNGLVERTEXATTRIBI2UIPROC) load(userptr, "glVertexAttribI2ui"); + glad_glVertexAttribI2uiv = (PFNGLVERTEXATTRIBI2UIVPROC) load(userptr, "glVertexAttribI2uiv"); + glad_glVertexAttribI3i = (PFNGLVERTEXATTRIBI3IPROC) load(userptr, "glVertexAttribI3i"); + glad_glVertexAttribI3iv = (PFNGLVERTEXATTRIBI3IVPROC) load(userptr, "glVertexAttribI3iv"); + glad_glVertexAttribI3ui = (PFNGLVERTEXATTRIBI3UIPROC) load(userptr, "glVertexAttribI3ui"); + glad_glVertexAttribI3uiv = (PFNGLVERTEXATTRIBI3UIVPROC) load(userptr, "glVertexAttribI3uiv"); + glad_glVertexAttribI4bv = (PFNGLVERTEXATTRIBI4BVPROC) load(userptr, "glVertexAttribI4bv"); + glad_glVertexAttribI4i = (PFNGLVERTEXATTRIBI4IPROC) load(userptr, "glVertexAttribI4i"); + glad_glVertexAttribI4iv = (PFNGLVERTEXATTRIBI4IVPROC) load(userptr, "glVertexAttribI4iv"); + glad_glVertexAttribI4sv = (PFNGLVERTEXATTRIBI4SVPROC) load(userptr, "glVertexAttribI4sv"); + glad_glVertexAttribI4ubv = (PFNGLVERTEXATTRIBI4UBVPROC) load(userptr, "glVertexAttribI4ubv"); + glad_glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC) load(userptr, "glVertexAttribI4ui"); + glad_glVertexAttribI4uiv = (PFNGLVERTEXATTRIBI4UIVPROC) load(userptr, "glVertexAttribI4uiv"); + glad_glVertexAttribI4usv = (PFNGLVERTEXATTRIBI4USVPROC) load(userptr, "glVertexAttribI4usv"); + glad_glVertexAttribIPointer = (PFNGLVERTEXATTRIBIPOINTERPROC) load(userptr, "glVertexAttribIPointer"); +} +static void glad_gl_load_GL_VERSION_3_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_3_1) return; + glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC) load(userptr, "glBindBufferBase"); + glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC) load(userptr, "glBindBufferRange"); + glad_glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC) load(userptr, "glCopyBufferSubData"); + glad_glDrawArraysInstanced = (PFNGLDRAWARRAYSINSTANCEDPROC) load(userptr, "glDrawArraysInstanced"); + glad_glDrawElementsInstanced = (PFNGLDRAWELEMENTSINSTANCEDPROC) load(userptr, "glDrawElementsInstanced"); + glad_glGetActiveUniformBlockName = (PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC) load(userptr, "glGetActiveUniformBlockName"); + glad_glGetActiveUniformBlockiv = (PFNGLGETACTIVEUNIFORMBLOCKIVPROC) load(userptr, "glGetActiveUniformBlockiv"); + glad_glGetActiveUniformName = (PFNGLGETACTIVEUNIFORMNAMEPROC) load(userptr, "glGetActiveUniformName"); + glad_glGetActiveUniformsiv = (PFNGLGETACTIVEUNIFORMSIVPROC) load(userptr, "glGetActiveUniformsiv"); + glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC) load(userptr, "glGetIntegeri_v"); + glad_glGetUniformBlockIndex = (PFNGLGETUNIFORMBLOCKINDEXPROC) load(userptr, "glGetUniformBlockIndex"); + glad_glGetUniformIndices = (PFNGLGETUNIFORMINDICESPROC) load(userptr, "glGetUniformIndices"); + glad_glPrimitiveRestartIndex = (PFNGLPRIMITIVERESTARTINDEXPROC) load(userptr, "glPrimitiveRestartIndex"); + glad_glTexBuffer = (PFNGLTEXBUFFERPROC) load(userptr, "glTexBuffer"); + glad_glUniformBlockBinding = (PFNGLUNIFORMBLOCKBINDINGPROC) load(userptr, "glUniformBlockBinding"); +} +static void glad_gl_load_GL_VERSION_3_2( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_3_2) return; + glad_glClientWaitSync = (PFNGLCLIENTWAITSYNCPROC) load(userptr, "glClientWaitSync"); + glad_glDeleteSync = (PFNGLDELETESYNCPROC) load(userptr, "glDeleteSync"); + glad_glDrawElementsBaseVertex = (PFNGLDRAWELEMENTSBASEVERTEXPROC) load(userptr, "glDrawElementsBaseVertex"); + glad_glDrawElementsInstancedBaseVertex = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC) load(userptr, "glDrawElementsInstancedBaseVertex"); + glad_glDrawRangeElementsBaseVertex = (PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC) load(userptr, "glDrawRangeElementsBaseVertex"); + glad_glFenceSync = (PFNGLFENCESYNCPROC) load(userptr, "glFenceSync"); + glad_glFramebufferTexture = (PFNGLFRAMEBUFFERTEXTUREPROC) load(userptr, "glFramebufferTexture"); + glad_glGetBufferParameteri64v = (PFNGLGETBUFFERPARAMETERI64VPROC) load(userptr, "glGetBufferParameteri64v"); + glad_glGetInteger64i_v = (PFNGLGETINTEGER64I_VPROC) load(userptr, "glGetInteger64i_v"); + glad_glGetInteger64v = (PFNGLGETINTEGER64VPROC) load(userptr, "glGetInteger64v"); + glad_glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC) load(userptr, "glGetMultisamplefv"); + glad_glGetSynciv = (PFNGLGETSYNCIVPROC) load(userptr, "glGetSynciv"); + glad_glIsSync = (PFNGLISSYNCPROC) load(userptr, "glIsSync"); + glad_glMultiDrawElementsBaseVertex = (PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC) load(userptr, "glMultiDrawElementsBaseVertex"); + glad_glProvokingVertex = (PFNGLPROVOKINGVERTEXPROC) load(userptr, "glProvokingVertex"); + glad_glSampleMaski = (PFNGLSAMPLEMASKIPROC) load(userptr, "glSampleMaski"); + glad_glTexImage2DMultisample = (PFNGLTEXIMAGE2DMULTISAMPLEPROC) load(userptr, "glTexImage2DMultisample"); + glad_glTexImage3DMultisample = (PFNGLTEXIMAGE3DMULTISAMPLEPROC) load(userptr, "glTexImage3DMultisample"); + glad_glWaitSync = (PFNGLWAITSYNCPROC) load(userptr, "glWaitSync"); +} +static void glad_gl_load_GL_VERSION_3_3( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_3_3) return; + glad_glBindFragDataLocationIndexed = (PFNGLBINDFRAGDATALOCATIONINDEXEDPROC) load(userptr, "glBindFragDataLocationIndexed"); + glad_glBindSampler = (PFNGLBINDSAMPLERPROC) load(userptr, "glBindSampler"); + glad_glColorP3ui = (PFNGLCOLORP3UIPROC) load(userptr, "glColorP3ui"); + glad_glColorP3uiv = (PFNGLCOLORP3UIVPROC) load(userptr, "glColorP3uiv"); + glad_glColorP4ui = (PFNGLCOLORP4UIPROC) load(userptr, "glColorP4ui"); + glad_glColorP4uiv = (PFNGLCOLORP4UIVPROC) load(userptr, "glColorP4uiv"); + glad_glDeleteSamplers = (PFNGLDELETESAMPLERSPROC) load(userptr, "glDeleteSamplers"); + glad_glGenSamplers = (PFNGLGENSAMPLERSPROC) load(userptr, "glGenSamplers"); + glad_glGetFragDataIndex = (PFNGLGETFRAGDATAINDEXPROC) load(userptr, "glGetFragDataIndex"); + glad_glGetQueryObjecti64v = (PFNGLGETQUERYOBJECTI64VPROC) load(userptr, "glGetQueryObjecti64v"); + glad_glGetQueryObjectui64v = (PFNGLGETQUERYOBJECTUI64VPROC) load(userptr, "glGetQueryObjectui64v"); + glad_glGetSamplerParameterIiv = (PFNGLGETSAMPLERPARAMETERIIVPROC) load(userptr, "glGetSamplerParameterIiv"); + glad_glGetSamplerParameterIuiv = (PFNGLGETSAMPLERPARAMETERIUIVPROC) load(userptr, "glGetSamplerParameterIuiv"); + glad_glGetSamplerParameterfv = (PFNGLGETSAMPLERPARAMETERFVPROC) load(userptr, "glGetSamplerParameterfv"); + glad_glGetSamplerParameteriv = (PFNGLGETSAMPLERPARAMETERIVPROC) load(userptr, "glGetSamplerParameteriv"); + glad_glIsSampler = (PFNGLISSAMPLERPROC) load(userptr, "glIsSampler"); + glad_glMultiTexCoordP1ui = (PFNGLMULTITEXCOORDP1UIPROC) load(userptr, "glMultiTexCoordP1ui"); + glad_glMultiTexCoordP1uiv = (PFNGLMULTITEXCOORDP1UIVPROC) load(userptr, "glMultiTexCoordP1uiv"); + glad_glMultiTexCoordP2ui = (PFNGLMULTITEXCOORDP2UIPROC) load(userptr, "glMultiTexCoordP2ui"); + glad_glMultiTexCoordP2uiv = (PFNGLMULTITEXCOORDP2UIVPROC) load(userptr, "glMultiTexCoordP2uiv"); + glad_glMultiTexCoordP3ui = (PFNGLMULTITEXCOORDP3UIPROC) load(userptr, "glMultiTexCoordP3ui"); + glad_glMultiTexCoordP3uiv = (PFNGLMULTITEXCOORDP3UIVPROC) load(userptr, "glMultiTexCoordP3uiv"); + glad_glMultiTexCoordP4ui = (PFNGLMULTITEXCOORDP4UIPROC) load(userptr, "glMultiTexCoordP4ui"); + glad_glMultiTexCoordP4uiv = (PFNGLMULTITEXCOORDP4UIVPROC) load(userptr, "glMultiTexCoordP4uiv"); + glad_glNormalP3ui = (PFNGLNORMALP3UIPROC) load(userptr, "glNormalP3ui"); + glad_glNormalP3uiv = (PFNGLNORMALP3UIVPROC) load(userptr, "glNormalP3uiv"); + glad_glQueryCounter = (PFNGLQUERYCOUNTERPROC) load(userptr, "glQueryCounter"); + glad_glSamplerParameterIiv = (PFNGLSAMPLERPARAMETERIIVPROC) load(userptr, "glSamplerParameterIiv"); + glad_glSamplerParameterIuiv = (PFNGLSAMPLERPARAMETERIUIVPROC) load(userptr, "glSamplerParameterIuiv"); + glad_glSamplerParameterf = (PFNGLSAMPLERPARAMETERFPROC) load(userptr, "glSamplerParameterf"); + glad_glSamplerParameterfv = (PFNGLSAMPLERPARAMETERFVPROC) load(userptr, "glSamplerParameterfv"); + glad_glSamplerParameteri = (PFNGLSAMPLERPARAMETERIPROC) load(userptr, "glSamplerParameteri"); + glad_glSamplerParameteriv = (PFNGLSAMPLERPARAMETERIVPROC) load(userptr, "glSamplerParameteriv"); + glad_glSecondaryColorP3ui = (PFNGLSECONDARYCOLORP3UIPROC) load(userptr, "glSecondaryColorP3ui"); + glad_glSecondaryColorP3uiv = (PFNGLSECONDARYCOLORP3UIVPROC) load(userptr, "glSecondaryColorP3uiv"); + glad_glTexCoordP1ui = (PFNGLTEXCOORDP1UIPROC) load(userptr, "glTexCoordP1ui"); + glad_glTexCoordP1uiv = (PFNGLTEXCOORDP1UIVPROC) load(userptr, "glTexCoordP1uiv"); + glad_glTexCoordP2ui = (PFNGLTEXCOORDP2UIPROC) load(userptr, "glTexCoordP2ui"); + glad_glTexCoordP2uiv = (PFNGLTEXCOORDP2UIVPROC) load(userptr, "glTexCoordP2uiv"); + glad_glTexCoordP3ui = (PFNGLTEXCOORDP3UIPROC) load(userptr, "glTexCoordP3ui"); + glad_glTexCoordP3uiv = (PFNGLTEXCOORDP3UIVPROC) load(userptr, "glTexCoordP3uiv"); + glad_glTexCoordP4ui = (PFNGLTEXCOORDP4UIPROC) load(userptr, "glTexCoordP4ui"); + glad_glTexCoordP4uiv = (PFNGLTEXCOORDP4UIVPROC) load(userptr, "glTexCoordP4uiv"); + glad_glVertexAttribDivisor = (PFNGLVERTEXATTRIBDIVISORPROC) load(userptr, "glVertexAttribDivisor"); + glad_glVertexAttribP1ui = (PFNGLVERTEXATTRIBP1UIPROC) load(userptr, "glVertexAttribP1ui"); + glad_glVertexAttribP1uiv = (PFNGLVERTEXATTRIBP1UIVPROC) load(userptr, "glVertexAttribP1uiv"); + glad_glVertexAttribP2ui = (PFNGLVERTEXATTRIBP2UIPROC) load(userptr, "glVertexAttribP2ui"); + glad_glVertexAttribP2uiv = (PFNGLVERTEXATTRIBP2UIVPROC) load(userptr, "glVertexAttribP2uiv"); + glad_glVertexAttribP3ui = (PFNGLVERTEXATTRIBP3UIPROC) load(userptr, "glVertexAttribP3ui"); + glad_glVertexAttribP3uiv = (PFNGLVERTEXATTRIBP3UIVPROC) load(userptr, "glVertexAttribP3uiv"); + glad_glVertexAttribP4ui = (PFNGLVERTEXATTRIBP4UIPROC) load(userptr, "glVertexAttribP4ui"); + glad_glVertexAttribP4uiv = (PFNGLVERTEXATTRIBP4UIVPROC) load(userptr, "glVertexAttribP4uiv"); + glad_glVertexP2ui = (PFNGLVERTEXP2UIPROC) load(userptr, "glVertexP2ui"); + glad_glVertexP2uiv = (PFNGLVERTEXP2UIVPROC) load(userptr, "glVertexP2uiv"); + glad_glVertexP3ui = (PFNGLVERTEXP3UIPROC) load(userptr, "glVertexP3ui"); + glad_glVertexP3uiv = (PFNGLVERTEXP3UIVPROC) load(userptr, "glVertexP3uiv"); + glad_glVertexP4ui = (PFNGLVERTEXP4UIPROC) load(userptr, "glVertexP4ui"); + glad_glVertexP4uiv = (PFNGLVERTEXP4UIVPROC) load(userptr, "glVertexP4uiv"); +} + + + +#if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0) +#define GLAD_GL_IS_SOME_NEW_VERSION 1 +#else +#define GLAD_GL_IS_SOME_NEW_VERSION 0 +#endif + +static int glad_gl_get_extensions( int version, const char **out_exts, unsigned int *out_num_exts_i, char ***out_exts_i) { +#if GLAD_GL_IS_SOME_NEW_VERSION + if(GLAD_VERSION_MAJOR(version) < 3) { +#else + (void) version; + (void) out_num_exts_i; + (void) out_exts_i; +#endif + if (glad_glGetString == NULL) { + return 0; + } + *out_exts = (const char *)glad_glGetString(GL_EXTENSIONS); +#if GLAD_GL_IS_SOME_NEW_VERSION + } else { + unsigned int index = 0; + unsigned int num_exts_i = 0; + char **exts_i = NULL; + if (glad_glGetStringi == NULL || glad_glGetIntegerv == NULL) { + return 0; + } + glad_glGetIntegerv(GL_NUM_EXTENSIONS, (int*) &num_exts_i); + if (num_exts_i > 0) { + exts_i = (char **) malloc(num_exts_i * (sizeof *exts_i)); + } + if (exts_i == NULL) { + return 0; + } + for(index = 0; index < num_exts_i; index++) { + const char *gl_str_tmp = (const char*) glad_glGetStringi(GL_EXTENSIONS, index); + size_t len = strlen(gl_str_tmp) + 1; + + char *local_str = (char*) malloc(len * sizeof(char)); + if(local_str != NULL) { + memcpy(local_str, gl_str_tmp, len * sizeof(char)); + } + + exts_i[index] = local_str; + } + + *out_num_exts_i = num_exts_i; + *out_exts_i = exts_i; + } +#endif + return 1; +} +static void glad_gl_free_extensions(char **exts_i, unsigned int num_exts_i) { + if (exts_i != NULL) { + unsigned int index; + for(index = 0; index < num_exts_i; index++) { + free((void *) (exts_i[index])); + } + free((void *)exts_i); + exts_i = NULL; + } +} +static int glad_gl_has_extension(int version, const char *exts, unsigned int num_exts_i, char **exts_i, const char *ext) { + if(GLAD_VERSION_MAJOR(version) < 3 || !GLAD_GL_IS_SOME_NEW_VERSION) { + const char *extensions; + const char *loc; + const char *terminator; + extensions = exts; + if(extensions == NULL || ext == NULL) { + return 0; + } + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) { + return 0; + } + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) { + return 1; + } + extensions = terminator; + } + } else { + unsigned int index; + for(index = 0; index < num_exts_i; index++) { + const char *e = exts_i[index]; + if(strcmp(e, ext) == 0) { + return 1; + } + } + } + return 0; +} + +static GLADapiproc glad_gl_get_proc_from_userptr(void *userptr, const char* name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_gl_find_extensions_gl( int version) { + const char *exts = NULL; + unsigned int num_exts_i = 0; + char **exts_i = NULL; + if (!glad_gl_get_extensions(version, &exts, &num_exts_i, &exts_i)) return 0; + + (void) glad_gl_has_extension; + + glad_gl_free_extensions(exts_i, num_exts_i); + + return 1; +} + +static int glad_gl_find_core_gl(void) { + int i, major, minor; + const char* version; + const char* prefixes[] = { + "OpenGL ES-CM ", + "OpenGL ES-CL ", + "OpenGL ES ", + NULL + }; + version = (const char*) glad_glGetString(GL_VERSION); + if (!version) return 0; + for (i = 0; prefixes[i]; i++) { + const size_t length = strlen(prefixes[i]); + if (strncmp(version, prefixes[i], length) == 0) { + version += length; + break; + } + } + + GLAD_IMPL_UTIL_SSCANF(version, "%d.%d", &major, &minor); + + GLAD_GL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + GLAD_GL_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1; + GLAD_GL_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1; + GLAD_GL_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1; + GLAD_GL_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1; + GLAD_GL_VERSION_1_5 = (major == 1 && minor >= 5) || major > 1; + GLAD_GL_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2; + GLAD_GL_VERSION_2_1 = (major == 2 && minor >= 1) || major > 2; + GLAD_GL_VERSION_3_0 = (major == 3 && minor >= 0) || major > 3; + GLAD_GL_VERSION_3_1 = (major == 3 && minor >= 1) || major > 3; + GLAD_GL_VERSION_3_2 = (major == 3 && minor >= 2) || major > 3; + GLAD_GL_VERSION_3_3 = (major == 3 && minor >= 3) || major > 3; + + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadGLUserPtr( GLADuserptrloadfunc load, void *userptr) { + int version; + + glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString"); + if(glad_glGetString == NULL) return 0; + if(glad_glGetString(GL_VERSION) == NULL) return 0; + version = glad_gl_find_core_gl(); + + glad_gl_load_GL_VERSION_1_0(load, userptr); + glad_gl_load_GL_VERSION_1_1(load, userptr); + glad_gl_load_GL_VERSION_1_2(load, userptr); + glad_gl_load_GL_VERSION_1_3(load, userptr); + glad_gl_load_GL_VERSION_1_4(load, userptr); + glad_gl_load_GL_VERSION_1_5(load, userptr); + glad_gl_load_GL_VERSION_2_0(load, userptr); + glad_gl_load_GL_VERSION_2_1(load, userptr); + glad_gl_load_GL_VERSION_3_0(load, userptr); + glad_gl_load_GL_VERSION_3_1(load, userptr); + glad_gl_load_GL_VERSION_3_2(load, userptr); + glad_gl_load_GL_VERSION_3_3(load, userptr); + + if (!glad_gl_find_extensions_gl(version)) return 0; + + + + return version; +} + + +int gladLoadGL( GLADloadfunc load) { + return gladLoadGLUserPtr( glad_gl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + + + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/gles2.c glmark2-2021.02/android/jni/src/glad/src/gles2.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/gles2.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/src/gles2.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,495 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_GL_ES_VERSION_2_0 = 0; +int GLAD_GL_OES_mapbuffer = 0; +int GLAD_GL_OES_required_internalformat = 0; + + + +PFNGLACTIVETEXTUREPROC glad_glActiveTexture = NULL; +PFNGLATTACHSHADERPROC glad_glAttachShader = NULL; +PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation = NULL; +PFNGLBINDBUFFERPROC glad_glBindBuffer = NULL; +PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer = NULL; +PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer = NULL; +PFNGLBINDTEXTUREPROC glad_glBindTexture = NULL; +PFNGLBLENDCOLORPROC glad_glBlendColor = NULL; +PFNGLBLENDEQUATIONPROC glad_glBlendEquation = NULL; +PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate = NULL; +PFNGLBLENDFUNCPROC glad_glBlendFunc = NULL; +PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate = NULL; +PFNGLBUFFERDATAPROC glad_glBufferData = NULL; +PFNGLBUFFERSUBDATAPROC glad_glBufferSubData = NULL; +PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus = NULL; +PFNGLCLEARPROC glad_glClear = NULL; +PFNGLCLEARCOLORPROC glad_glClearColor = NULL; +PFNGLCLEARDEPTHFPROC glad_glClearDepthf = NULL; +PFNGLCLEARSTENCILPROC glad_glClearStencil = NULL; +PFNGLCOLORMASKPROC glad_glColorMask = NULL; +PFNGLCOMPILESHADERPROC glad_glCompileShader = NULL; +PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D = NULL; +PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D = NULL; +PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D = NULL; +PFNGLCREATEPROGRAMPROC glad_glCreateProgram = NULL; +PFNGLCREATESHADERPROC glad_glCreateShader = NULL; +PFNGLCULLFACEPROC glad_glCullFace = NULL; +PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers = NULL; +PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers = NULL; +PFNGLDELETEPROGRAMPROC glad_glDeleteProgram = NULL; +PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers = NULL; +PFNGLDELETESHADERPROC glad_glDeleteShader = NULL; +PFNGLDELETETEXTURESPROC glad_glDeleteTextures = NULL; +PFNGLDEPTHFUNCPROC glad_glDepthFunc = NULL; +PFNGLDEPTHMASKPROC glad_glDepthMask = NULL; +PFNGLDEPTHRANGEFPROC glad_glDepthRangef = NULL; +PFNGLDETACHSHADERPROC glad_glDetachShader = NULL; +PFNGLDISABLEPROC glad_glDisable = NULL; +PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray = NULL; +PFNGLDRAWARRAYSPROC glad_glDrawArrays = NULL; +PFNGLDRAWELEMENTSPROC glad_glDrawElements = NULL; +PFNGLENABLEPROC glad_glEnable = NULL; +PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray = NULL; +PFNGLFINISHPROC glad_glFinish = NULL; +PFNGLFLUSHPROC glad_glFlush = NULL; +PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer = NULL; +PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D = NULL; +PFNGLFRONTFACEPROC glad_glFrontFace = NULL; +PFNGLGENBUFFERSPROC glad_glGenBuffers = NULL; +PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers = NULL; +PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers = NULL; +PFNGLGENTEXTURESPROC glad_glGenTextures = NULL; +PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap = NULL; +PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib = NULL; +PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform = NULL; +PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders = NULL; +PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation = NULL; +PFNGLGETBOOLEANVPROC glad_glGetBooleanv = NULL; +PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv = NULL; +PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv = NULL; +PFNGLGETBUFFERPOINTERVOESPROC glad_glGetBufferPointervOES = NULL; +PFNGLGETERRORPROC glad_glGetError = NULL; +PFNGLGETFLOATVPROC glad_glGetFloatv = NULL; +PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv = NULL; +PFNGLGETINTEGERVPROC glad_glGetIntegerv = NULL; +PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog = NULL; +PFNGLGETPROGRAMIVPROC glad_glGetProgramiv = NULL; +PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv = NULL; +PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog = NULL; +PFNGLGETSHADERPRECISIONFORMATPROC glad_glGetShaderPrecisionFormat = NULL; +PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource = NULL; +PFNGLGETSHADERIVPROC glad_glGetShaderiv = NULL; +PFNGLGETSTRINGPROC glad_glGetString = NULL; +PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv = NULL; +PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv = NULL; +PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation = NULL; +PFNGLGETUNIFORMFVPROC glad_glGetUniformfv = NULL; +PFNGLGETUNIFORMIVPROC glad_glGetUniformiv = NULL; +PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv = NULL; +PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv = NULL; +PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv = NULL; +PFNGLHINTPROC glad_glHint = NULL; +PFNGLISBUFFERPROC glad_glIsBuffer = NULL; +PFNGLISENABLEDPROC glad_glIsEnabled = NULL; +PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer = NULL; +PFNGLISPROGRAMPROC glad_glIsProgram = NULL; +PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer = NULL; +PFNGLISSHADERPROC glad_glIsShader = NULL; +PFNGLISTEXTUREPROC glad_glIsTexture = NULL; +PFNGLLINEWIDTHPROC glad_glLineWidth = NULL; +PFNGLLINKPROGRAMPROC glad_glLinkProgram = NULL; +PFNGLMAPBUFFERPROC glad_glMapBuffer = NULL; +PFNGLMAPBUFFEROESPROC glad_glMapBufferOES = NULL; +PFNGLPIXELSTOREIPROC glad_glPixelStorei = NULL; +PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset = NULL; +PFNGLREADPIXELSPROC glad_glReadPixels = NULL; +PFNGLRELEASESHADERCOMPILERPROC glad_glReleaseShaderCompiler = NULL; +PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage = NULL; +PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage = NULL; +PFNGLSCISSORPROC glad_glScissor = NULL; +PFNGLSHADERBINARYPROC glad_glShaderBinary = NULL; +PFNGLSHADERSOURCEPROC glad_glShaderSource = NULL; +PFNGLSTENCILFUNCPROC glad_glStencilFunc = NULL; +PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate = NULL; +PFNGLSTENCILMASKPROC glad_glStencilMask = NULL; +PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate = NULL; +PFNGLSTENCILOPPROC glad_glStencilOp = NULL; +PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate = NULL; +PFNGLTEXIMAGE2DPROC glad_glTexImage2D = NULL; +PFNGLTEXPARAMETERFPROC glad_glTexParameterf = NULL; +PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv = NULL; +PFNGLTEXPARAMETERIPROC glad_glTexParameteri = NULL; +PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv = NULL; +PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D = NULL; +PFNGLUNIFORM1FPROC glad_glUniform1f = NULL; +PFNGLUNIFORM1FVPROC glad_glUniform1fv = NULL; +PFNGLUNIFORM1IPROC glad_glUniform1i = NULL; +PFNGLUNIFORM1IVPROC glad_glUniform1iv = NULL; +PFNGLUNIFORM2FPROC glad_glUniform2f = NULL; +PFNGLUNIFORM2FVPROC glad_glUniform2fv = NULL; +PFNGLUNIFORM2IPROC glad_glUniform2i = NULL; +PFNGLUNIFORM2IVPROC glad_glUniform2iv = NULL; +PFNGLUNIFORM3FPROC glad_glUniform3f = NULL; +PFNGLUNIFORM3FVPROC glad_glUniform3fv = NULL; +PFNGLUNIFORM3IPROC glad_glUniform3i = NULL; +PFNGLUNIFORM3IVPROC glad_glUniform3iv = NULL; +PFNGLUNIFORM4FPROC glad_glUniform4f = NULL; +PFNGLUNIFORM4FVPROC glad_glUniform4fv = NULL; +PFNGLUNIFORM4IPROC glad_glUniform4i = NULL; +PFNGLUNIFORM4IVPROC glad_glUniform4iv = NULL; +PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv = NULL; +PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv = NULL; +PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv = NULL; +PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer = NULL; +PFNGLUNMAPBUFFEROESPROC glad_glUnmapBufferOES = NULL; +PFNGLUSEPROGRAMPROC glad_glUseProgram = NULL; +PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram = NULL; +PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f = NULL; +PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv = NULL; +PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f = NULL; +PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv = NULL; +PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f = NULL; +PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv = NULL; +PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f = NULL; +PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv = NULL; +PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer = NULL; +PFNGLVIEWPORTPROC glad_glViewport = NULL; + + +static void glad_gl_load_GL_ES_VERSION_2_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_ES_VERSION_2_0) return; + glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC) load(userptr, "glActiveTexture"); + glad_glAttachShader = (PFNGLATTACHSHADERPROC) load(userptr, "glAttachShader"); + glad_glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC) load(userptr, "glBindAttribLocation"); + glad_glBindBuffer = (PFNGLBINDBUFFERPROC) load(userptr, "glBindBuffer"); + glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC) load(userptr, "glBindFramebuffer"); + glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC) load(userptr, "glBindRenderbuffer"); + glad_glBindTexture = (PFNGLBINDTEXTUREPROC) load(userptr, "glBindTexture"); + glad_glBlendColor = (PFNGLBLENDCOLORPROC) load(userptr, "glBlendColor"); + glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC) load(userptr, "glBlendEquation"); + glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC) load(userptr, "glBlendEquationSeparate"); + glad_glBlendFunc = (PFNGLBLENDFUNCPROC) load(userptr, "glBlendFunc"); + glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC) load(userptr, "glBlendFuncSeparate"); + glad_glBufferData = (PFNGLBUFFERDATAPROC) load(userptr, "glBufferData"); + glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC) load(userptr, "glBufferSubData"); + glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC) load(userptr, "glCheckFramebufferStatus"); + glad_glClear = (PFNGLCLEARPROC) load(userptr, "glClear"); + glad_glClearColor = (PFNGLCLEARCOLORPROC) load(userptr, "glClearColor"); + glad_glClearDepthf = (PFNGLCLEARDEPTHFPROC) load(userptr, "glClearDepthf"); + glad_glClearStencil = (PFNGLCLEARSTENCILPROC) load(userptr, "glClearStencil"); + glad_glColorMask = (PFNGLCOLORMASKPROC) load(userptr, "glColorMask"); + glad_glCompileShader = (PFNGLCOMPILESHADERPROC) load(userptr, "glCompileShader"); + glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC) load(userptr, "glCompressedTexImage2D"); + glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) load(userptr, "glCompressedTexSubImage2D"); + glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC) load(userptr, "glCopyTexImage2D"); + glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC) load(userptr, "glCopyTexSubImage2D"); + glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC) load(userptr, "glCreateProgram"); + glad_glCreateShader = (PFNGLCREATESHADERPROC) load(userptr, "glCreateShader"); + glad_glCullFace = (PFNGLCULLFACEPROC) load(userptr, "glCullFace"); + glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC) load(userptr, "glDeleteBuffers"); + glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC) load(userptr, "glDeleteFramebuffers"); + glad_glDeleteProgram = (PFNGLDELETEPROGRAMPROC) load(userptr, "glDeleteProgram"); + glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC) load(userptr, "glDeleteRenderbuffers"); + glad_glDeleteShader = (PFNGLDELETESHADERPROC) load(userptr, "glDeleteShader"); + glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC) load(userptr, "glDeleteTextures"); + glad_glDepthFunc = (PFNGLDEPTHFUNCPROC) load(userptr, "glDepthFunc"); + glad_glDepthMask = (PFNGLDEPTHMASKPROC) load(userptr, "glDepthMask"); + glad_glDepthRangef = (PFNGLDEPTHRANGEFPROC) load(userptr, "glDepthRangef"); + glad_glDetachShader = (PFNGLDETACHSHADERPROC) load(userptr, "glDetachShader"); + glad_glDisable = (PFNGLDISABLEPROC) load(userptr, "glDisable"); + glad_glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC) load(userptr, "glDisableVertexAttribArray"); + glad_glDrawArrays = (PFNGLDRAWARRAYSPROC) load(userptr, "glDrawArrays"); + glad_glDrawElements = (PFNGLDRAWELEMENTSPROC) load(userptr, "glDrawElements"); + glad_glEnable = (PFNGLENABLEPROC) load(userptr, "glEnable"); + glad_glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC) load(userptr, "glEnableVertexAttribArray"); + glad_glFinish = (PFNGLFINISHPROC) load(userptr, "glFinish"); + glad_glFlush = (PFNGLFLUSHPROC) load(userptr, "glFlush"); + glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC) load(userptr, "glFramebufferRenderbuffer"); + glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC) load(userptr, "glFramebufferTexture2D"); + glad_glFrontFace = (PFNGLFRONTFACEPROC) load(userptr, "glFrontFace"); + glad_glGenBuffers = (PFNGLGENBUFFERSPROC) load(userptr, "glGenBuffers"); + glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC) load(userptr, "glGenFramebuffers"); + glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC) load(userptr, "glGenRenderbuffers"); + glad_glGenTextures = (PFNGLGENTEXTURESPROC) load(userptr, "glGenTextures"); + glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC) load(userptr, "glGenerateMipmap"); + glad_glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC) load(userptr, "glGetActiveAttrib"); + glad_glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC) load(userptr, "glGetActiveUniform"); + glad_glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC) load(userptr, "glGetAttachedShaders"); + glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC) load(userptr, "glGetAttribLocation"); + glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC) load(userptr, "glGetBooleanv"); + glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC) load(userptr, "glGetBufferParameteriv"); + glad_glGetError = (PFNGLGETERRORPROC) load(userptr, "glGetError"); + glad_glGetFloatv = (PFNGLGETFLOATVPROC) load(userptr, "glGetFloatv"); + glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC) load(userptr, "glGetFramebufferAttachmentParameteriv"); + glad_glGetIntegerv = (PFNGLGETINTEGERVPROC) load(userptr, "glGetIntegerv"); + glad_glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC) load(userptr, "glGetProgramInfoLog"); + glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC) load(userptr, "glGetProgramiv"); + glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC) load(userptr, "glGetRenderbufferParameteriv"); + glad_glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC) load(userptr, "glGetShaderInfoLog"); + glad_glGetShaderPrecisionFormat = (PFNGLGETSHADERPRECISIONFORMATPROC) load(userptr, "glGetShaderPrecisionFormat"); + glad_glGetShaderSource = (PFNGLGETSHADERSOURCEPROC) load(userptr, "glGetShaderSource"); + glad_glGetShaderiv = (PFNGLGETSHADERIVPROC) load(userptr, "glGetShaderiv"); + glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString"); + glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC) load(userptr, "glGetTexParameterfv"); + glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC) load(userptr, "glGetTexParameteriv"); + glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC) load(userptr, "glGetUniformLocation"); + glad_glGetUniformfv = (PFNGLGETUNIFORMFVPROC) load(userptr, "glGetUniformfv"); + glad_glGetUniformiv = (PFNGLGETUNIFORMIVPROC) load(userptr, "glGetUniformiv"); + glad_glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC) load(userptr, "glGetVertexAttribPointerv"); + glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC) load(userptr, "glGetVertexAttribfv"); + glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC) load(userptr, "glGetVertexAttribiv"); + glad_glHint = (PFNGLHINTPROC) load(userptr, "glHint"); + glad_glIsBuffer = (PFNGLISBUFFERPROC) load(userptr, "glIsBuffer"); + glad_glIsEnabled = (PFNGLISENABLEDPROC) load(userptr, "glIsEnabled"); + glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC) load(userptr, "glIsFramebuffer"); + glad_glIsProgram = (PFNGLISPROGRAMPROC) load(userptr, "glIsProgram"); + glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC) load(userptr, "glIsRenderbuffer"); + glad_glIsShader = (PFNGLISSHADERPROC) load(userptr, "glIsShader"); + glad_glIsTexture = (PFNGLISTEXTUREPROC) load(userptr, "glIsTexture"); + glad_glLineWidth = (PFNGLLINEWIDTHPROC) load(userptr, "glLineWidth"); + glad_glLinkProgram = (PFNGLLINKPROGRAMPROC) load(userptr, "glLinkProgram"); + glad_glPixelStorei = (PFNGLPIXELSTOREIPROC) load(userptr, "glPixelStorei"); + glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC) load(userptr, "glPolygonOffset"); + glad_glReadPixels = (PFNGLREADPIXELSPROC) load(userptr, "glReadPixels"); + glad_glReleaseShaderCompiler = (PFNGLRELEASESHADERCOMPILERPROC) load(userptr, "glReleaseShaderCompiler"); + glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC) load(userptr, "glRenderbufferStorage"); + glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC) load(userptr, "glSampleCoverage"); + glad_glScissor = (PFNGLSCISSORPROC) load(userptr, "glScissor"); + glad_glShaderBinary = (PFNGLSHADERBINARYPROC) load(userptr, "glShaderBinary"); + glad_glShaderSource = (PFNGLSHADERSOURCEPROC) load(userptr, "glShaderSource"); + glad_glStencilFunc = (PFNGLSTENCILFUNCPROC) load(userptr, "glStencilFunc"); + glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC) load(userptr, "glStencilFuncSeparate"); + glad_glStencilMask = (PFNGLSTENCILMASKPROC) load(userptr, "glStencilMask"); + glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC) load(userptr, "glStencilMaskSeparate"); + glad_glStencilOp = (PFNGLSTENCILOPPROC) load(userptr, "glStencilOp"); + glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC) load(userptr, "glStencilOpSeparate"); + glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC) load(userptr, "glTexImage2D"); + glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC) load(userptr, "glTexParameterf"); + glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC) load(userptr, "glTexParameterfv"); + glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC) load(userptr, "glTexParameteri"); + glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC) load(userptr, "glTexParameteriv"); + glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC) load(userptr, "glTexSubImage2D"); + glad_glUniform1f = (PFNGLUNIFORM1FPROC) load(userptr, "glUniform1f"); + glad_glUniform1fv = (PFNGLUNIFORM1FVPROC) load(userptr, "glUniform1fv"); + glad_glUniform1i = (PFNGLUNIFORM1IPROC) load(userptr, "glUniform1i"); + glad_glUniform1iv = (PFNGLUNIFORM1IVPROC) load(userptr, "glUniform1iv"); + glad_glUniform2f = (PFNGLUNIFORM2FPROC) load(userptr, "glUniform2f"); + glad_glUniform2fv = (PFNGLUNIFORM2FVPROC) load(userptr, "glUniform2fv"); + glad_glUniform2i = (PFNGLUNIFORM2IPROC) load(userptr, "glUniform2i"); + glad_glUniform2iv = (PFNGLUNIFORM2IVPROC) load(userptr, "glUniform2iv"); + glad_glUniform3f = (PFNGLUNIFORM3FPROC) load(userptr, "glUniform3f"); + glad_glUniform3fv = (PFNGLUNIFORM3FVPROC) load(userptr, "glUniform3fv"); + glad_glUniform3i = (PFNGLUNIFORM3IPROC) load(userptr, "glUniform3i"); + glad_glUniform3iv = (PFNGLUNIFORM3IVPROC) load(userptr, "glUniform3iv"); + glad_glUniform4f = (PFNGLUNIFORM4FPROC) load(userptr, "glUniform4f"); + glad_glUniform4fv = (PFNGLUNIFORM4FVPROC) load(userptr, "glUniform4fv"); + glad_glUniform4i = (PFNGLUNIFORM4IPROC) load(userptr, "glUniform4i"); + glad_glUniform4iv = (PFNGLUNIFORM4IVPROC) load(userptr, "glUniform4iv"); + glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC) load(userptr, "glUniformMatrix2fv"); + glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC) load(userptr, "glUniformMatrix3fv"); + glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC) load(userptr, "glUniformMatrix4fv"); + glad_glUseProgram = (PFNGLUSEPROGRAMPROC) load(userptr, "glUseProgram"); + glad_glValidateProgram = (PFNGLVALIDATEPROGRAMPROC) load(userptr, "glValidateProgram"); + glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC) load(userptr, "glVertexAttrib1f"); + glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC) load(userptr, "glVertexAttrib1fv"); + glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC) load(userptr, "glVertexAttrib2f"); + glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC) load(userptr, "glVertexAttrib2fv"); + glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC) load(userptr, "glVertexAttrib3f"); + glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC) load(userptr, "glVertexAttrib3fv"); + glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC) load(userptr, "glVertexAttrib4f"); + glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC) load(userptr, "glVertexAttrib4fv"); + glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC) load(userptr, "glVertexAttribPointer"); + glad_glViewport = (PFNGLVIEWPORTPROC) load(userptr, "glViewport"); +} +static void glad_gl_load_GL_OES_mapbuffer( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_OES_mapbuffer) return; + glad_glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC) load(userptr, "glGetBufferPointerv"); + glad_glGetBufferPointervOES = (PFNGLGETBUFFERPOINTERVOESPROC) load(userptr, "glGetBufferPointervOES"); + glad_glMapBuffer = (PFNGLMAPBUFFERPROC) load(userptr, "glMapBuffer"); + glad_glMapBufferOES = (PFNGLMAPBUFFEROESPROC) load(userptr, "glMapBufferOES"); + glad_glUnmapBuffer = (PFNGLUNMAPBUFFERPROC) load(userptr, "glUnmapBuffer"); + glad_glUnmapBufferOES = (PFNGLUNMAPBUFFEROESPROC) load(userptr, "glUnmapBufferOES"); +} + + + +#if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0) +#define GLAD_GL_IS_SOME_NEW_VERSION 1 +#else +#define GLAD_GL_IS_SOME_NEW_VERSION 0 +#endif + +static int glad_gl_get_extensions( int version, const char **out_exts, unsigned int *out_num_exts_i, char ***out_exts_i) { +#if GLAD_GL_IS_SOME_NEW_VERSION + if(GLAD_VERSION_MAJOR(version) < 3) { +#else + (void) version; + (void) out_num_exts_i; + (void) out_exts_i; +#endif + if (glad_glGetString == NULL) { + return 0; + } + *out_exts = (const char *)glad_glGetString(GL_EXTENSIONS); +#if GLAD_GL_IS_SOME_NEW_VERSION + } else { + unsigned int index = 0; + unsigned int num_exts_i = 0; + char **exts_i = NULL; + if (glad_glGetStringi == NULL || glad_glGetIntegerv == NULL) { + return 0; + } + glad_glGetIntegerv(GL_NUM_EXTENSIONS, (int*) &num_exts_i); + if (num_exts_i > 0) { + exts_i = (char **) malloc(num_exts_i * (sizeof *exts_i)); + } + if (exts_i == NULL) { + return 0; + } + for(index = 0; index < num_exts_i; index++) { + const char *gl_str_tmp = (const char*) glad_glGetStringi(GL_EXTENSIONS, index); + size_t len = strlen(gl_str_tmp) + 1; + + char *local_str = (char*) malloc(len * sizeof(char)); + if(local_str != NULL) { + memcpy(local_str, gl_str_tmp, len * sizeof(char)); + } + + exts_i[index] = local_str; + } + + *out_num_exts_i = num_exts_i; + *out_exts_i = exts_i; + } +#endif + return 1; +} +static void glad_gl_free_extensions(char **exts_i, unsigned int num_exts_i) { + if (exts_i != NULL) { + unsigned int index; + for(index = 0; index < num_exts_i; index++) { + free((void *) (exts_i[index])); + } + free((void *)exts_i); + exts_i = NULL; + } +} +static int glad_gl_has_extension(int version, const char *exts, unsigned int num_exts_i, char **exts_i, const char *ext) { + if(GLAD_VERSION_MAJOR(version) < 3 || !GLAD_GL_IS_SOME_NEW_VERSION) { + const char *extensions; + const char *loc; + const char *terminator; + extensions = exts; + if(extensions == NULL || ext == NULL) { + return 0; + } + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) { + return 0; + } + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) { + return 1; + } + extensions = terminator; + } + } else { + unsigned int index; + for(index = 0; index < num_exts_i; index++) { + const char *e = exts_i[index]; + if(strcmp(e, ext) == 0) { + return 1; + } + } + } + return 0; +} + +static GLADapiproc glad_gl_get_proc_from_userptr(void *userptr, const char* name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_gl_find_extensions_gles2( int version) { + const char *exts = NULL; + unsigned int num_exts_i = 0; + char **exts_i = NULL; + if (!glad_gl_get_extensions(version, &exts, &num_exts_i, &exts_i)) return 0; + + GLAD_GL_OES_mapbuffer = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OES_mapbuffer"); + GLAD_GL_OES_required_internalformat = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OES_required_internalformat"); + + glad_gl_free_extensions(exts_i, num_exts_i); + + return 1; +} + +static int glad_gl_find_core_gles2(void) { + int i, major, minor; + const char* version; + const char* prefixes[] = { + "OpenGL ES-CM ", + "OpenGL ES-CL ", + "OpenGL ES ", + NULL + }; + version = (const char*) glad_glGetString(GL_VERSION); + if (!version) return 0; + for (i = 0; prefixes[i]; i++) { + const size_t length = strlen(prefixes[i]); + if (strncmp(version, prefixes[i], length) == 0) { + version += length; + break; + } + } + + GLAD_IMPL_UTIL_SSCANF(version, "%d.%d", &major, &minor); + + GLAD_GL_ES_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2; + + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadGLES2UserPtr( GLADuserptrloadfunc load, void *userptr) { + int version; + + glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString"); + if(glad_glGetString == NULL) return 0; + if(glad_glGetString(GL_VERSION) == NULL) return 0; + version = glad_gl_find_core_gles2(); + + glad_gl_load_GL_ES_VERSION_2_0(load, userptr); + + if (!glad_gl_find_extensions_gles2(version)) return 0; + glad_gl_load_GL_OES_mapbuffer(load, userptr); + + + + return version; +} + + +int gladLoadGLES2( GLADloadfunc load) { + return gladLoadGLES2UserPtr( glad_gl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + + + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/glx.c glmark2-2021.02/android/jni/src/glad/src/glx.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/glx.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/src/glx.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,232 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_GLX_VERSION_1_0 = 0; +int GLAD_GLX_VERSION_1_1 = 0; +int GLAD_GLX_VERSION_1_2 = 0; +int GLAD_GLX_VERSION_1_3 = 0; +int GLAD_GLX_VERSION_1_4 = 0; +int GLAD_GLX_EXT_swap_control = 0; +int GLAD_GLX_MESA_swap_control = 0; + + + +PFNGLXCHOOSEFBCONFIGPROC glad_glXChooseFBConfig = NULL; +PFNGLXCHOOSEVISUALPROC glad_glXChooseVisual = NULL; +PFNGLXCOPYCONTEXTPROC glad_glXCopyContext = NULL; +PFNGLXCREATECONTEXTPROC glad_glXCreateContext = NULL; +PFNGLXCREATEGLXPIXMAPPROC glad_glXCreateGLXPixmap = NULL; +PFNGLXCREATENEWCONTEXTPROC glad_glXCreateNewContext = NULL; +PFNGLXCREATEPBUFFERPROC glad_glXCreatePbuffer = NULL; +PFNGLXCREATEPIXMAPPROC glad_glXCreatePixmap = NULL; +PFNGLXCREATEWINDOWPROC glad_glXCreateWindow = NULL; +PFNGLXDESTROYCONTEXTPROC glad_glXDestroyContext = NULL; +PFNGLXDESTROYGLXPIXMAPPROC glad_glXDestroyGLXPixmap = NULL; +PFNGLXDESTROYPBUFFERPROC glad_glXDestroyPbuffer = NULL; +PFNGLXDESTROYPIXMAPPROC glad_glXDestroyPixmap = NULL; +PFNGLXDESTROYWINDOWPROC glad_glXDestroyWindow = NULL; +PFNGLXGETCLIENTSTRINGPROC glad_glXGetClientString = NULL; +PFNGLXGETCONFIGPROC glad_glXGetConfig = NULL; +PFNGLXGETCURRENTCONTEXTPROC glad_glXGetCurrentContext = NULL; +PFNGLXGETCURRENTDISPLAYPROC glad_glXGetCurrentDisplay = NULL; +PFNGLXGETCURRENTDRAWABLEPROC glad_glXGetCurrentDrawable = NULL; +PFNGLXGETCURRENTREADDRAWABLEPROC glad_glXGetCurrentReadDrawable = NULL; +PFNGLXGETFBCONFIGATTRIBPROC glad_glXGetFBConfigAttrib = NULL; +PFNGLXGETFBCONFIGSPROC glad_glXGetFBConfigs = NULL; +PFNGLXGETPROCADDRESSPROC glad_glXGetProcAddress = NULL; +PFNGLXGETSELECTEDEVENTPROC glad_glXGetSelectedEvent = NULL; +PFNGLXGETSWAPINTERVALMESAPROC glad_glXGetSwapIntervalMESA = NULL; +PFNGLXGETVISUALFROMFBCONFIGPROC glad_glXGetVisualFromFBConfig = NULL; +PFNGLXISDIRECTPROC glad_glXIsDirect = NULL; +PFNGLXMAKECONTEXTCURRENTPROC glad_glXMakeContextCurrent = NULL; +PFNGLXMAKECURRENTPROC glad_glXMakeCurrent = NULL; +PFNGLXQUERYCONTEXTPROC glad_glXQueryContext = NULL; +PFNGLXQUERYDRAWABLEPROC glad_glXQueryDrawable = NULL; +PFNGLXQUERYEXTENSIONPROC glad_glXQueryExtension = NULL; +PFNGLXQUERYEXTENSIONSSTRINGPROC glad_glXQueryExtensionsString = NULL; +PFNGLXQUERYSERVERSTRINGPROC glad_glXQueryServerString = NULL; +PFNGLXQUERYVERSIONPROC glad_glXQueryVersion = NULL; +PFNGLXSELECTEVENTPROC glad_glXSelectEvent = NULL; +PFNGLXSWAPBUFFERSPROC glad_glXSwapBuffers = NULL; +PFNGLXSWAPINTERVALEXTPROC glad_glXSwapIntervalEXT = NULL; +PFNGLXSWAPINTERVALMESAPROC glad_glXSwapIntervalMESA = NULL; +PFNGLXUSEXFONTPROC glad_glXUseXFont = NULL; +PFNGLXWAITGLPROC glad_glXWaitGL = NULL; +PFNGLXWAITXPROC glad_glXWaitX = NULL; + + +static void glad_glx_load_GLX_VERSION_1_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_0) return; + glad_glXChooseVisual = (PFNGLXCHOOSEVISUALPROC) load(userptr, "glXChooseVisual"); + glad_glXCopyContext = (PFNGLXCOPYCONTEXTPROC) load(userptr, "glXCopyContext"); + glad_glXCreateContext = (PFNGLXCREATECONTEXTPROC) load(userptr, "glXCreateContext"); + glad_glXCreateGLXPixmap = (PFNGLXCREATEGLXPIXMAPPROC) load(userptr, "glXCreateGLXPixmap"); + glad_glXDestroyContext = (PFNGLXDESTROYCONTEXTPROC) load(userptr, "glXDestroyContext"); + glad_glXDestroyGLXPixmap = (PFNGLXDESTROYGLXPIXMAPPROC) load(userptr, "glXDestroyGLXPixmap"); + glad_glXGetConfig = (PFNGLXGETCONFIGPROC) load(userptr, "glXGetConfig"); + glad_glXGetCurrentContext = (PFNGLXGETCURRENTCONTEXTPROC) load(userptr, "glXGetCurrentContext"); + glad_glXGetCurrentDrawable = (PFNGLXGETCURRENTDRAWABLEPROC) load(userptr, "glXGetCurrentDrawable"); + glad_glXIsDirect = (PFNGLXISDIRECTPROC) load(userptr, "glXIsDirect"); + glad_glXMakeCurrent = (PFNGLXMAKECURRENTPROC) load(userptr, "glXMakeCurrent"); + glad_glXQueryExtension = (PFNGLXQUERYEXTENSIONPROC) load(userptr, "glXQueryExtension"); + glad_glXQueryVersion = (PFNGLXQUERYVERSIONPROC) load(userptr, "glXQueryVersion"); + glad_glXSwapBuffers = (PFNGLXSWAPBUFFERSPROC) load(userptr, "glXSwapBuffers"); + glad_glXUseXFont = (PFNGLXUSEXFONTPROC) load(userptr, "glXUseXFont"); + glad_glXWaitGL = (PFNGLXWAITGLPROC) load(userptr, "glXWaitGL"); + glad_glXWaitX = (PFNGLXWAITXPROC) load(userptr, "glXWaitX"); +} +static void glad_glx_load_GLX_VERSION_1_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_1) return; + glad_glXGetClientString = (PFNGLXGETCLIENTSTRINGPROC) load(userptr, "glXGetClientString"); + glad_glXQueryExtensionsString = (PFNGLXQUERYEXTENSIONSSTRINGPROC) load(userptr, "glXQueryExtensionsString"); + glad_glXQueryServerString = (PFNGLXQUERYSERVERSTRINGPROC) load(userptr, "glXQueryServerString"); +} +static void glad_glx_load_GLX_VERSION_1_2( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_2) return; + glad_glXGetCurrentDisplay = (PFNGLXGETCURRENTDISPLAYPROC) load(userptr, "glXGetCurrentDisplay"); +} +static void glad_glx_load_GLX_VERSION_1_3( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_3) return; + glad_glXChooseFBConfig = (PFNGLXCHOOSEFBCONFIGPROC) load(userptr, "glXChooseFBConfig"); + glad_glXCreateNewContext = (PFNGLXCREATENEWCONTEXTPROC) load(userptr, "glXCreateNewContext"); + glad_glXCreatePbuffer = (PFNGLXCREATEPBUFFERPROC) load(userptr, "glXCreatePbuffer"); + glad_glXCreatePixmap = (PFNGLXCREATEPIXMAPPROC) load(userptr, "glXCreatePixmap"); + glad_glXCreateWindow = (PFNGLXCREATEWINDOWPROC) load(userptr, "glXCreateWindow"); + glad_glXDestroyPbuffer = (PFNGLXDESTROYPBUFFERPROC) load(userptr, "glXDestroyPbuffer"); + glad_glXDestroyPixmap = (PFNGLXDESTROYPIXMAPPROC) load(userptr, "glXDestroyPixmap"); + glad_glXDestroyWindow = (PFNGLXDESTROYWINDOWPROC) load(userptr, "glXDestroyWindow"); + glad_glXGetCurrentReadDrawable = (PFNGLXGETCURRENTREADDRAWABLEPROC) load(userptr, "glXGetCurrentReadDrawable"); + glad_glXGetFBConfigAttrib = (PFNGLXGETFBCONFIGATTRIBPROC) load(userptr, "glXGetFBConfigAttrib"); + glad_glXGetFBConfigs = (PFNGLXGETFBCONFIGSPROC) load(userptr, "glXGetFBConfigs"); + glad_glXGetSelectedEvent = (PFNGLXGETSELECTEDEVENTPROC) load(userptr, "glXGetSelectedEvent"); + glad_glXGetVisualFromFBConfig = (PFNGLXGETVISUALFROMFBCONFIGPROC) load(userptr, "glXGetVisualFromFBConfig"); + glad_glXMakeContextCurrent = (PFNGLXMAKECONTEXTCURRENTPROC) load(userptr, "glXMakeContextCurrent"); + glad_glXQueryContext = (PFNGLXQUERYCONTEXTPROC) load(userptr, "glXQueryContext"); + glad_glXQueryDrawable = (PFNGLXQUERYDRAWABLEPROC) load(userptr, "glXQueryDrawable"); + glad_glXSelectEvent = (PFNGLXSELECTEVENTPROC) load(userptr, "glXSelectEvent"); +} +static void glad_glx_load_GLX_VERSION_1_4( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_4) return; + glad_glXGetProcAddress = (PFNGLXGETPROCADDRESSPROC) load(userptr, "glXGetProcAddress"); +} +static void glad_glx_load_GLX_EXT_swap_control( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_EXT_swap_control) return; + glad_glXSwapIntervalEXT = (PFNGLXSWAPINTERVALEXTPROC) load(userptr, "glXSwapIntervalEXT"); +} +static void glad_glx_load_GLX_MESA_swap_control( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_MESA_swap_control) return; + glad_glXGetSwapIntervalMESA = (PFNGLXGETSWAPINTERVALMESAPROC) load(userptr, "glXGetSwapIntervalMESA"); + glad_glXSwapIntervalMESA = (PFNGLXSWAPINTERVALMESAPROC) load(userptr, "glXSwapIntervalMESA"); +} + + + +static int glad_glx_has_extension(Display *display, int screen, const char *ext) { +#ifndef GLX_VERSION_1_1 + (void) display; + (void) screen; + (void) ext; +#else + const char *terminator; + const char *loc; + const char *extensions; + + if (glXQueryExtensionsString == NULL) { + return 0; + } + + extensions = glXQueryExtensionsString(display, screen); + + if(extensions == NULL || ext == NULL) { + return 0; + } + + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) + break; + + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) { + return 1; + } + extensions = terminator; + } +#endif + + return 0; +} + +static GLADapiproc glad_glx_get_proc_from_userptr(void *userptr, const char* name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_glx_find_extensions(Display *display, int screen) { + GLAD_GLX_EXT_swap_control = glad_glx_has_extension(display, screen, "GLX_EXT_swap_control"); + GLAD_GLX_MESA_swap_control = glad_glx_has_extension(display, screen, "GLX_MESA_swap_control"); + return 1; +} + +static int glad_glx_find_core_glx(Display **display, int *screen) { + int major = 0, minor = 0; + if(*display == NULL) { +#ifdef GLAD_GLX_NO_X11 + (void) screen; + return 0; +#else + *display = XOpenDisplay(0); + if (*display == NULL) { + return 0; + } + *screen = XScreenNumberOfScreen(XDefaultScreenOfDisplay(*display)); +#endif + } + glXQueryVersion(*display, &major, &minor); + GLAD_GLX_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + GLAD_GLX_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1; + GLAD_GLX_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1; + GLAD_GLX_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1; + GLAD_GLX_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1; + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadGLXUserPtr(Display *display, int screen, GLADuserptrloadfunc load, void *userptr) { + int version; + glXQueryVersion = (PFNGLXQUERYVERSIONPROC) load(userptr, "glXQueryVersion"); + if(glXQueryVersion == NULL) return 0; + version = glad_glx_find_core_glx(&display, &screen); + + glad_glx_load_GLX_VERSION_1_0(load, userptr); + glad_glx_load_GLX_VERSION_1_1(load, userptr); + glad_glx_load_GLX_VERSION_1_2(load, userptr); + glad_glx_load_GLX_VERSION_1_3(load, userptr); + glad_glx_load_GLX_VERSION_1_4(load, userptr); + + if (!glad_glx_find_extensions(display, screen)) return 0; + glad_glx_load_GLX_EXT_swap_control(load, userptr); + glad_glx_load_GLX_MESA_swap_control(load, userptr); + + return version; +} + +int gladLoadGLX(Display *display, int screen, GLADloadfunc load) { + return gladLoadGLXUserPtr(display, screen, glad_glx_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/wgl.c glmark2-2021.02/android/jni/src/glad/src/wgl.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/glad/src/wgl.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/glad/src/wgl.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,117 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_WGL_VERSION_1_0 = 0; +int GLAD_WGL_ARB_extensions_string = 0; +int GLAD_WGL_EXT_extensions_string = 0; +int GLAD_WGL_EXT_swap_control = 0; + + + +PFNWGLGETEXTENSIONSSTRINGARBPROC glad_wglGetExtensionsStringARB = NULL; +PFNWGLGETEXTENSIONSSTRINGEXTPROC glad_wglGetExtensionsStringEXT = NULL; +PFNWGLGETSWAPINTERVALEXTPROC glad_wglGetSwapIntervalEXT = NULL; +PFNWGLSWAPINTERVALEXTPROC glad_wglSwapIntervalEXT = NULL; + + +static void glad_wgl_load_WGL_ARB_extensions_string(GLADuserptrloadfunc load, void *userptr) { + if(!GLAD_WGL_ARB_extensions_string) return; + glad_wglGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC) load(userptr, "wglGetExtensionsStringARB"); +} +static void glad_wgl_load_WGL_EXT_extensions_string(GLADuserptrloadfunc load, void *userptr) { + if(!GLAD_WGL_EXT_extensions_string) return; + glad_wglGetExtensionsStringEXT = (PFNWGLGETEXTENSIONSSTRINGEXTPROC) load(userptr, "wglGetExtensionsStringEXT"); +} +static void glad_wgl_load_WGL_EXT_swap_control(GLADuserptrloadfunc load, void *userptr) { + if(!GLAD_WGL_EXT_swap_control) return; + glad_wglGetSwapIntervalEXT = (PFNWGLGETSWAPINTERVALEXTPROC) load(userptr, "wglGetSwapIntervalEXT"); + glad_wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC) load(userptr, "wglSwapIntervalEXT"); +} + + + +static int glad_wgl_has_extension(HDC hdc, const char *ext) { + const char *terminator; + const char *loc; + const char *extensions; + + if(wglGetExtensionsStringEXT == NULL && wglGetExtensionsStringARB == NULL) + return 0; + + if(wglGetExtensionsStringARB == NULL || hdc == INVALID_HANDLE_VALUE) + extensions = wglGetExtensionsStringEXT(); + else + extensions = wglGetExtensionsStringARB(hdc); + + if(extensions == NULL || ext == NULL) + return 0; + + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) + break; + + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) + { + return 1; + } + extensions = terminator; + } + + return 0; +} + +static GLADapiproc glad_wgl_get_proc_from_userptr(void *userptr, const char* name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_wgl_find_extensions_wgl(HDC hdc) { + GLAD_WGL_ARB_extensions_string = glad_wgl_has_extension(hdc, "WGL_ARB_extensions_string"); + GLAD_WGL_EXT_extensions_string = glad_wgl_has_extension(hdc, "WGL_EXT_extensions_string"); + GLAD_WGL_EXT_swap_control = glad_wgl_has_extension(hdc, "WGL_EXT_swap_control"); + return 1; +} + +static int glad_wgl_find_core_wgl(void) { + int major = 1, minor = 0; + GLAD_WGL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadWGLUserPtr(HDC hdc, GLADuserptrloadfunc load, void *userptr) { + int version; + wglGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC) load(userptr, "wglGetExtensionsStringARB"); + wglGetExtensionsStringEXT = (PFNWGLGETEXTENSIONSSTRINGEXTPROC) load(userptr, "wglGetExtensionsStringEXT"); + if(wglGetExtensionsStringARB == NULL && wglGetExtensionsStringEXT == NULL) return 0; + version = glad_wgl_find_core_wgl(); + + + if (!glad_wgl_find_extensions_wgl(hdc)) return 0; + glad_wgl_load_WGL_ARB_extensions_string(load, userptr); + glad_wgl_load_WGL_EXT_extensions_string(load, userptr); + glad_wgl_load_WGL_EXT_swap_control(load, userptr); + + return version; +} + +int gladLoadWGL(HDC hdc, GLADloadfunc load) { + return gladLoadWGLUserPtr(hdc, glad_wgl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-headers.cpp glmark2-2021.02/android/jni/src/gl-headers.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-headers.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-headers.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,8 +21,8 @@ */ #include "gl-headers.h" -void* (*GLExtensions::MapBuffer) (GLenum target, GLenum access) = 0; -GLboolean (*GLExtensions::UnmapBuffer) (GLenum target) = 0; +void* (GLAD_API_PTR *GLExtensions::MapBuffer) (GLenum target, GLenum access) = 0; +GLboolean (GLAD_API_PTR *GLExtensions::UnmapBuffer) (GLenum target) = 0; bool GLExtensions::support(const std::string &ext) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-headers.h glmark2-2021.02/android/jni/src/gl-headers.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-headers.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-headers.h 2021-04-25 01:47:22.000000000 +0800 @@ -25,14 +25,12 @@ #define GL_GLEXT_PROTOTYPES #if GLMARK2_USE_GL -#include -#include +#include #ifndef GL_RGB565 #define GL_RGB565 0x8D62 #endif #elif GLMARK2_USE_GLESv2 -#include -#include +#include #ifndef GL_WRITE_ONLY #define GL_WRITE_ONLY GL_WRITE_ONLY_OES #endif @@ -64,8 +62,8 @@ */ static bool support(const std::string &ext); - static void* (*MapBuffer) (GLenum target, GLenum access); - static GLboolean (*UnmapBuffer) (GLenum target); + static void* (GLAD_API_PTR *MapBuffer) (GLenum target, GLenum access); + static GLboolean (GLAD_API_PTR *UnmapBuffer) (GLenum target); }; #endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-egl.cpp glmark2-2021.02/android/jni/src/gl-state-egl.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-egl.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-state-egl.cpp 2021-04-25 01:47:47.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -27,10 +27,17 @@ #include "gl-headers.h" #include #include +#include using std::vector; using std::string; +GLADapiproc load_egl_func(void *userdata, const char *name) +{ + SharedLibrary *lib = reinterpret_cast(userdata); + return reinterpret_cast(lib->load(name)); +} + /**************************** * EGLConfig public methods * ****************************/ @@ -287,9 +294,34 @@ * GLStateEGL public methods * ****************************/ +GLStateEGL::~GLStateEGL() +{ + if(egl_display_ != nullptr){ + if(!eglTerminate(egl_display_)) + Log::error("eglTerminate failed\n"); + } + + if(eglReleaseThread && !eglReleaseThread()) + Log::error("eglReleaseThread failed\n"); +} + bool GLStateEGL::init_display(void* native_display, GLVisualConfig& visual_config) { +#if defined(WIN32) + if (!egl_lib_.open("libEGL.dll")) { +#else + if (!egl_lib_.open_from_alternatives({"libEGL.so", "libEGL.so.1" })) { +#endif + Log::error("Error loading EGL library\n"); + return false; + } + + if (gladLoadEGLUserPtr(EGL_NO_DISPLAY, load_egl_func, &egl_lib_) == 0) { + Log::error("Loading EGL entry points failed\n"); + return false; + } + native_display_ = reinterpret_cast(native_display); requested_visual_config_ = visual_config; @@ -304,20 +336,28 @@ return gotValidSurface(); } -void +bool GLStateEGL::init_gl_extensions() { #if GLMARK2_USE_GLESv2 + if (!gladLoadGLES2UserPtr(load_proc, this)) { + Log::error("Loading GLESv2 entry points failed."); + return false; + } + if (GLExtensions::support("GL_OES_mapbuffer")) { - GLExtensions::MapBuffer = - reinterpret_cast(eglGetProcAddress("glMapBufferOES")); - GLExtensions::UnmapBuffer = - reinterpret_cast(eglGetProcAddress("glUnmapBufferOES")); + GLExtensions::MapBuffer = glMapBufferOES; + GLExtensions::UnmapBuffer = glUnmapBufferOES; } #elif GLMARK2_USE_GL + if (!gladLoadGLUserPtr(load_proc, this)) { + Log::error("Loading GL entry points failed."); + return false; + } GLExtensions::MapBuffer = glMapBuffer; GLExtensions::UnmapBuffer = glUnmapBuffer; #endif + return true; } bool @@ -347,7 +387,9 @@ Log::info("** Failed to set swap interval. Results may be bounded above by refresh rate.\n"); } - init_gl_extensions(); + if (!init_gl_extensions()) { + return false; + } return true; } @@ -379,7 +421,7 @@ } bool -GLStateEGL::gotNativeConfig(int& vid) +GLStateEGL::gotNativeConfig(intptr_t& vid) { if (!gotValidConfig()) return false; @@ -409,17 +451,64 @@ * GLStateEGL private methods * *****************************/ +#ifdef GLMARK2_USE_X11 +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM EGL_PLATFORM_X11_KHR +#elif GLMARK2_USE_WAYLAND +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM EGL_PLATFORM_WAYLAND_KHR +#elif GLMARK2_USE_DRM +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM EGL_PLATFORM_GBM_KHR +#elif GLMARK2_USE_MIR +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM EGL_PLATFORM_MIR_KHR +#else +// Platforms not in the above platform enums fall back to eglGetDisplay. +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM 0 +#endif + bool GLStateEGL::gotValidDisplay() { if (egl_display_) return true; - egl_display_ = eglGetDisplay(native_display_); + char const * __restrict const supported_extensions = + eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); + + if (GLMARK2_NATIVE_EGL_DISPLAY_ENUM != 0 && supported_extensions + && strstr(supported_extensions, "EGL_EXT_platform_base")) + { + Log::debug("Using eglGetPlatformDisplayEXT()\n"); + PFNEGLGETPLATFORMDISPLAYEXTPROC get_platform_display = + reinterpret_cast( + eglGetProcAddress("eglGetPlatformDisplayEXT")); + + if (get_platform_display != nullptr) { + egl_display_ = get_platform_display( + GLMARK2_NATIVE_EGL_DISPLAY_ENUM, + reinterpret_cast(native_display_), + nullptr); + } + + if (!egl_display_) { + Log::debug("eglGetPlatformDisplayEXT() failed with error: 0x%x\n", + eglGetError()); + } + } + else + { + Log::debug("eglGetPlatformDisplayEXT() seems unsupported\n"); + } + + /* Just in case get_platform_display failed... */ + if (!egl_display_) { + Log::debug("Falling back to eglGetDisplay()\n"); + egl_display_ = eglGetDisplay(native_display_); + } + if (!egl_display_) { Log::error("eglGetDisplay() failed with error: 0x%x\n", eglGetError()); return false; } + int egl_major(-1); int egl_minor(-1); if (!eglInitialize(egl_display_, &egl_major, &egl_minor)) { @@ -428,16 +517,33 @@ return false; } + /* Reinitialize GLAD with a known display */ + if (gladLoadEGLUserPtr(egl_display_, load_egl_func, &egl_lib_) == 0) { + Log::error("Loading EGL entry points failed\n"); + return false; + } + #if GLMARK2_USE_GLESv2 EGLenum apiType(EGL_OPENGL_ES_API); +#if defined(WIN32) + std::initializer_list libNames = { "libGLESv2.dll" }; +#else + std::initializer_list libNames = { "libGLESv2.so", "libGLESv2.so.2" }; +#endif #elif GLMARK2_USE_GL EGLenum apiType(EGL_OPENGL_API); + std::initializer_list libNames = { "libGL.so", "libGL.so.1" }; #endif if (!eglBindAPI(apiType)) { Log::error("Failed to bind api EGL_OPENGL_ES_API\n"); return false; } + if (!gl_lib_.open_from_alternatives(libNames)) { + Log::error("Error loading GL library\n"); + return false; + } + return true; } @@ -617,3 +723,16 @@ return true; } +GLADapiproc +GLStateEGL::load_proc(void *userptr, const char* name) +{ + if (eglGetProcAddress) { + GLADapiproc sym = reinterpret_cast(eglGetProcAddress(name)); + if (sym) { + return sym; + } + } + + GLStateEGL* state = reinterpret_cast(userptr); + return reinterpret_cast(state->gl_lib_.load(name)); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-egl.h glmark2-2021.02/android/jni/src/gl-state-egl.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-egl.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-state-egl.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -23,9 +23,10 @@ #define GLMARK2_GL_STATE_EGL_H_ #include -#include +#include #include "gl-state.h" #include "gl-visual-config.h" +#include "shared-library.h" class EglConfig { @@ -124,12 +125,17 @@ EGLSurface egl_surface_; GLVisualConfig requested_visual_config_; EglConfig best_config_; + SharedLibrary egl_lib_; + SharedLibrary gl_lib_; bool gotValidDisplay(); bool gotValidConfig(); bool gotValidSurface(); bool gotValidContext(); void get_glvisualconfig(EGLConfig config, GLVisualConfig& visual_config); EGLConfig select_best_config(std::vector& configs); + + static GLADapiproc load_proc(void *userptr, const char* name); + public: GLStateEGL() : native_display_(0), @@ -138,15 +144,16 @@ egl_config_(0), egl_context_(0), egl_surface_(0) {} + ~GLStateEGL(); bool init_display(void* native_display, GLVisualConfig& config_pref); bool init_surface(void* native_window); - void init_gl_extensions(); + bool init_gl_extensions(); bool valid(); bool reset(); void swap(); // Performs a config search, returning a native visual ID on success - bool gotNativeConfig(int& vid); + bool gotNativeConfig(intptr_t& vid); void getVisualConfig(GLVisualConfig& vc); }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-glx.cpp glmark2-2021.02/android/jni/src/gl-state-glx.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-glx.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-state-glx.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -26,13 +26,6 @@ #include -namespace -{ -PFNGLXSWAPINTERVALEXTPROC glXSwapIntervalEXT_; -PFNGLXSWAPINTERVALMESAPROC glXSwapIntervalMESA_; -PFNGLXGETSWAPINTERVALMESAPROC glXGetSwapIntervalMESA_; -} - /****************** * Public methods * ******************/ @@ -43,6 +36,12 @@ xdpy_ = reinterpret_cast(native_display); requested_visual_config_ = visual_config; + if (!lib_.open_from_alternatives({"libGL.so", "libGL.so.1"})) { + Log::error("Failed to load libGL\n"); + return false; + } + + gladLoadGLXUserPtr(xdpy_, DefaultScreen(xdpy_), load_proc, this); return (xdpy_ != 0); } @@ -54,11 +53,12 @@ return (xwin_ != 0); } -void +bool GLStateGLX::init_gl_extensions() { GLExtensions::MapBuffer = glMapBuffer; GLExtensions::UnmapBuffer = glUnmapBuffer; + return true; } bool @@ -80,20 +80,26 @@ return false; } - init_gl_extensions(); + if (gladLoadGLUserPtr(load_proc, this) == 0) { + Log::error("Failed to load GL entry points\n"); + return false; + } + + if (!init_gl_extensions()) + return false; unsigned int desired_swap(0); unsigned int actual_swap(-1); - if (glXSwapIntervalEXT_) { - glXSwapIntervalEXT_(xdpy_, xwin_, desired_swap); + if (glXSwapIntervalEXT) { + glXSwapIntervalEXT(xdpy_, xwin_, desired_swap); glXQueryDrawable(xdpy_, xwin_, GLX_SWAP_INTERVAL_EXT, &actual_swap); if (actual_swap == desired_swap) return true; } - if (glXSwapIntervalMESA_) { - glXSwapIntervalMESA_(desired_swap); - actual_swap = glXGetSwapIntervalMESA_(); + if (glXSwapIntervalMESA) { + glXSwapIntervalMESA(desired_swap); + actual_swap = glXGetSwapIntervalMESA(); if (actual_swap == desired_swap) return true; } @@ -123,7 +129,7 @@ } bool -GLStateGLX::gotNativeConfig(int& vid) +GLStateGLX::gotNativeConfig(intptr_t& vid) { if (!ensure_glx_fbconfig()) return false; @@ -186,31 +192,7 @@ * GLX_SGI_swap_control is not enough because it doesn't allow 0 as a valid * value (i.e. you can't turn off VSync). */ - if (extString.find("GLX_EXT_swap_control") != std::string::npos) { - glXSwapIntervalEXT_ = - reinterpret_cast( - glXGetProcAddress( - reinterpret_cast("glXSwapIntervalEXT") - ) - ); - } - else if (extString.find("GLX_MESA_swap_control") != std::string::npos) { - glXSwapIntervalMESA_ = - reinterpret_cast( - glXGetProcAddress( - reinterpret_cast("glXSwapIntervalMESA") - ) - ); - glXGetSwapIntervalMESA_ = - reinterpret_cast( - glXGetProcAddress( - reinterpret_cast("glXGetSwapIntervalMESA") - ) - ); - } - - - if (!glXSwapIntervalEXT_ && !glXSwapIntervalMESA_) { + if (!glXSwapIntervalEXT && !glXSwapIntervalMESA) { Log::info("** GLX does not support GLX_EXT_swap_control or GLX_MESA_swap_control!\n"); } } @@ -346,3 +328,18 @@ return best_config; } + +GLADapiproc +GLStateGLX::load_proc(void *userptr, const char* name) +{ + if (glXGetProcAddress) { + const GLubyte* bytes = reinterpret_cast(name); + GLADapiproc sym = glXGetProcAddress(bytes); + if (sym) { + return sym; + } + } + + GLStateGLX* state = reinterpret_cast(userptr); + return reinterpret_cast(state->lib_.load(name)); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-glx.h glmark2-2021.02/android/jni/src/gl-state-glx.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-glx.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-state-glx.h 2021-04-25 01:47:22.000000000 +0800 @@ -26,13 +26,11 @@ #include "gl-state.h" #include "gl-visual-config.h" #include "gl-headers.h" +#include "shared-library.h" #include -#include -#define GLX_GLXEXT_PROTOTYPES -#include -#include +#include class GLStateGLX : public GLState { @@ -43,10 +41,10 @@ bool valid(); bool init_display(void* native_display, GLVisualConfig& config_pref); bool init_surface(void* native_window); - void init_gl_extensions(); + bool init_gl_extensions(); bool reset(); void swap(); - bool gotNativeConfig(int& vid); + bool gotNativeConfig(intptr_t& vid); void getVisualConfig(GLVisualConfig& vc); private: @@ -57,11 +55,14 @@ void get_glvisualconfig_glx(GLXFBConfig config, GLVisualConfig &visual_config); GLXFBConfig select_best_config(std::vector configs); + static GLADapiproc load_proc(void* userptr, const char* name); + Display* xdpy_; Window xwin_; GLXFBConfig glx_fbconfig_; GLXContext glx_context_; GLVisualConfig requested_visual_config_; + SharedLibrary lib_; }; #endif /* GLMARK2_GL_STATE_GLX_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state.h glmark2-2021.02/android/jni/src/gl-state.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-state.h 2021-04-25 01:47:22.000000000 +0800 @@ -22,6 +22,8 @@ #ifndef GLMARK2_GL_STATE_H_ #define GLMARK2_GL_STATE_H_ +#include + class GLVisualConfig; class GLState @@ -31,11 +33,11 @@ virtual bool init_display(void *native_display, GLVisualConfig& config_pref) = 0; virtual bool init_surface(void *native_window) = 0; - virtual void init_gl_extensions() = 0; + virtual bool init_gl_extensions() = 0; virtual bool valid() = 0; virtual bool reset() = 0; virtual void swap() = 0; - virtual bool gotNativeConfig(int& vid) = 0; + virtual bool gotNativeConfig(intptr_t& vid) = 0; virtual void getVisualConfig(GLVisualConfig& vc) = 0; }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-wgl.cpp glmark2-2021.02/android/jni/src/gl-state-wgl.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-wgl.cpp 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-state-wgl.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,178 @@ +// +// Copyright © 2019 Jamie Madill +// +// This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. +// +// glmark2 is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// glmark2. If not, see . +// +// Authors: +// Jamie Madill +// +#include "gl-state-wgl.h" + +#include +#include "gl-headers.h" +#include "log.h" +#include "options.h" + +/****************** + * Public methods * + ******************/ + +GLStateWGL::GLStateWGL() : hdc_(nullptr), wgl_context_(nullptr), get_proc_addr_(nullptr) {} + +GLStateWGL::~GLStateWGL() +{ + if (wgl_context_) { + wglDeleteContext(wgl_context_); + wgl_context_ = nullptr; + } +} + +bool +GLStateWGL::init_display(void* native_display, GLVisualConfig& /*visual_config*/) +{ + if (!wgl_library_.open("opengl32.dll")) { + Log::error("Failed to load opengl32.dll\n"); + return false; + } + + hdc_ = reinterpret_cast(native_display); + + PIXELFORMATDESCRIPTOR pixel_format_desc = {}; + pixel_format_desc.nSize = sizeof(pixel_format_desc); + pixel_format_desc.nVersion = 1; + pixel_format_desc.dwFlags = + PFD_DRAW_TO_WINDOW | PFD_GENERIC_ACCELERATED | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + pixel_format_desc.iPixelType = PFD_TYPE_RGBA; + pixel_format_desc.cColorBits = 24; + pixel_format_desc.cAlphaBits = 8; + pixel_format_desc.cDepthBits = 24; + pixel_format_desc.cStencilBits = 8; + pixel_format_desc.iLayerType = PFD_MAIN_PLANE; + + int pixelFormat = ChoosePixelFormat(hdc_, &pixel_format_desc); + if (pixelFormat == 0) { + Log::error("Could not find a compatible pixel format\n"); + return false; + } + + if (SetPixelFormat(hdc_, pixelFormat, &pixel_format_desc) != TRUE) { + Log::error("Failed to set the pixel format\n"); + return false; + } + + wgl_context_ = wglCreateContext(hdc_); + if (!wgl_context_) { + Log::error("Failed to create a WGL context\n"); + return false; + } + + if (wglMakeCurrent(hdc_, wgl_context_) == FALSE) { + Log::error("Error during wglMakeCurrent\n"); + return false; + } + + get_proc_addr_ = wgl_library_.load("wglGetProcAddress"); + if (!get_proc_addr_) { + Log::error("Failed to find wglGetProcAddress\n"); + return false; + } + + if (gladLoadGLUserPtr(load_proc, this) == 0) { + Log::error("Failed to load GL entry points\n"); + return false; + } + + if (gladLoadWGLUserPtr(hdc_, load_proc, this) == 0) { + Log::error("Failed to load WGL entry points\n"); + return false; + } + + return true; +} + +bool +GLStateWGL::init_surface(void* /*native_window*/) +{ + return true; +} + +bool +GLStateWGL::init_gl_extensions() +{ + GLExtensions::MapBuffer = glMapBuffer; + GLExtensions::UnmapBuffer = glUnmapBuffer; + return true; +} + +bool +GLStateWGL::valid() +{ + if (!wglSwapIntervalEXT || wglSwapIntervalEXT(0) == FALSE) { + Log::info("** Failed to set swap interval. Results may be bounded above by refresh rate.\n"); + } + + return wgl_context_ != nullptr; +} + + +bool +GLStateWGL::reset() +{ + return true; +} + +void +GLStateWGL::swap() +{ + if (SwapBuffers(hdc_) == FALSE) { + Log::error("Error during SwapBuffers\n"); + } +} + +bool +GLStateWGL::gotNativeConfig(intptr_t& vid) +{ + vid = 1; + return true; +} + +void +GLStateWGL::getVisualConfig(GLVisualConfig& vc) +{ + vc.buffer = 32; + vc.red = 8; + vc.green = 8; + vc.blue = 8; + vc.alpha = 8; + vc.depth = 24; + vc.stencil = 8; +} + +/******************* + * Private methods * + *******************/ + +GLStateWGL::api_proc GLStateWGL::load_proc(void *userptr, const char *name) +{ + GLStateWGL* state = reinterpret_cast(userptr); + auto get_proc_addr = reinterpret_cast(state->get_proc_addr_); + auto proc = reinterpret_cast(get_proc_addr(name)); + if (proc) { + return proc; + } + return reinterpret_cast(state->wgl_library_.load(name)); +} + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-wgl.h glmark2-2021.02/android/jni/src/gl-state-wgl.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-state-wgl.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-state-wgl.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,54 @@ +// +// Copyright © 2019 Jamie Madill +// +// This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. +// +// glmark2 is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// glmark2. If not, see . +// +// Authors: +// Jamie Madill +// +#ifndef GLMARK2_GL_STATE_WGL_H_ +#define GLMARK2_GL_STATE_WGL_H_ + +#include "gl-state.h" +#include "shared-library.h" +#include + +class GLStateWGL : public GLState +{ +public: + GLStateWGL(); + ~GLStateWGL(); + + bool init_display(void* native_display, GLVisualConfig& config_pref); + bool init_surface(void* native_window); + bool init_gl_extensions(); + bool valid(); + bool reset(); + void swap(); + bool gotNativeConfig(intptr_t& vid); + void getVisualConfig(GLVisualConfig& vc); + +private: + using api_proc = void (*)(); + static api_proc load_proc(void *userptr, const char *name); + + SharedLibrary wgl_library_; + HDC hdc_; + HGLRC wgl_context_; + void* get_proc_addr_; +}; + +#endif // GLMARK2_GL_STATE_WGL_H_ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-visual-config.cpp glmark2-2021.02/android/jni/src/gl-visual-config.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/gl-visual-config.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/gl-visual-config.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -108,6 +108,14 @@ /* Reward exact matches with the maximum per component score */ score = MAXIMUM_COMPONENT_SCORE; } + else if (component > 8 && target <= 8 && scale > 1) + { + /* Penalize RGBA component widths larger than 8, since they are + * unlikely to be what the users want or properly supported for + * display. Such widths can still be used, but only if explicitly + * requested. */ + score = UNACCEPTABLE_COMPONENT_PENALTY; + } else { /* diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/image-reader.cpp glmark2-2021.02/android/jni/src/image-reader.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/image-reader.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/image-reader.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -120,7 +120,7 @@ Log::debug("Reading PNG file %s\n", filename.c_str()); - const std::auto_ptr is_ptr(Util::get_resource(filename)); + const std::unique_ptr is_ptr(Util::get_resource(filename)); if (!(*is_ptr)) { Log::error("Cannot open file %s!\n", filename.c_str()); return false; @@ -268,7 +268,7 @@ size_t n = static_cast(num_bytes); while (n > src->pub.bytes_in_buffer) { n -= src->pub.bytes_in_buffer; - (*src->fill_input_buffer)(cinfo); + src->fill_input_buffer(cinfo); } src->pub.next_input_byte += n; src->pub.bytes_in_buffer -= n; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/include/dirent.h glmark2-2021.02/android/jni/src/include/dirent.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/include/dirent.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/include/dirent.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1224 @@ +/* + * Dirent interface for Microsoft Visual Studio + * + * Copyright (C) 2006-2012 Toni Ronkko + * This file is part of dirent. Dirent may be freely distributed + * under the MIT license. For all details and documentation, see + * https://github.com/tronkko/dirent + */ +#ifndef DIRENT_H +#define DIRENT_H + +/* + * Include windows.h without Windows Sockets 1.1 to prevent conflicts with + * Windows Sockets 2.0. + */ +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Indicates that d_type field is available in dirent structure */ +#define _DIRENT_HAVE_D_TYPE + +/* Indicates that d_namlen field is available in dirent structure */ +#define _DIRENT_HAVE_D_NAMLEN + +/* Entries missing from MSVC 6.0 */ +#if !defined(FILE_ATTRIBUTE_DEVICE) +# define FILE_ATTRIBUTE_DEVICE 0x40 +#endif + +/* File type and permission flags for stat(), general mask */ +#if !defined(S_IFMT) +# define S_IFMT _S_IFMT +#endif + +/* Directory bit */ +#if !defined(S_IFDIR) +# define S_IFDIR _S_IFDIR +#endif + +/* Character device bit */ +#if !defined(S_IFCHR) +# define S_IFCHR _S_IFCHR +#endif + +/* Pipe bit */ +#if !defined(S_IFFIFO) +# define S_IFFIFO _S_IFFIFO +#endif + +/* Regular file bit */ +#if !defined(S_IFREG) +# define S_IFREG _S_IFREG +#endif + +/* Read permission */ +#if !defined(S_IREAD) +# define S_IREAD _S_IREAD +#endif + +/* Write permission */ +#if !defined(S_IWRITE) +# define S_IWRITE _S_IWRITE +#endif + +/* Execute permission */ +#if !defined(S_IEXEC) +# define S_IEXEC _S_IEXEC +#endif + +/* Pipe */ +#if !defined(S_IFIFO) +# define S_IFIFO _S_IFIFO +#endif + +/* Block device */ +#if !defined(S_IFBLK) +# define S_IFBLK 0 +#endif + +/* Link */ +#if !defined(S_IFLNK) +# define S_IFLNK 0 +#endif + +/* Socket */ +#if !defined(S_IFSOCK) +# define S_IFSOCK 0 +#endif + +/* Read user permission */ +#if !defined(S_IRUSR) +# define S_IRUSR S_IREAD +#endif + +/* Write user permission */ +#if !defined(S_IWUSR) +# define S_IWUSR S_IWRITE +#endif + +/* Execute user permission */ +#if !defined(S_IXUSR) +# define S_IXUSR 0 +#endif + +/* Read group permission */ +#if !defined(S_IRGRP) +# define S_IRGRP 0 +#endif + +/* Write group permission */ +#if !defined(S_IWGRP) +# define S_IWGRP 0 +#endif + +/* Execute group permission */ +#if !defined(S_IXGRP) +# define S_IXGRP 0 +#endif + +/* Read others permission */ +#if !defined(S_IROTH) +# define S_IROTH 0 +#endif + +/* Write others permission */ +#if !defined(S_IWOTH) +# define S_IWOTH 0 +#endif + +/* Execute others permission */ +#if !defined(S_IXOTH) +# define S_IXOTH 0 +#endif + +/* Maximum length of file name */ +#if !defined(PATH_MAX) +# define PATH_MAX MAX_PATH +#endif +#if !defined(FILENAME_MAX) +# define FILENAME_MAX MAX_PATH +#endif +#if !defined(NAME_MAX) +# define NAME_MAX FILENAME_MAX +#endif + +/* File type flags for d_type */ +#define DT_UNKNOWN 0 +#define DT_REG S_IFREG +#define DT_DIR S_IFDIR +#define DT_FIFO S_IFIFO +#define DT_SOCK S_IFSOCK +#define DT_CHR S_IFCHR +#define DT_BLK S_IFBLK +#define DT_LNK S_IFLNK + +/* Macros for converting between st_mode and d_type */ +#define IFTODT(mode) ((mode) & S_IFMT) +#define DTTOIF(type) (type) + +/* + * File type macros. Note that block devices, sockets and links cannot be + * distinguished on Windows and the macros S_ISBLK, S_ISSOCK and S_ISLNK are + * only defined for compatibility. These macros should always return false + * on Windows. + */ +#if !defined(S_ISFIFO) +# define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) +#endif +#if !defined(S_ISDIR) +# define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#endif +#if !defined(S_ISREG) +# define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#endif +#if !defined(S_ISLNK) +# define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +#endif +#if !defined(S_ISSOCK) +# define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) +#endif +#if !defined(S_ISCHR) +# define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) +#endif +#if !defined(S_ISBLK) +# define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) +#endif + +/* Return the exact length of the file name without zero terminator */ +#define _D_EXACT_NAMLEN(p) ((p)->d_namlen) + +/* Return the maximum size of a file name */ +#define _D_ALLOC_NAMLEN(p) ((PATH_MAX)+1) + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Wide-character version */ +struct _wdirent { + /* Always zero */ + long d_ino; + + /* File position within stream */ + long d_off; + + /* Structure size */ + unsigned short d_reclen; + + /* Length of name without \0 */ + size_t d_namlen; + + /* File type */ + int d_type; + + /* File name */ + wchar_t d_name[PATH_MAX+1]; +}; +typedef struct _wdirent _wdirent; + +struct _WDIR { + /* Current directory entry */ + struct _wdirent ent; + + /* Private file data */ + WIN32_FIND_DATAW data; + + /* True if data is valid */ + int cached; + + /* Win32 search handle */ + HANDLE handle; + + /* Initial directory name */ + wchar_t *patt; +}; +typedef struct _WDIR _WDIR; + +/* Multi-byte character version */ +struct dirent { + /* Always zero */ + long d_ino; + + /* File position within stream */ + long d_off; + + /* Structure size */ + unsigned short d_reclen; + + /* Length of name without \0 */ + size_t d_namlen; + + /* File type */ + int d_type; + + /* File name */ + char d_name[PATH_MAX+1]; +}; +typedef struct dirent dirent; + +struct DIR { + struct dirent ent; + struct _WDIR *wdirp; +}; +typedef struct DIR DIR; + + +/* Dirent functions */ +static DIR *opendir (const char *dirname); +static _WDIR *_wopendir (const wchar_t *dirname); + +static struct dirent *readdir (DIR *dirp); +static struct _wdirent *_wreaddir (_WDIR *dirp); + +static int readdir_r( + DIR *dirp, struct dirent *entry, struct dirent **result); +static int _wreaddir_r( + _WDIR *dirp, struct _wdirent *entry, struct _wdirent **result); + +static int closedir (DIR *dirp); +static int _wclosedir (_WDIR *dirp); + +static void rewinddir (DIR* dirp); +static void _wrewinddir (_WDIR* dirp); + +static int scandir (const char *dirname, struct dirent ***namelist, + int (*filter)(const struct dirent*), + int (*compare)(const struct dirent**, const struct dirent**)); + +static int alphasort (const struct dirent **a, const struct dirent **b); + +static int versionsort (const struct dirent **a, const struct dirent **b); + + +/* For compatibility with Symbian */ +#define wdirent _wdirent +#define WDIR _WDIR +#define wopendir _wopendir +#define wreaddir _wreaddir +#define wclosedir _wclosedir +#define wrewinddir _wrewinddir + + +/* Internal utility functions */ +static WIN32_FIND_DATAW *dirent_first (_WDIR *dirp); +static WIN32_FIND_DATAW *dirent_next (_WDIR *dirp); + +static int dirent_mbstowcs_s( + size_t *pReturnValue, + wchar_t *wcstr, + size_t sizeInWords, + const char *mbstr, + size_t count); + +static int dirent_wcstombs_s( + size_t *pReturnValue, + char *mbstr, + size_t sizeInBytes, + const wchar_t *wcstr, + size_t count); + +static void dirent_set_errno (int error); + + +/* + * Open directory stream DIRNAME for read and return a pointer to the + * internal working area that is used to retrieve individual directory + * entries. + */ +static _WDIR* +_wopendir( + const wchar_t *dirname) +{ + _WDIR *dirp = NULL; + int error; + + /* Must have directory name */ + if (dirname == NULL || dirname[0] == '\0') { + dirent_set_errno (ENOENT); + return NULL; + } + + /* Allocate new _WDIR structure */ + dirp = (_WDIR*) malloc (sizeof (struct _WDIR)); + if (dirp != NULL) { + DWORD n; + + /* Reset _WDIR structure */ + dirp->handle = INVALID_HANDLE_VALUE; + dirp->patt = NULL; + dirp->cached = 0; + + /* Compute the length of full path plus zero terminator + * + * Note that on WinRT there's no way to convert relative paths + * into absolute paths, so just assume it is an absolute path. + */ +# if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP) + n = wcslen(dirname); +# else + n = GetFullPathNameW (dirname, 0, NULL, NULL); +# endif + + /* Allocate room for absolute directory name and search pattern */ + dirp->patt = (wchar_t*) malloc (sizeof (wchar_t) * n + 16); + if (dirp->patt) { + + /* + * Convert relative directory name to an absolute one. This + * allows rewinddir() to function correctly even when current + * working directory is changed between opendir() and rewinddir(). + * + * Note that on WinRT there's no way to convert relative paths + * into absolute paths, so just assume it is an absolute path. + */ +# if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP) + wcsncpy_s(dirp->patt, n+1, dirname, n); +# else + n = GetFullPathNameW (dirname, n, dirp->patt, NULL); +# endif + if (n > 0) { + wchar_t *p; + + /* Append search pattern \* to the directory name */ + p = dirp->patt + n; + if (dirp->patt < p) { + switch (p[-1]) { + case '\\': + case '/': + case ':': + /* Directory ends in path separator, e.g. c:\temp\ */ + /*NOP*/; + break; + + default: + /* Directory name doesn't end in path separator */ + *p++ = '\\'; + } + } + *p++ = '*'; + *p = '\0'; + + /* Open directory stream and retrieve the first entry */ + if (dirent_first (dirp)) { + /* Directory stream opened successfully */ + error = 0; + } else { + /* Cannot retrieve first entry */ + error = 1; + dirent_set_errno (ENOENT); + } + + } else { + /* Cannot retrieve full path name */ + dirent_set_errno (ENOENT); + error = 1; + } + + } else { + /* Cannot allocate memory for search pattern */ + error = 1; + } + + } else { + /* Cannot allocate _WDIR structure */ + error = 1; + } + + /* Clean up in case of error */ + if (error && dirp) { + _wclosedir (dirp); + dirp = NULL; + } + + return dirp; +} + +/* + * Read next directory entry. + * + * Returns pointer to static directory entry which may be overwritten by + * subsequent calls to _wreaddir(). + */ +static struct _wdirent* +_wreaddir( + _WDIR *dirp) +{ + struct _wdirent *entry; + + /* + * Read directory entry to buffer. We can safely ignore the return value + * as entry will be set to NULL in case of error. + */ + (void) _wreaddir_r (dirp, &dirp->ent, &entry); + + /* Return pointer to statically allocated directory entry */ + return entry; +} + +/* + * Read next directory entry. + * + * Returns zero on success. If end of directory stream is reached, then sets + * result to NULL and returns zero. + */ +static int +_wreaddir_r( + _WDIR *dirp, + struct _wdirent *entry, + struct _wdirent **result) +{ + WIN32_FIND_DATAW *datap; + + /* Read next directory entry */ + datap = dirent_next (dirp); + if (datap) { + size_t n; + DWORD attr; + + /* + * Copy file name as wide-character string. If the file name is too + * long to fit in to the destination buffer, then truncate file name + * to PATH_MAX characters and zero-terminate the buffer. + */ + n = 0; + while (n < PATH_MAX && datap->cFileName[n] != 0) { + entry->d_name[n] = datap->cFileName[n]; + n++; + } + entry->d_name[n] = 0; + + /* Length of file name excluding zero terminator */ + entry->d_namlen = n; + + /* File type */ + attr = datap->dwFileAttributes; + if ((attr & FILE_ATTRIBUTE_DEVICE) != 0) { + entry->d_type = DT_CHR; + } else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) { + entry->d_type = DT_DIR; + } else { + entry->d_type = DT_REG; + } + + /* Reset dummy fields */ + entry->d_ino = 0; + entry->d_off = 0; + entry->d_reclen = sizeof (struct _wdirent); + + /* Set result address */ + *result = entry; + + } else { + + /* Return NULL to indicate end of directory */ + *result = NULL; + + } + + return /*OK*/0; +} + +/* + * Close directory stream opened by opendir() function. This invalidates the + * DIR structure as well as any directory entry read previously by + * _wreaddir(). + */ +static int +_wclosedir( + _WDIR *dirp) +{ + int ok; + if (dirp) { + + /* Release search handle */ + if (dirp->handle != INVALID_HANDLE_VALUE) { + FindClose (dirp->handle); + dirp->handle = INVALID_HANDLE_VALUE; + } + + /* Release search pattern */ + if (dirp->patt) { + free (dirp->patt); + dirp->patt = NULL; + } + + /* Release directory structure */ + free (dirp); + ok = /*success*/0; + + } else { + + /* Invalid directory stream */ + dirent_set_errno (EBADF); + ok = /*failure*/-1; + + } + return ok; +} + +/* + * Rewind directory stream such that _wreaddir() returns the very first + * file name again. + */ +static void +_wrewinddir( + _WDIR* dirp) +{ + if (dirp) { + /* Release existing search handle */ + if (dirp->handle != INVALID_HANDLE_VALUE) { + FindClose (dirp->handle); + } + + /* Open new search handle */ + dirent_first (dirp); + } +} + +/* Get first directory entry (internal) */ +static WIN32_FIND_DATAW* +dirent_first( + _WDIR *dirp) +{ + WIN32_FIND_DATAW *datap; + + /* Open directory and retrieve the first entry */ + dirp->handle = FindFirstFileExW( + dirp->patt, FindExInfoStandard, &dirp->data, + FindExSearchNameMatch, NULL, 0); + if (dirp->handle != INVALID_HANDLE_VALUE) { + + /* a directory entry is now waiting in memory */ + datap = &dirp->data; + dirp->cached = 1; + + } else { + + /* Failed to re-open directory: no directory entry in memory */ + dirp->cached = 0; + datap = NULL; + + } + return datap; +} + +/* + * Get next directory entry (internal). + * + * Returns + */ +static WIN32_FIND_DATAW* +dirent_next( + _WDIR *dirp) +{ + WIN32_FIND_DATAW *p; + + /* Get next directory entry */ + if (dirp->cached != 0) { + + /* A valid directory entry already in memory */ + p = &dirp->data; + dirp->cached = 0; + + } else if (dirp->handle != INVALID_HANDLE_VALUE) { + + /* Get the next directory entry from stream */ + if (FindNextFileW (dirp->handle, &dirp->data) != FALSE) { + /* Got a file */ + p = &dirp->data; + } else { + /* The very last entry has been processed or an error occurred */ + FindClose (dirp->handle); + dirp->handle = INVALID_HANDLE_VALUE; + p = NULL; + } + + } else { + + /* End of directory stream reached */ + p = NULL; + + } + + return p; +} + +/* + * Open directory stream using plain old C-string. + */ +static DIR* +opendir( + const char *dirname) +{ + struct DIR *dirp; + int error; + + /* Must have directory name */ + if (dirname == NULL || dirname[0] == '\0') { + dirent_set_errno (ENOENT); + return NULL; + } + + /* Allocate memory for DIR structure */ + dirp = (DIR*) malloc (sizeof (struct DIR)); + if (dirp) { + wchar_t wname[PATH_MAX + 1]; + size_t n; + + /* Convert directory name to wide-character string */ + error = dirent_mbstowcs_s( + &n, wname, PATH_MAX + 1, dirname, PATH_MAX + 1); + if (!error) { + + /* Open directory stream using wide-character name */ + dirp->wdirp = _wopendir (wname); + if (dirp->wdirp) { + /* Directory stream opened */ + error = 0; + } else { + /* Failed to open directory stream */ + error = 1; + } + + } else { + /* + * Cannot convert file name to wide-character string. This + * occurs if the string contains invalid multi-byte sequences or + * the output buffer is too small to contain the resulting + * string. + */ + error = 1; + } + + } else { + /* Cannot allocate DIR structure */ + error = 1; + } + + /* Clean up in case of error */ + if (error && dirp) { + free (dirp); + dirp = NULL; + } + + return dirp; +} + +/* + * Read next directory entry. + */ +static struct dirent* +readdir( + DIR *dirp) +{ + struct dirent *entry; + + /* + * Read directory entry to buffer. We can safely ignore the return value + * as entry will be set to NULL in case of error. + */ + (void) readdir_r (dirp, &dirp->ent, &entry); + + /* Return pointer to statically allocated directory entry */ + return entry; +} + +/* + * Read next directory entry into called-allocated buffer. + * + * Returns zero on success. If the end of directory stream is reached, then + * sets result to NULL and returns zero. + */ +static int +readdir_r( + DIR *dirp, + struct dirent *entry, + struct dirent **result) +{ + WIN32_FIND_DATAW *datap; + + /* Read next directory entry */ + datap = dirent_next (dirp->wdirp); + if (datap) { + size_t n; + int error; + + /* Attempt to convert file name to multi-byte string */ + error = dirent_wcstombs_s( + &n, entry->d_name, PATH_MAX + 1, datap->cFileName, PATH_MAX + 1); + + /* + * If the file name cannot be represented by a multi-byte string, + * then attempt to use old 8+3 file name. This allows traditional + * Unix-code to access some file names despite of unicode + * characters, although file names may seem unfamiliar to the user. + * + * Be ware that the code below cannot come up with a short file + * name unless the file system provides one. At least + * VirtualBox shared folders fail to do this. + */ + if (error && datap->cAlternateFileName[0] != '\0') { + error = dirent_wcstombs_s( + &n, entry->d_name, PATH_MAX + 1, + datap->cAlternateFileName, PATH_MAX + 1); + } + + if (!error) { + DWORD attr; + + /* Length of file name excluding zero terminator */ + entry->d_namlen = n - 1; + + /* File attributes */ + attr = datap->dwFileAttributes; + if ((attr & FILE_ATTRIBUTE_DEVICE) != 0) { + entry->d_type = DT_CHR; + } else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) { + entry->d_type = DT_DIR; + } else { + entry->d_type = DT_REG; + } + + /* Reset dummy fields */ + entry->d_ino = 0; + entry->d_off = 0; + entry->d_reclen = sizeof (struct dirent); + + } else { + + /* + * Cannot convert file name to multi-byte string so construct + * an erroneous directory entry and return that. Note that + * we cannot return NULL as that would stop the processing + * of directory entries completely. + */ + entry->d_name[0] = '?'; + entry->d_name[1] = '\0'; + entry->d_namlen = 1; + entry->d_type = DT_UNKNOWN; + entry->d_ino = 0; + entry->d_off = -1; + entry->d_reclen = 0; + + } + + /* Return pointer to directory entry */ + *result = entry; + + } else { + + /* No more directory entries */ + *result = NULL; + + } + + return /*OK*/0; +} + +/* + * Close directory stream. + */ +static int +closedir( + DIR *dirp) +{ + int ok; + if (dirp) { + + /* Close wide-character directory stream */ + ok = _wclosedir (dirp->wdirp); + dirp->wdirp = NULL; + + /* Release multi-byte character version */ + free (dirp); + + } else { + + /* Invalid directory stream */ + dirent_set_errno (EBADF); + ok = /*failure*/-1; + + } + return ok; +} + +/* + * Rewind directory stream to beginning. + */ +static void +rewinddir( + DIR* dirp) +{ + /* Rewind wide-character string directory stream */ + _wrewinddir (dirp->wdirp); +} + +/* + * Scan directory for entries. + */ +static int +scandir( + const char *dirname, + struct dirent ***namelist, + int (*filter)(const struct dirent*), + int (*compare)(const struct dirent**, const struct dirent**)) +{ + struct dirent **files = NULL; + size_t size = 0; + size_t allocated = 0; + const size_t init_size = 1; + DIR *dir = NULL; + struct dirent *entry; + struct dirent *tmp = NULL; + size_t i; + int result = 0; + + /* Open directory stream */ + dir = opendir (dirname); + if (dir) { + + /* Read directory entries to memory */ + while (1) { + + /* Enlarge pointer table to make room for another pointer */ + if (size >= allocated) { + void *p; + size_t num_entries; + + /* Compute number of entries in the enlarged pointer table */ + if (size < init_size) { + /* Allocate initial pointer table */ + num_entries = init_size; + } else { + /* Double the size */ + num_entries = size * 2; + } + + /* Allocate first pointer table or enlarge existing table */ + p = realloc (files, sizeof (void*) * num_entries); + if (p != NULL) { + /* Got the memory */ + files = (dirent**) p; + allocated = num_entries; + } else { + /* Out of memory */ + result = -1; + break; + } + + } + + /* Allocate room for temporary directory entry */ + if (tmp == NULL) { + tmp = (struct dirent*) malloc (sizeof (struct dirent)); + if (tmp == NULL) { + /* Cannot allocate temporary directory entry */ + result = -1; + break; + } + } + + /* Read directory entry to temporary area */ + if (readdir_r (dir, tmp, &entry) == /*OK*/0) { + + /* Did we get an entry? */ + if (entry != NULL) { + int pass; + + /* Determine whether to include the entry in result */ + if (filter) { + /* Let the filter function decide */ + pass = filter (tmp); + } else { + /* No filter function, include everything */ + pass = 1; + } + + if (pass) { + /* Store the temporary entry to pointer table */ + files[size++] = tmp; + tmp = NULL; + + /* Keep up with the number of files */ + result++; + } + + } else { + + /* + * End of directory stream reached => sort entries and + * exit. + */ + qsort (files, size, sizeof (void*), + (int (*) (const void*, const void*)) compare); + break; + + } + + } else { + /* Error reading directory entry */ + result = /*Error*/ -1; + break; + } + + } + + } else { + /* Cannot open directory */ + result = /*Error*/ -1; + } + + /* Release temporary directory entry */ + if (tmp) { + free (tmp); + } + + /* Release allocated memory on error */ + if (result < 0) { + for (i = 0; i < size; i++) { + free (files[i]); + } + free (files); + files = NULL; + } + + /* Close directory stream */ + if (dir) { + closedir (dir); + } + + /* Pass pointer table to caller */ + if (namelist) { + *namelist = files; + } + return result; +} + +/* Alphabetical sorting */ +static int +alphasort( + const struct dirent **a, const struct dirent **b) +{ + return strcoll ((*a)->d_name, (*b)->d_name); +} + +/* Sort versions */ +static int +versionsort( + const struct dirent **a, const struct dirent **b) +{ + /* FIXME: implement strverscmp and use that */ + return alphasort (a, b); +} + +/* Convert multi-byte string to wide character string */ +static int +dirent_mbstowcs_s( + size_t *pReturnValue, + wchar_t *wcstr, + size_t sizeInWords, + const char *mbstr, + size_t count) +{ + int error; + int n; + size_t len; + UINT cp; + DWORD flags; + + /* Determine code page for multi-byte string */ + if (AreFileApisANSI ()) { + /* Default ANSI code page */ + cp = GetACP (); + } else { + /* Default OEM code page */ + cp = GetOEMCP (); + } + + /* + * Determine flags based on the character set. For more information, + * please see https://docs.microsoft.com/fi-fi/windows/desktop/api/stringapiset/nf-stringapiset-multibytetowidechar + */ + switch (cp) { + case 42: + case 50220: + case 50221: + case 50222: + case 50225: + case 50227: + case 50229: + case 57002: + case 57003: + case 57004: + case 57005: + case 57006: + case 57007: + case 57008: + case 57009: + case 57010: + case 57011: + case 65000: + /* MultiByteToWideChar does not support MB_ERR_INVALID_CHARS */ + flags = 0; + break; + + default: + /* + * Ask MultiByteToWideChar to return an error if a multi-byte + * character cannot be converted to a wide-character. + */ + flags = MB_ERR_INVALID_CHARS; + } + + /* Compute the length of input string without zero-terminator */ + len = 0; + while (mbstr[len] != '\0' && len < count) { + len++; + } + + /* Convert to wide-character string */ + n = MultiByteToWideChar( + /* Source code page */ cp, + /* Flags */ flags, + /* Pointer to string to convert */ mbstr, + /* Size of multi-byte string */ (int) len, + /* Pointer to output buffer */ wcstr, + /* Size of output buffer */ (int)sizeInWords - 1 + ); + + /* Ensure that output buffer is zero-terminated */ + wcstr[n] = '\0'; + + /* Return length of wide-character string with zero-terminator */ + *pReturnValue = (size_t) (n + 1); + + /* Return zero if conversion succeeded */ + if (n > 0) { + error = 0; + } else { + error = 1; + } + return error; +} + +/* Convert wide-character string to multi-byte string */ +static int +dirent_wcstombs_s( + size_t *pReturnValue, + char *mbstr, + size_t sizeInBytes, /* max size of mbstr */ + const wchar_t *wcstr, + size_t count) +{ + int n; + int error; + UINT cp; + size_t len; + BOOL flag = 0; + LPBOOL pflag; + + /* Determine code page for multi-byte string */ + if (AreFileApisANSI ()) { + /* Default ANSI code page */ + cp = GetACP (); + } else { + /* Default OEM code page */ + cp = GetOEMCP (); + } + + /* Compute the length of input string without zero-terminator */ + len = 0; + while (wcstr[len] != '\0' && len < count) { + len++; + } + + /* + * Determine if we can ask WideCharToMultiByte to return information on + * broken characters. For more information, please see + * https://docs.microsoft.com/en-us/windows/desktop/api/stringapiset/nf-stringapiset-widechartomultibyte + */ + switch (cp) { + case CP_UTF7: + case CP_UTF8: + /* + * WideCharToMultiByte fails if we request information on default + * characters. This is just a nuisance but doesn't affect the + * conversion: if Windows is configured to use UTF-8, then the default + * character should not be needed anyway. + */ + pflag = NULL; + break; + + default: + /* + * Request that WideCharToMultiByte sets the flag if it uses the + * default character. + */ + pflag = &flag; + } + + /* Convert wide-character string to multi-byte character string */ + n = WideCharToMultiByte( + /* Target code page */ cp, + /* Flags */ 0, + /* Pointer to unicode string */ wcstr, + /* Length of unicode string */ (int) len, + /* Pointer to output buffer */ mbstr, + /* Size of output buffer */ (int)sizeInBytes - 1, + /* Default character */ NULL, + /* Whether default character was used or not */ pflag + ); + + /* Ensure that output buffer is zero-terminated */ + mbstr[n] = '\0'; + + /* Return length of multi-byte string with zero-terminator */ + *pReturnValue = (size_t) (n + 1); + + /* Return zero if conversion succeeded without using default characters */ + if (n > 0 && flag == 0) { + error = 0; + } else { + error = 1; + } + return error; +} + +/* Set errno variable */ +static void +dirent_set_errno( + int error) +{ +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + /* Microsoft Visual Studio 2005 and later */ + _set_errno (error); + +#else + + /* Non-Microsoft compiler or older Microsoft compiler */ + errno = error; + +#endif +} + + +#ifdef __cplusplus +} +#endif +#endif /*DIRENT_H*/ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/include/getopt.h glmark2-2021.02/android/jni/src/include/getopt.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/include/getopt.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/include/getopt.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,653 @@ +#ifndef __GETOPT_H__ +/** + * DISCLAIMER + * This file is part of the mingw-w64 runtime package. + * + * The mingw-w64 runtime package and its code is distributed in the hope that it + * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR + * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to + * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + /* + * Copyright (c) 2002 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Dieter Baron and Thomas Klausner. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma warning(disable:4996) + +#define __GETOPT_H__ + +/* All the headers include this file. */ +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ + +#ifdef REPLACE_GETOPT +int opterr = 1; /* if error message should be printed */ +int optind = 1; /* index into parent argv vector */ +int optopt = '?'; /* character checked for validity */ +#undef optreset /* see getopt.h */ +#define optreset __mingw_optreset +int optreset; /* reset getopt */ +char *optarg; /* argument associated with option */ +#endif + +//extern int optind; /* index of first non-option in argv */ +//extern int optopt; /* single option character, as parsed */ +//extern int opterr; /* flag to enable built-in diagnostics... */ +// /* (user may set to zero, to suppress) */ +// +//extern char *optarg; /* pointer to argument of current option */ + +#define PRINT_ERROR ((opterr) && (*options != ':')) + +#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ +#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ +#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ + +/* return values */ +#define BADCH (int)'?' +#define BADARG ((*options == ':') ? (int)':' : (int)'?') +#define INORDER (int)1 + +#ifndef __CYGWIN__ +#define __progname __argv[0] +#else +extern char __declspec(dllimport) *__progname; +#endif + +#ifdef __CYGWIN__ +static char EMSG[] = ""; +#else +#define EMSG "" +#endif + +static int getopt_internal(int, char * const *, const char *, + const struct option *, int *, int); +static int parse_long_options(char * const *, const char *, + const struct option *, int *, int); +static int gcd(int, int); +static void permute_args(int, int, int, char * const *); + +static char *place = EMSG; /* option letter processing */ + +/* XXX: set optreset to 1 rather than these two */ +static int nonopt_start = -1; /* first non option argument (for permute) */ +static int nonopt_end = -1; /* first option after non options (for permute) */ + +/* Error messages */ +static const char recargchar[] = "option requires an argument -- %c"; +static const char recargstring[] = "option requires an argument -- %s"; +static const char ambig[] = "ambiguous option -- %.*s"; +static const char noarg[] = "option doesn't take an argument -- %.*s"; +static const char illoptchar[] = "unknown option -- %c"; +static const char illoptstring[] = "unknown option -- %s"; + +static void +_vwarnx(const char *fmt,va_list ap) +{ + (void)fprintf(stderr,"%s: ",__progname); + if (fmt != NULL) + (void)vfprintf(stderr,fmt,ap); + (void)fprintf(stderr,"\n"); +} + +static void +warnx(const char *fmt,...) +{ + va_list ap; + va_start(ap,fmt); + _vwarnx(fmt,ap); + va_end(ap); +} + +/* + * Compute the greatest common divisor of a and b. + */ +static int +gcd(int a, int b) +{ + int c; + + c = a % b; + while (c != 0) { + a = b; + b = c; + c = a % b; + } + + return (b); +} + +/* + * Exchange the block from nonopt_start to nonopt_end with the block + * from nonopt_end to opt_end (keeping the same order of arguments + * in each block). + */ +static void +permute_args(int panonopt_start, int panonopt_end, int opt_end, + char * const *nargv) +{ + int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; + char *swap; + + /* + * compute lengths of blocks and number and size of cycles + */ + nnonopts = panonopt_end - panonopt_start; + nopts = opt_end - panonopt_end; + ncycle = gcd(nnonopts, nopts); + cyclelen = (opt_end - panonopt_start) / ncycle; + + for (i = 0; i < ncycle; i++) { + cstart = panonopt_end+i; + pos = cstart; + for (j = 0; j < cyclelen; j++) { + if (pos >= panonopt_end) + pos -= nnonopts; + else + pos += nopts; + swap = nargv[pos]; + /* LINTED const cast */ + ((char **) nargv)[pos] = nargv[cstart]; + /* LINTED const cast */ + ((char **)nargv)[cstart] = swap; + } + } +} + +#ifdef REPLACE_GETOPT +/* + * getopt -- + * Parse argc/argv argument vector. + * + * [eventually this will replace the BSD getopt] + */ +int +getopt(int nargc, char * const *nargv, const char *options) +{ + + /* + * We don't pass FLAG_PERMUTE to getopt_internal() since + * the BSD getopt(3) (unlike GNU) has never done this. + * + * Furthermore, since many privileged programs call getopt() + * before dropping privileges it makes sense to keep things + * as simple (and bug-free) as possible. + */ + return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); +} +#endif /* REPLACE_GETOPT */ + +//extern int getopt(int nargc, char * const *nargv, const char *options); + +#ifdef _BSD_SOURCE +/* + * BSD adds the non-standard `optreset' feature, for reinitialisation + * of `getopt' parsing. We support this feature, for applications which + * proclaim their BSD heritage, before including this header; however, + * to maintain portability, developers are advised to avoid it. + */ +# define optreset __mingw_optreset +extern int optreset; +#endif +#ifdef __cplusplus +} +#endif +/* + * POSIX requires the `getopt' API to be specified in `unistd.h'; + * thus, `unistd.h' includes this header. However, we do not want + * to expose the `getopt_long' or `getopt_long_only' APIs, when + * included in this manner. Thus, close the standard __GETOPT_H__ + * declarations block, and open an additional __GETOPT_LONG_H__ + * specific block, only when *not* __UNISTD_H_SOURCED__, in which + * to declare the extended API. + */ +#endif /* !defined(__GETOPT_H__) */ + +#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) +#define __GETOPT_LONG_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +struct option /* specification for a long form option... */ +{ + const char *name; /* option name, without leading hyphens */ + int has_arg; /* does it take an argument? */ + int *flag; /* where to save its status, or NULL */ + int val; /* its associated status value */ +}; + +enum /* permitted values for its `has_arg' field... */ +{ + no_argument = 0, /* option never takes an argument */ + required_argument, /* option always requires an argument */ + optional_argument /* option may take an argument */ +}; + +/* + * parse_long_options -- + * Parse long options in argc/argv argument vector. + * Returns -1 if short_too is set and the option does not match long_options. + */ +static int +parse_long_options(char * const *nargv, const char *options, + const struct option *long_options, int *idx, int short_too) +{ + char *current_argv, *has_equal; + size_t current_argv_len; + int i, ambiguous, match; + +#define IDENTICAL_INTERPRETATION(_x, _y) \ + (long_options[(_x)].has_arg == long_options[(_y)].has_arg && \ + long_options[(_x)].flag == long_options[(_y)].flag && \ + long_options[(_x)].val == long_options[(_y)].val) + + current_argv = place; + match = -1; + ambiguous = 0; + + optind++; + + if ((has_equal = strchr(current_argv, '=')) != NULL) { + /* argument found (--option=arg) */ + current_argv_len = has_equal - current_argv; + has_equal++; + } else + current_argv_len = strlen(current_argv); + + for (i = 0; long_options[i].name; i++) { + /* find matching long option */ + if (strncmp(current_argv, long_options[i].name, + current_argv_len)) + continue; + + if (strlen(long_options[i].name) == current_argv_len) { + /* exact match */ + match = i; + ambiguous = 0; + break; + } + /* + * If this is a known short option, don't allow + * a partial match of a single character. + */ + if (short_too && current_argv_len == 1) + continue; + + if (match == -1) /* partial match */ + match = i; + else if (!IDENTICAL_INTERPRETATION(i, match)) + ambiguous = 1; + } + if (ambiguous) { + /* ambiguous abbreviation */ + if (PRINT_ERROR) + warnx(ambig, (int)current_argv_len, + current_argv); + optopt = 0; + return (BADCH); + } + if (match != -1) { /* option found */ + if (long_options[match].has_arg == no_argument + && has_equal) { + if (PRINT_ERROR) + warnx(noarg, (int)current_argv_len, + current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + return (BADARG); + } + if (long_options[match].has_arg == required_argument || + long_options[match].has_arg == optional_argument) { + if (has_equal) + optarg = has_equal; + else if (long_options[match].has_arg == + required_argument) { + /* + * optional argument doesn't use next nargv + */ + optarg = nargv[optind++]; + } + } + if ((long_options[match].has_arg == required_argument) + && (optarg == NULL)) { + /* + * Missing argument; leading ':' indicates no error + * should be generated. + */ + if (PRINT_ERROR) + warnx(recargstring, + current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + --optind; + return (BADARG); + } + } else { /* unknown option */ + if (short_too) { + --optind; + return (-1); + } + if (PRINT_ERROR) + warnx(illoptstring, current_argv); + optopt = 0; + return (BADCH); + } + if (idx) + *idx = match; + if (long_options[match].flag) { + *long_options[match].flag = long_options[match].val; + return (0); + } else + return (long_options[match].val); +#undef IDENTICAL_INTERPRETATION +} + +/* + * getopt_internal -- + * Parse argc/argv argument vector. Called by user level routines. + */ +static int +getopt_internal(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx, int flags) +{ + char *oli; /* option letter list index */ + int optchar, short_too; + static int posixly_correct = -1; + + if (options == NULL) + return (-1); + + /* + * XXX Some GNU programs (like cvs) set optind to 0 instead of + * XXX using optreset. Work around this braindamage. + */ + if (optind == 0) + optind = optreset = 1; + + /* + * Disable GNU extensions if POSIXLY_CORRECT is set or options + * string begins with a '+'. + * + * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or + * optreset != 0 for GNU compatibility. + */ + if (posixly_correct == -1 || optreset != 0) + posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); + if (*options == '-') + flags |= FLAG_ALLARGS; + else if (posixly_correct || *options == '+') + flags &= ~FLAG_PERMUTE; + if (*options == '+' || *options == '-') + options++; + + optarg = NULL; + if (optreset) + nonopt_start = nonopt_end = -1; +start: + if (optreset || !*place) { /* update scanning pointer */ + optreset = 0; + if (optind >= nargc) { /* end of argument vector */ + place = EMSG; + if (nonopt_end != -1) { + /* do permutation, if we have to */ + permute_args(nonopt_start, nonopt_end, + optind, nargv); + optind -= nonopt_end - nonopt_start; + } + else if (nonopt_start != -1) { + /* + * If we skipped non-options, set optind + * to the first of them. + */ + optind = nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + if (*(place = nargv[optind]) != '-' || + (place[1] == '\0' && strchr(options, '-') == NULL)) { + place = EMSG; /* found non-option */ + if (flags & FLAG_ALLARGS) { + /* + * GNU extension: + * return non-option as argument to option 1 + */ + optarg = nargv[optind++]; + return (INORDER); + } + if (!(flags & FLAG_PERMUTE)) { + /* + * If no permutation wanted, stop parsing + * at first non-option. + */ + return (-1); + } + /* do permutation */ + if (nonopt_start == -1) + nonopt_start = optind; + else if (nonopt_end != -1) { + permute_args(nonopt_start, nonopt_end, + optind, nargv); + nonopt_start = optind - + (nonopt_end - nonopt_start); + nonopt_end = -1; + } + optind++; + /* process next argument */ + goto start; + } + if (nonopt_start != -1 && nonopt_end == -1) + nonopt_end = optind; + + /* + * If we have "-" do nothing, if "--" we are done. + */ + if (place[1] != '\0' && *++place == '-' && place[1] == '\0') { + optind++; + place = EMSG; + /* + * We found an option (--), so if we skipped + * non-options, we have to permute. + */ + if (nonopt_end != -1) { + permute_args(nonopt_start, nonopt_end, + optind, nargv); + optind -= nonopt_end - nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + } + + /* + * Check long options if: + * 1) we were passed some + * 2) the arg is not just "-" + * 3) either the arg starts with -- we are getopt_long_only() + */ + if (long_options != NULL && place != nargv[optind] && + (*place == '-' || (flags & FLAG_LONGONLY))) { + short_too = 0; + if (*place == '-') + place++; /* --foo long option */ + else if (*place != ':' && strchr(options, *place) != NULL) + short_too = 1; /* could be short option too */ + + optchar = parse_long_options(nargv, options, long_options, + idx, short_too); + if (optchar != -1) { + place = EMSG; + return (optchar); + } + } + + if ((optchar = (int)*place++) == (int)':' || + (optchar == (int)'-' && *place != '\0') || + (oli = (char*)strchr(options, optchar)) == NULL) { + /* + * If the user specified "-" and '-' isn't listed in + * options, return -1 (non-option) as per POSIX. + * Otherwise, it is an unknown option character (or ':'). + */ + if (optchar == (int)'-' && *place == '\0') + return (-1); + if (!*place) + ++optind; + if (PRINT_ERROR) + warnx(illoptchar, optchar); + optopt = optchar; + return (BADCH); + } + if (long_options != NULL && optchar == 'W' && oli[1] == ';') { + /* -W long-option */ + if (*place) /* no space */ + /* NOTHING */; + else if (++optind >= nargc) { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } else /* white space */ + place = nargv[optind]; + optchar = parse_long_options(nargv, options, long_options, + idx, 0); + place = EMSG; + return (optchar); + } + if (*++oli != ':') { /* doesn't take argument */ + if (!*place) + ++optind; + } else { /* takes (optional) argument */ + optarg = NULL; + if (*place) /* no white space */ + optarg = place; + else if (oli[1] != ':') { /* arg not optional */ + if (++optind >= nargc) { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } else + optarg = nargv[optind]; + } + place = EMSG; + ++optind; + } + /* dump back option letter */ + return (optchar); +} + +/* + * getopt_long -- + * Parse argc/argv argument vector. + */ +int +getopt_long(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, + FLAG_PERMUTE)); +} + +/* + * getopt_long_only -- + * Parse argc/argv argument vector. + */ +int +getopt_long_only(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, + FLAG_PERMUTE|FLAG_LONGONLY)); +} + +//extern int getopt_long(int nargc, char * const *nargv, const char *options, +// const struct option *long_options, int *idx); +//extern int getopt_long_only(int nargc, char * const *nargv, const char *options, +// const struct option *long_options, int *idx); +/* + * Previous MinGW implementation had... + */ +#ifndef HAVE_DECL_GETOPT +/* + * ...for the long form API only; keep this for compatibility. + */ +# define HAVE_DECL_GETOPT 1 +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/include/sys/time.h glmark2-2021.02/android/jni/src/include/sys/time.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/include/sys/time.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/include/sys/time.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,45 @@ +// +// Copyright © 2019 Jamie Madill +// +// This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. +// +// glmark2 is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// glmark2. If not, see . +// +// Authors: +// Jamie Madill +// +// From Stack Overflow under Create Commons license: +// https://stackoverflow.com/a/31335254 + +#ifndef SYS_TIME_H_FOR_WINDOWS_ +#define SYS_TIME_H_FOR_WINDOWS_ + +#ifdef _WIN32 +#include +#include +#define CLOCK_MONOTONIC 0 +inline int clock_gettime(int, struct timespec *spec) +{ + __int64 wintime; + GetSystemTimeAsFileTime((FILETIME*)&wintime); + wintime -= 116444736000000000i64; // 1601 to 1970 + spec->tv_sec = wintime / 10000000i64; // seconds + spec->tv_nsec = wintime % 10000000i64 *100; // nanoseconds + return 0; +} +#else +#include +#endif + +#endif // SYS_TIME_H_FOR_WINDOWS_ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/acinclude.m4 glmark2-2021.02/android/jni/src/libjpeg-turbo/acinclude.m4 --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/acinclude.m4 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/acinclude.m4 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,254 @@ +# AC_PROG_NASM +# -------------------------- +# Check that NASM exists and determine flags +AC_DEFUN([AC_PROG_NASM],[ + +AC_ARG_VAR(NASM, [NASM command (used to build the x86/x86-64 SIMD code)]) +if test "x$NASM" = "x"; then + AC_CHECK_PROGS(NASM, [nasm nasmw yasm]) + test -z "$NASM" && AC_MSG_ERROR([no nasm (Netwide Assembler) found]) +fi + +AC_MSG_CHECKING([for object file format of host system]) +case "$host_os" in + cygwin* | mingw* | pw32* | interix*) + case "$host_cpu" in + x86_64) + objfmt='Win64-COFF' + ;; + *) + objfmt='Win32-COFF' + ;; + esac + ;; + msdosdjgpp* | go32*) + objfmt='COFF' + ;; + os2-emx*) # not tested + objfmt='MSOMF' # obj + ;; + linux*coff* | linux*oldld*) + objfmt='COFF' # ??? + ;; + linux*aout*) + objfmt='a.out' + ;; + linux*) + case "$host_cpu" in + x86_64) + objfmt='ELF64' + ;; + *) + objfmt='ELF' + ;; + esac + ;; + kfreebsd* | freebsd* | netbsd* | openbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then + objfmt='BSD-a.out' + else + case "$host_cpu" in + x86_64 | amd64) + objfmt='ELF64' + ;; + *) + objfmt='ELF' + ;; + esac + fi + ;; + solaris* | sunos* | sysv* | sco*) + case "$host_cpu" in + x86_64) + objfmt='ELF64' + ;; + *) + objfmt='ELF' + ;; + esac + ;; + darwin* | rhapsody* | nextstep* | openstep* | macos*) + case "$host_cpu" in + x86_64) + objfmt='Mach-O64' + ;; + *) + objfmt='Mach-O' + ;; + esac + ;; + *) + objfmt='ELF ?' + ;; +esac + +AC_MSG_RESULT([$objfmt]) +if test "$objfmt" = 'ELF ?'; then + objfmt='ELF' + AC_MSG_WARN([unexpected host system. assumed that the format is $objfmt.]) +fi + +AC_MSG_CHECKING([for object file format specifier (NAFLAGS) ]) +case "$objfmt" in + MSOMF) NAFLAGS='-fobj -DOBJ32';; + Win32-COFF) NAFLAGS='-fwin32 -DWIN32';; + Win64-COFF) NAFLAGS='-fwin64 -DWIN64 -D__x86_64__';; + COFF) NAFLAGS='-fcoff -DCOFF';; + a.out) NAFLAGS='-faout -DAOUT';; + BSD-a.out) NAFLAGS='-faoutb -DAOUT';; + ELF) NAFLAGS='-felf -DELF';; + ELF64) NAFLAGS='-felf64 -DELF -D__x86_64__';; + RDF) NAFLAGS='-frdf -DRDF';; + Mach-O) NAFLAGS='-fmacho -DMACHO';; + Mach-O64) NAFLAGS='-fmacho64 -DMACHO -D__x86_64__';; +esac +AC_MSG_RESULT([$NAFLAGS]) +AC_SUBST([NAFLAGS]) + +AC_MSG_CHECKING([whether the assembler ($NASM $NAFLAGS) works]) +cat > conftest.asm <&AC_FD_CC + cat conftest.asm >&AC_FD_CC + rm -rf conftest* + AC_MSG_RESULT(no) + AC_MSG_ERROR([installation or configuration problem: assembler cannot create object files.]) +fi + +AC_MSG_CHECKING([whether the linker accepts assembler output]) +try_nasm='${CC-cc} -o conftest${ac_exeext} $LDFLAGS conftest.o $LIBS 1>&AC_FD_CC' +if AC_TRY_EVAL(try_nasm) && test -s conftest${ac_exeext}; then + rm -rf conftest* + AC_MSG_RESULT(yes) +else + rm -rf conftest* + AC_MSG_RESULT(no) + AC_MSG_ERROR([configuration problem: maybe object file format mismatch.]) +fi + +]) + +# AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE +# -------------------------- +# Test whether the assembler is suitable and supports NEON instructions +AC_DEFUN([AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE],[ + ac_good_gnu_arm_assembler=no + ac_save_CC="$CC" + ac_save_CFLAGS="$CFLAGS" + CFLAGS="$CCASFLAGS -x assembler-with-cpp" + CC="$CCAS" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .arm + pld [r0] + vmovn.u16 d0, q0]])], ac_good_gnu_arm_assembler=yes) + + ac_use_gas_preprocessor=no + if test "x$ac_good_gnu_arm_assembler" = "xno" ; then + CC="gas-preprocessor.pl $CCAS" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .arm + pld [r0] + vmovn.u16 d0, q0]])], ac_use_gas_preprocessor=yes) + fi + CFLAGS="$ac_save_CFLAGS" + CC="$ac_save_CC" + + if test "x$ac_use_gas_preprocessor" = "xyes" ; then + CCAS="gas-preprocessor.pl $CCAS" + AC_SUBST([CCAS]) + ac_good_gnu_arm_assembler=yes + fi + + if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then + $1 + else + $2 + fi +]) + +# AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE +# -------------------------- +# Test whether the assembler is suitable and supports MIPS instructions +AC_DEFUN([AC_CHECK_COMPATIBLE_MIPS_ASSEMBLER_IFELSE],[ + have_mips_dspr2=no + ac_save_CFLAGS="$CFLAGS" + CFLAGS="$CCASFLAGS -mdspr2" + + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + + int main () + { + int c = 0, a = 0, b = 0; + __asm__ __volatile__ ( + "precr.qb.ph %[c], %[a], %[b] \n\t" + : [c] "=r" (c) + : [a] "r" (a), [b] "r" (b) + ); + return c; + } + ]])], have_mips_dspr2=yes) + CFLAGS=$ac_save_CFLAGS + + if test "x$have_mips_dspr2" = "xyes" ; then + $1 + else + $2 + fi +]) + +AC_DEFUN([AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE],[ + ac_good_gnu_arm_assembler=no + ac_save_CC="$CC" + ac_save_CFLAGS="$CFLAGS" + CFLAGS="$CCASFLAGS -x assembler-with-cpp" + CC="$CCAS" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + .text + MYVAR .req x0 + movi v0.16b, #100 + mov MYVAR, #100 + .unreq MYVAR]])], ac_good_gnu_arm_assembler=yes) + + ac_use_gas_preprocessor=no + if test "x$ac_good_gnu_arm_assembler" = "xno" ; then + CC="gas-preprocessor.pl $CCAS" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + .text + MYVAR .req x0 + movi v0.16b, #100 + mov MYVAR, #100 + .unreq MYVAR]])], ac_use_gas_preprocessor=yes) + fi + CFLAGS="$ac_save_CFLAGS" + CC="$ac_save_CC" + + if test "x$ac_use_gas_preprocessor" = "xyes" ; then + CCAS="gas-preprocessor.pl $CCAS" + AC_SUBST([CCAS]) + ac_good_gnu_arm_assembler=yes + fi + + if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then + $1 + else + $2 + fi +]) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/bmp.c glmark2-2021.02/android/jni/src/libjpeg-turbo/bmp.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/bmp.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/bmp.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,334 @@ +/* + * Copyright (C)2011, 2015 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include "cdjpeg.h" +#include +#include +#include "tjutil.h" +#include "bmp.h" + + +/* This duplicates the functionality of the VirtualGL bitmap library using + the components from cjpeg and djpeg */ + + +/* Error handling (based on example in example.c) */ + +static char errStr[JMSG_LENGTH_MAX]="No error"; + +struct my_error_mgr +{ + struct jpeg_error_mgr pub; + jmp_buf setjmp_buffer; +}; +typedef struct my_error_mgr *my_error_ptr; + +static void my_error_exit(j_common_ptr cinfo) +{ + my_error_ptr myerr=(my_error_ptr)cinfo->err; + (*cinfo->err->output_message)(cinfo); + longjmp(myerr->setjmp_buffer, 1); +} + +/* Based on output_message() in jerror.c */ + +static void my_output_message(j_common_ptr cinfo) +{ + (*cinfo->err->format_message)(cinfo, errStr); +} + +#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \ + retval=-1; goto bailout;} +#define _throwunix(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, \ + strerror(errno)); retval=-1; goto bailout;} + + +static void pixelconvert(unsigned char *srcbuf, int srcpf, int srcbottomup, + unsigned char *dstbuf, int dstpf, int dstbottomup, int w, int h) +{ + unsigned char *srcrowptr=srcbuf, *srccolptr; + int srcps=tjPixelSize[srcpf]; + int srcstride=srcbottomup? -w*srcps:w*srcps; + unsigned char *dstrowptr=dstbuf, *dstcolptr; + int dstps=tjPixelSize[dstpf]; + int dststride=dstbottomup? -w*dstps:w*dstps; + int row, col; + + if(srcbottomup) srcrowptr=&srcbuf[w*srcps*(h-1)]; + if(dstbottomup) dstrowptr=&dstbuf[w*dstps*(h-1)]; + + /* NOTE: These quick & dirty CMYK<->RGB conversion routines are for testing + purposes only. Properly converting between CMYK and RGB requires a color + management system. */ + + if(dstpf==TJPF_CMYK) + { + for(row=0; row1.0) c=1.0; if(c<0.) c=0.; + if(m>1.0) m=1.0; if(m<0.) m=0.; + if(y>1.0) y=1.0; if(y<0.) y=0.; + if(k>1.0) k=1.0; if(k<0.) k=0.; + *dstcolptr++=(unsigned char)(255.0-c*255.0+0.5); + *dstcolptr++=(unsigned char)(255.0-m*255.0+0.5); + *dstcolptr++=(unsigned char)(255.0-y*255.0+0.5); + *dstcolptr++=(unsigned char)(255.0-k*255.0+0.5); + } + } + } + else if(srcpf==TJPF_CMYK) + { + for(row=0; row255.0) r=255.0; if(r<0.) r=0.; + if(g>255.0) g=255.0; if(g<0.) g=0.; + if(b>255.0) b=255.0; if(b<0.) b=0.; + dstcolptr[tjRedOffset[dstpf]]=(unsigned char)(r+0.5); + dstcolptr[tjGreenOffset[dstpf]]=(unsigned char)(g+0.5); + dstcolptr[tjBlueOffset[dstpf]]=(unsigned char)(b+0.5); + } + } + } + else + { + for(row=0; row=TJ_NUMPF) + _throw("loadbmp(): Invalid argument"); + + if((file=fopen(filename, "rb"))==NULL) + _throwunix("loadbmp(): Cannot open input file"); + + cinfo.err=jpeg_std_error(&jerr.pub); + jerr.pub.error_exit=my_error_exit; + jerr.pub.output_message=my_output_message; + + if(setjmp(jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; goto bailout; + } + + jpeg_create_compress(&cinfo); + if((tempc=getc(file))<0 || ungetc(tempc, file)==EOF) + _throwunix("loadbmp(): Could not read input file") + else if(tempc==EOF) _throw("loadbmp(): Input file contains no data"); + + if(tempc=='B') + { + if((src=jinit_read_bmp(&cinfo))==NULL) + _throw("loadbmp(): Could not initialize bitmap loader"); + } + else if(tempc=='P') + { + if((src=jinit_read_ppm(&cinfo))==NULL) + _throw("loadbmp(): Could not initialize bitmap loader"); + } + else _throw("loadbmp(): Unsupported file type"); + + src->input_file=file; + (*src->start_input)(&cinfo, src); + (*cinfo.mem->realize_virt_arrays)((j_common_ptr)&cinfo); + + *w=cinfo.image_width; *h=cinfo.image_height; + + if(cinfo.input_components==1 && cinfo.in_color_space==JCS_RGB) + srcpf=TJPF_GRAY; + else srcpf=TJPF_RGB; + + dstps=tjPixelSize[dstpf]; + if((*buf=(unsigned char *)malloc((*w)*(*h)*dstps))==NULL) + _throw("loadbmp(): Memory allocation failure"); + + while(cinfo.next_scanlineget_pixel_rows)(&cinfo, src); + for(i=0; ibuffer[i], srcpf, 0, outbuf, dstpf, bottomup, *w, + nlines); + } + cinfo.next_scanline+=nlines; + } + + (*src->finish_input)(&cinfo, src); + + bailout: + jpeg_destroy_compress(&cinfo); + if(file) fclose(file); + if(retval<0 && buf && *buf) {free(*buf); *buf=NULL;} + return retval; +} + + +int savebmp(char *filename, unsigned char *buf, int w, int h, int srcpf, + int bottomup) +{ + int retval=0, srcps, dstpf; + struct jpeg_decompress_struct dinfo; + struct my_error_mgr jerr; + djpeg_dest_ptr dst; + FILE *file=NULL; + char *ptr=NULL; + + memset(&dinfo, 0, sizeof(struct jpeg_decompress_struct)); + + if(!filename || !buf || w<1 || h<1 || srcpf<0 || srcpf>=TJ_NUMPF) + _throw("savebmp(): Invalid argument"); + + if((file=fopen(filename, "wb"))==NULL) + _throwunix("savebmp(): Cannot open output file"); + + dinfo.err=jpeg_std_error(&jerr.pub); + jerr.pub.error_exit=my_error_exit; + jerr.pub.output_message=my_output_message; + + if(setjmp(jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; goto bailout; + } + + jpeg_create_decompress(&dinfo); + if(srcpf==TJPF_GRAY) + { + dinfo.out_color_components=dinfo.output_components=1; + dinfo.out_color_space=JCS_GRAYSCALE; + } + else + { + dinfo.out_color_components=dinfo.output_components=3; + dinfo.out_color_space=JCS_RGB; + } + dinfo.image_width=w; dinfo.image_height=h; + dinfo.global_state=DSTATE_READY; + dinfo.scale_num=dinfo.scale_denom=1; + + ptr=strrchr(filename, '.'); + if(ptr && !strcasecmp(ptr, ".bmp")) + { + if((dst=jinit_write_bmp(&dinfo, 0))==NULL) + _throw("savebmp(): Could not initialize bitmap writer"); + } + else + { + if((dst=jinit_write_ppm(&dinfo))==NULL) + _throw("savebmp(): Could not initialize PPM writer"); + } + + dst->output_file=file; + (*dst->start_output)(&dinfo, dst); + (*dinfo.mem->realize_virt_arrays)((j_common_ptr)&dinfo); + + if(srcpf==TJPF_GRAY) dstpf=srcpf; + else dstpf=TJPF_RGB; + srcps=tjPixelSize[srcpf]; + + while(dinfo.output_scanlinebuffer_height; + for(i=0; ibuffer[i], dstpf, 0, w, + nlines); + } + (*dst->put_pixel_rows)(&dinfo, dst, nlines); + dinfo.output_scanline+=nlines; + } + + (*dst->finish_output)(&dinfo, dst); + + bailout: + jpeg_destroy_decompress(&dinfo); + if(file) fclose(file); + return retval; +} + +const char *bmpgeterr(void) +{ + return errStr; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/bmp.h glmark2-2021.02/android/jni/src/libjpeg-turbo/bmp.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/bmp.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/bmp.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,42 @@ +/* + * Copyright (C)2011 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BMP_H__ +#define __BMP_H__ + +#include "./turbojpeg.h" + +int loadbmp(char *filename, unsigned char **buf, int *w, int *h, int pf, + int bottomup); + +int savebmp(char *filename, unsigned char *buf, int w, int h, int pf, + int bottomup); + +const char *bmpgeterr(void); + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cderror.h glmark2-2021.02/android/jni/src/libjpeg-turbo/cderror.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cderror.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/cderror.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,136 @@ +/* + * cderror.h + * + * Copyright (C) 1994-1997, Thomas G. Lane. + * Modified 2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file defines the error and message codes for the cjpeg/djpeg + * applications. These strings are not needed as part of the JPEG library + * proper. + * Edit this file to add new codes, or to translate the message strings to + * some other language. + */ + +/* + * To define the enum list of message codes, include this file without + * defining macro JMESSAGE. To create a message string table, include it + * again with a suitable JMESSAGE definition (see jerror.c for an example). + */ +#ifndef JMESSAGE +#ifndef CDERROR_H +#define CDERROR_H +/* First time through, define the enum list */ +#define JMAKE_ENUM_LIST +#else +/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */ +#define JMESSAGE(code,string) +#endif /* CDERROR_H */ +#endif /* JMESSAGE */ + +#ifdef JMAKE_ENUM_LIST + +typedef enum { + +#define JMESSAGE(code,string) code , + +#endif /* JMAKE_ENUM_LIST */ + +JMESSAGE(JMSG_FIRSTADDONCODE=1000, NULL) /* Must be first entry! */ + +#ifdef BMP_SUPPORTED +JMESSAGE(JERR_BMP_BADCMAP, "Unsupported BMP colormap format") +JMESSAGE(JERR_BMP_BADDEPTH, "Only 8- and 24-bit BMP files are supported") +JMESSAGE(JERR_BMP_BADHEADER, "Invalid BMP file: bad header length") +JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1") +JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB") +JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported") +JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image") +JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM") +JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image") +JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image") +JMESSAGE(JTRC_BMP_OS2, "%ux%u 24-bit OS2 BMP image") +JMESSAGE(JTRC_BMP_OS2_MAPPED, "%ux%u 8-bit colormapped OS2 BMP image") +#endif /* BMP_SUPPORTED */ + +#ifdef GIF_SUPPORTED +JMESSAGE(JERR_GIF_BUG, "GIF output got confused") +JMESSAGE(JERR_GIF_CODESIZE, "Bogus GIF codesize %d") +JMESSAGE(JERR_GIF_COLORSPACE, "GIF output must be grayscale or RGB") +JMESSAGE(JERR_GIF_IMAGENOTFOUND, "Too few images in GIF file") +JMESSAGE(JERR_GIF_NOT, "Not a GIF file") +JMESSAGE(JTRC_GIF, "%ux%ux%d GIF image") +JMESSAGE(JTRC_GIF_BADVERSION, + "Warning: unexpected GIF version number '%c%c%c'") +JMESSAGE(JTRC_GIF_EXTENSION, "Ignoring GIF extension block of type 0x%02x") +JMESSAGE(JTRC_GIF_NONSQUARE, "Caution: nonsquare pixels in input") +JMESSAGE(JWRN_GIF_BADDATA, "Corrupt data in GIF file") +JMESSAGE(JWRN_GIF_CHAR, "Bogus char 0x%02x in GIF file, ignoring") +JMESSAGE(JWRN_GIF_ENDCODE, "Premature end of GIF image") +JMESSAGE(JWRN_GIF_NOMOREDATA, "Ran out of GIF bits") +#endif /* GIF_SUPPORTED */ + +#ifdef PPM_SUPPORTED +JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB") +JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file") +JMESSAGE(JERR_PPM_TOOLARGE, "Integer value too large in PPM file") +JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file") +JMESSAGE(JTRC_PGM, "%ux%u PGM image") +JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image") +JMESSAGE(JTRC_PPM, "%ux%u PPM image") +JMESSAGE(JTRC_PPM_TEXT, "%ux%u text PPM image") +#endif /* PPM_SUPPORTED */ + +#ifdef RLE_SUPPORTED +JMESSAGE(JERR_RLE_BADERROR, "Bogus error code from RLE library") +JMESSAGE(JERR_RLE_COLORSPACE, "RLE output must be grayscale or RGB") +JMESSAGE(JERR_RLE_DIMENSIONS, "Image dimensions (%ux%u) too large for RLE") +JMESSAGE(JERR_RLE_EMPTY, "Empty RLE file") +JMESSAGE(JERR_RLE_EOF, "Premature EOF in RLE header") +JMESSAGE(JERR_RLE_MEM, "Insufficient memory for RLE header") +JMESSAGE(JERR_RLE_NOT, "Not an RLE file") +JMESSAGE(JERR_RLE_TOOMANYCHANNELS, "Cannot handle %d output channels for RLE") +JMESSAGE(JERR_RLE_UNSUPPORTED, "Cannot handle this RLE setup") +JMESSAGE(JTRC_RLE, "%ux%u full-color RLE file") +JMESSAGE(JTRC_RLE_FULLMAP, "%ux%u full-color RLE file with map of length %d") +JMESSAGE(JTRC_RLE_GRAY, "%ux%u grayscale RLE file") +JMESSAGE(JTRC_RLE_MAPGRAY, "%ux%u grayscale RLE file with map of length %d") +JMESSAGE(JTRC_RLE_MAPPED, "%ux%u colormapped RLE file with map of length %d") +#endif /* RLE_SUPPORTED */ + +#ifdef TARGA_SUPPORTED +JMESSAGE(JERR_TGA_BADCMAP, "Unsupported Targa colormap format") +JMESSAGE(JERR_TGA_BADPARMS, "Invalid or unsupported Targa file") +JMESSAGE(JERR_TGA_COLORSPACE, "Targa output must be grayscale or RGB") +JMESSAGE(JTRC_TGA, "%ux%u RGB Targa image") +JMESSAGE(JTRC_TGA_GRAY, "%ux%u grayscale Targa image") +JMESSAGE(JTRC_TGA_MAPPED, "%ux%u colormapped Targa image") +#else +JMESSAGE(JERR_TGA_NOTCOMP, "Targa support was not compiled") +#endif /* TARGA_SUPPORTED */ + +JMESSAGE(JERR_BAD_CMAP_FILE, + "Color map file is invalid or of unsupported format") +JMESSAGE(JERR_TOO_MANY_COLORS, + "Output file format cannot handle %d colormap entries") +JMESSAGE(JERR_UNGETC_FAILED, "ungetc failed") +#ifdef TARGA_SUPPORTED +JMESSAGE(JERR_UNKNOWN_FORMAT, + "Unrecognized input file format --- perhaps you need -targa") +#else +JMESSAGE(JERR_UNKNOWN_FORMAT, "Unrecognized input file format") +#endif +JMESSAGE(JERR_UNSUPPORTED_FORMAT, "Unsupported output file format") + +#ifdef JMAKE_ENUM_LIST + + JMSG_LASTADDONCODE +} ADDON_MESSAGE_CODE; + +#undef JMAKE_ENUM_LIST +#endif /* JMAKE_ENUM_LIST */ + +/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */ +#undef JMESSAGE diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cdjpeg.c glmark2-2021.02/android/jni/src/libjpeg-turbo/cdjpeg.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cdjpeg.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/cdjpeg.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,144 @@ +/* + * cdjpeg.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains common support routines used by the IJG application + * programs (cjpeg, djpeg, jpegtran). + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include /* to declare isupper(), tolower() */ +#ifdef USE_SETMODE +#include /* to declare setmode()'s parameter macros */ +/* If you have setmode() but not , just delete this line: */ +#include /* to declare setmode() */ +#endif + + +/* + * Optional progress monitor: display a percent-done figure on stderr. + */ + +#ifdef PROGRESS_REPORT + +METHODDEF(void) +progress_monitor (j_common_ptr cinfo) +{ + cd_progress_ptr prog = (cd_progress_ptr) cinfo->progress; + int total_passes = prog->pub.total_passes + prog->total_extra_passes; + int percent_done = (int) (prog->pub.pass_counter*100L/prog->pub.pass_limit); + + if (percent_done != prog->percent_done) { + prog->percent_done = percent_done; + if (total_passes > 1) { + fprintf(stderr, "\rPass %d/%d: %3d%% ", + prog->pub.completed_passes + prog->completed_extra_passes + 1, + total_passes, percent_done); + } else { + fprintf(stderr, "\r %3d%% ", percent_done); + } + fflush(stderr); + } +} + + +GLOBAL(void) +start_progress_monitor (j_common_ptr cinfo, cd_progress_ptr progress) +{ + /* Enable progress display, unless trace output is on */ + if (cinfo->err->trace_level == 0) { + progress->pub.progress_monitor = progress_monitor; + progress->completed_extra_passes = 0; + progress->total_extra_passes = 0; + progress->percent_done = -1; + cinfo->progress = &progress->pub; + } +} + + +GLOBAL(void) +end_progress_monitor (j_common_ptr cinfo) +{ + /* Clear away progress display */ + if (cinfo->err->trace_level == 0) { + fprintf(stderr, "\r \r"); + fflush(stderr); + } +} + +#endif + + +/* + * Case-insensitive matching of possibly-abbreviated keyword switches. + * keyword is the constant keyword (must be lower case already), + * minchars is length of minimum legal abbreviation. + */ + +GLOBAL(boolean) +keymatch (char *arg, const char *keyword, int minchars) +{ + register int ca, ck; + register int nmatched = 0; + + while ((ca = *arg++) != '\0') { + if ((ck = *keyword++) == '\0') + return FALSE; /* arg longer than keyword, no good */ + if (isupper(ca)) /* force arg to lcase (assume ck is already) */ + ca = tolower(ca); + if (ca != ck) + return FALSE; /* no good */ + nmatched++; /* count matched characters */ + } + /* reached end of argument; fail if it's too short for unique abbrev */ + if (nmatched < minchars) + return FALSE; + return TRUE; /* A-OK */ +} + + +/* + * Routines to establish binary I/O mode for stdin and stdout. + * Non-Unix systems often require some hacking to get out of text mode. + */ + +GLOBAL(FILE *) +read_stdin (void) +{ + FILE * input_file = stdin; + +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdin), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((input_file = fdopen(fileno(stdin), READ_BINARY)) == NULL) { + fprintf(stderr, "Cannot reopen stdin\n"); + exit(EXIT_FAILURE); + } +#endif + return input_file; +} + + +GLOBAL(FILE *) +write_stdout (void) +{ + FILE * output_file = stdout; + +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdout), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((output_file = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) { + fprintf(stderr, "Cannot reopen stdout\n"); + exit(EXIT_FAILURE); + } +#endif + return output_file; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cdjpeg.h glmark2-2021.02/android/jni/src/libjpeg-turbo/cdjpeg.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cdjpeg.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/cdjpeg.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,145 @@ +/* + * cdjpeg.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1997, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains common declarations for the sample applications + * cjpeg and djpeg. It is NOT used by the core JPEG library. + */ + +#define JPEG_CJPEG_DJPEG /* define proper options in jconfig.h */ +#define JPEG_INTERNAL_OPTIONS /* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */ +#include "jinclude.h" +#include "jpeglib.h" +#include "jerror.h" /* get library error codes too */ +#include "cderror.h" /* get application-specific error codes */ + + +/* + * Object interface for cjpeg's source file decoding modules + */ + +typedef struct cjpeg_source_struct *cjpeg_source_ptr; + +struct cjpeg_source_struct { + void (*start_input) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo); + JDIMENSION (*get_pixel_rows) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo); + void (*finish_input) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo); + + FILE *input_file; + + JSAMPARRAY buffer; + JDIMENSION buffer_height; +}; + + +/* + * Object interface for djpeg's output file encoding modules + */ + +typedef struct djpeg_dest_struct *djpeg_dest_ptr; + +struct djpeg_dest_struct { + /* start_output is called after jpeg_start_decompress finishes. + * The color map will be ready at this time, if one is needed. + */ + void (*start_output) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo); + /* Emit the specified number of pixel rows from the buffer. */ + void (*put_pixel_rows) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied); + /* Finish up at the end of the image. */ + void (*finish_output) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo); + + /* Target file spec; filled in by djpeg.c after object is created. */ + FILE *output_file; + + /* Output pixel-row buffer. Created by module init or start_output. + * Width is cinfo->output_width * cinfo->output_components; + * height is buffer_height. + */ + JSAMPARRAY buffer; + JDIMENSION buffer_height; +}; + + +/* + * cjpeg/djpeg may need to perform extra passes to convert to or from + * the source/destination file format. The JPEG library does not know + * about these passes, but we'd like them to be counted by the progress + * monitor. We use an expanded progress monitor object to hold the + * additional pass count. + */ + +struct cdjpeg_progress_mgr { + struct jpeg_progress_mgr pub; /* fields known to JPEG library */ + int completed_extra_passes; /* extra passes completed */ + int total_extra_passes; /* total extra */ + /* last printed percentage stored here to avoid multiple printouts */ + int percent_done; +}; + +typedef struct cdjpeg_progress_mgr *cd_progress_ptr; + + +/* Module selection routines for I/O modules. */ + +EXTERN(cjpeg_source_ptr) jinit_read_bmp (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_bmp (j_decompress_ptr cinfo, + boolean is_os2); +EXTERN(cjpeg_source_ptr) jinit_read_gif (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_gif (j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_ppm (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_ppm (j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_rle (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_rle (j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_targa (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_targa (j_decompress_ptr cinfo); + +/* cjpeg support routines (in rdswitch.c) */ + +EXTERN(boolean) read_quant_tables (j_compress_ptr cinfo, char *filename, + boolean force_baseline); +EXTERN(boolean) read_scan_script (j_compress_ptr cinfo, char *filename); +EXTERN(boolean) set_quality_ratings (j_compress_ptr cinfo, char *arg, + boolean force_baseline); +EXTERN(boolean) set_quant_slots (j_compress_ptr cinfo, char *arg); +EXTERN(boolean) set_sample_factors (j_compress_ptr cinfo, char *arg); + +/* djpeg support routines (in rdcolmap.c) */ + +EXTERN(void) read_color_map (j_decompress_ptr cinfo, FILE *infile); + +/* common support routines (in cdjpeg.c) */ + +EXTERN(void) enable_signal_catcher (j_common_ptr cinfo); +EXTERN(void) start_progress_monitor (j_common_ptr cinfo, + cd_progress_ptr progress); +EXTERN(void) end_progress_monitor (j_common_ptr cinfo); +EXTERN(boolean) keymatch (char *arg, const char *keyword, int minchars); +EXTERN(FILE *) read_stdin (void); +EXTERN(FILE *) write_stdout (void); + +/* miscellaneous useful macros */ + +#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */ +#define READ_BINARY "r" +#define WRITE_BINARY "w" +#else +#define READ_BINARY "rb" +#define WRITE_BINARY "wb" +#endif + +#ifndef EXIT_FAILURE /* define exit() codes if not provided */ +#define EXIT_FAILURE 1 +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif +#ifndef EXIT_WARNING +#define EXIT_WARNING 2 +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/change.log glmark2-2021.02/android/jni/src/libjpeg-turbo/change.log --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/change.log 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/change.log 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,313 @@ +NOTE: This file was modified by The libjpeg-turbo Project to include only +information relevant to libjpeg-turbo. + +CHANGE LOG for Independent JPEG Group's JPEG software + + +Version 9b 17-Jan-2016 +----------------------- + +Document 'f' specifier for jpegtran -crop specification. +Thank to Michele Martone for suggestion. + + +Version 9 13-Jan-2013 +---------------------- + +Add remark for jpeg_mem_dest() in jdatadst.c. +Thank to Elie-Gregoire Khoury for the hint. + +Correct argument type in format string, avoid compiler warnings. +Thank to Vincent Torri for hint. + + +Version 8d 15-Jan-2012 +----------------------- + +Add cjpeg -rgb option to create RGB JPEG files. +Using this switch suppresses the conversion from RGB +colorspace input to the default YCbCr JPEG colorspace. +Thank to Michael Koch for the initial suggestion. + +Add option to disable the region adjustment in the transupp crop code. +Thank to Jeffrey Friedl for the suggestion. + + +Version 8b 16-May-2010 +----------------------- + +Repair problem in new memory source manager with corrupt JPEG data. +Thank to Ted Campbell and Samuel Chun for the report. + + +Version 8a 28-Feb-2010 +----------------------- + +Writing tables-only datastreams via jpeg_write_tables works again. + +Support 32-bit BMPs (RGB image with Alpha channel) for read in cjpeg. +Thank to Brett Blackham for the suggestion. + + +Version 8 10-Jan-2010 +---------------------- + +Add sanity check in BMP reader module to avoid cjpeg crash for empty input +image (thank to Isaev Ildar of ISP RAS, Moscow, RU for reporting this error). + +Add data source and destination managers for read from and write to +memory buffers. New API functions jpeg_mem_src and jpeg_mem_dest. +Thank to Roberto Boni from Italy for the suggestion. + + +Version 7 27-Jun-2009 +---------------------- + +New scaled DCTs implemented. +djpeg now supports scalings N/8 with all N from 1 to 16. + +cjpeg -quality option has been extended for support of separate quality +settings for luminance and chrominance (or in general, for every provided +quantization table slot). +New API function jpeg_default_qtables() and q_scale_factor array in library. + +Support arithmetic entropy encoding and decoding. +Added files jaricom.c, jcarith.c, jdarith.c. + +jpegtran has a new "lossless" cropping feature. + +Implement -perfect option in jpegtran, new API function +jtransform_perfect_transform() in transupp. (DP 204_perfect.dpatch) + +Better error messages for jpegtran fopen failure. +(DP 203_jpegtran_errmsg.dpatch) + +Fix byte order issue with 16bit PPM/PGM files in rdppm.c/wrppm.c: +according to Netpbm, the de facto standard implementation of the PNM formats, +the most significant byte is first. (DP 203_rdppm.dpatch) + +Add -raw option to rdjpgcom not to mangle the output. +(DP 205_rdjpgcom_raw.dpatch) + +Make rdjpgcom locale aware. (DP 201_rdjpgcom_locale.dpatch) + +Add extern "C" to jpeglib.h. +This avoids the need to put extern "C" { ... } around #include "jpeglib.h" +in your C++ application. Defining the symbol DONT_USE_EXTERN_C in the +configuration prevents this. (DP 202_jpeglib.h_c++.dpatch) + + +Version 6b 27-Mar-1998 +----------------------- + +jpegtran has new features for lossless image transformations (rotation +and flipping) as well as "lossless" reduction to grayscale. + +jpegtran now copies comments by default; it has a -copy switch to enable +copying all APPn blocks as well, or to suppress comments. (Formerly it +always suppressed comments and APPn blocks.) jpegtran now also preserves +JFIF version and resolution information. + +New decompressor library feature: COM and APPn markers found in the input +file can be saved in memory for later use by the application. (Before, +you had to code this up yourself with a custom marker processor.) + +There is an unused field "void * client_data" now in compress and decompress +parameter structs; this may be useful in some applications. + +JFIF version number information is now saved by the decoder and accepted by +the encoder. jpegtran uses this to copy the source file's version number, +to ensure "jpegtran -copy all" won't create bogus files that contain JFXX +extensions but claim to be version 1.01. Applications that generate their +own JFXX extension markers also (finally) have a supported way to cause the +encoder to emit JFIF version number 1.02. + +djpeg's trace mode reports JFIF 1.02 thumbnail images as such, rather +than as unknown APP0 markers. + +In -verbose mode, djpeg and rdjpgcom will try to print the contents of +APP12 markers as text. Some digital cameras store useful text information +in APP12 markers. + +Handling of truncated data streams is more robust: blocks beyond the one in +which the error occurs will be output as uniform gray, or left unchanged +if decoding a progressive JPEG. The appearance no longer depends on the +Huffman tables being used. + +Huffman tables are checked for validity much more carefully than before. + +To avoid the Unisys LZW patent, djpeg's GIF output capability has been +changed to produce "uncompressed GIFs", and cjpeg's GIF input capability +has been removed altogether. We're not happy about it either, but there +seems to be no good alternative. + +The configure script now supports building libjpeg as a shared library +on many flavors of Unix (all the ones that GNU libtool knows how to +build shared libraries for). Use "./configure --enable-shared" to +try this out. + +New jconfig file and makefiles for Microsoft Visual C++ and Developer Studio. +Also, a jconfig file and a build script for Metrowerks CodeWarrior +on Apple Macintosh. makefile.dj has been updated for DJGPP v2, and there +are miscellaneous other minor improvements in the makefiles. + +jmemmac.c now knows how to create temporary files following Mac System 7 +conventions. + +djpeg's -map switch is now able to read raw-format PPM files reliably. + +cjpeg -progressive -restart no longer generates any unnecessary DRI markers. + +Multiple calls to jpeg_simple_progression for a single JPEG object +no longer leak memory. + + +Version 6a 7-Feb-96 +-------------------- + +Library initialization sequence modified to detect version mismatches +and struct field packing mismatches between library and calling application. +This change requires applications to be recompiled, but does not require +any application source code change. + +All routine declarations changed to the style "GLOBAL(type) name ...", +that is, GLOBAL, LOCAL, METHODDEF, EXTERN are now macros taking the +routine's return type as an argument. This makes it possible to add +Microsoft-style linkage keywords to all the routines by changing just +these macros. Note that any application code that was using these macros +will have to be changed. + +DCT coefficient quantization tables are now stored in normal array order +rather than zigzag order. Application code that calls jpeg_add_quant_table, +or otherwise manipulates quantization tables directly, will need to be +changed. If you need to make such code work with either older or newer +versions of the library, a test like "#if JPEG_LIB_VERSION >= 61" is +recommended. + +djpeg's trace capability now dumps DQT tables in natural order, not zigzag +order. This allows the trace output to be made into a "-qtables" file +more easily. + +New system-dependent memory manager module for use on Apple Macintosh. + +Fix bug in cjpeg's -smooth option: last one or two scanlines would be +duplicates of the prior line unless the image height mod 16 was 1 or 2. + +Repair minor problems in VMS, BCC, MC6 makefiles. + +New configure script based on latest GNU Autoconf. + +Correct the list of include files needed by MetroWerks C for ccommand(). + +Numerous small documentation updates. + + +Version 6 2-Aug-95 +------------------- + +Progressive JPEG support: library can read and write full progressive JPEG +files. A "buffered image" mode supports incremental decoding for on-the-fly +display of progressive images. Simply recompiling an existing IJG-v5-based +decoder with v6 should allow it to read progressive files, though of course +without any special progressive display. + +New "jpegtran" application performs lossless transcoding between different +JPEG formats; primarily, it can be used to convert baseline to progressive +JPEG and vice versa. In support of jpegtran, the library now allows lossless +reading and writing of JPEG files as DCT coefficient arrays. This ability +may be of use in other applications. + +Notes for programmers: +* We changed jpeg_start_decompress() to be able to suspend; this makes all +decoding modes available to suspending-input applications. However, +existing applications that use suspending input will need to be changed +to check the return value from jpeg_start_decompress(). You don't need to +do anything if you don't use a suspending data source. +* We changed the interface to the virtual array routines: access_virt_array +routines now take a count of the number of rows to access this time. The +last parameter to request_virt_array routines is now interpreted as the +maximum number of rows that may be accessed at once, but not necessarily +the height of every access. + + +Version 5b 15-Mar-95 +--------------------- + +Correct bugs with grayscale images having v_samp_factor > 1. + +jpeg_write_raw_data() now supports output suspension. + +Correct bugs in "configure" script for case of compiling in +a directory other than the one containing the source files. + +Repair bug in jquant1.c: sometimes didn't use as many colors as it could. + +Borland C makefile and jconfig file work under either MS-DOS or OS/2. + +Miscellaneous improvements to documentation. + + +Version 5a 7-Dec-94 +-------------------- + +Changed color conversion roundoff behavior so that grayscale values are +represented exactly. (This causes test image files to change.) + +Make ordered dither use 16x16 instead of 4x4 pattern for a small quality +improvement. + +New configure script based on latest GNU Autoconf. +Fix configure script to handle CFLAGS correctly. +Rename *.auto files to *.cfg, so that configure script still works if +file names have been truncated for DOS. + +Fix bug in rdbmp.c: didn't allow for extra data between header and image. + +Modify rdppm.c/wrppm.c to handle 2-byte raw PPM/PGM formats for 12-bit data. + +Fix several bugs in rdrle.c. + +NEED_SHORT_EXTERNAL_NAMES option was broken. + +Revise jerror.h/jerror.c for more flexibility in message table. + +Repair oversight in jmemname.c NO_MKTEMP case: file could be there +but unreadable. + + +Version 5 24-Sep-94 +-------------------- + +Version 5 represents a nearly complete redesign and rewrite of the IJG +software. Major user-visible changes include: + * Automatic configuration simplifies installation for most Unix systems. + * A range of speed vs. image quality tradeoffs are supported. + This includes resizing of an image during decompression: scaling down + by a factor of 1/2, 1/4, or 1/8 is handled very efficiently. + * New programs rdjpgcom and wrjpgcom allow insertion and extraction + of text comments in a JPEG file. + +The application programmer's interface to the library has changed completely. +Notable improvements include: + * We have eliminated the use of callback routines for handling the + uncompressed image data. The application now sees the library as a + set of routines that it calls to read or write image data on a + scanline-by-scanline basis. + * The application image data is represented in a conventional interleaved- + pixel format, rather than as a separate array for each color channel. + This can save a copying step in many programs. + * The handling of compressed data has been cleaned up: the application can + supply routines to source or sink the compressed data. It is possible to + suspend processing on source/sink buffer overrun, although this is not + supported in all operating modes. + * All static state has been eliminated from the library, so that multiple + instances of compression or decompression can be active concurrently. + * JPEG abbreviated datastream formats are supported, ie, quantization and + Huffman tables can be stored separately from the image data. + * And not only that, but the documentation of the library has improved + considerably! + + +The last widely used release before the version 5 rewrite was version 4A of +18-Feb-93. Change logs before that point have been discarded, since they +are not of much interest after the rewrite. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/ChangeLog.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/ChangeLog.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/ChangeLog.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/ChangeLog.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,861 @@ +1.5 beta1 +========= + +[1] Added full SIMD acceleration for PowerPC platforms using AltiVec VMX +(128-bit SIMD) instructions. Although the performance of libjpeg-turbo on +PowerPC was already good, due to the increased number of registers available +to the compiler vs. x86, it was still possible to speed up compression by about +3-4x and decompression by about 2-2.5x (relative to libjpeg v6b) through the +use of AltiVec instructions. + +[2] Added two new libjpeg API functions (jpeg_skip_scanlines() and +jpeg_crop_scanline()) that can be used to partially decode a JPEG image. See +libjpeg.txt for more details. + +[3] The TJCompressor and TJDecompressor classes in the TurboJPEG Java API now +implement the Closeable interface, so those classes can be used with a +try-with-resources statement. + +[4] The TurboJPEG Java classes now throw unchecked idiomatic exceptions +(IllegalArgumentException, IllegalStateException) for unrecoverable errors +caused by incorrect API usage, and those classes throw a new checked exception +type (TJException) for errors that are passed through from the C library. + +[5] Source buffers for the TurboJPEG C API functions, as well as the +jpeg_mem_src() function in the libjpeg API, are now declared as const pointers. +This facilitates passing read-only buffers to those functions and ensures the +caller that the source buffer will not be modified. This should not create any +backward API or ABI incompatibilities with prior libjpeg-turbo releases. + +[6] The MIPS DSPr2 SIMD code can now be compiled to support either FR=0 or FR=1 +FPUs. + +[7] Fixed additional negative left shifts and other issues reported by the GCC +and Clang undefined behavior sanitizers. Most of these issues affected only +32-bit code, and none of them was known to pose a security threat, but removing +the warnings makes it easier to detect actual security issues, should they +arise in the future. + +[8] Removed the unnecessary .arch directive from the ARM64 NEON SIMD code. +This directive was preventing the code from assembling using the clang +integrated assembler. + +[9] Fixed a regression caused by 1.4.1[6] that prevented 32-bit and 64-bit +libjpeg-turbo RPMs from being installed simultaneously on recent Red Hat/Fedora +distributions. This was due to the addition of a macro in jconfig.h that +allows the Huffman codec to determine the word size at compile time. Since +that macro differs between 32-bit and 64-bit builds, this caused a conflict +between the i386 and x86_64 RPMs (any differing files, other than executables, +are not allowed when 32-bit and 64-bit RPMs are installed simultaneously.) +Since the macro is used only internally, it has been moved into jconfigint.h. + +[10] The x86-64 SIMD code can now be disabled at run time by setting the +JSIMD_FORCENONE environment variable to 1 (the other SIMD implementations +already had this capability.) + +[11] Added a new command-line argument to TJBench (-nowrite) that prevents the +benchmark from outputting any images. This removes any potential operating +system overhead that might be caused by lazy writes to disk and thus improves +the consistency of the performance measurements. + +[12] Added SIMD acceleration for Huffman encoding on SSE2-capable x86 and +x86-64 platforms. This speeds up the compression of full-color JPEGs by about +10-15% on average (relative to libjpeg-turbo 1.4.x) when using modern Intel and +AMD CPUs. Additionally, this works around an issue in the clang optimizer that +prevents it (as of this writing) from achieving the same performance as GCC +when compiling the C version of the Huffman encoder +(https://llvm.org/bugs/show_bug.cgi?id=16035). For the purposes of benchmarking +or regression testing, SIMD-accelerated Huffman encoding can be disabled by +setting the JSIMD_NOHUFFENC environment variable to 1. + +[13] Added ARM 64-bit (ARMv8) NEON SIMD implementations of the commonly-used +compression algorithms (including the slow integer forward DCT and h2v2 & h2v1 +downsampling algorithms, which are not accelerated in the 32-bit NEON +implementation.) This speeds up the compression of full-color JPEGs by about +75% on average on a Cavium ThunderX processor and by about 2-2.5x on average on +Cortex-A53 and Cortex-A57 cores. + +[14] Added SIMD acceleration for Huffman encoding on NEON-capable ARM 32-bit +and 64-bit platforms. + +For 32-bit code, this speeds up the compression of full-color JPEGs by about +30% on average on a typical iOS device (iPhone 4S, Cortex-A9) and by about 6-7% +on average on a typical Android device (Nexus 5X, Cortex-A53 and Cortex-A57), +relative to libjpeg-turbo 1.4.x. Note that the larger speedup under iOS is due +to the fact that iOS builds use LLVM, which does not optimize the C Huffman +encoder as well as GCC does. + +For 64-bit code, NEON-accelerated Huffman encoding speeds up the compression of +full-color JPEGs by about 40% on average on a typical iOS device (iPhone 5S, +Apple A7) and by about 7-8% on average on a typical Android device (Nexus 5X, +Cortex-A53 and Cortex-A57), in addition to the speedup described in [13] above. + +For the purposes of benchmarking or regression testing, SIMD-accelerated +Huffman encoding can be disabled by setting the JSIMD_NOHUFFENC environment +variable to 1. + +[15] pkg-config (.pc) scripts are now included for both the libjpeg and +TurboJPEG API libraries on Un*x systems. Note that if a project's build system +relies on these scripts, then it will not be possible to build that project +with libjpeg or with a prior version of libjpeg-turbo. + +[16] Optimized the ARM 64-bit (ARMv8) NEON SIMD decompression routines to +improve performance on CPUs with in-order pipelines. This speeds up the +decompression of full-color JPEGs by nearly 2x on average on a Cavium ThunderX +processor and by about 15% on average on a Cortex-A53 core. + +[17] Fixed an issue in the accelerated Huffman decoder that could have caused +the decoder to read past the end of the input buffer when a malformed, +specially-crafted JPEG image was being decompressed. In prior versions of +libjpeg-turbo, the accelerated Huffman decoder was invoked (in most cases) only +if there were > 128 bytes of data in the input buffer. However, it is possible +to construct a JPEG image in which a single Huffman block is over 430 bytes +long, so this version of libjpeg-turbo activates the accelerated Huffman +decoder only if there are > 512 bytes of data in the input buffer. + +[18] Fixed a memory leak in tjunittest encountered when running the program +with the -yuv option. + + +1.4.2 +===== + +[1] Fixed an issue whereby cjpeg would segfault if a Windows bitmap with a +negative width or height was used as an input image (Windows bitmaps can have +a negative height if they are stored in top-down order, but such files are +rare and not supported by libjpeg-turbo.) + +[2] Fixed an issue whereby, under certain circumstances, libjpeg-turbo would +incorrectly encode certain JPEG images when quality=100 and the fast integer +forward DCT were used. This was known to cause 'make test' to fail when the +library was built with '-march=haswell' on x86 systems. + +[3] Fixed an issue whereby libjpeg-turbo would crash when built with the latest +& greatest development version of the Clang/LLVM compiler. This was caused by +an x86-64 ABI conformance issue in some of libjpeg-turbo's 64-bit SSE2 SIMD +routines. Those routines were incorrectly using a 64-bit mov instruction to +transfer a 32-bit JDIMENSION argument, whereas the x86-64 ABI allows the upper +(unused) 32 bits of a 32-bit argument's register to be undefined. The new +Clang/LLVM optimizer uses load combining to transfer multiple adjacent 32-bit +structure members into a single 64-bit register, and this exposed the ABI +conformance issue. + +[4] Fixed a bug in the MIPS DSPr2 4:2:0 "plain" (non-fancy and non-merged) +upsampling routine that caused a buffer overflow (and subsequent segfault) when +decompressing a 4:2:0 JPEG image whose scaled output width was less than 16 +pixels. The "plain" upsampling routines are normally only used when +decompressing a non-YCbCr JPEG image, but they are also used when decompressing +a JPEG image whose scaled output height is 1. + +[5] Fixed various negative left shifts and other issues reported by the GCC and +Clang undefined behavior sanitizers. None of these was known to pose a +security threat, but removing the warnings makes it easier to detect actual +security issues, should they arise in the future. + + +1.4.1 +===== + +[1] tjbench now properly handles CMYK/YCCK JPEG files. Passing an argument of +-cmyk (instead of, for instance, -rgb) will cause tjbench to internally convert +the source bitmap to CMYK prior to compression, to generate YCCK JPEG files, +and to internally convert the decompressed CMYK pixels back to RGB after +decompression (the latter is done automatically if a CMYK or YCCK JPEG is +passed to tjbench as a source image.) The CMYK<->RGB conversion operation is +not benchmarked. NOTE: The quick & dirty CMYK<->RGB conversions that tjbench +uses are suitable for testing only. Proper conversion between CMYK and RGB +requires a color management system. + +[2] 'make test' now performs additional bitwise regression tests using tjbench, +mainly for the purpose of testing compression from/decompression to a subregion +of a larger image buffer. + +[3] 'make test' no longer tests the regression of the floating point DCT/IDCT +by default, since the results of those tests can vary if the algorithms in +question are not implemented using SIMD instructions on a particular platform. +See the comments in Makefile.am for information on how to re-enable the tests +and to specify an expected result for them based on the particulars of your +platform. + +[4] The NULL color conversion routines have been significantly optimized, +which speeds up the compression of RGB and CMYK JPEGs by 5-20% when using +64-bit code and 0-3% when using 32-bit code, and the decompression of those +images by 10-30% when using 64-bit code and 3-12% when using 32-bit code. + +[5] Fixed an "illegal instruction" error that occurred when djpeg from a +SIMD-enabled libjpeg-turbo MIPS build was executed with the -nosmooth option on +a MIPS machine that lacked DSPr2 support. The MIPS SIMD routines for h2v1 and +h2v2 merged upsampling were not properly checking for the existence of DSPr2. + +[6] Performance has been improved significantly on 64-bit non-Linux and +non-Windows platforms (generally 10-20% faster compression and 5-10% faster +decompression.) Due to an oversight, the 64-bit version of the accelerated +Huffman codec was not being compiled in when libjpeg-turbo was built on +platforms other than Windows or Linux. Oops. + +[7] Fixed an extremely rare bug in the Huffman encoder that caused 64-bit +builds of libjpeg-turbo to incorrectly encode a few specific test images when +quality=98, an optimized Huffman table, and the slow integer forward DCT were +used. + +[8] The Windows (CMake) build system now supports building only static or only +shared libraries. This is accomplished by adding either -DENABLE_STATIC=0 or +-DENABLE_SHARED=0 to the CMake command line. + +[9] TurboJPEG API functions will now return an error code if a warning is +triggered in the underlying libjpeg API. For instance, if a JPEG file is +corrupt, the TurboJPEG decompression functions will attempt to decompress +as much of the image as possible, but those functions will now return -1 to +indicate that the decompression was not entirely successful. + +[10] Fixed a bug in the MIPS DSPr2 4:2:2 fancy upsampling routine that caused a +buffer overflow (and subsequent segfault) when decompressing a 4:2:2 JPEG image +in which the right-most MCU was 5 or 6 pixels wide. + + +1.4.0 +===== + +[1] Fixed a build issue on OS X PowerPC platforms (md5cmp failed to build +because OS X does not provide the le32toh() and htole32() functions.) + +[2] The non-SIMD RGB565 color conversion code did not work correctly on big +endian machines. This has been fixed. + +[3] Fixed an issue in tjPlaneSizeYUV() whereby it would erroneously return 1 +instead of -1 if componentID was > 0 and subsamp was TJSAMP_GRAY. + +[3] Fixed an issue in tjBufSizeYUV2() whereby it would erroneously return 0 +instead of -1 if width was < 1. + +[5] The Huffman encoder now uses clz and bsr instructions for bit counting on +ARM64 platforms (see 1.4 beta1 [5].) + +[6] The close() method in the TJCompressor and TJDecompressor Java classes is +now idempotent. Previously, that method would call the native tjDestroy() +function even if the TurboJPEG instance had already been destroyed. This +caused an exception to be thrown during finalization, if the close() method had +already been called. The exception was caught, but it was still an expensive +operation. + +[7] The TurboJPEG API previously generated an error ("Could not determine +subsampling type for JPEG image") when attempting to decompress grayscale JPEG +images that were compressed with a sampling factor other than 1 (for instance, +with 'cjpeg -grayscale -sample 2x2'). Subsampling technically has no meaning +with grayscale JPEGs, and thus the horizontal and vertical sampling factors +for such images are ignored by the decompressor. However, the TurboJPEG API +was being too rigid and was expecting the sampling factors to be equal to 1 +before it treated the image as a grayscale JPEG. + +[8] cjpeg, djpeg, and jpegtran now accept an argument of -version, which will +print the library version and exit. + +[9] Referring to 1.4 beta1 [15], another extremely rare circumstance was +discovered under which the Huffman encoder's local buffer can be overrun +when a buffered destination manager is being used and an +extremely-high-frequency block (basically junk image data) is being encoded. +Even though the Huffman local buffer was increased from 128 bytes to 136 bytes +to address the previous issue, the new issue caused even the larger buffer to +be overrun. Further analysis reveals that, in the absolute worst case (such as +setting alternating AC coefficients to 32767 and -32768 in the JPEG scanning +order), the Huffman encoder can produce encoded blocks that approach double the +size of the unencoded blocks. Thus, the Huffman local buffer was increased to +256 bytes, which should prevent any such issue from re-occurring in the future. + +[10] The new tjPlaneSizeYUV(), tjPlaneWidth(), and tjPlaneHeight() functions +were not actually usable on any platform except OS X and Windows, because +those functions were not included in the libturbojpeg mapfile. This has been +fixed. + +[11] Restored the JPP(), JMETHOD(), and FAR macros in the libjpeg-turbo header +files. The JPP() and JMETHOD() macros were originally implemented in libjpeg +as a way of supporting non-ANSI compilers that lacked support for prototype +parameters. libjpeg-turbo has never supported such compilers, but some +software packages still use the macros to define their own prototypes. +Similarly, libjpeg-turbo has never supported MS-DOS and other platforms that +have far symbols, but some software packages still use the FAR macro. A pretty +good argument can be made that this is a bad practice on the part of the +software in question, but since this affects more than one package, it's just +easier to fix it here. + +[12] Fixed issues that were preventing the ARM 64-bit SIMD code from compiling +for iOS, and included an ARMv8 architecture in all of the binaries installed by +the "official" libjpeg-turbo SDK for OS X. + + +1.3.90 (1.4 beta1) +================== + +[1] New features in the TurboJPEG API: +-- YUV planar images can now be generated with an arbitrary line padding +(previously only 4-byte padding, which was compatible with X Video, was +supported.) +-- The decompress-to-YUV function has been extended to support image scaling. +-- JPEG images can now be compressed from YUV planar source images. +-- YUV planar images can now be decoded into RGB or grayscale images. +-- 4:1:1 subsampling is now supported. This is mainly included for +compatibility, since 4:1:1 is not fully accelerated in libjpeg-turbo and has no +significant advantages relative to 4:2:0. +-- CMYK images are now supported. This feature allows CMYK source images to be +compressed to YCCK JPEGs and YCCK or CMYK JPEGs to be decompressed to CMYK +destination images. Conversion between CMYK/YCCK and RGB or YUV images is not +supported. Such conversion requires a color management system and is thus out +of scope for a codec library. +-- The handling of YUV images in the Java API has been significantly refactored +and should now be much more intuitive. +-- The Java API now supports encoding a YUV image from an arbitrary position in +a large image buffer. +-- All of the YUV functions now have a corresponding function that operates on +separate image planes instead of a unified image buffer. This allows for +compressing/decoding from or decompressing/encoding to a subregion of a larger +YUV image. It also allows for handling YUV formats that swap the order of the +U and V planes. + +[2] Added SIMD acceleration for DSPr2-capable MIPS platforms. This speeds up +the compression of full-color JPEGs by 70-80% on such platforms and +decompression by 25-35%. + +[3] If an application attempts to decompress a Huffman-coded JPEG image whose +header does not contain Huffman tables, libjpeg-turbo will now insert the +default Huffman tables. In order to save space, many motion JPEG video frames +are encoded without the default Huffman tables, so these frames can now be +successfully decompressed by libjpeg-turbo without additional work on the part +of the application. An application can still override the Huffman tables, for +instance to re-use tables from a previous frame of the same video. + +[4] The Mac packaging system now uses pkgbuild and productbuild rather than +PackageMaker (which is obsolete and no longer supported.) This means that +OS X 10.6 "Snow Leopard" or later must be used when packaging libjpeg-turbo, +although the packages produced can be installed on OS X 10.5 "Leopard" or +later. OS X 10.4 "Tiger" is no longer supported. + +[5] The Huffman encoder now uses clz and bsr instructions for bit counting on +ARM platforms rather than a lookup table. This reduces the memory footprint +by 64k, which may be important for some mobile applications. Out of four +Android devices that were tested, two demonstrated a small overall performance +loss (~3-4% on average) with ARMv6 code and a small gain (also ~3-4%) with +ARMv7 code when enabling this new feature, but the other two devices +demonstrated a significant overall performance gain with both ARMv6 and ARMv7 +code (~10-20%) when enabling the feature. Actual mileage may vary. + +[6] Worked around an issue with Visual C++ 2010 and later that caused incorrect +pixels to be generated when decompressing a JPEG image to a 256-color bitmap, +if compiler optimization was enabled when libjpeg-turbo was built. This caused +the regression tests to fail when doing a release build under Visual C++ 2010 +and later. + +[7] Improved the accuracy and performance of the non-SIMD implementation of the +floating point inverse DCT (using code borrowed from libjpeg v8a and later.) +The accuracy of this implementation now matches the accuracy of the SSE/SSE2 +implementation. Note, however, that the floating point DCT/IDCT algorithms are +mainly a legacy feature. They generally do not produce significantly better +accuracy than the slow integer DCT/IDCT algorithms, and they are quite a bit +slower. + +[8] Added a new output colorspace (JCS_RGB565) to the libjpeg API that allows +for decompressing JPEG images into RGB565 (16-bit) pixels. If dithering is not +used, then this code path is SIMD-accelerated on ARM platforms. + +[9] Numerous obsolete features, such as support for non-ANSI compilers and +support for the MS-DOS memory model, were removed from the libjpeg code, +greatly improving its readability and making it easier to maintain and extend. + +[10] Fixed a segfault that occurred when calling output_message() with msg_code +set to JMSG_COPYRIGHT. + +[11] Fixed an issue whereby wrjpgcom was allowing comments longer than 65k +characters to be passed on the command line, which was causing it to generate +incorrect JPEG files. + +[12] Fixed a bug in the build system that was causing the Windows version of +wrjpgcom to be built using the rdjpgcom source code. + +[13] Restored 12-bit-per-component JPEG support. A 12-bit version of +libjpeg-turbo can now be built by passing an argument of --with-12bit to +configure (Unix) or -DWITH_12BIT=1 to cmake (Windows.) 12-bit JPEG support is +included only for convenience. Enabling this feature disables all of the +performance features in libjpeg-turbo, as well as arithmetic coding and the +TurboJPEG API. The resulting library still contains the other libjpeg-turbo +features (such as the colorspace extensions), but in general, it performs no +faster than libjpeg v6b. + +[14] Added ARM 64-bit SIMD acceleration for the YCC-to-RGB color conversion +and IDCT algorithms (both are used during JPEG decompression.) For unknown +reasons (probably related to clang), this code cannot currently be compiled for +iOS. + +[15] Fixed an extremely rare bug that could cause the Huffman encoder's local +buffer to overrun when a very high-frequency MCU is compressed using quality +100 and no subsampling, and when the JPEG output buffer is being dynamically +resized by the destination manager. This issue was so rare that, even with a +test program specifically designed to make the bug occur (by injecting random +high-frequency YUV data into the compressor), it was reproducible only once in +about every 25 million iterations. + +[16] Fixed an oversight in the TurboJPEG C wrapper: if any of the JPEG +compression functions was called repeatedly with the same +automatically-allocated destination buffer, then TurboJPEG would erroneously +assume that the jpegSize parameter was equal to the size of the buffer, when in +fact that parameter was probably equal to the size of the most recently +compressed JPEG image. If the size of the previous JPEG image was not as large +as the current JPEG image, then TurboJPEG would unnecessarily reallocate the +destination buffer. + + +1.3.1 +===== + +[1] On Un*x systems, 'make install' now installs the libjpeg-turbo libraries +into /opt/libjpeg-turbo/lib32 by default on any 32-bit system, not just x86, +and into /opt/libjpeg-turbo/lib64 by default on any 64-bit system, not just +x86-64. You can override this by overriding either the 'prefix' or 'libdir' +configure variables. + +[2] The Windows installer now places a copy of the TurboJPEG DLLs in the same +directory as the rest of the libjpeg-turbo binaries. This was mainly done +to support TurboVNC 1.3, which bundles the DLLs in its Windows installation. +When using a 32-bit version of CMake on 64-bit Windows, it is impossible to +access the c:\WINDOWS\system32 directory, which made it impossible for the +TurboVNC build scripts to bundle the 64-bit TurboJPEG DLL. + +[3] Fixed a bug whereby attempting to encode a progressive JPEG with arithmetic +entropy coding (by passing arguments of -progressive -arithmetic to cjpeg or +jpegtran, for instance) would result in an error, "Requested feature was +omitted at compile time". + +[4] Fixed a couple of issues whereby malformed JPEG images would cause +libjpeg-turbo to use uninitialized memory during decompression. + +[5] Fixed an error ("Buffer passed to JPEG library is too small") that occurred +when calling the TurboJPEG YUV encoding function with a very small (< 5x5) +source image, and added a unit test to check for this error. + +[6] The Java classes should now build properly under Visual Studio 2010 and +later. + +[7] Fixed an issue that prevented SRPMs generated using the in-tree packaging +tools from being rebuilt on certain newer Linux distributions. + +[8] Numerous minor fixes to eliminate compilation and build/packaging system +warnings, fix cosmetic issues, improve documentation clarity, and other general +source cleanup. + + +1.3.0 +===== + +[1] 'make test' now works properly on FreeBSD, and it no longer requires the +md5sum executable to be present on other Un*x platforms. + +[2] Overhauled the packaging system: +-- To avoid conflict with vendor-supplied libjpeg-turbo packages, the +official RPMs and DEBs for libjpeg-turbo have been renamed to +"libjpeg-turbo-official". +-- The TurboJPEG libraries are now located under /opt/libjpeg-turbo in the +official Linux and Mac packages, to avoid conflict with vendor-supplied +packages and also to streamline the packaging system. +-- Release packages are now created with the directory structure defined +by the configure variables "prefix", "bindir", "libdir", etc. (Un*x) or by the +CMAKE_INSTALL_PREFIX variable (Windows.) The exception is that the docs are +always located under the system default documentation directory on Un*x and Mac +systems, and on Windows, the TurboJPEG DLL is always located in the Windows +system directory. +-- To avoid confusion, official libjpeg-turbo packages on Linux/Unix platforms +(except for Mac) will always install the 32-bit libraries in +/opt/libjpeg-turbo/lib32 and the 64-bit libraries in /opt/libjpeg-turbo/lib64. +-- Fixed an issue whereby, in some cases, the libjpeg-turbo executables on Un*x +systems were not properly linking with the shared libraries installed by the +same package. +-- Fixed an issue whereby building the "installer" target on Windows when +WITH_JAVA=1 would fail if the TurboJPEG JAR had not been previously built. +-- Building the "install" target on Windows now installs files into the same +places that the installer does. + +[3] Fixed a Huffman encoder bug that prevented I/O suspension from working +properly. + + +1.2.90 (1.3 beta1) +================== + +[1] Added support for additional scaling factors (3/8, 5/8, 3/4, 7/8, 9/8, 5/4, +11/8, 3/2, 13/8, 7/4, 15/8, and 2) when decompressing. Note that the IDCT will +not be SIMD-accelerated when using any of these new scaling factors. + +[2] The TurboJPEG dynamic library is now versioned. It was not strictly +necessary to do so, because TurboJPEG uses versioned symbols, and if a function +changes in an ABI-incompatible way, that function is renamed and a legacy +function is provided to maintain backward compatibility. However, certain +Linux distro maintainers have a policy against accepting any library that isn't +versioned. + +[3] Extended the TurboJPEG Java API so that it can be used to compress a JPEG +image from and decompress a JPEG image to an arbitrary position in a large +image buffer. + +[4] The tjDecompressToYUV() function now supports the TJFLAG_FASTDCT flag. + +[5] The 32-bit supplementary package for amd64 Debian systems now provides +symlinks in /usr/lib/i386-linux-gnu for the TurboJPEG libraries in /usr/lib32. +This allows those libraries to be used on MultiArch-compatible systems (such as +Ubuntu 11 and later) without setting the linker path. + +[6] The TurboJPEG Java wrapper should now find the JNI library on Mac systems +without having to pass -Djava.library.path=/usr/lib to java. + +[7] TJBench has been ported to Java to provide a convenient way of validating +the performance of the TurboJPEG Java API. It can be run with +'java -cp turbojpeg.jar TJBench'. + +[8] cjpeg can now be used to generate JPEG files with the RGB colorspace +(feature ported from jpeg-8d.) + +[9] The width and height in the -crop argument passed to jpegtran can now be +suffixed with "f" to indicate that, when the upper left corner of the cropping +region is automatically moved to the nearest iMCU boundary, the bottom right +corner should be moved by the same amount. In other words, this feature causes +jpegtran to strictly honor the specified width/height rather than the specified +bottom right corner (feature ported from jpeg-8d.) + +[10] JPEG files using the RGB colorspace can now be decompressed into grayscale +images (feature ported from jpeg-8d.) + +[11] Fixed a regression caused by 1.2.1[7] whereby the build would fail with +multiple "Mismatch in operand sizes" errors when attempting to build the x86 +SIMD code with NASM 0.98. + +[12] The in-memory source/destination managers (jpeg_mem_src() and +jpeg_mem_dest()) are now included by default when building libjpeg-turbo with +libjpeg v6b or v7 emulation, so that programs can take advantage of these +functions without requiring the use of the backward-incompatible libjpeg v8 +ABI. The "age number" of the libjpeg-turbo library on Un*x systems has been +incremented by 1 to reflect this. You can disable this feature with a +configure/CMake switch in order to retain strict API/ABI compatibility with the +libjpeg v6b or v7 API/ABI (or with previous versions of libjpeg-turbo.) See +README.md for more details. + +[13] Added ARMv7s architecture to libjpeg.a and libturbojpeg.a in the official +libjpeg-turbo binary package for OS X, so that those libraries can be used to +build applications that leverage the faster CPUs in the iPhone 5 and iPad 4. + + +1.2.1 +===== + +[1] Creating or decoding a JPEG file that uses the RGB colorspace should now +properly work when the input or output colorspace is one of the libjpeg-turbo +colorspace extensions. + +[2] When libjpeg-turbo was built without SIMD support and merged (non-fancy) +upsampling was used along with an alpha-enabled colorspace during +decompression, the unused byte of the decompressed pixels was not being set to +0xFF. This has been fixed. TJUnitTest has also been extended to test for the +correct behavior of the colorspace extensions when merged upsampling is used. + +[3] Fixed a bug whereby the libjpeg-turbo SSE2 SIMD code would not preserve the +upper 64 bits of xmm6 and xmm7 on Win64 platforms, which violated the Win64 +calling conventions. + +[4] Fixed a regression caused by 1.2.0[6] whereby decompressing corrupt JPEG +images (specifically, images in which the component count was erroneously set +to a large value) would cause libjpeg-turbo to segfault. + +[5] Worked around a severe performance issue with "Bobcat" (AMD Embedded APU) +processors. The MASKMOVDQU instruction, which was used by the libjpeg-turbo +SSE2 SIMD code, is apparently implemented in microcode on AMD processors, and +it is painfully slow on Bobcat processors in particular. Eliminating the use +of this instruction improved performance by an order of magnitude on Bobcat +processors and by a small amount (typically 5%) on AMD desktop processors. + +[6] Added SIMD acceleration for performing 4:2:2 upsampling on NEON-capable ARM +platforms. This speeds up the decompression of 4:2:2 JPEGs by 20-25% on such +platforms. + +[7] Fixed a regression caused by 1.2.0[2] whereby, on Linux/x86 platforms +running the 32-bit SSE2 SIMD code in libjpeg-turbo, decompressing a 4:2:0 or +4:2:2 JPEG image into a 32-bit (RGBX, BGRX, etc.) buffer without using fancy +upsampling would produce several incorrect columns of pixels at the right-hand +side of the output image if each row in the output image was not evenly +divisible by 16 bytes. + +[8] Fixed an issue whereby attempting to build the SIMD extensions with Xcode +4.3 on OS X platforms would cause NASM to return numerous errors of the form +"'%define' expects a macro identifier". + +[9] Added flags to the TurboJPEG API that allow the caller to force the use of +either the fast or the accurate DCT/IDCT algorithms in the underlying codec. + + +1.2.0 +===== + +[1] Fixed build issue with YASM on Unix systems (the libjpeg-turbo build system +was not adding the current directory to the assembler include path, so YASM +was not able to find jsimdcfg.inc.) + +[2] Fixed out-of-bounds read in SSE2 SIMD code that occurred when decompressing +a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes. +This was more of an annoyance than an actual bug, since it did not cause any +actual run-time problems, but the issue showed up when running libjpeg-turbo in +valgrind. See http://crbug.com/72399 for more information. + +[3] Added a compile-time macro (LIBJPEG_TURBO_VERSION) that can be used to +check the version of libjpeg-turbo against which an application was compiled. + +[4] Added new RGBA/BGRA/ABGR/ARGB colorspace extension constants (libjpeg API) +and pixel formats (TurboJPEG API), which allow applications to specify that, +when decompressing to a 4-component RGB buffer, the unused byte should be set +to 0xFF so that it can be interpreted as an opaque alpha channel. + +[5] Fixed regression issue whereby DevIL failed to build against libjpeg-turbo +because libjpeg-turbo's distributed version of jconfig.h contained an INLINE +macro, which conflicted with a similar macro in DevIL. This macro is used only +internally when building libjpeg-turbo, so it was moved into config.h. + +[6] libjpeg-turbo will now correctly decompress erroneous CMYK/YCCK JPEGs whose +K component is assigned a component ID of 1 instead of 4. Although these files +are in violation of the spec, other JPEG implementations handle them +correctly. + +[7] Added ARMv6 and ARMv7 architectures to libjpeg.a and libturbojpeg.a in +the official libjpeg-turbo binary package for OS X, so that those libraries can +be used to build both OS X and iOS applications. + + +1.1.90 (1.2 beta1) +================== + +[1] Added a Java wrapper for the TurboJPEG API. See java/README for more +details. + +[2] The TurboJPEG API can now be used to scale down images during +decompression. + +[3] Added SIMD routines for RGB-to-grayscale color conversion, which +significantly improves the performance of grayscale JPEG compression from an +RGB source image. + +[4] Improved the performance of the C color conversion routines, which are used +on platforms for which SIMD acceleration is not available. + +[5] Added a function to the TurboJPEG API that performs lossless transforms. +This function is implemented using the same back end as jpegtran, but it +performs transcoding entirely in memory and allows multiple transforms and/or +crop operations to be batched together, so the source coefficients only need to +be read once. This is useful when generating image tiles from a single source +JPEG. + +[6] Added tests for the new TurboJPEG scaled decompression and lossless +transform features to tjbench (the TurboJPEG benchmark, formerly called +"jpgtest".) + +[7] Added support for 4:4:0 (transposed 4:2:2) subsampling in TurboJPEG, which +was necessary in order for it to read 4:2:2 JPEG files that had been losslessly +transposed or rotated 90 degrees. + +[8] All legacy VirtualGL code has been re-factored, and this has allowed +libjpeg-turbo, in its entirety, to be re-licensed under a BSD-style license. + +[9] libjpeg-turbo can now be built with YASM. + +[10] Added SIMD acceleration for ARM Linux and iOS platforms that support +NEON instructions. + +[11] Refactored the TurboJPEG C API and documented it using Doxygen. The +TurboJPEG 1.2 API uses pixel formats to define the size and component order of +the uncompressed source/destination images, and it includes a more efficient +version of TJBUFSIZE() that computes a worst-case JPEG size based on the level +of chrominance subsampling. The refactored implementation of the TurboJPEG API +now uses the libjpeg memory source and destination managers, which allows the +TurboJPEG compressor to grow the JPEG buffer as necessary. + +[12] Eliminated errors in the output of jpegtran on Windows that occurred when +the application was invoked using I/O redirection +(jpegtran output.jpg). + +[13] The inclusion of libjpeg v7 and v8 emulation as well as arithmetic coding +support in libjpeg-turbo v1.1.0 introduced several new error constants in +jerror.h, and these were mistakenly enabled for all emulation modes, causing +the error enum in libjpeg-turbo to sometimes have different values than the +same enum in libjpeg. This represents an ABI incompatibility, and it caused +problems with rare applications that took specific action based on a particular +error value. The fix was to include the new error constants conditionally +based on whether libjpeg v7 or v8 emulation was enabled. + +[14] Fixed an issue whereby Windows applications that used libjpeg-turbo would +fail to compile if the Windows system headers were included before jpeglib.h. +This issue was caused by a conflict in the definition of the INT32 type. + +[15] Fixed 32-bit supplementary package for amd64 Debian systems, which was +broken by enhancements to the packaging system in 1.1. + +[16] When decompressing a JPEG image using an output colorspace of +JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR, or JCS_EXT_XRGB, libjpeg-turbo will +now set the unused byte to 0xFF, which allows applications to interpret that +byte as an alpha channel (0xFF = opaque). + + +1.1.1 +===== + +[1] Fixed a 1-pixel error in row 0, column 21 of the luminance plane generated +by tjEncodeYUV(). + +[2] libjpeg-turbo's accelerated Huffman decoder previously ignored unexpected +markers found in the middle of the JPEG data stream during decompression. It +will now hand off decoding of a particular block to the unaccelerated Huffman +decoder if an unexpected marker is found, so that the unaccelerated Huffman +decoder can generate an appropriate warning. + +[3] Older versions of MinGW64 prefixed symbol names with underscores by +default, which differed from the behavior of 64-bit Visual C++. MinGW64 1.0 +has adopted the behavior of 64-bit Visual C++ as the default, so to accommodate +this, the libjpeg-turbo SIMD function names are no longer prefixed with an +underscore when building with MinGW64. This means that, when building +libjpeg-turbo with older versions of MinGW64, you will now have to add +-fno-leading-underscore to the CFLAGS. + +[4] Fixed a regression bug in the NSIS script that caused the Windows installer +build to fail when using the Visual Studio IDE. + +[5] Fixed a bug in jpeg_read_coefficients() whereby it would not initialize +cinfo->image_width and cinfo->image_height if libjpeg v7 or v8 emulation was +enabled. This specifically caused the jpegoptim program to fail if it was +linked against a version of libjpeg-turbo that was built with libjpeg v7 or v8 +emulation. + +[6] Eliminated excessive I/O overhead that occurred when reading BMP files in +cjpeg. + +[7] Eliminated errors in the output of cjpeg on Windows that occurred when the +application was invoked using I/O redirection (cjpeg output.jpg). + + +1.1.0 +===== + +[1] The algorithm used by the SIMD quantization function cannot produce correct +results when the JPEG quality is >= 98 and the fast integer forward DCT is +used. Thus, the non-SIMD quantization function is now used for those cases, +and libjpeg-turbo should now produce identical output to libjpeg v6b in all +cases. + +[2] Despite the above, the fast integer forward DCT still degrades somewhat for +JPEG qualities greater than 95, so the TurboJPEG wrapper will now automatically +use the slow integer forward DCT when generating JPEG images of quality 96 or +greater. This reduces compression performance by as much as 15% for these +high-quality images but is necessary to ensure that the images are perceptually +lossless. It also ensures that the library can avoid the performance pitfall +created by [1]. + +[3] Ported jpgtest.cxx to pure C to avoid the need for a C++ compiler. + +[4] Fixed visual artifacts in grayscale JPEG compression caused by a typo in +the RGB-to-luminance lookup tables. + +[5] The Windows distribution packages now include the libjpeg run-time programs +(cjpeg, etc.) + +[6] All packages now include jpgtest. + +[7] The TurboJPEG dynamic library now uses versioned symbols. + +[8] Added two new TurboJPEG API functions, tjEncodeYUV() and +tjDecompressToYUV(), to replace the somewhat hackish TJ_YUV flag. + + +1.0.90 (1.1 beta1) +================== + +[1] Added emulation of the libjpeg v7 and v8 APIs and ABIs. See +README.md for more details. This feature was sponsored by CamTrace SAS. + +[2] Created a new CMake-based build system for the Visual C++ and MinGW builds. + +[3] Grayscale bitmaps can now be compressed from/decompressed to using the +TurboJPEG API. + +[4] jpgtest can now be used to test decompression performance with existing +JPEG images. + +[5] If the default install prefix (/opt/libjpeg-turbo) is used, then +'make install' now creates /opt/libjpeg-turbo/lib32 and +/opt/libjpeg-turbo/lib64 sym links to duplicate the behavior of the binary +packages. + +[6] All symbols in the libjpeg-turbo dynamic library are now versioned, even +when the library is built with libjpeg v6b emulation. + +[7] Added arithmetic encoding and decoding support (can be disabled with +configure or CMake options) + +[8] Added a TJ_YUV flag to the TurboJPEG API, which causes both the compressor +and decompressor to output planar YUV images. + +[9] Added an extended version of tjDecompressHeader() to the TurboJPEG API, +which allows the caller to determine the type of subsampling used in a JPEG +image. + +[10] Added further protections against invalid Huffman codes. + + +1.0.1 +===== + +[1] The Huffman decoder will now handle erroneous Huffman codes (for instance, +from a corrupt JPEG image.) Previously, these would cause libjpeg-turbo to +crash under certain circumstances. + +[2] Fixed typo in SIMD dispatch routines that was causing 4:2:2 upsampling to +be used instead of 4:2:0 when decompressing JPEG images using SSE2 code. + +[3] configure script will now automatically determine whether the +INCOMPLETE_TYPES_BROKEN macro should be defined. + + +1.0.0 +===== + +[1] 2983700: Further FreeBSD build tweaks (no longer necessary to specify +--host when configuring on a 64-bit system) + +[2] Created symlinks in the Unix/Linux packages so that the TurboJPEG +include file can always be found in /opt/libjpeg-turbo/include, the 32-bit +static libraries can always be found in /opt/libjpeg-turbo/lib32, and the +64-bit static libraries can always be found in /opt/libjpeg-turbo/lib64. + +[3] The Unix/Linux distribution packages now include the libjpeg run-time +programs (cjpeg, etc.) and man pages. + +[4] Created a 32-bit supplementary package for amd64 Debian systems, which +contains just the 32-bit libjpeg-turbo libraries. + +[5] Moved the libraries from */lib32 to */lib in the i386 Debian package. + +[6] Include distribution package for Cygwin + +[7] No longer necessary to specify --without-simd on non-x86 architectures, and +unit tests now work on those architectures. + + +0.0.93 +====== + +[1] 2982659, Fixed x86-64 build on FreeBSD systems + +[2] 2988188: Added support for Windows 64-bit systems + + +0.0.91 +====== + +[1] Added documentation to .deb packages + +[2] 2968313: Fixed data corruption issues when decompressing large JPEG images +and/or using buffered I/O with the libjpeg-turbo decompressor + + +0.0.90 +====== + +Initial release diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cjpeg.1 glmark2-2021.02/android/jni/src/libjpeg-turbo/cjpeg.1 --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cjpeg.1 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/cjpeg.1 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,351 @@ +.TH CJPEG 1 "17 February 2016" +.SH NAME +cjpeg \- compress an image file to a JPEG file +.SH SYNOPSIS +.B cjpeg +[ +.I options +] +[ +.I filename +] +.LP +.SH DESCRIPTION +.LP +.B cjpeg +compresses the named image file, or the standard input if no file is +named, and produces a JPEG/JFIF file on the standard output. +The currently supported input file formats are: PPM (PBMPLUS color +format), PGM (PBMPLUS grayscale format), BMP, Targa, and RLE (Utah Raster +Toolkit format). (RLE is supported only if the URT library is available.) +.SH OPTIONS +All switch names may be abbreviated; for example, +.B \-grayscale +may be written +.B \-gray +or +.BR \-gr . +Most of the "basic" switches can be abbreviated to as little as one letter. +Upper and lower case are equivalent (thus +.B \-BMP +is the same as +.BR \-bmp ). +British spellings are also accepted (e.g., +.BR \-greyscale ), +though for brevity these are not mentioned below. +.PP +The basic switches are: +.TP +.BI \-quality " N[,...]" +Scale quantization tables to adjust image quality. Quality is 0 (worst) to +100 (best); default is 75. (See below for more info.) +.TP +.B \-grayscale +Create monochrome JPEG file from color input. Be sure to use this switch when +compressing a grayscale BMP file, because +.B cjpeg +isn't bright enough to notice whether a BMP file uses only shades of gray. +By saying +.BR \-grayscale , +you'll get a smaller JPEG file that takes less time to process. +.TP +.B \-rgb +Create RGB JPEG file. +Using this switch suppresses the conversion from RGB +colorspace input to the default YCbCr JPEG colorspace. +.TP +.B \-optimize +Perform optimization of entropy encoding parameters. Without this, default +encoding parameters are used. +.B \-optimize +usually makes the JPEG file a little smaller, but +.B cjpeg +runs somewhat slower and needs much more memory. Image quality and speed of +decompression are unaffected by +.BR \-optimize . +.TP +.B \-progressive +Create progressive JPEG file (see below). +.TP +.B \-targa +Input file is Targa format. Targa files that contain an "identification" +field will not be automatically recognized by +.BR cjpeg ; +for such files you must specify +.B \-targa +to make +.B cjpeg +treat the input as Targa format. +For most Targa files, you won't need this switch. +.PP +The +.B \-quality +switch lets you trade off compressed file size against quality of the +reconstructed image: the higher the quality setting, the larger the JPEG file, +and the closer the output image will be to the original input. Normally you +want to use the lowest quality setting (smallest file) that decompresses into +something visually indistinguishable from the original image. For this +purpose the quality setting should generally be between 50 and 95 (the default +is 75) for photographic images. If you see defects at +.B \-quality +75, then go up 5 or 10 counts at a time until you are happy with the output +image. (The optimal setting will vary from one image to another.) +.PP +.B \-quality +100 will generate a quantization table of all 1's, minimizing loss in the +quantization step (but there is still information loss in subsampling, as well +as roundoff error.) For most images, specifying a quality value above +about 95 will increase the size of the compressed file dramatically, and while +the quality gain from these higher quality values is measurable (using metrics +such as PSNR or SSIM), it is rarely perceivable by human vision. +.PP +In the other direction, quality values below 50 will produce very small files +of low image quality. Settings around 5 to 10 might be useful in preparing an +index of a large image library, for example. Try +.B \-quality +2 (or so) for some amusing Cubist effects. (Note: quality +values below about 25 generate 2-byte quantization tables, which are +considered optional in the JPEG standard. +.B cjpeg +emits a warning message when you give such a quality value, because some +other JPEG programs may be unable to decode the resulting file. Use +.B \-baseline +if you need to ensure compatibility at low quality values.) +.PP +The \fB-quality\fR option has been extended in this version of \fBcjpeg\fR to +support separate quality settings for luminance and chrominance (or, in +general, separate settings for every quantization table slot.) The principle +is the same as chrominance subsampling: since the human eye is more sensitive +to spatial changes in brightness than spatial changes in color, the chrominance +components can be quantized more than the luminance components without +incurring any visible image quality loss. However, unlike subsampling, this +feature reduces data in the frequency domain instead of the spatial domain, +which allows for more fine-grained control. This option is useful in +quality-sensitive applications, for which the artifacts generated by +subsampling may be unacceptable. +.PP +The \fB-quality\fR option accepts a comma-separated list of parameters, which +respectively refer to the quality levels that should be assigned to the +quantization table slots. If there are more q-table slots than parameters, +then the last parameter is replicated. Thus, if only one quality parameter is +given, this is used for both luminance and chrominance (slots 0 and 1, +respectively), preserving the legacy behavior of cjpeg v6b and prior. +More (or customized) quantization tables can be set with the \fB-qtables\fR +option and assigned to components with the \fB-qslots\fR option (see the +"wizard" switches below.) +.PP +JPEG files generated with separate luminance and chrominance quality are fully +compliant with standard JPEG decoders. +.PP +.BR CAUTION: +For this setting to be useful, be sure to pass an argument of \fB-sample 1x1\fR +to \fBcjpeg\fR to disable chrominance subsampling. Otherwise, the default +subsampling level (2x2, AKA "4:2:0") will be used. +.PP +The +.B \-progressive +switch creates a "progressive JPEG" file. In this type of JPEG file, the data +is stored in multiple scans of increasing quality. If the file is being +transmitted over a slow communications link, the decoder can use the first +scan to display a low-quality image very quickly, and can then improve the +display with each subsequent scan. The final image is exactly equivalent to a +standard JPEG file of the same quality setting, and the total file size is +about the same --- often a little smaller. +.PP +Switches for advanced users: +.TP +.B \-arithmetic +Use arithmetic coding. +.B Caution: +arithmetic coded JPEG is not yet widely implemented, so many decoders will be +unable to view an arithmetic coded JPEG file at all. +.TP +.B \-dct int +Use integer DCT method (default). +.TP +.B \-dct fast +Use fast integer DCT (less accurate). +In libjpeg-turbo, the fast method is generally about 5-15% faster than the int +method when using the x86/x86-64 SIMD extensions (results may vary with other +SIMD implementations, or when using libjpeg-turbo without SIMD extensions.) +For quality levels of 90 and below, there should be little or no perceptible +difference between the two algorithms. For quality levels above 90, however, +the difference between the fast and the int methods becomes more pronounced. +With quality=97, for instance, the fast method incurs generally about a 1-3 dB +loss (in PSNR) relative to the int method, but this can be larger for some +images. Do not use the fast method with quality levels above 97. The +algorithm often degenerates at quality=98 and above and can actually produce a +more lossy image than if lower quality levels had been used. Also, in +libjpeg-turbo, the fast method is not fully accelerated for quality levels +above 97, so it will be slower than the int method. +.TP +.B \-dct float +Use floating-point DCT method. +The float method is mainly a legacy feature. It does not produce significantly +more accurate results than the int method, and it is much slower. The float +method may also give different results on different machines due to varying +roundoff behavior, whereas the integer methods should give the same results on +all machines. +.TP +.BI \-restart " N" +Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is +attached to the number. +.B \-restart 0 +(the default) means no restart markers. +.TP +.BI \-smooth " N" +Smooth the input image to eliminate dithering noise. N, ranging from 1 to +100, indicates the strength of smoothing. 0 (the default) means no smoothing. +.TP +.BI \-maxmemory " N" +Set limit for amount of memory to use in processing large images. Value is +in thousands of bytes, or millions of bytes if "M" is attached to the +number. For example, +.B \-max 4m +selects 4000000 bytes. If more space is needed, temporary files will be used. +.TP +.BI \-outfile " name" +Send output image to the named file, not to standard output. +.TP +.BI \-memdst +Compress to memory instead of a file. This feature was implemented mainly as a +way of testing the in-memory destination manager (jpeg_mem_dest()), but it is +also useful for benchmarking, since it reduces the I/O overhead. +.TP +.B \-verbose +Enable debug printout. More +.BR \-v 's +give more output. Also, version information is printed at startup. +.TP +.B \-debug +Same as +.BR \-verbose . +.TP +.B \-version +Print version information and exit. +.PP +The +.B \-restart +option inserts extra markers that allow a JPEG decoder to resynchronize after +a transmission error. Without restart markers, any damage to a compressed +file will usually ruin the image from the point of the error to the end of the +image; with restart markers, the damage is usually confined to the portion of +the image up to the next restart marker. Of course, the restart markers +occupy extra space. We recommend +.B \-restart 1 +for images that will be transmitted across unreliable networks such as Usenet. +.PP +The +.B \-smooth +option filters the input to eliminate fine-scale noise. This is often useful +when converting dithered images to JPEG: a moderate smoothing factor of 10 to +50 gets rid of dithering patterns in the input file, resulting in a smaller +JPEG file and a better-looking image. Too large a smoothing factor will +visibly blur the image, however. +.PP +Switches for wizards: +.TP +.B \-baseline +Force baseline-compatible quantization tables to be generated. This clamps +quantization values to 8 bits even at low quality settings. (This switch is +poorly named, since it does not ensure that the output is actually baseline +JPEG. For example, you can use +.B \-baseline +and +.B \-progressive +together.) +.TP +.BI \-qtables " file" +Use the quantization tables given in the specified text file. +.TP +.BI \-qslots " N[,...]" +Select which quantization table to use for each color component. +.TP +.BI \-sample " HxV[,...]" +Set JPEG sampling factors for each color component. +.TP +.BI \-scans " file" +Use the scan script given in the specified text file. +.PP +The "wizard" switches are intended for experimentation with JPEG. If you +don't know what you are doing, \fBdon't use them\fR. These switches are +documented further in the file wizard.txt. +.SH EXAMPLES +.LP +This example compresses the PPM file foo.ppm with a quality factor of +60 and saves the output as foo.jpg: +.IP +.B cjpeg \-quality +.I 60 foo.ppm +.B > +.I foo.jpg +.SH HINTS +Color GIF files are not the ideal input for JPEG; JPEG is really intended for +compressing full-color (24-bit) images. In particular, don't try to convert +cartoons, line drawings, and other images that have only a few distinct +colors. GIF works great on these, JPEG does not. If you want to convert a +GIF to JPEG, you should experiment with +.BR cjpeg 's +.B \-quality +and +.B \-smooth +options to get a satisfactory conversion. +.B \-smooth 10 +or so is often helpful. +.PP +Avoid running an image through a series of JPEG compression/decompression +cycles. Image quality loss will accumulate; after ten or so cycles the image +may be noticeably worse than it was after one cycle. It's best to use a +lossless format while manipulating an image, then convert to JPEG format when +you are ready to file the image away. +.PP +The +.B \-optimize +option to +.B cjpeg +is worth using when you are making a "final" version for posting or archiving. +It's also a win when you are using low quality settings to make very small +JPEG files; the percentage improvement is often a lot more than it is on +larger files. (At present, +.B \-optimize +mode is always selected when generating progressive JPEG files.) +.SH ENVIRONMENT +.TP +.B JPEGMEM +If this environment variable is set, its value is the default memory limit. +The value is specified as described for the +.B \-maxmemory +switch. +.B JPEGMEM +overrides the default value specified when the program was compiled, and +itself is overridden by an explicit +.BR \-maxmemory . +.SH SEE ALSO +.BR djpeg (1), +.BR jpegtran (1), +.BR rdjpgcom (1), +.BR wrjpgcom (1) +.br +.BR ppm (5), +.BR pgm (5) +.br +Wallace, Gregory K. "The JPEG Still Picture Compression Standard", +Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44. +.SH AUTHOR +Independent JPEG Group +.PP +This file was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo, to wordsmith certain sections, and to describe +features not present in libjpeg. +.SH ISSUES +Support for GIF input files was removed in cjpeg v6b due to concerns over +the Unisys LZW patent. Although this patent expired in 2006, cjpeg still +lacks GIF support, for these historical reasons. (Conversion of GIF files to +JPEG is usually a bad idea anyway, since GIF is a 256-color format.) +.PP +Not all variants of BMP and Targa file formats are supported. +.PP +The +.B \-targa +switch is not a bug, it's a feature. (It would be a bug if the Targa format +designers had not been clueless.) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cjpeg.c glmark2-2021.02/android/jni/src/libjpeg-turbo/cjpeg.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/cjpeg.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/cjpeg.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,644 @@ +/* + * cjpeg.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1998, Thomas G. Lane. + * Modified 2003-2011 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2013-2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a command-line user interface for the JPEG compressor. + * It should work on any system with Unix- or MS-DOS-style command lines. + * + * Two different command line styles are permitted, depending on the + * compile-time switch TWO_FILE_COMMANDLINE: + * cjpeg [options] inputfile outputfile + * cjpeg [options] [inputfile] + * In the second style, output is always to standard output, which you'd + * normally redirect to a file or pipe to some other program. Input is + * either from a named file or from standard input (typically redirected). + * The second style is convenient on Unix but is unhelpful on systems that + * don't support pipes. Also, you MUST use the first style if your system + * doesn't do binary I/O to stdin/stdout. + * To simplify script writing, the "-outfile" switch is provided. The syntax + * cjpeg [options] -outfile outputfile inputfile + * works regardless of which command line style is used. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "jversion.h" /* for version message */ +#include "jconfigint.h" + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + + +/* Create the add-on message string table. */ + +#define JMESSAGE(code,string) string , + +static const char * const cdjpeg_message_table[] = { +#include "cderror.h" + NULL +}; + + +/* + * This routine determines what format the input file is, + * and selects the appropriate input-reading module. + * + * To determine which family of input formats the file belongs to, + * we may look only at the first byte of the file, since C does not + * guarantee that more than one character can be pushed back with ungetc. + * Looking at additional bytes would require one of these approaches: + * 1) assume we can fseek() the input file (fails for piped input); + * 2) assume we can push back more than one character (works in + * some C implementations, but unportable); + * 3) provide our own buffering (breaks input readers that want to use + * stdio directly, such as the RLE library); + * or 4) don't put back the data, and modify the input_init methods to assume + * they start reading after the start of file (also breaks RLE library). + * #1 is attractive for MS-DOS but is untenable on Unix. + * + * The most portable solution for file types that can't be identified by their + * first byte is to make the user tell us what they are. This is also the + * only approach for "raw" file types that contain only arbitrary values. + * We presently apply this method for Targa files. Most of the time Targa + * files start with 0x00, so we recognize that case. Potentially, however, + * a Targa file could start with any byte value (byte 0 is the length of the + * seldom-used ID field), so we provide a switch to force Targa input mode. + */ + +static boolean is_targa; /* records user -targa switch */ + + +LOCAL(cjpeg_source_ptr) +select_file_type (j_compress_ptr cinfo, FILE *infile) +{ + int c; + + if (is_targa) { +#ifdef TARGA_SUPPORTED + return jinit_read_targa(cinfo); +#else + ERREXIT(cinfo, JERR_TGA_NOTCOMP); +#endif + } + + if ((c = getc(infile)) == EOF) + ERREXIT(cinfo, JERR_INPUT_EMPTY); + if (ungetc(c, infile) == EOF) + ERREXIT(cinfo, JERR_UNGETC_FAILED); + + switch (c) { +#ifdef BMP_SUPPORTED + case 'B': + return jinit_read_bmp(cinfo); +#endif +#ifdef GIF_SUPPORTED + case 'G': + return jinit_read_gif(cinfo); +#endif +#ifdef PPM_SUPPORTED + case 'P': + return jinit_read_ppm(cinfo); +#endif +#ifdef RLE_SUPPORTED + case 'R': + return jinit_read_rle(cinfo); +#endif +#ifdef TARGA_SUPPORTED + case 0x00: + return jinit_read_targa(cinfo); +#endif + default: + ERREXIT(cinfo, JERR_UNKNOWN_FORMAT); + break; + } + + return NULL; /* suppress compiler warnings */ +} + + +/* + * Argument-parsing code. + * The switch parser is designed to be useful with DOS-style command line + * syntax, ie, intermixed switches and file names, where only the switches + * to the left of a given file name affect processing of that file. + * The main program in this file doesn't actually use this capability... + */ + + +static const char *progname; /* program name for error messages */ +static char *outfilename; /* for -outfile switch */ +boolean memdst; /* for -memdst switch */ + + +LOCAL(void) +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "usage: %s [switches] ", progname); +#ifdef TWO_FILE_COMMANDLINE + fprintf(stderr, "inputfile outputfile\n"); +#else + fprintf(stderr, "[inputfile]\n"); +#endif + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -quality N[,...] Compression quality (0..100; 5-95 is most useful range,\n"); + fprintf(stderr, " default is 75)\n"); + fprintf(stderr, " -grayscale Create monochrome JPEG file\n"); + fprintf(stderr, " -rgb Create RGB JPEG file\n"); +#ifdef ENTROPY_OPT_SUPPORTED + fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n"); +#endif +#ifdef C_PROGRESSIVE_SUPPORTED + fprintf(stderr, " -progressive Create progressive JPEG file\n"); +#endif +#ifdef TARGA_SUPPORTED + fprintf(stderr, " -targa Input file is Targa format (usually not needed)\n"); +#endif + fprintf(stderr, "Switches for advanced users:\n"); +#ifdef C_ARITH_CODING_SUPPORTED + fprintf(stderr, " -arithmetic Use arithmetic coding\n"); +#endif +#ifdef DCT_ISLOW_SUPPORTED + fprintf(stderr, " -dct int Use integer DCT method%s\n", + (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : "")); +#endif +#ifdef DCT_IFAST_SUPPORTED + fprintf(stderr, " -dct fast Use fast integer DCT (less accurate)%s\n", + (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : "")); +#endif +#ifdef DCT_FLOAT_SUPPORTED + fprintf(stderr, " -dct float Use floating-point DCT method%s\n", + (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : "")); +#endif + fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); +#ifdef INPUT_SMOOTHING_SUPPORTED + fprintf(stderr, " -smooth N Smooth dithered input (N=1..100 is strength)\n"); +#endif + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); + fprintf(stderr, " -outfile name Specify name for output file\n"); +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + fprintf(stderr, " -memdst Compress to memory instead of file (useful for benchmarking)\n"); +#endif + fprintf(stderr, " -verbose or -debug Emit debug output\n"); + fprintf(stderr, " -version Print version information and exit\n"); + fprintf(stderr, "Switches for wizards:\n"); + fprintf(stderr, " -baseline Force baseline quantization tables\n"); + fprintf(stderr, " -qtables file Use quantization tables given in file\n"); + fprintf(stderr, " -qslots N[,...] Set component quantization tables\n"); + fprintf(stderr, " -sample HxV[,...] Set component sampling factors\n"); +#ifdef C_MULTISCAN_FILES_SUPPORTED + fprintf(stderr, " -scans file Create multi-scan JPEG per script file\n"); +#endif + exit(EXIT_FAILURE); +} + + +LOCAL(int) +parse_switches (j_compress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) +/* Parse optional switches. + * Returns argv[] index of first file-name argument (== argc if none). + * Any file names with indexes <= last_file_arg_seen are ignored; + * they have presumably been processed in a previous iteration. + * (Pass 0 for last_file_arg_seen on the first or only iteration.) + * for_real is FALSE on the first (dummy) pass; we may skip any expensive + * processing. + */ +{ + int argn; + char *arg; + boolean force_baseline; + boolean simple_progressive; + char *qualityarg = NULL; /* saves -quality parm if any */ + char *qtablefile = NULL; /* saves -qtables filename if any */ + char *qslotsarg = NULL; /* saves -qslots parm if any */ + char *samplearg = NULL; /* saves -sample parm if any */ + char *scansarg = NULL; /* saves -scans parm if any */ + + /* Set up default JPEG parameters. */ + + force_baseline = FALSE; /* by default, allow 16-bit quantizers */ + simple_progressive = FALSE; + is_targa = FALSE; + outfilename = NULL; + memdst = FALSE; + cinfo->err->trace_level = 0; + + /* Scan command line options, adjust parameters */ + + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (*arg != '-') { + /* Not a switch, must be a file name argument */ + if (argn <= last_file_arg_seen) { + outfilename = NULL; /* -outfile applies to just one input file */ + continue; /* ignore this name if previously processed */ + } + break; /* else done parsing switches */ + } + arg++; /* advance past switch marker character */ + + if (keymatch(arg, "arithmetic", 1)) { + /* Use arithmetic coding. */ +#ifdef C_ARITH_CODING_SUPPORTED + cinfo->arith_code = TRUE; +#else + fprintf(stderr, "%s: sorry, arithmetic coding not supported\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "baseline", 1)) { + /* Force baseline-compatible output (8-bit quantizer values). */ + force_baseline = TRUE; + + } else if (keymatch(arg, "dct", 2)) { + /* Select DCT algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "int", 1)) { + cinfo->dct_method = JDCT_ISLOW; + } else if (keymatch(argv[argn], "fast", 2)) { + cinfo->dct_method = JDCT_IFAST; + } else if (keymatch(argv[argn], "float", 2)) { + cinfo->dct_method = JDCT_FLOAT; + } else + usage(); + + } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { + /* Enable debug printouts. */ + /* On first -d, print version identification */ + static boolean printed_version = FALSE; + + if (! printed_version) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + fprintf(stderr, "%s\n\n", JCOPYRIGHT); + fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n", + JVERSION); + printed_version = TRUE; + } + cinfo->err->trace_level++; + + } else if (keymatch(arg, "version", 4)) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + exit(EXIT_SUCCESS); + + } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { + /* Force a monochrome JPEG file to be generated. */ + jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); + + } else if (keymatch(arg, "rgb", 3)) { + /* Force an RGB JPEG file to be generated. */ + jpeg_set_colorspace(cinfo, JCS_RGB); + + } else if (keymatch(arg, "maxmemory", 3)) { + /* Maximum memory in Kb (or Mb with 'm'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (ch == 'm' || ch == 'M') + lval *= 1000L; + cinfo->mem->max_memory_to_use = lval * 1000L; + + } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { + /* Enable entropy parm optimization. */ +#ifdef ENTROPY_OPT_SUPPORTED + cinfo->optimize_coding = TRUE; +#else + fprintf(stderr, "%s: sorry, entropy optimization was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "outfile", 4)) { + /* Set output file name. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + outfilename = argv[argn]; /* save it away for later use */ + + } else if (keymatch(arg, "progressive", 1)) { + /* Select simple progressive mode. */ +#ifdef C_PROGRESSIVE_SUPPORTED + simple_progressive = TRUE; + /* We must postpone execution until num_components is known. */ +#else + fprintf(stderr, "%s: sorry, progressive output was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "memdst", 2)) { + /* Use in-memory destination manager */ +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + memdst = TRUE; +#else + fprintf(stderr, "%s: sorry, in-memory destination manager was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "quality", 1)) { + /* Quality ratings (quantization table scaling factors). */ + if (++argn >= argc) /* advance to next argument */ + usage(); + qualityarg = argv[argn]; + + } else if (keymatch(arg, "qslots", 2)) { + /* Quantization table slot numbers. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + qslotsarg = argv[argn]; + /* Must delay setting qslots until after we have processed any + * colorspace-determining switches, since jpeg_set_colorspace sets + * default quant table numbers. + */ + + } else if (keymatch(arg, "qtables", 2)) { + /* Quantization tables fetched from file. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + qtablefile = argv[argn]; + /* We postpone actually reading the file in case -quality comes later. */ + + } else if (keymatch(arg, "restart", 1)) { + /* Restart interval in MCU rows (or in MCUs with 'b'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (lval < 0 || lval > 65535L) + usage(); + if (ch == 'b' || ch == 'B') { + cinfo->restart_interval = (unsigned int) lval; + cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */ + } else { + cinfo->restart_in_rows = (int) lval; + /* restart_interval will be computed during startup */ + } + + } else if (keymatch(arg, "sample", 2)) { + /* Set sampling factors. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + samplearg = argv[argn]; + /* Must delay setting sample factors until after we have processed any + * colorspace-determining switches, since jpeg_set_colorspace sets + * default sampling factors. + */ + + } else if (keymatch(arg, "scans", 4)) { + /* Set scan script. */ +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (++argn >= argc) /* advance to next argument */ + usage(); + scansarg = argv[argn]; + /* We must postpone reading the file in case -progressive appears. */ +#else + fprintf(stderr, "%s: sorry, multi-scan output was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "smooth", 2)) { + /* Set input smoothing factor. */ + int val; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%d", &val) != 1) + usage(); + if (val < 0 || val > 100) + usage(); + cinfo->smoothing_factor = val; + + } else if (keymatch(arg, "targa", 1)) { + /* Input file is Targa format. */ + is_targa = TRUE; + + } else { + usage(); /* bogus switch */ + } + } + + /* Post-switch-scanning cleanup */ + + if (for_real) { + + /* Set quantization tables for selected quality. */ + /* Some or all may be overridden if -qtables is present. */ + if (qualityarg != NULL) /* process -quality if it was present */ + if (! set_quality_ratings(cinfo, qualityarg, force_baseline)) + usage(); + + if (qtablefile != NULL) /* process -qtables if it was present */ + if (! read_quant_tables(cinfo, qtablefile, force_baseline)) + usage(); + + if (qslotsarg != NULL) /* process -qslots if it was present */ + if (! set_quant_slots(cinfo, qslotsarg)) + usage(); + + if (samplearg != NULL) /* process -sample if it was present */ + if (! set_sample_factors(cinfo, samplearg)) + usage(); + +#ifdef C_PROGRESSIVE_SUPPORTED + if (simple_progressive) /* process -progressive; -scans can override */ + jpeg_simple_progression(cinfo); +#endif + +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (scansarg != NULL) /* process -scans if it was present */ + if (! read_scan_script(cinfo, scansarg)) + usage(); +#endif + } + + return argn; /* return index of next arg (file name) */ +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; +#ifdef PROGRESS_REPORT + struct cdjpeg_progress_mgr progress; +#endif + int file_index; + cjpeg_source_ptr src_mgr; + FILE *input_file; + FILE *output_file = NULL; + unsigned char *outbuffer = NULL; + unsigned long outsize = 0; + JDIMENSION num_scanlines; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "cjpeg"; /* in case C library doesn't provide it */ + + /* Initialize the JPEG compression object with default error handling. */ + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + /* Add some application-specific error messages (from cderror.h) */ + jerr.addon_message_table = cdjpeg_message_table; + jerr.first_addon_message = JMSG_FIRSTADDONCODE; + jerr.last_addon_message = JMSG_LASTADDONCODE; + + /* Initialize JPEG parameters. + * Much of this may be overridden later. + * In particular, we don't yet know the input file's color space, + * but we need to provide some value for jpeg_set_defaults() to work. + */ + + cinfo.in_color_space = JCS_RGB; /* arbitrary guess */ + jpeg_set_defaults(&cinfo); + + /* Scan command line to find file names. + * It is convenient to use just one switch-parsing routine, but the switch + * values read here are ignored; we will rescan the switches after opening + * the input file. + */ + + file_index = parse_switches(&cinfo, argc, argv, 0, FALSE); + +#ifdef TWO_FILE_COMMANDLINE + if (!memdst) { + /* Must have either -outfile switch or explicit output file name */ + if (outfilename == NULL) { + if (file_index != argc-2) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + outfilename = argv[file_index+1]; + } else { + if (file_index != argc-1) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + } + } +#else + /* Unix style: expect zero or one file name */ + if (file_index < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } +#endif /* TWO_FILE_COMMANDLINE */ + + /* Open the input file. */ + if (file_index < argc) { + if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ + input_file = read_stdin(); + } + + /* Open the output file. */ + if (outfilename != NULL) { + if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, outfilename); + exit(EXIT_FAILURE); + } + } else if (!memdst) { + /* default output file is stdout */ + output_file = write_stdout(); + } + +#ifdef PROGRESS_REPORT + start_progress_monitor((j_common_ptr) &cinfo, &progress); +#endif + + /* Figure out the input file format, and set up to read it. */ + src_mgr = select_file_type(&cinfo, input_file); + src_mgr->input_file = input_file; + + /* Read the input file header to obtain file size & colorspace. */ + (*src_mgr->start_input) (&cinfo, src_mgr); + + /* Now that we know input colorspace, fix colorspace-dependent defaults */ + jpeg_default_colorspace(&cinfo); + + /* Adjust default compression parameters by re-parsing the options */ + file_index = parse_switches(&cinfo, argc, argv, 0, TRUE); + + /* Specify data destination for compression */ +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + if (memdst) + jpeg_mem_dest(&cinfo, &outbuffer, &outsize); + else +#endif + jpeg_stdio_dest(&cinfo, output_file); + + /* Start compressor */ + jpeg_start_compress(&cinfo, TRUE); + + /* Process data */ + while (cinfo.next_scanline < cinfo.image_height) { + num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr); + (void) jpeg_write_scanlines(&cinfo, src_mgr->buffer, num_scanlines); + } + + /* Finish compression and release memory */ + (*src_mgr->finish_input) (&cinfo, src_mgr); + jpeg_finish_compress(&cinfo); + jpeg_destroy_compress(&cinfo); + + /* Close files, if we opened them */ + if (input_file != stdin) + fclose(input_file); + if (output_file != stdout && output_file != NULL) + fclose(output_file); + +#ifdef PROGRESS_REPORT + end_progress_monitor((j_common_ptr) &cinfo); +#endif + + if (memdst) { + fprintf(stderr, "Compressed size: %lu bytes\n", outsize); + if (outbuffer != NULL) + free(outbuffer); + } + + /* All done. */ + exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/coderules.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/coderules.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/coderules.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/coderules.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,78 @@ +IJG JPEG LIBRARY: CODING RULES + +This file was part of the Independent JPEG Group's software: +Copyright (C) 1991-1996, Thomas G. Lane. +It was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo. +For conditions of distribution and use, see the accompanying README.ijg file. + + +Since numerous people will be contributing code and bug fixes, it's important +to establish a common coding style. The goal of using similar coding styles +is much more important than the details of just what that style is. + +In general we follow the recommendations of "Recommended C Style and Coding +Standards" revision 6.1 (Cannon et al. as modified by Spencer, Keppel and +Brader). This document is available in the IJG FTP archive (see +jpeg/doc/cstyle.ms.tbl.Z, or cstyle.txt.Z for those without nroff/tbl). + +Block comments should be laid out thusly: + +/* + * Block comments in this style. + */ + +We indent statements in K&R style, e.g., + if (test) { + then-part; + } else { + else-part; + } +with two spaces per indentation level. (This indentation convention is +handled automatically by GNU Emacs and many other text editors.) + +Multi-word names should be written in lower case with underscores, e.g., +multi_word_name (not multiWordName). Preprocessor symbols and enum constants +are similar but upper case (MULTI_WORD_NAME). Names should be unique within +the first fifteen characters. + +Note that each function definition must begin with GLOBAL(type), LOCAL(type), +or METHODDEF(type). These macros expand to "static type" or just "type" as +appropriate. They provide a readable indication of the routine's usage and +can readily be changed for special needs. (For instance, special linkage +keywords can be inserted for use in Windows DLLs.) + +A similar solution is used for external function declarations (see the EXTERN +macro.) + + +The JPEG library is intended to be used within larger programs. Furthermore, +we want it to be reentrant so that it can be used by applications that process +multiple images concurrently. The following rules support these requirements: + +1. Avoid direct use of file I/O, "malloc", error report printouts, etc; +pass these through the common routines provided. + +2. Minimize global namespace pollution. Functions should be declared static +wherever possible. (Note that our method-based calling conventions help this +a lot: in many modules only the initialization function will ever need to be +called directly, so only that function need be externally visible.) All +global function names should begin with "jpeg_". + +3. Don't use global variables; anything that must be used in another module +should be in the common data structures. + +4. Don't use static variables except for read-only constant tables. Variables +that should be private to a module can be placed into private structures (see +the system architecture document, structure.txt). + +5. Source file names should begin with "j" for files that are part of the +library proper; source files that are not part of the library, such as cjpeg.c +and djpeg.c, do not begin with "j". Keep compression and decompression code in +separate source files --- some applications may want only one half of the +library. + +Note: these rules (particularly #4) are not followed religiously in the +modules that are used in cjpeg/djpeg but are not part of the JPEG library +proper. Those modules are not really intended to be used in other +applications. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/config.h glmark2-2021.02/android/jni/src/libjpeg-turbo/config.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/config.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/config.h 1970-01-01 08:00:00.000000000 +0800 @@ -1,137 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* Build number */ -#define BUILD "20120626" - -/* Support arithmetic encoding */ -#define C_ARITH_CODING_SUPPORTED 1 - -/* Support arithmetic decoding */ -#define D_ARITH_CODING_SUPPORTED 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_JNI_H */ - -/* Define to 1 if you have the `memcpy' function. */ -#define HAVE_MEMCPY 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `memset' function. */ -#define HAVE_MEMSET 1 - -/* Define if your compiler supports prototypes */ -#define HAVE_PROTOTYPES 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDDEF_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `unsigned char'. */ -#define HAVE_UNSIGNED_CHAR 1 - -/* Define to 1 if the system has the type `unsigned short'. */ -#define HAVE_UNSIGNED_SHORT 1 - -/* Compiler does not support pointers to undefined structures. */ -/* #undef INCOMPLETE_TYPES_BROKEN */ - -/* How to obtain function inlining. */ -#define INLINE __attribute__((always_inline)) - -/* libjpeg API version */ -#define JPEG_LIB_VERSION 62 - -/* libjpeg-turbo version */ -#define LIBJPEG_TURBO_VERSION 1.2.0 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* Define if you have BSD-like bzero and bcopy */ -/* #undef NEED_BSD_STRINGS */ - -/* Define if you need short function names */ -/* #undef NEED_SHORT_EXTERNAL_NAMES */ - -/* Define if you have sys/types.h */ -#define NEED_SYS_TYPES_H 1 - -/* Name of package */ -#define PACKAGE "libjpeg-turbo" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "libjpeg-turbo" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "libjpeg-turbo 1.2.0" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "libjpeg-turbo" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "1.2.0" - -/* Define if shift is unsigned */ -/* #undef RIGHT_SHIFT_IS_UNSIGNED */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Version number of package */ -#define VERSION "1.2.0" - -/* Use accelerated SIMD routines. */ -#define WITH_SIMD 1 - -/* Define to 1 if type `char' is unsigned and you are not using gcc. */ -#ifndef __CHAR_UNSIGNED__ -/* # undef __CHAR_UNSIGNED__ */ -#endif - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/configure.ac glmark2-2021.02/android/jni/src/libjpeg-turbo/configure.ac --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/configure.ac 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/configure.ac 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,595 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([2.56]) +AC_INIT([libjpeg-turbo], [1.4.90]) + +AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2]) +AC_PREFIX_DEFAULT(/opt/libjpeg-turbo) + +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) + +# Checks for programs. +SAVED_CFLAGS=${CFLAGS} +SAVED_CPPFLAGS=${CPPFLAGS} +AC_PROG_CPP +AC_PROG_CC +m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) +AM_PROG_AS +AM_PROG_CC_C_O +AC_PROG_INSTALL +AC_PROG_LIBTOOL +AC_PROG_LN_S + +AC_ARG_WITH([build-date], [Use custom build string to enable reproducible builds (default: YYMMDD)], + [BUILD="$with_build_date"], + [BUILD=`date +%Y%m%d`]) + +PKG_PROG_PKG_CONFIG + +# When the prefix is /opt/libjpeg-turbo, we assume that an "official" binary is +# being created, and thus we install things into specific locations. + +old_prefix=${prefix} +if test "x$prefix" = "xNONE" -a "x$ac_default_prefix" != "x"; then + prefix=$ac_default_prefix +fi +DATADIR=`eval echo ${datadir}` +DATADIR=`eval echo $DATADIR` +if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then + datadir='${prefix}' +fi +DATADIR=`eval echo ${datarootdir}` +DATADIR=`eval echo $DATADIR` +if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then + datarootdir='${prefix}' +fi +DOCDIR=`eval echo ${docdir}` +DOCDIR=`eval echo $DOCDIR` +if test "$DOCDIR" = "/opt/libjpeg-turbo/doc/libjpeg-turbo"; then + docdir='${datadir}/doc' +fi + +old_exec_prefix=${exec_prefix} +if test "x$exec_prefix" = "xNONE"; then + exec_prefix=${prefix} +fi + +AC_CHECK_SIZEOF(size_t) + +if test "x${libdir}" = 'x${exec_prefix}/lib' -o "x${libdir}" = 'x${prefix}/lib'; then + LIBDIR=`eval echo ${libdir}` + LIBDIR=`eval echo $LIBDIR` + if test "$LIBDIR" = "/opt/libjpeg-turbo/lib"; then + case $host_os in + darwin*) + ;; + *) + if test "${ac_cv_sizeof_size_t}" = "8"; then + libdir='${exec_prefix}/lib64' + elif test "${ac_cv_sizeof_size_t}" = "4"; then + libdir='${exec_prefix}/lib32' + fi + ;; + esac + fi +fi +exec_prefix=${old_exec_prefix} +prefix=${old_prefix} + +# Check whether compiler supports pointers to undefined structures +AC_MSG_CHECKING(whether compiler supports pointers to undefined structures) +AC_TRY_COMPILE([ typedef struct undefined_structure *undef_struct_ptr; ], , + AC_MSG_RESULT(yes), + [AC_MSG_RESULT(no) + AC_DEFINE([INCOMPLETE_TYPES_BROKEN], [1], + [Compiler does not support pointers to undefined structures.])]) + +if test "x${GCC}" = "xyes"; then + if test "x${SAVED_CFLAGS}" = "x"; then + CFLAGS=-O3 + fi + if test "x${SAVED_CPPFLAGS}" = "x"; then + CPPFLAGS=-Wall + fi +fi + +AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"]) +if test "x${SUNCC}" = "xyes"; then + if test "x${SAVED_CFLAGS}" = "x"; then + CFLAGS=-xO5 + fi +fi + +# Checks for libraries. + +# Checks for header files. +AC_HEADER_STDC +AC_CHECK_HEADERS([stddef.h stdlib.h locale.h string.h]) +AC_CHECK_HEADER([sys/types.h], + AC_DEFINE([NEED_SYS_TYPES_H], 1, [Define if you need to include to get size_t.])) + +# Checks for typedefs, structures, and compiler characteristics. +AC_C_CONST +AC_C_CHAR_UNSIGNED +AC_C_INLINE +AC_TYPE_SIZE_T +AC_CHECK_TYPES([unsigned char, unsigned short]) + +AC_MSG_CHECKING([if right shift is signed]) +AC_TRY_RUN( + [#include + int is_shifting_signed (long arg) { + long res = arg >> 4; + + if (res == -0x7F7E80CL) + return 1; /* right shift is signed */ + + /* see if unsigned-shift hack will fix it. */ + /* we can't just test exact value since it depends on width of long... */ + res |= (~0L) << (32-4); + if (res == -0x7F7E80CL) + return 0; /* right shift is unsigned */ + + printf("Right shift isn't acting as I expect it to.\n"); + printf("I fear the JPEG software will not work at all.\n\n"); + return 0; /* try it with unsigned anyway */ + } + int main (void) { + exit(is_shifting_signed(-0x7F7E80B1L)); + }], + [AC_MSG_RESULT(no) + AC_DEFINE([RIGHT_SHIFT_IS_UNSIGNED], 1, + [Define if your (broken) compiler shifts signed values as if they were unsigned.])], + [AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(Assuming that right shift is signed on target machine.)]) + +# Checks for library functions. +AC_CHECK_FUNCS([memset memcpy], [], + [AC_DEFINE([NEED_BSD_STRINGS], 1, + [Define if you have BSD-like bzero and bcopy in rather than memset/memcpy in .])]) + +AC_MSG_CHECKING([libjpeg API version]) +AC_ARG_VAR(JPEG_LIB_VERSION, [libjpeg API version (62, 70, or 80)]) +if test "x$JPEG_LIB_VERSION" = "x"; then + AC_ARG_WITH([jpeg7], + AC_HELP_STRING([--with-jpeg7], + [Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)])) + AC_ARG_WITH([jpeg8], + AC_HELP_STRING([--with-jpeg8], + [Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)])) + if test "x${with_jpeg8}" = "xyes"; then + JPEG_LIB_VERSION=80 + else + if test "x${with_jpeg7}" = "xyes"; then + JPEG_LIB_VERSION=70 + else + JPEG_LIB_VERSION=62 + fi + fi +fi +JPEG_LIB_VERSION_DECIMAL=`expr $JPEG_LIB_VERSION / 10`.`expr $JPEG_LIB_VERSION % 10` +AC_SUBST(JPEG_LIB_VERSION_DECIMAL) +AC_MSG_RESULT([$JPEG_LIB_VERSION_DECIMAL]) +AC_DEFINE_UNQUOTED(JPEG_LIB_VERSION, [$JPEG_LIB_VERSION], + [libjpeg API version]) + +AC_ARG_VAR(SO_MAJOR_VERSION, + [Major version of the libjpeg-turbo shared library (default is determined by the API version)]) +AC_ARG_VAR(SO_MINOR_VERSION, + [Minor version of the libjpeg-turbo shared library (default is determined by the API version)]) +if test "x$SO_MAJOR_VERSION" = "x"; then + case "$JPEG_LIB_VERSION" in + 62) SO_MAJOR_VERSION=$JPEG_LIB_VERSION ;; + *) SO_MAJOR_VERSION=`expr $JPEG_LIB_VERSION / 10` ;; + esac +fi +if test "x$SO_MINOR_VERSION" = "x"; then + case "$JPEG_LIB_VERSION" in + 80) SO_MINOR_VERSION=2 ;; + *) SO_MINOR_VERSION=0 ;; + esac +fi + +RPM_CONFIG_ARGS= + +# Memory source/destination managers +SO_AGE=1 +MEM_SRCDST_FUNCTIONS= +if test "x${with_jpeg8}" != "xyes"; then + AC_MSG_CHECKING([whether to include in-memory source/destination managers]) + AC_ARG_WITH([mem-srcdst], + AC_HELP_STRING([--without-mem-srcdst], + [Do not include in-memory source/destination manager functions when emulating the libjpeg v6b or v7 API/ABI])) + if test "x$with_mem_srcdst" != "xno"; then + AC_MSG_RESULT(yes) + AC_DEFINE([MEM_SRCDST_SUPPORTED], [1], + [Support in-memory source/destination managers]) + SO_AGE=2 + MEM_SRCDST_FUNCTIONS="global: jpeg_mem_dest; jpeg_mem_src;"; + else + AC_MSG_RESULT(no) + RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-mem-srcdst" + fi +fi + +AC_MSG_CHECKING([libjpeg shared library version]) +AC_MSG_RESULT([$SO_MAJOR_VERSION.$SO_AGE.$SO_MINOR_VERSION]) +LIBTOOL_CURRENT=`expr $SO_MAJOR_VERSION + $SO_AGE` +AC_SUBST(LIBTOOL_CURRENT) +AC_SUBST(SO_MAJOR_VERSION) +AC_SUBST(SO_MINOR_VERSION) +AC_SUBST(SO_AGE) +AC_SUBST(MEM_SRCDST_FUNCTIONS) + +AC_DEFINE_UNQUOTED(LIBJPEG_TURBO_VERSION, [$VERSION], [libjpeg-turbo version]) + +VERSION_SCRIPT=yes +AC_ARG_ENABLE([ld-version-script], + AS_HELP_STRING([--disable-ld-version-script], + [Disable linker version script for libjpeg-turbo (default is to use linker version script if the linker supports it)]), + [VERSION_SCRIPT=$enableval], []) + +AC_MSG_CHECKING([whether the linker supports version scripts]) +SAVED_LDFLAGS="$LDFLAGS" +LDFLAGS="$LDFLAGS -Wl,--version-script,conftest.map" +cat > conftest.map < +.I foo.bmp +.SH HINTS +To get a quick preview of an image, use the +.B \-grayscale +and/or +.B \-scale +switches. +.B \-grayscale \-scale 1/8 +is the fastest case. +.PP +Several options are available that trade off image quality to gain speed. +.B \-fast +turns on the recommended settings. +.PP +.B \-dct fast +and/or +.B \-nosmooth +gain speed at a small sacrifice in quality. +When producing a color-quantized image, +.B \-onepass \-dither ordered +is fast but much lower quality than the default behavior. +.B \-dither none +may give acceptable results in two-pass mode, but is seldom tolerable in +one-pass mode. +.PP +If you are fortunate enough to have very fast floating point hardware, +\fB\-dct float\fR may be even faster than \fB\-dct fast\fR. But on most +machines \fB\-dct float\fR is slower than \fB\-dct int\fR; in this case it is +not worth using, because its theoretical accuracy advantage is too small to be +significant in practice. +.SH ENVIRONMENT +.TP +.B JPEGMEM +If this environment variable is set, its value is the default memory limit. +The value is specified as described for the +.B \-maxmemory +switch. +.B JPEGMEM +overrides the default value specified when the program was compiled, and +itself is overridden by an explicit +.BR \-maxmemory . +.SH SEE ALSO +.BR cjpeg (1), +.BR jpegtran (1), +.BR rdjpgcom (1), +.BR wrjpgcom (1) +.br +.BR ppm (5), +.BR pgm (5) +.br +Wallace, Gregory K. "The JPEG Still Picture Compression Standard", +Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44. +.SH AUTHOR +Independent JPEG Group +.PP +This file was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo, to wordsmith certain sections, and to describe +features not present in libjpeg. +.SH ISSUES +Support for compressed GIF output files was removed in djpeg v6b due to +concerns over the Unisys LZW patent. Although this patent expired in 2006, +djpeg still lacks compressed GIF support, for these historical reasons. +(Conversion of JPEG files to GIF is usually a bad idea anyway, since GIF is a +256-color format.) The uncompressed GIF files that djpeg generates are larger +than they should be, but they are readable by standard GIF decoders. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/djpeg.c glmark2-2021.02/android/jni/src/libjpeg-turbo/djpeg.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/djpeg.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/djpeg.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,781 @@ +/* + * djpeg.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2013 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010-2011, 2013-2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a command-line user interface for the JPEG decompressor. + * It should work on any system with Unix- or MS-DOS-style command lines. + * + * Two different command line styles are permitted, depending on the + * compile-time switch TWO_FILE_COMMANDLINE: + * djpeg [options] inputfile outputfile + * djpeg [options] [inputfile] + * In the second style, output is always to standard output, which you'd + * normally redirect to a file or pipe to some other program. Input is + * either from a named file or from standard input (typically redirected). + * The second style is convenient on Unix but is unhelpful on systems that + * don't support pipes. Also, you MUST use the first style if your system + * doesn't do binary I/O to stdin/stdout. + * To simplify script writing, the "-outfile" switch is provided. The syntax + * djpeg [options] -outfile outputfile inputfile + * works regardless of which command line style is used. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "jversion.h" /* for version message */ +#include "jconfigint.h" +#include "wrppm.h" + +#include /* to declare isprint() */ + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + + +/* Create the add-on message string table. */ + +#define JMESSAGE(code,string) string , + +static const char * const cdjpeg_message_table[] = { +#include "cderror.h" + NULL +}; + + +/* + * This list defines the known output image formats + * (not all of which need be supported by a given version). + * You can change the default output format by defining DEFAULT_FMT; + * indeed, you had better do so if you undefine PPM_SUPPORTED. + */ + +typedef enum { + FMT_BMP, /* BMP format (Windows flavor) */ + FMT_GIF, /* GIF format */ + FMT_OS2, /* BMP format (OS/2 flavor) */ + FMT_PPM, /* PPM/PGM (PBMPLUS formats) */ + FMT_RLE, /* RLE format */ + FMT_TARGA, /* Targa format */ + FMT_TIFF /* TIFF format */ +} IMAGE_FORMATS; + +#ifndef DEFAULT_FMT /* so can override from CFLAGS in Makefile */ +#define DEFAULT_FMT FMT_PPM +#endif + +static IMAGE_FORMATS requested_fmt; + + +/* + * Argument-parsing code. + * The switch parser is designed to be useful with DOS-style command line + * syntax, ie, intermixed switches and file names, where only the switches + * to the left of a given file name affect processing of that file. + * The main program in this file doesn't actually use this capability... + */ + + +static const char *progname; /* program name for error messages */ +static char *outfilename; /* for -outfile switch */ +boolean memsrc; /* for -memsrc switch */ +boolean skip, crop; +JDIMENSION skip_start, skip_end; +JDIMENSION crop_x, crop_y, crop_width, crop_height; +#define INPUT_BUF_SIZE 4096 + + +LOCAL(void) +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "usage: %s [switches] ", progname); +#ifdef TWO_FILE_COMMANDLINE + fprintf(stderr, "inputfile outputfile\n"); +#else + fprintf(stderr, "[inputfile]\n"); +#endif + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -colors N Reduce image to no more than N colors\n"); + fprintf(stderr, " -fast Fast, low-quality processing\n"); + fprintf(stderr, " -grayscale Force grayscale output\n"); + fprintf(stderr, " -rgb Force RGB output\n"); + fprintf(stderr, " -rgb565 Force RGB565 output\n"); +#ifdef IDCT_SCALING_SUPPORTED + fprintf(stderr, " -scale M/N Scale output image by fraction M/N, eg, 1/8\n"); +#endif +#ifdef BMP_SUPPORTED + fprintf(stderr, " -bmp Select BMP output format (Windows style)%s\n", + (DEFAULT_FMT == FMT_BMP ? " (default)" : "")); +#endif +#ifdef GIF_SUPPORTED + fprintf(stderr, " -gif Select GIF output format%s\n", + (DEFAULT_FMT == FMT_GIF ? " (default)" : "")); +#endif +#ifdef BMP_SUPPORTED + fprintf(stderr, " -os2 Select BMP output format (OS/2 style)%s\n", + (DEFAULT_FMT == FMT_OS2 ? " (default)" : "")); +#endif +#ifdef PPM_SUPPORTED + fprintf(stderr, " -pnm Select PBMPLUS (PPM/PGM) output format%s\n", + (DEFAULT_FMT == FMT_PPM ? " (default)" : "")); +#endif +#ifdef RLE_SUPPORTED + fprintf(stderr, " -rle Select Utah RLE output format%s\n", + (DEFAULT_FMT == FMT_RLE ? " (default)" : "")); +#endif +#ifdef TARGA_SUPPORTED + fprintf(stderr, " -targa Select Targa output format%s\n", + (DEFAULT_FMT == FMT_TARGA ? " (default)" : "")); +#endif + fprintf(stderr, "Switches for advanced users:\n"); +#ifdef DCT_ISLOW_SUPPORTED + fprintf(stderr, " -dct int Use integer DCT method%s\n", + (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : "")); +#endif +#ifdef DCT_IFAST_SUPPORTED + fprintf(stderr, " -dct fast Use fast integer DCT (less accurate)%s\n", + (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : "")); +#endif +#ifdef DCT_FLOAT_SUPPORTED + fprintf(stderr, " -dct float Use floating-point DCT method%s\n", + (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : "")); +#endif + fprintf(stderr, " -dither fs Use F-S dithering (default)\n"); + fprintf(stderr, " -dither none Don't use dithering in quantization\n"); + fprintf(stderr, " -dither ordered Use ordered dither (medium speed, quality)\n"); +#ifdef QUANT_2PASS_SUPPORTED + fprintf(stderr, " -map FILE Map to colors used in named image file\n"); +#endif + fprintf(stderr, " -nosmooth Don't use high-quality upsampling\n"); +#ifdef QUANT_1PASS_SUPPORTED + fprintf(stderr, " -onepass Use 1-pass quantization (fast, low quality)\n"); +#endif + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); + fprintf(stderr, " -outfile name Specify name for output file\n"); +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + fprintf(stderr, " -memsrc Load input file into memory before decompressing\n"); +#endif + + fprintf(stderr, " -skip Y0,Y1 Decompress all rows except those between Y0 and Y1 (inclusive)\n"); + fprintf(stderr, " -crop WxH+X+Y Decompress only a rectangular subregion of the image\n"); + fprintf(stderr, " -verbose or -debug Emit debug output\n"); + fprintf(stderr, " -version Print version information and exit\n"); + exit(EXIT_FAILURE); +} + + +LOCAL(int) +parse_switches (j_decompress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) +/* Parse optional switches. + * Returns argv[] index of first file-name argument (== argc if none). + * Any file names with indexes <= last_file_arg_seen are ignored; + * they have presumably been processed in a previous iteration. + * (Pass 0 for last_file_arg_seen on the first or only iteration.) + * for_real is FALSE on the first (dummy) pass; we may skip any expensive + * processing. + */ +{ + int argn; + char *arg; + + /* Set up default JPEG parameters. */ + requested_fmt = DEFAULT_FMT; /* set default output file format */ + outfilename = NULL; + memsrc = FALSE; + skip = FALSE; + crop = FALSE; + cinfo->err->trace_level = 0; + + /* Scan command line options, adjust parameters */ + + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (*arg != '-') { + /* Not a switch, must be a file name argument */ + if (argn <= last_file_arg_seen) { + outfilename = NULL; /* -outfile applies to just one input file */ + continue; /* ignore this name if previously processed */ + } + break; /* else done parsing switches */ + } + arg++; /* advance past switch marker character */ + + if (keymatch(arg, "bmp", 1)) { + /* BMP output format. */ + requested_fmt = FMT_BMP; + + } else if (keymatch(arg, "colors", 1) || keymatch(arg, "colours", 1) || + keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) { + /* Do color quantization. */ + int val; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%d", &val) != 1) + usage(); + cinfo->desired_number_of_colors = val; + cinfo->quantize_colors = TRUE; + + } else if (keymatch(arg, "dct", 2)) { + /* Select IDCT algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "int", 1)) { + cinfo->dct_method = JDCT_ISLOW; + } else if (keymatch(argv[argn], "fast", 2)) { + cinfo->dct_method = JDCT_IFAST; + } else if (keymatch(argv[argn], "float", 2)) { + cinfo->dct_method = JDCT_FLOAT; + } else + usage(); + + } else if (keymatch(arg, "dither", 2)) { + /* Select dithering algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "fs", 2)) { + cinfo->dither_mode = JDITHER_FS; + } else if (keymatch(argv[argn], "none", 2)) { + cinfo->dither_mode = JDITHER_NONE; + } else if (keymatch(argv[argn], "ordered", 2)) { + cinfo->dither_mode = JDITHER_ORDERED; + } else + usage(); + + } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { + /* Enable debug printouts. */ + /* On first -d, print version identification */ + static boolean printed_version = FALSE; + + if (! printed_version) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + fprintf(stderr, "%s\n\n", JCOPYRIGHT); + fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n", + JVERSION); + printed_version = TRUE; + } + cinfo->err->trace_level++; + + } else if (keymatch(arg, "version", 4)) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + exit(EXIT_SUCCESS); + + } else if (keymatch(arg, "fast", 1)) { + /* Select recommended processing options for quick-and-dirty output. */ + cinfo->two_pass_quantize = FALSE; + cinfo->dither_mode = JDITHER_ORDERED; + if (! cinfo->quantize_colors) /* don't override an earlier -colors */ + cinfo->desired_number_of_colors = 216; + cinfo->dct_method = JDCT_FASTEST; + cinfo->do_fancy_upsampling = FALSE; + + } else if (keymatch(arg, "gif", 1)) { + /* GIF output format. */ + requested_fmt = FMT_GIF; + + } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { + /* Force monochrome output. */ + cinfo->out_color_space = JCS_GRAYSCALE; + + } else if (keymatch(arg, "rgb", 2)) { + /* Force RGB output. */ + cinfo->out_color_space = JCS_RGB; + + } else if (keymatch(arg, "rgb565", 2)) { + /* Force RGB565 output. */ + cinfo->out_color_space = JCS_RGB565; + + } else if (keymatch(arg, "map", 3)) { + /* Quantize to a color map taken from an input file. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (for_real) { /* too expensive to do twice! */ +#ifdef QUANT_2PASS_SUPPORTED /* otherwise can't quantize to supplied map */ + FILE *mapfile; + + if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + read_color_map(cinfo, mapfile); + fclose(mapfile); + cinfo->quantize_colors = TRUE; +#else + ERREXIT(cinfo, JERR_NOT_COMPILED); +#endif + } + + } else if (keymatch(arg, "maxmemory", 3)) { + /* Maximum memory in Kb (or Mb with 'm'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (ch == 'm' || ch == 'M') + lval *= 1000L; + cinfo->mem->max_memory_to_use = lval * 1000L; + + } else if (keymatch(arg, "nosmooth", 3)) { + /* Suppress fancy upsampling */ + cinfo->do_fancy_upsampling = FALSE; + + } else if (keymatch(arg, "onepass", 3)) { + /* Use fast one-pass quantization. */ + cinfo->two_pass_quantize = FALSE; + + } else if (keymatch(arg, "os2", 3)) { + /* BMP output format (OS/2 flavor). */ + requested_fmt = FMT_OS2; + + } else if (keymatch(arg, "outfile", 4)) { + /* Set output file name. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + outfilename = argv[argn]; /* save it away for later use */ + + } else if (keymatch(arg, "memsrc", 2)) { + /* Use in-memory source manager */ +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + memsrc = TRUE; +#else + fprintf(stderr, "%s: sorry, in-memory source manager was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) { + /* PPM/PGM output format. */ + requested_fmt = FMT_PPM; + + } else if (keymatch(arg, "rle", 1)) { + /* RLE output format. */ + requested_fmt = FMT_RLE; + + } else if (keymatch(arg, "scale", 2)) { + /* Scale the output image by a fraction M/N. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%u/%u", + &cinfo->scale_num, &cinfo->scale_denom) != 2) + usage(); + + } else if (keymatch(arg, "skip", 2)) { + if (++argn >= argc) + usage(); + if (sscanf(argv[argn], "%u,%u", &skip_start, &skip_end) != 2 || + skip_start > skip_end) + usage(); + skip = TRUE; + + } else if (keymatch(arg, "crop", 2)) { + char c; + if (++argn >= argc) + usage(); + if (sscanf(argv[argn], "%u%c%u+%u+%u", &crop_width, &c, &crop_height, + &crop_x, &crop_y) != 5 || + (c != 'X' && c != 'x') || crop_width < 1 || crop_height < 1) + usage(); + crop = TRUE; + + } else if (keymatch(arg, "targa", 1)) { + /* Targa output format. */ + requested_fmt = FMT_TARGA; + + } else { + usage(); /* bogus switch */ + } + } + + return argn; /* return index of next arg (file name) */ +} + + +/* + * Marker processor for COM and interesting APPn markers. + * This replaces the library's built-in processor, which just skips the marker. + * We want to print out the marker as text, to the extent possible. + * Note this code relies on a non-suspending data source. + */ + +LOCAL(unsigned int) +jpeg_getc (j_decompress_ptr cinfo) +/* Read next byte */ +{ + struct jpeg_source_mgr *datasrc = cinfo->src; + + if (datasrc->bytes_in_buffer == 0) { + if (! (*datasrc->fill_input_buffer) (cinfo)) + ERREXIT(cinfo, JERR_CANT_SUSPEND); + } + datasrc->bytes_in_buffer--; + return GETJOCTET(*datasrc->next_input_byte++); +} + + +METHODDEF(boolean) +print_text_marker (j_decompress_ptr cinfo) +{ + boolean traceit = (cinfo->err->trace_level >= 1); + long length; + unsigned int ch; + unsigned int lastch = 0; + + length = jpeg_getc(cinfo) << 8; + length += jpeg_getc(cinfo); + length -= 2; /* discount the length word itself */ + + if (traceit) { + if (cinfo->unread_marker == JPEG_COM) + fprintf(stderr, "Comment, length %ld:\n", (long) length); + else /* assume it is an APPn otherwise */ + fprintf(stderr, "APP%d, length %ld:\n", + cinfo->unread_marker - JPEG_APP0, (long) length); + } + + while (--length >= 0) { + ch = jpeg_getc(cinfo); + if (traceit) { + /* Emit the character in a readable form. + * Nonprintables are converted to \nnn form, + * while \ is converted to \\. + * Newlines in CR, CR/LF, or LF form will be printed as one newline. + */ + if (ch == '\r') { + fprintf(stderr, "\n"); + } else if (ch == '\n') { + if (lastch != '\r') + fprintf(stderr, "\n"); + } else if (ch == '\\') { + fprintf(stderr, "\\\\"); + } else if (isprint(ch)) { + putc(ch, stderr); + } else { + fprintf(stderr, "\\%03o", ch); + } + lastch = ch; + } + } + + if (traceit) + fprintf(stderr, "\n"); + + return TRUE; +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + struct jpeg_decompress_struct cinfo; + struct jpeg_error_mgr jerr; +#ifdef PROGRESS_REPORT + struct cdjpeg_progress_mgr progress; +#endif + int file_index; + djpeg_dest_ptr dest_mgr = NULL; + FILE *input_file; + FILE *output_file; + unsigned char *inbuffer = NULL; + unsigned long insize = 0; + JDIMENSION num_scanlines; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "djpeg"; /* in case C library doesn't provide it */ + + /* Initialize the JPEG decompression object with default error handling. */ + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_decompress(&cinfo); + /* Add some application-specific error messages (from cderror.h) */ + jerr.addon_message_table = cdjpeg_message_table; + jerr.first_addon_message = JMSG_FIRSTADDONCODE; + jerr.last_addon_message = JMSG_LASTADDONCODE; + + /* Insert custom marker processor for COM and APP12. + * APP12 is used by some digital camera makers for textual info, + * so we provide the ability to display it as text. + * If you like, additional APPn marker types can be selected for display, + * but don't try to override APP0 or APP14 this way (see libjpeg.txt). + */ + jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker); + jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker); + + /* Scan command line to find file names. */ + /* It is convenient to use just one switch-parsing routine, but the switch + * values read here are ignored; we will rescan the switches after opening + * the input file. + * (Exception: tracing level set here controls verbosity for COM markers + * found during jpeg_read_header...) + */ + + file_index = parse_switches(&cinfo, argc, argv, 0, FALSE); + +#ifdef TWO_FILE_COMMANDLINE + /* Must have either -outfile switch or explicit output file name */ + if (outfilename == NULL) { + if (file_index != argc-2) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + outfilename = argv[file_index+1]; + } else { + if (file_index != argc-1) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + } +#else + /* Unix style: expect zero or one file name */ + if (file_index < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } +#endif /* TWO_FILE_COMMANDLINE */ + + /* Open the input file. */ + if (file_index < argc) { + if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ + input_file = read_stdin(); + } + + /* Open the output file. */ + if (outfilename != NULL) { + if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, outfilename); + exit(EXIT_FAILURE); + } + } else { + /* default output file is stdout */ + output_file = write_stdout(); + } + +#ifdef PROGRESS_REPORT + start_progress_monitor((j_common_ptr) &cinfo, &progress); +#endif + + /* Specify data source for decompression */ +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + if (memsrc) { + size_t nbytes; + do { + inbuffer = (unsigned char *)realloc(inbuffer, insize + INPUT_BUF_SIZE); + if (inbuffer == NULL) { + fprintf(stderr, "%s: memory allocation failure\n", progname); + exit(EXIT_FAILURE); + } + nbytes = JFREAD(input_file, &inbuffer[insize], INPUT_BUF_SIZE); + if (nbytes < INPUT_BUF_SIZE && ferror(input_file)) { + if (file_index < argc) + fprintf(stderr, "%s: can't read from %s\n", progname, + argv[file_index]); + else + fprintf(stderr, "%s: can't read from stdin\n", progname); + } + insize += (unsigned long)nbytes; + } while (nbytes == INPUT_BUF_SIZE); + fprintf(stderr, "Compressed size: %lu bytes\n", insize); + jpeg_mem_src(&cinfo, inbuffer, insize); + } else +#endif + jpeg_stdio_src(&cinfo, input_file); + + /* Read file header, set default decompression parameters */ + (void) jpeg_read_header(&cinfo, TRUE); + + /* Adjust default decompression parameters by re-parsing the options */ + file_index = parse_switches(&cinfo, argc, argv, 0, TRUE); + + /* Initialize the output module now to let it override any crucial + * option settings (for instance, GIF wants to force color quantization). + */ + switch (requested_fmt) { +#ifdef BMP_SUPPORTED + case FMT_BMP: + dest_mgr = jinit_write_bmp(&cinfo, FALSE); + break; + case FMT_OS2: + dest_mgr = jinit_write_bmp(&cinfo, TRUE); + break; +#endif +#ifdef GIF_SUPPORTED + case FMT_GIF: + dest_mgr = jinit_write_gif(&cinfo); + break; +#endif +#ifdef PPM_SUPPORTED + case FMT_PPM: + dest_mgr = jinit_write_ppm(&cinfo); + break; +#endif +#ifdef RLE_SUPPORTED + case FMT_RLE: + dest_mgr = jinit_write_rle(&cinfo); + break; +#endif +#ifdef TARGA_SUPPORTED + case FMT_TARGA: + dest_mgr = jinit_write_targa(&cinfo); + break; +#endif + default: + ERREXIT(&cinfo, JERR_UNSUPPORTED_FORMAT); + break; + } + dest_mgr->output_file = output_file; + + /* Start decompressor */ + (void) jpeg_start_decompress(&cinfo); + + /* Skip rows */ + if (skip) { + JDIMENSION tmp; + + /* Check for valid skip_end. We cannot check this value until after + * jpeg_start_decompress() is called. Note that we have already verified + * that skip_start <= skip_end. + */ + if (skip_end > cinfo.output_height - 1) { + fprintf(stderr, "%s: skip region exceeds image height %d\n", progname, + cinfo.output_height); + exit(EXIT_FAILURE); + } + + /* Write output file header. This is a hack to ensure that the destination + * manager creates an output image of the proper size. + */ + tmp = cinfo.output_height; + cinfo.output_height -= (skip_end - skip_start + 1); + (*dest_mgr->start_output) (&cinfo, dest_mgr); + cinfo.output_height = tmp; + + /* Process data */ + while (cinfo.output_scanline < skip_start) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + jpeg_skip_scanlines(&cinfo, skip_end - skip_start + 1); + while (cinfo.output_scanline < cinfo.output_height) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + + /* Decompress a subregion */ + } else if (crop) { + JDIMENSION tmp; + + /* Check for valid crop dimensions. We cannot check these values until + * after jpeg_start_decompress() is called. + */ + if (crop_x + crop_width > cinfo.output_width || + crop_y + crop_height > cinfo.output_height) { + fprintf(stderr, "%s: crop dimensions exceed image dimensions %d x %d\n", + progname, cinfo.output_width, cinfo.output_height); + exit(EXIT_FAILURE); + } + + jpeg_crop_scanline(&cinfo, &crop_x, &crop_width); + ((ppm_dest_ptr) dest_mgr)->buffer_width = cinfo.output_width * + cinfo.out_color_components * + sizeof(JSAMPLE); + + /* Write output file header. This is a hack to ensure that the destination + * manager creates an output image of the proper size. + */ + tmp = cinfo.output_height; + cinfo.output_height = crop_height; + (*dest_mgr->start_output) (&cinfo, dest_mgr); + cinfo.output_height = tmp; + + /* Process data */ + jpeg_skip_scanlines(&cinfo, crop_y); + while (cinfo.output_scanline < crop_y + crop_height) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + jpeg_skip_scanlines(&cinfo, cinfo.output_height - crop_y - crop_height); + + /* Normal full-image decompress */ + } else { + /* Write output file header */ + (*dest_mgr->start_output) (&cinfo, dest_mgr); + + /* Process data */ + while (cinfo.output_scanline < cinfo.output_height) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + } + +#ifdef PROGRESS_REPORT + /* Hack: count final pass as done in case finish_output does an extra pass. + * The library won't have updated completed_passes. + */ + progress.pub.completed_passes = progress.pub.total_passes; +#endif + + /* Finish decompression and release memory. + * I must do it in this order because output module has allocated memory + * of lifespan JPOOL_IMAGE; it needs to finish before releasing memory. + */ + (*dest_mgr->finish_output) (&cinfo, dest_mgr); + (void) jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + + /* Close files, if we opened them */ + if (input_file != stdin) + fclose(input_file); + if (output_file != stdout) + fclose(output_file); + +#ifdef PROGRESS_REPORT + end_progress_monitor((j_common_ptr) &cinfo); +#endif + + if (memsrc && inbuffer != NULL) + free(inbuffer); + + /* All done. */ + exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/doxygen.config glmark2-2021.02/android/jni/src/libjpeg-turbo/doxygen.config --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/doxygen.config 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/doxygen.config 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,16 @@ +PROJECT_NAME = TurboJPEG +PROJECT_NUMBER = 1.5 +OUTPUT_DIRECTORY = doc/ +USE_WINDOWS_ENCODING = NO +OPTIMIZE_OUTPUT_FOR_C = YES +WARN_NO_PARAMDOC = YES +GENERATE_LATEX = NO +FILE_PATTERNS = turbojpeg.h +HIDE_UNDOC_MEMBERS = YES +VERBATIM_HEADERS = NO +EXTRACT_STATIC = YES +JAVADOC_AUTOBRIEF = YES +MAX_INITIALIZER_LINES = 0 +ALWAYS_DETAILED_SEC = YES +HTML_TIMESTAMP = NO +HTML_EXTRA_STYLESHEET = doxygen-extra.css diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/doxygen-extra.css glmark2-2021.02/android/jni/src/libjpeg-turbo/doxygen-extra.css --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/doxygen-extra.css 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/doxygen-extra.css 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,3 @@ +code { + color: #4665A2; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/example.c glmark2-2021.02/android/jni/src/libjpeg-turbo/example.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/example.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/example.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,433 @@ +/* + * example.c + * + * This file illustrates how to use the IJG code as a subroutine library + * to read or write JPEG image files. You should look at this code in + * conjunction with the documentation file libjpeg.txt. + * + * This code will not do anything useful as-is, but it may be helpful as a + * skeleton for constructing routines that call the JPEG library. + * + * We present these routines in the same coding style used in the JPEG code + * (ANSI function definitions, etc); but you are of course free to code your + * routines in a different style if you prefer. + */ + +#include + +/* + * Include file for users of JPEG library. + * You will need to have included system headers that define at least + * the typedefs FILE and size_t before you can include jpeglib.h. + * (stdio.h is sufficient on ANSI-conforming systems.) + * You may also wish to include "jerror.h". + */ + +#include "jpeglib.h" + +/* + * is used for the optional error recovery mechanism shown in + * the second part of the example. + */ + +#include + + + +/******************** JPEG COMPRESSION SAMPLE INTERFACE *******************/ + +/* This half of the example shows how to feed data into the JPEG compressor. + * We present a minimal version that does not worry about refinements such + * as error recovery (the JPEG code will just exit() if it gets an error). + */ + + +/* + * IMAGE DATA FORMATS: + * + * The standard input image format is a rectangular array of pixels, with + * each pixel having the same number of "component" values (color channels). + * Each pixel row is an array of JSAMPLEs (which typically are unsigned chars). + * If you are working with color data, then the color values for each pixel + * must be adjacent in the row; for example, R,G,B,R,G,B,R,G,B,... for 24-bit + * RGB color. + * + * For this example, we'll assume that this data structure matches the way + * our application has stored the image in memory, so we can just pass a + * pointer to our image buffer. In particular, let's say that the image is + * RGB color and is described by: + */ + +extern JSAMPLE *image_buffer; /* Points to large array of R,G,B-order data */ +extern int image_height; /* Number of rows in image */ +extern int image_width; /* Number of columns in image */ + + +/* + * Sample routine for JPEG compression. We assume that the target file name + * and a compression quality factor are passed in. + */ + +GLOBAL(void) +write_JPEG_file (char *filename, int quality) +{ + /* This struct contains the JPEG compression parameters and pointers to + * working space (which is allocated as needed by the JPEG library). + * It is possible to have several such structures, representing multiple + * compression/decompression processes, in existence at once. We refer + * to any one struct (and its associated working data) as a "JPEG object". + */ + struct jpeg_compress_struct cinfo; + /* This struct represents a JPEG error handler. It is declared separately + * because applications often want to supply a specialized error handler + * (see the second half of this file for an example). But here we just + * take the easy way out and use the standard error handler, which will + * print a message on stderr and call exit() if compression fails. + * Note that this struct must live as long as the main JPEG parameter + * struct, to avoid dangling-pointer problems. + */ + struct jpeg_error_mgr jerr; + /* More stuff */ + FILE *outfile; /* target file */ + JSAMPROW row_pointer[1]; /* pointer to JSAMPLE row[s] */ + int row_stride; /* physical row width in image buffer */ + + /* Step 1: allocate and initialize JPEG compression object */ + + /* We have to set up the error handler first, in case the initialization + * step fails. (Unlikely, but it could happen if you are out of memory.) + * This routine fills in the contents of struct jerr, and returns jerr's + * address which we place into the link field in cinfo. + */ + cinfo.err = jpeg_std_error(&jerr); + /* Now we can initialize the JPEG compression object. */ + jpeg_create_compress(&cinfo); + + /* Step 2: specify data destination (eg, a file) */ + /* Note: steps 2 and 3 can be done in either order. */ + + /* Here we use the library-supplied code to send compressed data to a + * stdio stream. You can also write your own code to do something else. + * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that + * requires it in order to write binary files. + */ + if ((outfile = fopen(filename, "wb")) == NULL) { + fprintf(stderr, "can't open %s\n", filename); + exit(1); + } + jpeg_stdio_dest(&cinfo, outfile); + + /* Step 3: set parameters for compression */ + + /* First we supply a description of the input image. + * Four fields of the cinfo struct must be filled in: + */ + cinfo.image_width = image_width; /* image width and height, in pixels */ + cinfo.image_height = image_height; + cinfo.input_components = 3; /* # of color components per pixel */ + cinfo.in_color_space = JCS_RGB; /* colorspace of input image */ + /* Now use the library's routine to set default compression parameters. + * (You must set at least cinfo.in_color_space before calling this, + * since the defaults depend on the source color space.) + */ + jpeg_set_defaults(&cinfo); + /* Now you can set any non-default parameters you wish to. + * Here we just illustrate the use of quality (quantization table) scaling: + */ + jpeg_set_quality(&cinfo, quality, TRUE /* limit to baseline-JPEG values */); + + /* Step 4: Start compressor */ + + /* TRUE ensures that we will write a complete interchange-JPEG file. + * Pass TRUE unless you are very sure of what you're doing. + */ + jpeg_start_compress(&cinfo, TRUE); + + /* Step 5: while (scan lines remain to be written) */ + /* jpeg_write_scanlines(...); */ + + /* Here we use the library's state variable cinfo.next_scanline as the + * loop counter, so that we don't have to keep track ourselves. + * To keep things simple, we pass one scanline per call; you can pass + * more if you wish, though. + */ + row_stride = image_width * 3; /* JSAMPLEs per row in image_buffer */ + + while (cinfo.next_scanline < cinfo.image_height) { + /* jpeg_write_scanlines expects an array of pointers to scanlines. + * Here the array is only one element long, but you could pass + * more than one scanline at a time if that's more convenient. + */ + row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride]; + (void) jpeg_write_scanlines(&cinfo, row_pointer, 1); + } + + /* Step 6: Finish compression */ + + jpeg_finish_compress(&cinfo); + /* After finish_compress, we can close the output file. */ + fclose(outfile); + + /* Step 7: release JPEG compression object */ + + /* This is an important step since it will release a good deal of memory. */ + jpeg_destroy_compress(&cinfo); + + /* And we're done! */ +} + + +/* + * SOME FINE POINTS: + * + * In the above loop, we ignored the return value of jpeg_write_scanlines, + * which is the number of scanlines actually written. We could get away + * with this because we were only relying on the value of cinfo.next_scanline, + * which will be incremented correctly. If you maintain additional loop + * variables then you should be careful to increment them properly. + * Actually, for output to a stdio stream you needn't worry, because + * then jpeg_write_scanlines will write all the lines passed (or else exit + * with a fatal error). Partial writes can only occur if you use a data + * destination module that can demand suspension of the compressor. + * (If you don't know what that's for, you don't need it.) + * + * If the compressor requires full-image buffers (for entropy-coding + * optimization or a multi-scan JPEG file), it will create temporary + * files for anything that doesn't fit within the maximum-memory setting. + * (Note that temp files are NOT needed if you use the default parameters.) + * On some systems you may need to set up a signal handler to ensure that + * temporary files are deleted if the program is interrupted. See libjpeg.txt. + * + * Scanlines MUST be supplied in top-to-bottom order if you want your JPEG + * files to be compatible with everyone else's. If you cannot readily read + * your data in that order, you'll need an intermediate array to hold the + * image. See rdtarga.c or rdbmp.c for examples of handling bottom-to-top + * source data using the JPEG code's internal virtual-array mechanisms. + */ + + + +/******************** JPEG DECOMPRESSION SAMPLE INTERFACE *******************/ + +/* This half of the example shows how to read data from the JPEG decompressor. + * It's a bit more refined than the above, in that we show: + * (a) how to modify the JPEG library's standard error-reporting behavior; + * (b) how to allocate workspace using the library's memory manager. + * + * Just to make this example a little different from the first one, we'll + * assume that we do not intend to put the whole image into an in-memory + * buffer, but to send it line-by-line someplace else. We need a one- + * scanline-high JSAMPLE array as a work buffer, and we will let the JPEG + * memory manager allocate it for us. This approach is actually quite useful + * because we don't need to remember to deallocate the buffer separately: it + * will go away automatically when the JPEG object is cleaned up. + */ + + +/* + * ERROR HANDLING: + * + * The JPEG library's standard error handler (jerror.c) is divided into + * several "methods" which you can override individually. This lets you + * adjust the behavior without duplicating a lot of code, which you might + * have to update with each future release. + * + * Our example here shows how to override the "error_exit" method so that + * control is returned to the library's caller when a fatal error occurs, + * rather than calling exit() as the standard error_exit method does. + * + * We use C's setjmp/longjmp facility to return control. This means that the + * routine which calls the JPEG library must first execute a setjmp() call to + * establish the return point. We want the replacement error_exit to do a + * longjmp(). But we need to make the setjmp buffer accessible to the + * error_exit routine. To do this, we make a private extension of the + * standard JPEG error handler object. (If we were using C++, we'd say we + * were making a subclass of the regular error handler.) + * + * Here's the extended error handler struct: + */ + +struct my_error_mgr { + struct jpeg_error_mgr pub; /* "public" fields */ + + jmp_buf setjmp_buffer; /* for return to caller */ +}; + +typedef struct my_error_mgr *my_error_ptr; + +/* + * Here's the routine that will replace the standard error_exit method: + */ + +METHODDEF(void) +my_error_exit (j_common_ptr cinfo) +{ + /* cinfo->err really points to a my_error_mgr struct, so coerce pointer */ + my_error_ptr myerr = (my_error_ptr) cinfo->err; + + /* Always display the message. */ + /* We could postpone this until after returning, if we chose. */ + (*cinfo->err->output_message) (cinfo); + + /* Return control to the setjmp point */ + longjmp(myerr->setjmp_buffer, 1); +} + + +/* + * Sample routine for JPEG decompression. We assume that the source file name + * is passed in. We want to return 1 on success, 0 on error. + */ + + +GLOBAL(int) +read_JPEG_file (char *filename) +{ + /* This struct contains the JPEG decompression parameters and pointers to + * working space (which is allocated as needed by the JPEG library). + */ + struct jpeg_decompress_struct cinfo; + /* We use our private extension JPEG error handler. + * Note that this struct must live as long as the main JPEG parameter + * struct, to avoid dangling-pointer problems. + */ + struct my_error_mgr jerr; + /* More stuff */ + FILE *infile; /* source file */ + JSAMPARRAY buffer; /* Output row buffer */ + int row_stride; /* physical row width in output buffer */ + + /* In this example we want to open the input file before doing anything else, + * so that the setjmp() error recovery below can assume the file is open. + * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that + * requires it in order to read binary files. + */ + + if ((infile = fopen(filename, "rb")) == NULL) { + fprintf(stderr, "can't open %s\n", filename); + return 0; + } + + /* Step 1: allocate and initialize JPEG decompression object */ + + /* We set up the normal JPEG error routines, then override error_exit. */ + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = my_error_exit; + /* Establish the setjmp return context for my_error_exit to use. */ + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. + * We need to clean up the JPEG object, close the input file, and return. + */ + jpeg_destroy_decompress(&cinfo); + fclose(infile); + return 0; + } + /* Now we can initialize the JPEG decompression object. */ + jpeg_create_decompress(&cinfo); + + /* Step 2: specify data source (eg, a file) */ + + jpeg_stdio_src(&cinfo, infile); + + /* Step 3: read file parameters with jpeg_read_header() */ + + (void) jpeg_read_header(&cinfo, TRUE); + /* We can ignore the return value from jpeg_read_header since + * (a) suspension is not possible with the stdio data source, and + * (b) we passed TRUE to reject a tables-only JPEG file as an error. + * See libjpeg.txt for more info. + */ + + /* Step 4: set parameters for decompression */ + + /* In this example, we don't need to change any of the defaults set by + * jpeg_read_header(), so we do nothing here. + */ + + /* Step 5: Start decompressor */ + + (void) jpeg_start_decompress(&cinfo); + /* We can ignore the return value since suspension is not possible + * with the stdio data source. + */ + + /* We may need to do some setup of our own at this point before reading + * the data. After jpeg_start_decompress() we have the correct scaled + * output image dimensions available, as well as the output colormap + * if we asked for color quantization. + * In this example, we need to make an output work buffer of the right size. + */ + /* JSAMPLEs per row in output buffer */ + row_stride = cinfo.output_width * cinfo.output_components; + /* Make a one-row-high sample array that will go away when done with image */ + buffer = (*cinfo.mem->alloc_sarray) + ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1); + + /* Step 6: while (scan lines remain to be read) */ + /* jpeg_read_scanlines(...); */ + + /* Here we use the library's state variable cinfo.output_scanline as the + * loop counter, so that we don't have to keep track ourselves. + */ + while (cinfo.output_scanline < cinfo.output_height) { + /* jpeg_read_scanlines expects an array of pointers to scanlines. + * Here the array is only one element long, but you could ask for + * more than one scanline at a time if that's more convenient. + */ + (void) jpeg_read_scanlines(&cinfo, buffer, 1); + /* Assume put_scanline_someplace wants a pointer and sample count. */ + put_scanline_someplace(buffer[0], row_stride); + } + + /* Step 7: Finish decompression */ + + (void) jpeg_finish_decompress(&cinfo); + /* We can ignore the return value since suspension is not possible + * with the stdio data source. + */ + + /* Step 8: Release JPEG decompression object */ + + /* This is an important step since it will release a good deal of memory. */ + jpeg_destroy_decompress(&cinfo); + + /* After finish_decompress, we can close the input file. + * Here we postpone it until after no more JPEG errors are possible, + * so as to simplify the setjmp error logic above. (Actually, I don't + * think that jpeg_destroy can do an error exit, but why assume anything...) + */ + fclose(infile); + + /* At this point you may want to check to see whether any corrupt-data + * warnings occurred (test whether jerr.pub.num_warnings is nonzero). + */ + + /* And we're done! */ + return 1; +} + + +/* + * SOME FINE POINTS: + * + * In the above code, we ignored the return value of jpeg_read_scanlines, + * which is the number of scanlines actually read. We could get away with + * this because we asked for only one line at a time and we weren't using + * a suspending data source. See libjpeg.txt for more info. + * + * We cheated a bit by calling alloc_sarray() after jpeg_start_decompress(); + * we should have done it beforehand to ensure that the space would be + * counted against the JPEG max_memory setting. In some systems the above + * code would risk an out-of-memory error. However, in general we don't + * know the output image dimensions before jpeg_start_decompress(), unless we + * call jpeg_calc_output_dimensions(). See libjpeg.txt for more about this. + * + * Scanlines are returned in the same order as they appear in the JPEG file, + * which is standardly top-to-bottom. If you must emit data bottom-to-top, + * you can use one of the virtual arrays provided by the JPEG memory manager + * to invert the data. See wrbmp.c for an example. + * + * As with compression, some operating modes may require temporary files. + * On some systems you may need to set up a signal handler to ensure that + * temporary files are deleted if the program is interrupted. See libjpeg.txt. + */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jaricom.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jaricom.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jaricom.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jaricom.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jaricom.c * + * This file was part of the Independent JPEG Group's software: * Developed 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains probability estimation tables for common use in * arithmetic entropy encoding and decoding routines. @@ -18,7 +21,7 @@ #include "jpeglib.h" /* The following #define specifies the packing of the four components - * into the compact INT32 representation. + * into the compact JLONG representation. * Note that this formula must match the actual arithmetic encoder * and decoder implementation. The implementation has to be changed * if this formula is changed. @@ -26,9 +29,9 @@ * implementation (jbig_tab.c). */ -#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b) +#define V(i,a,b,c,d) (((JLONG)a << 16) | ((JLONG)c << 8) | ((JLONG)d << 7) | b) -const INT32 jpeg_aritab[113+1] = { +const JLONG jpeg_aritab[113+1] = { /* * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcapimin.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcapimin.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcapimin.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcapimin.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jcapimin.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. * Modified 2003-2010 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface code for the compression half * of the JPEG library. These are the "minimum" API routines that may be @@ -33,12 +36,12 @@ int i; /* Guard against version mismatches between library and caller. */ - cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */ + cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */ if (version != JPEG_LIB_VERSION) ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version); - if (structsize != SIZEOF(struct jpeg_compress_struct)) - ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, - (int) SIZEOF(struct jpeg_compress_struct), (int) structsize); + if (structsize != sizeof(struct jpeg_compress_struct)) + ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, + (int) sizeof(struct jpeg_compress_struct), (int) structsize); /* For debugging purposes, we zero the whole master structure. * But the application has already set the err pointer, and may have set @@ -47,9 +50,9 @@ * complain here. */ { - struct jpeg_error_mgr * err = cinfo->err; - void * client_data = cinfo->client_data; /* ignore Purify complaint here */ - MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct)); + struct jpeg_error_mgr *err = cinfo->err; + void *client_data = cinfo->client_data; /* ignore Purify complaint here */ + MEMZERO(cinfo, sizeof(struct jpeg_compress_struct)); cinfo->err = err; cinfo->client_data = client_data; } @@ -85,7 +88,7 @@ cinfo->script_space = NULL; - cinfo->input_gamma = 1.0; /* in case application forgets */ + cinfo->input_gamma = 1.0; /* in case application forgets */ /* OK, I'm ready */ cinfo->global_state = CSTATE_START; @@ -131,8 +134,8 @@ jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress) { int i; - JQUANT_TBL * qtbl; - JHUFF_TBL * htbl; + JQUANT_TBL *qtbl; + JHUFF_TBL *htbl; for (i = 0; i < NUM_QUANT_TBLS; i++) { if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL) @@ -173,15 +176,15 @@ (*cinfo->master->prepare_for_pass) (cinfo); for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) { if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) iMCU_row; - cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long) iMCU_row; + cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows; + (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); } /* We bypass the main controller and invoke coef controller directly; * all work is being done from the coefficient buffer. */ if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL)) - ERREXIT(cinfo, JERR_CANT_SUSPEND); + ERREXIT(cinfo, JERR_CANT_SUSPEND); } (*cinfo->master->finish_pass) (cinfo); } @@ -202,9 +205,9 @@ GLOBAL(void) jpeg_write_marker (j_compress_ptr cinfo, int marker, - const JOCTET *dataptr, unsigned int datalen) + const JOCTET *dataptr, unsigned int datalen) { - JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val)); + void (*write_marker_byte) (j_compress_ptr info, int val); if (cinfo->next_scanline != 0 || (cinfo->global_state != CSTATE_SCANNING && @@ -213,7 +216,7 @@ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); (*cinfo->marker->write_marker_header) (cinfo, marker, datalen); - write_marker_byte = cinfo->marker->write_marker_byte; /* copy for speed */ + write_marker_byte = cinfo->marker->write_marker_byte; /* copy for speed */ while (datalen--) { (*write_marker_byte) (cinfo, *dataptr); dataptr++; @@ -248,14 +251,14 @@ * To produce a pair of files containing abbreviated tables and abbreviated * image data, one would proceed as follows: * - * initialize JPEG object - * set JPEG parameters - * set destination to table file - * jpeg_write_tables(cinfo); - * set destination to image file - * jpeg_start_compress(cinfo, FALSE); - * write data... - * jpeg_finish_compress(cinfo); + * initialize JPEG object + * set JPEG parameters + * set destination to table file + * jpeg_write_tables(cinfo); + * set destination to image file + * jpeg_start_compress(cinfo, FALSE); + * write data... + * jpeg_finish_compress(cinfo); * * jpeg_write_tables has the side effect of marking all tables written * (same as jpeg_suppress_tables(..., TRUE)). Thus a subsequent start_compress diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcapistd.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcapistd.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcapistd.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcapistd.c 2021-04-25 01:47:22.000000000 +0800 @@ -3,7 +3,8 @@ * * Copyright (C) 1994-1996, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface code for the compression half * of the JPEG library. These are the "standard" API routines that are @@ -41,7 +42,7 @@ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); if (write_all_tables) - jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */ + jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */ /* (Re)initialize error mgr and destination modules */ (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); @@ -75,7 +76,7 @@ GLOBAL(JDIMENSION) jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines, - JDIMENSION num_lines) + JDIMENSION num_lines) { JDIMENSION row_ctr, rows_left; @@ -118,7 +119,7 @@ GLOBAL(JDIMENSION) jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data, - JDIMENSION num_lines) + JDIMENSION num_lines) { JDIMENSION lines_per_iMCU_row; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcarith.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcarith.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcarith.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcarith.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcarith.c * + * This file was part of the Independent JPEG Group's software: * Developed 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains portable arithmetic entropy encoding routines for JPEG * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81). @@ -23,10 +26,10 @@ typedef struct { struct jpeg_entropy_encoder pub; /* public fields */ - INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */ - INT32 a; /* A register, normalized size of coding interval */ - INT32 sc; /* counter for stacked 0xFF values which might overflow */ - INT32 zc; /* counter for pending 0x00 output values which might * + JLONG c; /* C register, base of coding interval, layout as in sec. D.1.3 */ + JLONG a; /* A register, normalized size of coding interval */ + JLONG sc; /* counter for stacked 0xFF values which might overflow */ + JLONG zc; /* counter for pending 0x00 output values which might * * be discarded at the end ("Pacman" termination) */ int ct; /* bit shift counter, determines when next byte will be written */ int buffer; /* buffer for most recent output byte != 0xFF */ @@ -34,18 +37,18 @@ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - int next_restart_num; /* next restart number to write (0-7) */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ /* Pointers to statistics areas (these workspaces have image lifespan) */ - unsigned char * dc_stats[NUM_ARITH_TBLS]; - unsigned char * ac_stats[NUM_ARITH_TBLS]; + unsigned char *dc_stats[NUM_ARITH_TBLS]; + unsigned char *ac_stats[NUM_ARITH_TBLS]; /* Statistics bin for coding with fixed probability 0.5 */ unsigned char fixed_bin[4]; } arith_entropy_encoder; -typedef arith_entropy_encoder * arith_entropy_ptr; +typedef arith_entropy_encoder *arith_entropy_ptr; /* The following two definitions specify the allocation chunk size * for the statistics area. @@ -95,20 +98,20 @@ #define CALCULATE_SPECTRAL_CONDITIONING */ -/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32. - * We assume that int right shift is unsigned if INT32 right shift is, +/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG. + * We assume that int right shift is unsigned if JLONG right shift is, * which should be safe. */ #ifdef RIGHT_SHIFT_IS_UNSIGNED -#define ISHIFT_TEMPS int ishift_temp; +#define ISHIFT_TEMPS int ishift_temp; #define IRIGHT_SHIFT(x,shft) \ - ((ishift_temp = (x)) < 0 ? \ - (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ - (ishift_temp >> (shft))) + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ + (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) #endif @@ -116,7 +119,7 @@ emit_byte (int val, j_compress_ptr cinfo) /* Write next output byte; we do not support suspension in this module. */ { - struct jpeg_destination_mgr * dest = cinfo->dest; + struct jpeg_destination_mgr *dest = cinfo->dest; *dest->next_output_byte++ = (JOCTET) val; if (--dest->free_in_buffer == 0) @@ -133,7 +136,7 @@ finish_pass (j_compress_ptr cinfo) { arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; - INT32 temp; + JLONG temp; /* Section D.1.8: Termination of encoding */ @@ -149,11 +152,11 @@ /* One final overflow has to be handled */ if (e->buffer >= 0) { if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); + do emit_byte(0x00, cinfo); + while (--e->zc); emit_byte(e->buffer + 1, cinfo); if (e->buffer + 1 == 0xFF) - emit_byte(0x00, cinfo); + emit_byte(0x00, cinfo); } e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */ e->sc = 0; @@ -162,17 +165,17 @@ ++e->zc; else if (e->buffer >= 0) { if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); + do emit_byte(0x00, cinfo); + while (--e->zc); emit_byte(e->buffer, cinfo); } if (e->sc) { if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); + do emit_byte(0x00, cinfo); + while (--e->zc); do { - emit_byte(0xFF, cinfo); - emit_byte(0x00, cinfo); + emit_byte(0xFF, cinfo); + emit_byte(0x00, cinfo); } while (--e->sc); } } @@ -187,7 +190,7 @@ if (e->c & 0x7F800L) { emit_byte((e->c >> 11) & 0xFF, cinfo); if (((e->c >> 11) & 0xFF) == 0xFF) - emit_byte(0x00, cinfo); + emit_byte(0x00, cinfo); } } } @@ -216,20 +219,20 @@ */ LOCAL(void) -arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) +arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) { register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; register unsigned char nl, nm; - register INT32 qe, temp; + register JLONG qe, temp; register int sv; /* Fetch values from our compact representation of Table D.2: * Qe values and probability estimation state machine */ sv = *st; - qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ - nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ - nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ + qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ + nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ + nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ /* Encode & estimation procedures per sections D.1.4 & D.1.5 */ e->a -= qe; @@ -243,7 +246,7 @@ e->c += e->a; e->a = qe; } - *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ } else { /* Encode the more probable symbol */ if (e->a >= 0x8000L) @@ -255,7 +258,7 @@ e->c += e->a; e->a = qe; } - *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ } /* Renormalization & data output per section D.1.6 */ @@ -266,43 +269,43 @@ /* Another byte is ready for output */ temp = e->c >> 19; if (temp > 0xFF) { - /* Handle overflow over all stacked 0xFF bytes */ - if (e->buffer >= 0) { - if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); - emit_byte(e->buffer + 1, cinfo); - if (e->buffer + 1 == 0xFF) - emit_byte(0x00, cinfo); - } - e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */ - e->sc = 0; - /* Note: The 3 spacer bits in the C register guarantee - * that the new buffer byte can't be 0xFF here - * (see page 160 in the P&M JPEG book). */ - e->buffer = temp & 0xFF; /* new output byte, might overflow later */ + /* Handle overflow over all stacked 0xFF bytes */ + if (e->buffer >= 0) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte(e->buffer + 1, cinfo); + if (e->buffer + 1 == 0xFF) + emit_byte(0x00, cinfo); + } + e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */ + e->sc = 0; + /* Note: The 3 spacer bits in the C register guarantee + * that the new buffer byte can't be 0xFF here + * (see page 160 in the P&M JPEG book). */ + e->buffer = temp & 0xFF; /* new output byte, might overflow later */ } else if (temp == 0xFF) { - ++e->sc; /* stack 0xFF byte (which might overflow later) */ + ++e->sc; /* stack 0xFF byte (which might overflow later) */ } else { - /* Output all stacked 0xFF bytes, they will not overflow any more */ - if (e->buffer == 0) - ++e->zc; - else if (e->buffer >= 0) { - if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); - emit_byte(e->buffer, cinfo); - } - if (e->sc) { - if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); - do { - emit_byte(0xFF, cinfo); - emit_byte(0x00, cinfo); - } while (--e->sc); - } - e->buffer = temp & 0xFF; /* new output byte (can still overflow) */ + /* Output all stacked 0xFF bytes, they will not overflow any more */ + if (e->buffer == 0) + ++e->zc; + else if (e->buffer >= 0) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte(e->buffer, cinfo); + } + if (e->sc) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + do { + emit_byte(0xFF, cinfo); + emit_byte(0x00, cinfo); + } while (--e->sc); + } + e->buffer = temp & 0xFF; /* new output byte (can still overflow) */ } e->c &= 0x7FFFFL; e->ct += 8; @@ -320,7 +323,7 @@ { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; finish_pass(cinfo); @@ -398,45 +401,45 @@ /* Figure F.4: Encode_DC_DIFF */ if ((v = m - entropy->last_dc_val[ci]) == 0) { arith_encode(cinfo, st, 0); - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ } else { entropy->last_dc_val[ci] = m; arith_encode(cinfo, st, 1); /* Figure F.6: Encoding nonzero value v */ /* Figure F.7: Encoding the sign of v */ if (v > 0) { - arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ - st += 2; /* Table F.4: SP = S0 + 2 */ - entropy->dc_context[ci] = 4; /* small positive diff category */ + arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ + st += 2; /* Table F.4: SP = S0 + 2 */ + entropy->dc_context[ci] = 4; /* small positive diff category */ } else { - v = -v; - arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ - st += 3; /* Table F.4: SN = S0 + 3 */ - entropy->dc_context[ci] = 8; /* small negative diff category */ + v = -v; + arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ + st += 3; /* Table F.4: SN = S0 + 3 */ + entropy->dc_context[ci] = 8; /* small negative diff category */ } /* Figure F.8: Encoding the magnitude category of v */ m = 0; if (v -= 1) { - arith_encode(cinfo, st, 1); - m = 1; - v2 = v; - st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ - while (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st += 1; - } + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } } arith_encode(cinfo, st, 0); /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) - entropy->dc_context[ci] += 8; /* large diff category */ + entropy->dc_context[ci] += 8; /* large diff category */ /* Figure F.9: Encoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - arith_encode(cinfo, st, (m & v) ? 1 : 0); + arith_encode(cinfo, st, (m & v) ? 1 : 0); } } @@ -491,21 +494,21 @@ /* Figure F.5: Encode_AC_Coefficients */ for (k = cinfo->Ss; k <= ke; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); - arith_encode(cinfo, st, 0); /* EOB decision */ + arith_encode(cinfo, st, 0); /* EOB decision */ for (;;) { if ((v = (*block)[jpeg_natural_order[k]]) >= 0) { - if (v >>= cinfo->Al) { - arith_encode(cinfo, st + 1, 1); - arith_encode(cinfo, entropy->fixed_bin, 0); - break; - } + if (v >>= cinfo->Al) { + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 0); + break; + } } else { - v = -v; - if (v >>= cinfo->Al) { - arith_encode(cinfo, st + 1, 1); - arith_encode(cinfo, entropy->fixed_bin, 1); - break; - } + v = -v; + if (v >>= cinfo->Al) { + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 1); + break; + } } arith_encode(cinfo, st + 1, 0); st += 3; k++; } @@ -517,15 +520,15 @@ m = 1; v2 = v; if (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st = entropy->ac_stats[tbl] + - (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); - while (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st += 1; - } + arith_encode(cinfo, st, 1); + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } } } arith_encode(cinfo, st, 0); @@ -566,7 +569,7 @@ entropy->restarts_to_go--; } - st = entropy->fixed_bin; /* use fixed probability estimation */ + st = entropy->fixed_bin; /* use fixed probability estimation */ Al = cinfo->Al; /* Encode the MCU data blocks */ @@ -635,29 +638,29 @@ for (k = cinfo->Ss; k <= ke; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); if (k > kex) - arith_encode(cinfo, st, 0); /* EOB decision */ + arith_encode(cinfo, st, 0); /* EOB decision */ for (;;) { if ((v = (*block)[jpeg_natural_order[k]]) >= 0) { - if (v >>= cinfo->Al) { - if (v >> 1) /* previously nonzero coef */ - arith_encode(cinfo, st + 2, (v & 1)); - else { /* newly nonzero coef */ - arith_encode(cinfo, st + 1, 1); - arith_encode(cinfo, entropy->fixed_bin, 0); - } - break; - } + if (v >>= cinfo->Al) { + if (v >> 1) /* previously nonzero coef */ + arith_encode(cinfo, st + 2, (v & 1)); + else { /* newly nonzero coef */ + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 0); + } + break; + } } else { - v = -v; - if (v >>= cinfo->Al) { - if (v >> 1) /* previously nonzero coef */ - arith_encode(cinfo, st + 2, (v & 1)); - else { /* newly nonzero coef */ - arith_encode(cinfo, st + 1, 1); - arith_encode(cinfo, entropy->fixed_bin, 1); - } - break; - } + v = -v; + if (v >>= cinfo->Al) { + if (v >> 1) /* previously nonzero coef */ + arith_encode(cinfo, st + 2, (v & 1)); + else { /* newly nonzero coef */ + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 1); + } + break; + } } arith_encode(cinfo, st + 1, 0); st += 3; k++; } @@ -680,7 +683,7 @@ encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data) { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JBLOCKROW block; unsigned char *st; int blkn, ci, tbl, k, ke; @@ -713,45 +716,45 @@ /* Figure F.4: Encode_DC_DIFF */ if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) { arith_encode(cinfo, st, 0); - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ } else { entropy->last_dc_val[ci] = (*block)[0]; arith_encode(cinfo, st, 1); /* Figure F.6: Encoding nonzero value v */ /* Figure F.7: Encoding the sign of v */ if (v > 0) { - arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ - st += 2; /* Table F.4: SP = S0 + 2 */ - entropy->dc_context[ci] = 4; /* small positive diff category */ + arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ + st += 2; /* Table F.4: SP = S0 + 2 */ + entropy->dc_context[ci] = 4; /* small positive diff category */ } else { - v = -v; - arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ - st += 3; /* Table F.4: SN = S0 + 3 */ - entropy->dc_context[ci] = 8; /* small negative diff category */ + v = -v; + arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ + st += 3; /* Table F.4: SN = S0 + 3 */ + entropy->dc_context[ci] = 8; /* small negative diff category */ } /* Figure F.8: Encoding the magnitude category of v */ m = 0; if (v -= 1) { - arith_encode(cinfo, st, 1); - m = 1; - v2 = v; - st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ - while (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st += 1; - } + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } } arith_encode(cinfo, st, 0); /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) - entropy->dc_context[ci] += 8; /* large diff category */ + entropy->dc_context[ci] += 8; /* large diff category */ /* Figure F.9: Encoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - arith_encode(cinfo, st, (m & v) ? 1 : 0); + arith_encode(cinfo, st, (m & v) ? 1 : 0); } /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */ @@ -765,43 +768,43 @@ /* Figure F.5: Encode_AC_Coefficients */ for (k = 1; k <= ke; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); - arith_encode(cinfo, st, 0); /* EOB decision */ + arith_encode(cinfo, st, 0); /* EOB decision */ while ((v = (*block)[jpeg_natural_order[k]]) == 0) { - arith_encode(cinfo, st + 1, 0); st += 3; k++; + arith_encode(cinfo, st + 1, 0); st += 3; k++; } arith_encode(cinfo, st + 1, 1); /* Figure F.6: Encoding nonzero value v */ /* Figure F.7: Encoding the sign of v */ if (v > 0) { - arith_encode(cinfo, entropy->fixed_bin, 0); + arith_encode(cinfo, entropy->fixed_bin, 0); } else { - v = -v; - arith_encode(cinfo, entropy->fixed_bin, 1); + v = -v; + arith_encode(cinfo, entropy->fixed_bin, 1); } st += 2; /* Figure F.8: Encoding the magnitude category of v */ m = 0; if (v -= 1) { - arith_encode(cinfo, st, 1); - m = 1; - v2 = v; - if (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st = entropy->ac_stats[tbl] + - (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); - while (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st += 1; - } - } + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + if (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } + } } arith_encode(cinfo, st, 0); /* Figure F.9: Encoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - arith_encode(cinfo, st, (m & v) ? 1 : 0); + arith_encode(cinfo, st, (m & v) ? 1 : 0); } /* Encode EOB decision only if k <= DCTSIZE2 - 1 */ if (k <= DCTSIZE2 - 1) { @@ -823,7 +826,7 @@ { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; int ci, tbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; if (gather_statistics) /* Make sure to avoid that in the master control logic! @@ -838,14 +841,14 @@ if (cinfo->progressive_mode) { if (cinfo->Ah == 0) { if (cinfo->Ss == 0) - entropy->pub.encode_mcu = encode_mcu_DC_first; + entropy->pub.encode_mcu = encode_mcu_DC_first; else - entropy->pub.encode_mcu = encode_mcu_AC_first; + entropy->pub.encode_mcu = encode_mcu_AC_first; } else { if (cinfo->Ss == 0) - entropy->pub.encode_mcu = encode_mcu_DC_refine; + entropy->pub.encode_mcu = encode_mcu_DC_refine; else - entropy->pub.encode_mcu = encode_mcu_AC_refine; + entropy->pub.encode_mcu = encode_mcu_AC_refine; } } else entropy->pub.encode_mcu = encode_mcu; @@ -857,10 +860,10 @@ if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) { tbl = compptr->dc_tbl_no; if (tbl < 0 || tbl >= NUM_ARITH_TBLS) - ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->dc_stats[tbl] == NULL) - entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); + entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS); /* Initialize DC predictions to 0 */ entropy->last_dc_val[ci] = 0; @@ -870,15 +873,15 @@ if (cinfo->progressive_mode == 0 || cinfo->Se) { tbl = compptr->ac_tbl_no; if (tbl < 0 || tbl >= NUM_ARITH_TBLS) - ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->ac_stats[tbl] == NULL) - entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); + entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS); #ifdef CALCULATE_SPECTRAL_CONDITIONING if (cinfo->progressive_mode) - /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */ - cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4); + /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */ + cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4); #endif } } @@ -909,7 +912,7 @@ entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(arith_entropy_encoder)); + sizeof(arith_entropy_encoder)); cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; entropy->pub.start_pass = start_pass; entropy->pub.finish_pass = finish_pass; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jccoefct.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jccoefct.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jccoefct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jccoefct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jccoefct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the coefficient buffer controller for compression. * This controller is the top level of the JPEG compressor proper. @@ -34,19 +37,16 @@ typedef struct { struct jpeg_c_coef_controller pub; /* public fields */ - JDIMENSION iMCU_row_num; /* iMCU row # within image */ - JDIMENSION mcu_ctr; /* counts MCUs processed in current row */ - int MCU_vert_offset; /* counts MCU rows within iMCU row */ - int MCU_rows_per_iMCU_row; /* number of such rows needed */ + JDIMENSION iMCU_row_num; /* iMCU row # within image */ + JDIMENSION mcu_ctr; /* counts MCUs processed in current row */ + int MCU_vert_offset; /* counts MCU rows within iMCU row */ + int MCU_rows_per_iMCU_row; /* number of such rows needed */ /* For single-pass compression, it's sufficient to buffer just one MCU * (although this may prove a bit slow in practice). We allocate a * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each - * MCU constructed and sent. (On 80x86, the workspace is FAR even though - * it's not really very big; this is to keep the module interfaces unchanged - * when a large coefficient buffer is necessary.) - * In multi-pass modes, this array points to the current MCU's blocks - * within the virtual arrays. + * MCU constructed and sent. In multi-pass modes, this array points to the + * current MCU's blocks within the virtual arrays. */ JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU]; @@ -54,17 +54,17 @@ jvirt_barray_ptr whole_image[MAX_COMPONENTS]; } my_coef_controller; -typedef my_coef_controller * my_coef_ptr; +typedef my_coef_controller *my_coef_ptr; /* Forward declarations */ METHODDEF(boolean) compress_data - JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf)); + (j_compress_ptr cinfo, JSAMPIMAGE input_buf); #ifdef FULL_COEF_BUFFER_SUPPORTED METHODDEF(boolean) compress_first_pass - JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf)); + (j_compress_ptr cinfo, JSAMPIMAGE input_buf); METHODDEF(boolean) compress_output - JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf)); + (j_compress_ptr cinfo, JSAMPIMAGE input_buf); #endif @@ -143,7 +143,7 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; int blkn, bi, ci, yindex, yoffset, blockcnt; @@ -154,7 +154,7 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col; - MCU_col_num++) { + MCU_col_num++) { /* Determine where data comes from in input_buf and do the DCT thing. * Each call on forward_DCT processes a horizontal row of DCT blocks * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks @@ -166,46 +166,46 @@ */ blkn = 0; for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; - xpos = MCU_col_num * compptr->MCU_sample_width; - ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */ - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - if (coef->iMCU_row_num < last_iMCU_row || - yoffset+yindex < compptr->last_row_height) { - (*cinfo->fdct->forward_DCT) (cinfo, compptr, - input_buf[compptr->component_index], - coef->MCU_buffer[blkn], - ypos, xpos, (JDIMENSION) blockcnt); - if (blockcnt < compptr->MCU_width) { - /* Create some dummy blocks at the right edge of the image. */ - jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt], - (compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK)); - for (bi = blockcnt; bi < compptr->MCU_width; bi++) { - coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0]; - } - } - } else { - /* Create a row of dummy blocks at the bottom of the image. */ - jzero_far((void FAR *) coef->MCU_buffer[blkn], - compptr->MCU_width * SIZEOF(JBLOCK)); - for (bi = 0; bi < compptr->MCU_width; bi++) { - coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0]; - } - } - blkn += compptr->MCU_width; - ypos += DCTSIZE; - } + compptr = cinfo->cur_comp_info[ci]; + blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width + : compptr->last_col_width; + xpos = MCU_col_num * compptr->MCU_sample_width; + ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */ + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + if (coef->iMCU_row_num < last_iMCU_row || + yoffset+yindex < compptr->last_row_height) { + (*cinfo->fdct->forward_DCT) (cinfo, compptr, + input_buf[compptr->component_index], + coef->MCU_buffer[blkn], + ypos, xpos, (JDIMENSION) blockcnt); + if (blockcnt < compptr->MCU_width) { + /* Create some dummy blocks at the right edge of the image. */ + jzero_far((void *) coef->MCU_buffer[blkn + blockcnt], + (compptr->MCU_width - blockcnt) * sizeof(JBLOCK)); + for (bi = blockcnt; bi < compptr->MCU_width; bi++) { + coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0]; + } + } + } else { + /* Create a row of dummy blocks at the bottom of the image. */ + jzero_far((void *) coef->MCU_buffer[blkn], + compptr->MCU_width * sizeof(JBLOCK)); + for (bi = 0; bi < compptr->MCU_width; bi++) { + coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0]; + } + } + blkn += compptr->MCU_width; + ypos += DCTSIZE; + } } /* Try to write the MCU. In event of a suspension failure, we will * re-DCT the MCU on restart (a bit inefficient, could be fixed...) */ if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->mcu_ctr = MCU_col_num; - return FALSE; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->mcu_ctr = MCU_col_num; + return FALSE; } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -280,17 +280,17 @@ for (block_row = 0; block_row < block_rows; block_row++) { thisblockrow = buffer[block_row]; (*cinfo->fdct->forward_DCT) (cinfo, compptr, - input_buf[ci], thisblockrow, - (JDIMENSION) (block_row * DCTSIZE), - (JDIMENSION) 0, blocks_across); + input_buf[ci], thisblockrow, + (JDIMENSION) (block_row * DCTSIZE), + (JDIMENSION) 0, blocks_across); if (ndummy > 0) { - /* Create dummy blocks at the right edge of the image. */ - thisblockrow += blocks_across; /* => first dummy block */ - jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK)); - lastDC = thisblockrow[-1][0]; - for (bi = 0; bi < ndummy; bi++) { - thisblockrow[bi][0] = lastDC; - } + /* Create dummy blocks at the right edge of the image. */ + thisblockrow += blocks_across; /* => first dummy block */ + jzero_far((void *) thisblockrow, ndummy * sizeof(JBLOCK)); + lastDC = thisblockrow[-1][0]; + for (bi = 0; bi < ndummy; bi++) { + thisblockrow[bi][0] = lastDC; + } } } /* If at end of image, create dummy block rows as needed. @@ -299,22 +299,22 @@ * This squeezes a few more bytes out of the resulting file... */ if (coef->iMCU_row_num == last_iMCU_row) { - blocks_across += ndummy; /* include lower right corner */ + blocks_across += ndummy; /* include lower right corner */ MCUs_across = blocks_across / h_samp_factor; for (block_row = block_rows; block_row < compptr->v_samp_factor; - block_row++) { - thisblockrow = buffer[block_row]; - lastblockrow = buffer[block_row-1]; - jzero_far((void FAR *) thisblockrow, - (size_t) (blocks_across * SIZEOF(JBLOCK))); - for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) { - lastDC = lastblockrow[h_samp_factor-1][0]; - for (bi = 0; bi < h_samp_factor; bi++) { - thisblockrow[bi][0] = lastDC; - } - thisblockrow += h_samp_factor; /* advance to next MCU in row */ - lastblockrow += h_samp_factor; - } + block_row++) { + thisblockrow = buffer[block_row]; + lastblockrow = buffer[block_row-1]; + jzero_far((void *) thisblockrow, + (size_t) (blocks_across * sizeof(JBLOCK))); + for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) { + lastDC = lastblockrow[h_samp_factor-1][0]; + for (bi = 0; bi < h_samp_factor; bi++) { + thisblockrow[bi][0] = lastDC; + } + thisblockrow += h_samp_factor; /* advance to next MCU in row */ + lastblockrow += h_samp_factor; + } } } } @@ -341,7 +341,7 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ int blkn, ci, xindex, yindex, yoffset; JDIMENSION start_col; JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN]; @@ -364,25 +364,25 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row; - MCU_col_num++) { + MCU_col_num++) { /* Construct list of pointers to DCT blocks belonging to this MCU */ - blkn = 0; /* index of current DCT block within MCU */ + blkn = 0; /* index of current DCT block within MCU */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - start_col = MCU_col_num * compptr->MCU_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; - for (xindex = 0; xindex < compptr->MCU_width; xindex++) { - coef->MCU_buffer[blkn++] = buffer_ptr++; - } - } + compptr = cinfo->cur_comp_info[ci]; + start_col = MCU_col_num * compptr->MCU_width; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + for (xindex = 0; xindex < compptr->MCU_width; xindex++) { + coef->MCU_buffer[blkn++] = buffer_ptr++; + } + } } /* Try to write the MCU. */ if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->mcu_ctr = MCU_col_num; - return FALSE; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->mcu_ctr = MCU_col_num; + return FALSE; } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -408,7 +408,7 @@ coef = (my_coef_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_coef_controller)); + sizeof(my_coef_controller)); cinfo->coef = (struct jpeg_c_coef_controller *) coef; coef->pub.start_pass = start_pass_coef; @@ -421,14 +421,14 @@ jpeg_component_info *compptr; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { coef->whole_image[ci] = (*cinfo->mem->request_virt_barray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - (JDIMENSION) jround_up((long) compptr->width_in_blocks, - (long) compptr->h_samp_factor), - (JDIMENSION) jround_up((long) compptr->height_in_blocks, - (long) compptr->v_samp_factor), - (JDIMENSION) compptr->v_samp_factor); + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) jround_up((long) compptr->width_in_blocks, + (long) compptr->h_samp_factor), + (JDIMENSION) jround_up((long) compptr->height_in_blocks, + (long) compptr->v_samp_factor), + (JDIMENSION) compptr->v_samp_factor); } #else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); @@ -440,7 +440,7 @@ buffer = (JBLOCKROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)); + C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) { coef->MCU_buffer[i] = buffer + i; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jccolext.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jccolext.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jccolext.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jccolext.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,148 @@ +/* + * jccolext.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2012, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains input colorspace conversion routines. + */ + + +/* This file is included by jccolor.c */ + + +/* + * Convert some rows of samples to the JPEG colorspace. + * + * Note that we change from the application's interleaved-pixel format + * to our internal noninterleaved, one-plane-per-component format. + * The input buffer is therefore three times as wide as the output buffer. + * + * A starting row offset is provided only for the output buffer. The caller + * can easily adjust the passed input_buf value to accommodate any row + * offset required on that side. + */ + +INLINE +LOCAL(void) +rgb_ycc_convert_internal (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int r, g, b; + register JLONG * ctab = cconvert->rgb_ycc_tab; + register JSAMPROW inptr; + register JSAMPROW outptr0, outptr1, outptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->image_width; + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + r = GETJSAMPLE(inptr[RGB_RED]); + g = GETJSAMPLE(inptr[RGB_GREEN]); + b = GETJSAMPLE(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations + * must be too; we do not need an explicit range-limiting operation. + * Hence the value being shifted is never negative, and we don't + * need the general RIGHT_SHIFT macro. + */ + /* Y */ + outptr0[col] = (JSAMPLE) + ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) + >> SCALEBITS); + /* Cb */ + outptr1[col] = (JSAMPLE) + ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) + >> SCALEBITS); + /* Cr */ + outptr2[col] = (JSAMPLE) + ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) + >> SCALEBITS); + } + } +} + + +/**************** Cases other than RGB -> YCbCr **************/ + + +/* + * Convert some rows of samples to the JPEG colorspace. + * This version handles RGB->grayscale conversion, which is the same + * as the RGB->Y portion of RGB->YCbCr. + * We assume rgb_ycc_start has been called (we only use the Y tables). + */ + +INLINE +LOCAL(void) +rgb_gray_convert_internal (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int r, g, b; + register JLONG * ctab = cconvert->rgb_ycc_tab; + register JSAMPROW inptr; + register JSAMPROW outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->image_width; + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr = output_buf[0][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + r = GETJSAMPLE(inptr[RGB_RED]); + g = GETJSAMPLE(inptr[RGB_GREEN]); + b = GETJSAMPLE(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + /* Y */ + outptr[col] = (JSAMPLE) + ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) + >> SCALEBITS); + } + } +} + + +/* + * Convert some rows of samples to the JPEG colorspace. + * This version handles extended RGB->plain RGB conversion + */ + +INLINE +LOCAL(void) +rgb_rgb_convert_internal (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + register JSAMPROW inptr; + register JSAMPROW outptr0, outptr1, outptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->image_width; + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + outptr0[col] = GETJSAMPLE(inptr[RGB_RED]); + outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]); + outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jccolext.c.inc glmark2-2021.02/android/jni/src/libjpeg-turbo/jccolext.c.inc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jccolext.c.inc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jccolext.c.inc 1970-01-01 08:00:00.000000000 +0800 @@ -1,114 +0,0 @@ -/* - * jccolext.c - * - * Copyright (C) 1991-1996, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains input colorspace conversion routines. - */ - - -/* This file is included by jccolor.c */ - - -/* - * Convert some rows of samples to the JPEG colorspace. - * - * Note that we change from the application's interleaved-pixel format - * to our internal noninterleaved, one-plane-per-component format. - * The input buffer is therefore three times as wide as the output buffer. - * - * A starting row offset is provided only for the output buffer. The caller - * can easily adjust the passed input_buf value to accommodate any row - * offset required on that side. - */ - -INLINE -LOCAL(void) -rgb_ycc_convert_internal (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; - register int r, g, b; - register INT32 * ctab = cconvert->rgb_ycc_tab; - register JSAMPROW inptr; - register JSAMPROW outptr0, outptr1, outptr2; - register JDIMENSION col; - JDIMENSION num_cols = cinfo->image_width; - - while (--num_rows >= 0) { - inptr = *input_buf++; - outptr0 = output_buf[0][output_row]; - outptr1 = output_buf[1][output_row]; - outptr2 = output_buf[2][output_row]; - output_row++; - for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr[RGB_RED]); - g = GETJSAMPLE(inptr[RGB_GREEN]); - b = GETJSAMPLE(inptr[RGB_BLUE]); - inptr += RGB_PIXELSIZE; - /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations - * must be too; we do not need an explicit range-limiting operation. - * Hence the value being shifted is never negative, and we don't - * need the general RIGHT_SHIFT macro. - */ - /* Y */ - outptr0[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); - /* Cb */ - outptr1[col] = (JSAMPLE) - ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) - >> SCALEBITS); - /* Cr */ - outptr2[col] = (JSAMPLE) - ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) - >> SCALEBITS); - } - } -} - - -/**************** Cases other than RGB -> YCbCr **************/ - - -/* - * Convert some rows of samples to the JPEG colorspace. - * This version handles RGB->grayscale conversion, which is the same - * as the RGB->Y portion of RGB->YCbCr. - * We assume rgb_ycc_start has been called (we only use the Y tables). - */ - -INLINE -LOCAL(void) -rgb_gray_convert_internal (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; - register int r, g, b; - register INT32 * ctab = cconvert->rgb_ycc_tab; - register JSAMPROW inptr; - register JSAMPROW outptr; - register JDIMENSION col; - JDIMENSION num_cols = cinfo->image_width; - - while (--num_rows >= 0) { - inptr = *input_buf++; - outptr = output_buf[0][output_row]; - output_row++; - for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr[RGB_RED]); - g = GETJSAMPLE(inptr[RGB_GREEN]); - b = GETJSAMPLE(inptr[RGB_BLUE]); - inptr += RGB_PIXELSIZE; - /* Y */ - outptr[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); - } - } -} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jccolor.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jccolor.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jccolor.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jccolor.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,14 @@ /* * jccolor.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2009-2012, 2015 D. R. Commander. + * Copyright (C) 2014, MIPS Technologies, Inc., California + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains input colorspace conversion routines. */ @@ -14,7 +17,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jsimd.h" -#include "config.h" +#include "jconfigint.h" /* Private subobject */ @@ -23,10 +26,10 @@ struct jpeg_color_converter pub; /* public fields */ /* Private state for RGB->YCC conversion */ - INT32 * rgb_ycc_tab; /* => table for RGB to YCbCr conversion */ + JLONG *rgb_ycc_tab; /* => table for RGB to YCbCr conversion */ } my_color_converter; -typedef my_color_converter * my_cconvert_ptr; +typedef my_color_converter *my_cconvert_ptr; /**************** RGB -> YCbCr conversion: most common case **************/ @@ -35,9 +38,9 @@ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5. * The conversion equations to be implemented are therefore - * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B - * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE - * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.) * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2, * rather than CENTERJSAMPLE, for Cb and Cr. This gave equal positive and @@ -59,10 +62,10 @@ * in the tables to save adding them separately in the inner loop. */ -#define SCALEBITS 16 /* speediest right-shift on some machines */ -#define CBCR_OFFSET ((INT32) CENTERJSAMPLE << SCALEBITS) -#define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) -#define FIX(x) ((INT32) ((x) * (1L< Y section */ -#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ -#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ -#define R_CB_OFF (3*(MAXJSAMPLE+1)) -#define G_CB_OFF (4*(MAXJSAMPLE+1)) -#define B_CB_OFF (5*(MAXJSAMPLE+1)) -#define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */ -#define G_CR_OFF (6*(MAXJSAMPLE+1)) -#define B_CR_OFF (7*(MAXJSAMPLE+1)) -#define TABLE_SIZE (8*(MAXJSAMPLE+1)) +#define R_Y_OFF 0 /* offset to R => Y section */ +#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ +#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ +#define R_CB_OFF (3*(MAXJSAMPLE+1)) +#define G_CB_OFF (4*(MAXJSAMPLE+1)) +#define B_CB_OFF (5*(MAXJSAMPLE+1)) +#define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */ +#define G_CR_OFF (6*(MAXJSAMPLE+1)) +#define B_CR_OFF (7*(MAXJSAMPLE+1)) +#define TABLE_SIZE (8*(MAXJSAMPLE+1)) /* Include inline routines for colorspace extensions */ -#include "jccolext.c.inc" +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -96,13 +99,15 @@ #define RGB_PIXELSIZE EXT_RGB_PIXELSIZE #define rgb_ycc_convert_internal extrgb_ycc_convert_internal #define rgb_gray_convert_internal extrgb_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extrgb_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_RGBX_RED #define RGB_GREEN EXT_RGBX_GREEN @@ -110,13 +115,15 @@ #define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE #define rgb_ycc_convert_internal extrgbx_ycc_convert_internal #define rgb_gray_convert_internal extrgbx_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extrgbx_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGR_RED #define RGB_GREEN EXT_BGR_GREEN @@ -124,13 +131,15 @@ #define RGB_PIXELSIZE EXT_BGR_PIXELSIZE #define rgb_ycc_convert_internal extbgr_ycc_convert_internal #define rgb_gray_convert_internal extbgr_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extbgr_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGRX_RED #define RGB_GREEN EXT_BGRX_GREEN @@ -138,13 +147,15 @@ #define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE #define rgb_ycc_convert_internal extbgrx_ycc_convert_internal #define rgb_gray_convert_internal extbgrx_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extbgrx_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XBGR_RED #define RGB_GREEN EXT_XBGR_GREEN @@ -152,13 +163,15 @@ #define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE #define rgb_ycc_convert_internal extxbgr_ycc_convert_internal #define rgb_gray_convert_internal extxbgr_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extxbgr_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XRGB_RED #define RGB_GREEN EXT_XRGB_GREEN @@ -166,13 +179,15 @@ #define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE #define rgb_ycc_convert_internal extxrgb_ycc_convert_internal #define rgb_gray_convert_internal extxrgb_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extxrgb_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal /* @@ -183,13 +198,13 @@ rgb_ycc_start (j_compress_ptr cinfo) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; - INT32 * rgb_ycc_tab; - INT32 i; + JLONG *rgb_ycc_tab; + JLONG i; /* Allocate and fill in the conversion tables. */ - cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *) + cconvert->rgb_ycc_tab = rgb_ycc_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (TABLE_SIZE * SIZEOF(INT32))); + (TABLE_SIZE * sizeof(JLONG))); for (i = 0; i <= MAXJSAMPLE; i++) { rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i; @@ -217,8 +232,8 @@ METHODDEF(void) rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { switch (cinfo->in_color_space) { case JCS_EXT_RGB: @@ -266,8 +281,8 @@ METHODDEF(void) rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { switch (cinfo->in_color_space) { case JCS_EXT_RGB: @@ -307,6 +322,52 @@ /* + * Extended RGB to plain RGB conversion + */ + +METHODDEF(void) +rgb_rgb_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGR: + extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + default: + rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + } +} + + +/* * Convert some rows of samples to the JPEG colorspace. * This version handles Adobe-style CMYK->YCCK conversion, * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same @@ -316,12 +377,12 @@ METHODDEF(void) cmyk_ycck_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int r, g, b; - register INT32 * ctab = cconvert->rgb_ycc_tab; + register JLONG *ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; @@ -339,7 +400,7 @@ g = MAXJSAMPLE - GETJSAMPLE(inptr[1]); b = MAXJSAMPLE - GETJSAMPLE(inptr[2]); /* K passes through as-is */ - outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ + outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. @@ -348,16 +409,16 @@ */ /* Y */ outptr0[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); + ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) + >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) - ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) - >> SCALEBITS); + ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) + >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) - ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) - >> SCALEBITS); + ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) + >> SCALEBITS); } } } @@ -371,8 +432,8 @@ METHODDEF(void) grayscale_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; register JSAMPROW outptr; @@ -385,7 +446,7 @@ outptr = output_buf[0][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */ + outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */ inptr += instride; } } @@ -400,28 +461,58 @@ METHODDEF(void) null_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; - register JSAMPROW outptr; + register JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; register int ci; int nc = cinfo->num_components; JDIMENSION num_cols = cinfo->image_width; - while (--num_rows >= 0) { - /* It seems fastest to make a separate pass for each component. */ - for (ci = 0; ci < nc; ci++) { - inptr = *input_buf; - outptr = output_buf[ci][output_row]; + if (nc == 3) { + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ - inptr += nc; + outptr0[col] = *inptr++; + outptr1[col] = *inptr++; + outptr2[col] = *inptr++; } } - input_buf++; - output_row++; + } else if (nc == 4) { + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + outptr3 = output_buf[3][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + outptr0[col] = *inptr++; + outptr1[col] = *inptr++; + outptr2[col] = *inptr++; + outptr3[col] = *inptr++; + } + } + } else { + while (--num_rows >= 0) { + /* It seems fastest to make a separate pass for each component. */ + for (ci = 0; ci < nc; ci++) { + inptr = *input_buf; + outptr = output_buf[ci][output_row]; + for (col = 0; col < num_cols; col++) { + outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ + inptr += nc; + } + } + input_buf++; + output_row++; + } } } @@ -448,7 +539,7 @@ cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_color_converter)); + sizeof(my_color_converter)); cinfo->cconvert = (struct jpeg_color_converter *) cconvert; /* set start_pass to null method until we find out differently */ cconvert->pub.start_pass = null_method; @@ -486,7 +577,7 @@ ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; - default: /* JCS_UNKNOWN can be anything */ + default: /* JCS_UNKNOWN can be anything */ if (cinfo->input_components < 1) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; @@ -523,21 +614,30 @@ break; case JCS_RGB: - case JCS_EXT_RGB: - case JCS_EXT_RGBX: - case JCS_EXT_BGR: - case JCS_EXT_BGRX: - case JCS_EXT_XBGR: - case JCS_EXT_XRGB: - case JCS_EXT_RGBA: - case JCS_EXT_BGRA: - case JCS_EXT_ABGR: - case JCS_EXT_ARGB: if (cinfo->num_components != 3) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); - if (cinfo->in_color_space == cinfo->jpeg_color_space && - rgb_pixelsize[cinfo->in_color_space] == 3) - cconvert->pub.color_convert = null_convert; + if (rgb_red[cinfo->in_color_space] == 0 && + rgb_green[cinfo->in_color_space] == 1 && + rgb_blue[cinfo->in_color_space] == 2 && + rgb_pixelsize[cinfo->in_color_space] == 3) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else if (cinfo->in_color_space == JCS_RGB || + cinfo->in_color_space == JCS_EXT_RGB || + cinfo->in_color_space == JCS_EXT_RGBX || + cinfo->in_color_space == JCS_EXT_BGR || + cinfo->in_color_space == JCS_EXT_BGRX || + cinfo->in_color_space == JCS_EXT_XBGR || + cinfo->in_color_space == JCS_EXT_XRGB || + cinfo->in_color_space == JCS_EXT_RGBA || + cinfo->in_color_space == JCS_EXT_BGRA || + cinfo->in_color_space == JCS_EXT_ABGR || + cinfo->in_color_space == JCS_EXT_ARGB) + cconvert->pub.color_convert = rgb_rgb_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; @@ -562,18 +662,28 @@ cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = rgb_ycc_convert; } - } else if (cinfo->in_color_space == JCS_YCbCr) - cconvert->pub.color_convert = null_convert; - else + } else if (cinfo->in_color_space == JCS_YCbCr) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; case JCS_CMYK: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); - if (cinfo->in_color_space == JCS_CMYK) - cconvert->pub.color_convert = null_convert; - else + if (cinfo->in_color_space == JCS_CMYK) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; @@ -583,17 +693,27 @@ if (cinfo->in_color_space == JCS_CMYK) { cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = cmyk_ycck_convert; - } else if (cinfo->in_color_space == JCS_YCCK) - cconvert->pub.color_convert = null_convert; - else + } else if (cinfo->in_color_space == JCS_YCCK) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; - default: /* allow null conversion of JCS_UNKNOWN */ + default: /* allow null conversion of JCS_UNKNOWN */ if (cinfo->jpeg_color_space != cinfo->in_color_space || - cinfo->num_components != cinfo->input_components) + cinfo->num_components != cinfo->input_components) ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); - cconvert->pub.color_convert = null_convert; +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; break; } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcdctmgr.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcdctmgr.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcdctmgr.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcdctmgr.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,12 +1,14 @@ /* * jcdctmgr.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2011 D. R. Commander - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2011, 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the forward-DCT management logic. * This code selects a particular DCT implementation to be used, @@ -17,33 +19,32 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #include "jsimddct.h" /* Private subobject for this module */ -typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data)); -typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data)); +typedef void (*forward_DCT_method_ptr) (DCTELEM *data); +typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data); -typedef JMETHOD(void, convsamp_method_ptr, - (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM * workspace)); -typedef JMETHOD(void, float_convsamp_method_ptr, - (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace)); - -typedef JMETHOD(void, quantize_method_ptr, - (JCOEFPTR coef_block, DCTELEM * divisors, - DCTELEM * workspace)); -typedef JMETHOD(void, float_quantize_method_ptr, - (JCOEFPTR coef_block, FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); +typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data, + JDIMENSION start_col, + DCTELEM *workspace); +typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data, + JDIMENSION start_col, + FAST_FLOAT *workspace); + +typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace); +typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block, + FAST_FLOAT *divisors, + FAST_FLOAT *workspace); METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *); typedef struct { - struct jpeg_forward_dct pub; /* public fields */ + struct jpeg_forward_dct pub; /* public fields */ /* Pointer to the DCT routine actually in use */ forward_DCT_method_ptr dct; @@ -54,27 +55,30 @@ * entries, because of scaling (especially for an unnormalized DCT). * Each table is given in normal array order. */ - DCTELEM * divisors[NUM_QUANT_TBLS]; + DCTELEM *divisors[NUM_QUANT_TBLS]; /* work area for FDCT subroutine */ - DCTELEM * workspace; + DCTELEM *workspace; #ifdef DCT_FLOAT_SUPPORTED /* Same as above for the floating-point case. */ float_DCT_method_ptr float_dct; float_convsamp_method_ptr float_convsamp; float_quantize_method_ptr float_quantize; - FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; - FAST_FLOAT * float_workspace; + FAST_FLOAT *float_divisors[NUM_QUANT_TBLS]; + FAST_FLOAT *float_workspace; #endif } my_fdct_controller; -typedef my_fdct_controller * my_fdct_ptr; +typedef my_fdct_controller *my_fdct_ptr; +#if BITS_IN_JSAMPLE == 8 + /* * Find the highest bit in an integer through binary search. */ + LOCAL(int) flss (UINT16 val) { @@ -105,6 +109,7 @@ return bit; } + /* * Compute values to do a division using reciprocal. * @@ -146,7 +151,7 @@ * * In order to allow SIMD implementations we also tweak the values to * allow the same calculation to be made at all times: - * + * * dctbl[0] = f rounded to nearest integer * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5) * dctbl[2] = 1 << ((word size) * 2 - r) @@ -163,13 +168,27 @@ * of in a consecutive manner, yet again in order to allow SIMD * routines. */ + LOCAL(int) -compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) +compute_reciprocal (UINT16 divisor, DCTELEM *dtbl) { UDCTELEM2 fq, fr; UDCTELEM c; int b, r; + if (divisor == 1) { + /* divisor == 1 means unquantized, so these reciprocal/correction/shift + * values will cause the C quantization algorithm to act like the + * identity function. Since only the C quantization algorithm is used in + * these cases, the scale value is irrelevant. + */ + dtbl[DCTSIZE2 * 0] = (DCTELEM) 1; /* reciprocal */ + dtbl[DCTSIZE2 * 1] = (DCTELEM) 0; /* correction */ + dtbl[DCTSIZE2 * 2] = (DCTELEM) 1; /* scale */ + dtbl[DCTSIZE2 * 3] = -(DCTELEM) (sizeof(DCTELEM) * 8); /* shift */ + return 0; + } + b = flss(divisor) - 1; r = sizeof(DCTELEM) * 8 + b; @@ -190,13 +209,20 @@ dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */ dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */ +#ifdef WITH_SIMD dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */ +#else + dtbl[DCTSIZE2 * 2] = 1; +#endif dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */ if(r <= 16) return 0; else return 1; } +#endif + + /* * Initialize for a processing pass. * Verify that all referenced Q-tables are present, and set up @@ -212,15 +238,15 @@ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; int ci, qtblno, i; jpeg_component_info *compptr; - JQUANT_TBL * qtbl; - DCTELEM * dtbl; + JQUANT_TBL *qtbl; + DCTELEM *dtbl; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { qtblno = compptr->quant_tbl_no; /* Make sure specified quantization table is present */ if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || - cinfo->quant_tbl_ptrs[qtblno] == NULL) + cinfo->quant_tbl_ptrs[qtblno] == NULL) ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); qtbl = cinfo->quant_tbl_ptrs[qtblno]; /* Compute divisors for this quant table */ @@ -232,91 +258,102 @@ * coefficients multiplied by 8 (to counteract scaling). */ if (fdct->divisors[qtblno] == NULL) { - fdct->divisors[qtblno] = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); + fdct->divisors[qtblno] = (DCTELEM *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (DCTSIZE2 * 4) * sizeof(DCTELEM)); } dtbl = fdct->divisors[qtblno]; for (i = 0; i < DCTSIZE2; i++) { - if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) - && fdct->quantize == jsimd_quantize) - fdct->quantize = quantize; +#if BITS_IN_JSAMPLE == 8 + if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) && + fdct->quantize == jsimd_quantize) + fdct->quantize = quantize; +#else + dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3; +#endif } break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: { - /* For AA&N IDCT method, divisors are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - * We apply a further scale factor of 8. - */ + /* For AA&N IDCT method, divisors are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * We apply a further scale factor of 8. + */ #define CONST_BITS 14 - static const INT16 aanscales[DCTSIZE2] = { - /* precomputed values scaled up by 14 bits */ - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, - 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, - 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, - 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, - 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 - }; - SHIFT_TEMPS - - if (fdct->divisors[qtblno] == NULL) { - fdct->divisors[qtblno] = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); - } - dtbl = fdct->divisors[qtblno]; - for (i = 0; i < DCTSIZE2; i++) { - if(!compute_reciprocal( - DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], - (INT32) aanscales[i]), - CONST_BITS-3), &dtbl[i]) - && fdct->quantize == jsimd_quantize) - fdct->quantize = quantize; - } + static const INT16 aanscales[DCTSIZE2] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 + }; + SHIFT_TEMPS + + if (fdct->divisors[qtblno] == NULL) { + fdct->divisors[qtblno] = (DCTELEM *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (DCTSIZE2 * 4) * sizeof(DCTELEM)); + } + dtbl = fdct->divisors[qtblno]; + for (i = 0; i < DCTSIZE2; i++) { +#if BITS_IN_JSAMPLE == 8 + if (!compute_reciprocal( + DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], + (JLONG) aanscales[i]), + CONST_BITS-3), &dtbl[i]) && + fdct->quantize == jsimd_quantize) + fdct->quantize = quantize; +#else + dtbl[i] = (DCTELEM) + DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], + (JLONG) aanscales[i]), + CONST_BITS-3); +#endif + } } break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: { - /* For float AA&N IDCT method, divisors are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - * We apply a further scale factor of 8. - * What's actually stored is 1/divisor so that the inner loop can - * use a multiplication rather than a division. - */ - FAST_FLOAT * fdtbl; - int row, col; - static const double aanscalefactor[DCTSIZE] = { - 1.0, 1.387039845, 1.306562965, 1.175875602, - 1.0, 0.785694958, 0.541196100, 0.275899379 - }; - - if (fdct->float_divisors[qtblno] == NULL) { - fdct->float_divisors[qtblno] = (FAST_FLOAT *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - DCTSIZE2 * SIZEOF(FAST_FLOAT)); - } - fdtbl = fdct->float_divisors[qtblno]; - i = 0; - for (row = 0; row < DCTSIZE; row++) { - for (col = 0; col < DCTSIZE; col++) { - fdtbl[i] = (FAST_FLOAT) - (1.0 / (((double) qtbl->quantval[i] * - aanscalefactor[row] * aanscalefactor[col] * 8.0))); - i++; - } - } + /* For float AA&N IDCT method, divisors are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * We apply a further scale factor of 8. + * What's actually stored is 1/divisor so that the inner loop can + * use a multiplication rather than a division. + */ + FAST_FLOAT *fdtbl; + int row, col; + static const double aanscalefactor[DCTSIZE] = { + 1.0, 1.387039845, 1.306562965, 1.175875602, + 1.0, 0.785694958, 0.541196100, 0.275899379 + }; + + if (fdct->float_divisors[qtblno] == NULL) { + fdct->float_divisors[qtblno] = (FAST_FLOAT *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + DCTSIZE2 * sizeof(FAST_FLOAT)); + } + fdtbl = fdct->float_divisors[qtblno]; + i = 0; + for (row = 0; row < DCTSIZE; row++) { + for (col = 0; col < DCTSIZE; col++) { + fdtbl[i] = (FAST_FLOAT) + (1.0 / (((double) qtbl->quantval[i] * + aanscalefactor[row] * aanscalefactor[col] * 8.0))); + i++; + } + } } break; #endif @@ -333,7 +370,7 @@ */ METHODDEF(void) -convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace) +convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) { register DCTELEM *workspaceptr; register JSAMPROW elemptr; @@ -343,7 +380,7 @@ for (elemr = 0; elemr < DCTSIZE; elemr++) { elemptr = sample_data[elemr] + start_col; -#if DCTSIZE == 8 /* unroll the inner loop */ +#if DCTSIZE == 8 /* unroll the inner loop */ *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; @@ -368,14 +405,18 @@ */ METHODDEF(void) -quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace) +quantize (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { int i; DCTELEM temp; - UDCTELEM recip, corr, shift; - UDCTELEM2 product; JCOEFPTR output_ptr = coef_block; +#if BITS_IN_JSAMPLE == 8 + + UDCTELEM recip, corr; + int shift; + UDCTELEM2 product; + for (i = 0; i < DCTSIZE2; i++) { temp = workspace[i]; recip = divisors[i + DCTSIZE2 * 0]; @@ -386,16 +427,54 @@ temp = -temp; product = (UDCTELEM2)(temp + corr) * recip; product >>= shift + sizeof(DCTELEM)*8; - temp = product; + temp = (DCTELEM)product; temp = -temp; } else { product = (UDCTELEM2)(temp + corr) * recip; product >>= shift + sizeof(DCTELEM)*8; - temp = product; + temp = (DCTELEM)product; } + output_ptr[i] = (JCOEF) temp; + } + +#else + + register DCTELEM qval; + for (i = 0; i < DCTSIZE2; i++) { + qval = divisors[i]; + temp = workspace[i]; + /* Divide the coefficient value by qval, ensuring proper rounding. + * Since C does not specify the direction of rounding for negative + * quotients, we have to force the dividend positive for portability. + * + * In most files, at least half of the output values will be zero + * (at default quantization settings, more like three-quarters...) + * so we should ensure that this case is fast. On many machines, + * a comparison is enough cheaper than a divide to make a special test + * a win. Since both inputs will be nonnegative, we need only test + * for a < b to discover whether a/b is 0. + * If your machine's division is fast enough, define FAST_DIVIDE. + */ +#ifdef FAST_DIVIDE +#define DIVIDE_BY(a,b) a /= b +#else +#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0 +#endif + if (temp < 0) { + temp = -temp; + temp += qval>>1; /* for rounding */ + DIVIDE_BY(temp, qval); + temp = -temp; + } else { + temp += qval>>1; /* for rounding */ + DIVIDE_BY(temp, qval); + } output_ptr[i] = (JCOEF) temp; } + +#endif + } @@ -408,16 +487,16 @@ */ METHODDEF(void) -forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks) +forward_DCT (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, + JDIMENSION num_blocks) /* This version is used for integer DCT implementations. */ { /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; - DCTELEM * workspace; + DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no]; + DCTELEM *workspace; JDIMENSION bi; /* Make sure the compiler doesn't look up these every pass */ @@ -426,7 +505,7 @@ quantize_method_ptr do_quantize = fdct->quantize; workspace = fdct->workspace; - sample_data += start_row; /* fold in the vertical offset once */ + sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { /* Load data into workspace, applying unsigned->signed conversion */ @@ -445,7 +524,7 @@ METHODDEF(void) -convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace) +convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace) { register FAST_FLOAT *workspaceptr; register JSAMPROW elemptr; @@ -454,7 +533,7 @@ workspaceptr = workspace; for (elemr = 0; elemr < DCTSIZE; elemr++) { elemptr = sample_data[elemr] + start_col; -#if DCTSIZE == 8 /* unroll the inner loop */ +#if DCTSIZE == 8 /* unroll the inner loop */ *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); @@ -476,7 +555,7 @@ METHODDEF(void) -quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace) +quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace) { register FAST_FLOAT temp; register int i; @@ -498,16 +577,16 @@ METHODDEF(void) -forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks) +forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, + JDIMENSION num_blocks) /* This version is used for floating-point DCT implementations. */ { /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; - FAST_FLOAT * workspace; + FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no]; + FAST_FLOAT *workspace; JDIMENSION bi; @@ -517,7 +596,7 @@ float_quantize_method_ptr do_quantize = fdct->float_quantize; workspace = fdct->float_workspace; - sample_data += start_row; /* fold in the vertical offset once */ + sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { /* Load data into workspace, applying unsigned->signed conversion */ @@ -546,7 +625,7 @@ fdct = (my_fdct_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_fdct_controller)); + sizeof(my_fdct_controller)); cinfo->fdct = (struct jpeg_forward_dct *) fdct; fdct->pub.start_pass = start_pass_fdctmgr; @@ -625,12 +704,12 @@ if (cinfo->dct_method == JDCT_FLOAT) fdct->float_workspace = (FAST_FLOAT *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(FAST_FLOAT) * DCTSIZE2); + sizeof(FAST_FLOAT) * DCTSIZE2); else #endif fdct->workspace = (DCTELEM *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(DCTELEM) * DCTSIZE2); + sizeof(DCTELEM) * DCTSIZE2); /* Mark divisor tables unallocated */ for (i = 0; i < NUM_QUANT_TBLS; i++) { diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jchuff.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jchuff.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jchuff.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jchuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jchuff.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2011, 2014-2016 D. R. Commander. + * Copyright (C) 2015 Matthieu Darbois. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains Huffman entropy encoding routines. * @@ -18,11 +21,39 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jchuff.h" /* Declarations shared with jcphuff.c */ +#include "jsimd.h" +#include "jconfigint.h" #include -static unsigned char jpeg_nbits_table[65536]; -static int jpeg_nbits_table_init = 0; +/* + * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be + * used for bit counting rather than the lookup table. This will reduce the + * memory footprint by 64k, which is important for some mobile applications + * that create many isolated instances of libjpeg-turbo (web browsers, for + * instance.) This may improve performance on some mobile platforms as well. + * This feature is enabled by default only on ARM processors, because some x86 + * chips have a slow implementation of bsr, and the use of clz/bsr cannot be + * shown to have a significant performance impact even on the x86 chips that + * have a fast implementation of it. When building for ARMv6, you can + * explicitly disable the use of clz/bsr by adding -mthumb to the compiler + * flags (this defines __thumb__). + */ + +/* NOTE: Both GCC and Clang define __GNUC__ */ +#if defined __GNUC__ && (defined __arm__ || defined __aarch64__) +#if !defined __thumb__ || defined __thumb2__ +#define USE_CLZ_INTRINSIC +#endif +#endif + +#ifdef USE_CLZ_INTRINSIC +#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) +#else +#include "jpeg_nbits_table.h" +#define JPEG_NBITS(x) (jpeg_nbits_table[x]) +#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) +#endif #ifndef min #define min(a,b) ((a)<(b)?(a):(b)) @@ -36,8 +67,8 @@ */ typedef struct { - size_t put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ + size_t put_buffer; /* current bit-accumulation buffer */ + int put_bits; /* # of bits now in it */ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; @@ -51,12 +82,12 @@ #else #if MAX_COMPS_IN_SCAN == 4 #define ASSIGN_STATE(dest,src) \ - ((dest).put_buffer = (src).put_buffer, \ - (dest).put_bits = (src).put_bits, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) + ((dest).put_buffer = (src).put_buffer, \ + (dest).put_bits = (src).put_bits, \ + (dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -64,44 +95,45 @@ typedef struct { struct jpeg_entropy_encoder pub; /* public fields */ - savable_state saved; /* Bit buffer & DC state at start of MCU */ + savable_state saved; /* Bit buffer & DC state at start of MCU */ /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - int next_restart_num; /* next restart number to write (0-7) */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ /* Pointers to derived tables (these workspaces have image lifespan) */ - c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; - c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; + c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS]; + c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS]; -#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ - long * dc_count_ptrs[NUM_HUFF_TBLS]; - long * ac_count_ptrs[NUM_HUFF_TBLS]; +#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ + long *dc_count_ptrs[NUM_HUFF_TBLS]; + long *ac_count_ptrs[NUM_HUFF_TBLS]; #endif + + int simd; } huff_entropy_encoder; -typedef huff_entropy_encoder * huff_entropy_ptr; +typedef huff_entropy_encoder *huff_entropy_ptr; /* Working state while writing an MCU. * This struct contains all the fields that are needed by subroutines. */ typedef struct { - JOCTET * next_output_byte; /* => next byte to write in buffer */ - size_t free_in_buffer; /* # of byte spaces remaining in buffer */ - savable_state cur; /* Current bit buffer & DC state */ - j_compress_ptr cinfo; /* dump_buffer needs access to this */ + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + savable_state cur; /* Current bit buffer & DC state */ + j_compress_ptr cinfo; /* dump_buffer needs access to this */ } working_state; /* Forward declarations */ -METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo)); +METHODDEF(boolean) encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_huff (j_compress_ptr cinfo); #ifdef ENTROPY_OPT_SUPPORTED -METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo)); +METHODDEF(boolean) encode_mcu_gather (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_gather (j_compress_ptr cinfo); #endif @@ -116,7 +148,7 @@ { huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; int ci, dctbl, actbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; if (gather_statistics) { #ifdef ENTROPY_OPT_SUPPORTED @@ -130,6 +162,8 @@ entropy->pub.finish_pass = finish_pass_huff; } + entropy->simd = jsimd_can_huff_encode_one_block(); + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; dctbl = compptr->dc_tbl_no; @@ -139,29 +173,29 @@ /* Check for invalid table indexes */ /* (make_c_derived_tbl does this in the other path) */ if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); if (actbl < 0 || actbl >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); /* Allocate and zero the statistics tables */ /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ if (entropy->dc_count_ptrs[dctbl] == NULL) - entropy->dc_count_ptrs[dctbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long)); + entropy->dc_count_ptrs[dctbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + 257 * sizeof(long)); + MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long)); if (entropy->ac_count_ptrs[actbl] == NULL) - entropy->ac_count_ptrs[actbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long)); + entropy->ac_count_ptrs[actbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + 257 * sizeof(long)); + MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long)); #endif } else { /* Compute derived values for Huffman tables */ /* We may do this more than once for a table, but it's not expensive */ jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, - & entropy->dc_derived_tbls[dctbl]); + & entropy->dc_derived_tbls[dctbl]); jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, - & entropy->ac_derived_tbls[actbl]); + & entropy->ac_derived_tbls[actbl]); } /* Initialize DC predictions to 0 */ entropy->saved.last_dc_val[ci] = 0; @@ -186,7 +220,7 @@ GLOBAL(void) jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, - c_derived_tbl ** pdtbl) + c_derived_tbl **pdtbl) { JHUFF_TBL *htbl; c_derived_tbl *dtbl; @@ -211,22 +245,22 @@ if (*pdtbl == NULL) *pdtbl = (c_derived_tbl *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(c_derived_tbl)); + sizeof(c_derived_tbl)); dtbl = *pdtbl; - + /* Figure C.1: make table of Huffman code length for each symbol */ p = 0; for (l = 1; l <= 16; l++) { i = (int) htbl->bits[l]; - if (i < 0 || p + i > 256) /* protect against table overrun */ + if (i < 0 || p + i > 256) /* protect against table overrun */ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); while (i--) huffsize[p++] = (char) l; } huffsize[p] = 0; lastp = p; - + /* Figure C.2: generate the codes themselves */ /* We also validate that the counts represent a legal Huffman code tree. */ @@ -241,12 +275,12 @@ /* code is now 1 more than the last code used for codelength si; but * it must still fit in si bits, since no code is allowed to be all ones. */ - if (((INT32) code) >= (((INT32) 1) << si)) + if (((JLONG) code) >= (((JLONG) 1) << si)) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); code <<= 1; si++; } - + /* Figure C.3: generate encoding tables */ /* These are code and size indexed by symbol value */ @@ -254,7 +288,7 @@ * this lets us detect duplicate VAL entries here, and later * allows emit_bits to detect any attempt to emit such symbols. */ - MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi)); + MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi)); /* This is also a convenient place to check for out-of-range * and duplicated VAL entries. We allow 0..255 for AC symbols @@ -270,15 +304,6 @@ dtbl->ehufco[i] = huffcode[p]; dtbl->ehufsi[i] = huffsize[p]; } - - if(!jpeg_nbits_table_init) { - for(i = 0; i < 65536; i++) { - int nbits = 0, temp = i; - while (temp) {temp >>= 1; nbits++;} - jpeg_nbits_table[i] = nbits; - } - jpeg_nbits_table_init = 1; - } } @@ -286,19 +311,17 @@ /* Emit a byte, taking 'action' if must suspend. */ #define emit_byte(state,val,action) \ - { *(state)->next_output_byte++ = (JOCTET) (val); \ - if (--(state)->free_in_buffer == 0) \ - if (! dump_buffer(state)) \ - { action; } } + { *(state)->next_output_byte++ = (JOCTET) (val); \ + if (--(state)->free_in_buffer == 0) \ + if (! dump_buffer(state)) \ + { action; } } LOCAL(boolean) -dump_buffer (working_state * state) +dump_buffer (working_state *state) /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ { - struct jpeg_destination_mgr * dest = state->cinfo->dest; - - dest->free_in_buffer = state->free_in_buffer; + struct jpeg_destination_mgr *dest = state->cinfo->dest; if (! (*dest->empty_output_buffer) (state->cinfo)) return FALSE; @@ -360,7 +383,11 @@ } \ } -#if __WORDSIZE==64 || defined(_WIN64) +#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T) +#error Cannot determine word size +#endif + +#if SIZEOF_SIZE_T==8 || defined(_WIN64) #define EMIT_BITS(code, size) { \ CHECKBUF47() \ @@ -368,7 +395,7 @@ } #define EMIT_CODE(code, size) { \ - temp2 &= (((INT32) 1)<free_in_buffer < BUFSIZE) { \ @@ -425,7 +461,7 @@ LOCAL(boolean) -flush_bits (working_state * state) +flush_bits (working_state *state) { JOCTET _buffer[BUFSIZE], *buffer; size_t put_buffer; int put_bits; @@ -439,7 +475,7 @@ PUT_BITS(0x7F, 7) while (put_bits >= 8) EMIT_BYTE() - state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ + state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ state->cur.put_bits = 0; STORE_BUFFER() @@ -450,8 +486,25 @@ /* Encode a single block's worth of coefficients */ LOCAL(boolean) -encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl) +encode_one_block_simd (working_state *state, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl) +{ + JOCTET _buffer[BUFSIZE], *buffer; + size_t bytes, bytestocopy; int localbuf = 0; + + LOAD_BUFFER() + + buffer = jsimd_huff_encode_one_block(state, buffer, block, last_dc_val, + dctbl, actbl); + + STORE_BUFFER() + + return TRUE; +} + +LOCAL(boolean) +encode_one_block (working_state *state, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl) { int temp, temp2, temp3; int nbits; @@ -466,7 +519,7 @@ LOAD_BUFFER() /* Encode the DC coefficient difference per section F.1.2.1 */ - + temp = temp2 = block[0] - last_dc_val; /* This is a well-known technique for obtaining the absolute value without a @@ -483,25 +536,23 @@ temp2 += temp3; /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = jpeg_nbits_table[temp]; + nbits = JPEG_NBITS(temp); /* Emit the Huffman-coded symbol for the number of bits */ code = dctbl->ehufco[nbits]; size = dctbl->ehufsi[nbits]; - PUT_BITS(code, size) - CHECKBUF15() + EMIT_BITS(code, size) /* Mask off any extra bits in code */ - temp2 &= (((INT32) 1)< 15, must emit special run-length-16 codes (0xF0) */ \ while (r > 15) { \ EMIT_BITS(code_0xf0, size_0xf0) \ @@ -565,7 +616,7 @@ */ LOCAL(boolean) -emit_restart (working_state * state, int restart_num) +emit_restart (working_state *state, int restart_num) { int ci; @@ -595,7 +646,7 @@ huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; working_state state; int blkn, ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; /* Load up working state */ state.next_output_byte = cinfo->dest->next_output_byte; @@ -607,20 +658,34 @@ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! emit_restart(&state, entropy->next_restart_num)) - return FALSE; + return FALSE; } /* Encode the MCU data blocks */ - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - ci = cinfo->MCU_membership[blkn]; - compptr = cinfo->cur_comp_info[ci]; - if (! encode_one_block(&state, - MCU_data[blkn][0], state.cur.last_dc_val[ci], - entropy->dc_derived_tbls[compptr->dc_tbl_no], - entropy->ac_derived_tbls[compptr->ac_tbl_no])) - return FALSE; - /* Update last_dc_val */ - state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; + if (entropy->simd) { + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + if (! encode_one_block_simd(&state, + MCU_data[blkn][0], state.cur.last_dc_val[ci], + entropy->dc_derived_tbls[compptr->dc_tbl_no], + entropy->ac_derived_tbls[compptr->ac_tbl_no])) + return FALSE; + /* Update last_dc_val */ + state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; + } + } else { + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + if (! encode_one_block(&state, + MCU_data[blkn][0], state.cur.last_dc_val[ci], + entropy->dc_derived_tbls[compptr->dc_tbl_no], + entropy->ac_derived_tbls[compptr->ac_tbl_no])) + return FALSE; + /* Update last_dc_val */ + state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; + } } /* Completed MCU, so update state */ @@ -687,18 +752,18 @@ LOCAL(void) htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, - long dc_counts[], long ac_counts[]) + long dc_counts[], long ac_counts[]) { register int temp; register int nbits; register int k, r; - + /* Encode the DC coefficient difference per section F.1.2.1 */ - + temp = block[0] - last_dc_val; if (temp < 0) temp = -temp; - + /* Find the number of bits needed for the magnitude of the coefficient */ nbits = 0; while (temp) { @@ -713,36 +778,36 @@ /* Count the Huffman symbol for the number of bits */ dc_counts[nbits]++; - + /* Encode the AC coefficients per section F.1.2.2 */ - - r = 0; /* r = run length of zeros */ - + + r = 0; /* r = run length of zeros */ + for (k = 1; k < DCTSIZE2; k++) { if ((temp = block[jpeg_natural_order[k]]) == 0) { r++; } else { /* if run length > 15, must emit special run-length-16 codes (0xF0) */ while (r > 15) { - ac_counts[0xF0]++; - r -= 16; + ac_counts[0xF0]++; + r -= 16; } - + /* Find the number of bits needed for the magnitude of the coefficient */ if (temp < 0) - temp = -temp; - + temp = -temp; + /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 1; /* there must be at least one 1 bit */ + nbits = 1; /* there must be at least one 1 bit */ while ((temp >>= 1)) - nbits++; + nbits++; /* Check for out-of-range coefficient values */ if (nbits > MAX_COEF_BITS) - ERREXIT(cinfo, JERR_BAD_DCT_COEF); - + ERREXIT(cinfo, JERR_BAD_DCT_COEF); + /* Count Huffman symbol for run length / number of bits */ ac_counts[(r << 4) + nbits]++; - + r = 0; } } @@ -763,14 +828,14 @@ { huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; int blkn, ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; /* Take care of restart intervals if needed */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) { /* Re-initialize DC predictions to 0 */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) - entropy->saved.last_dc_val[ci] = 0; + entropy->saved.last_dc_val[ci] = 0; /* Update restart state */ entropy->restarts_to_go = cinfo->restart_interval; } @@ -781,8 +846,8 @@ ci = cinfo->MCU_membership[blkn]; compptr = cinfo->cur_comp_info[ci]; htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci], - entropy->dc_count_ptrs[compptr->dc_tbl_no], - entropy->ac_count_ptrs[compptr->ac_tbl_no]); + entropy->dc_count_ptrs[compptr->dc_tbl_no], + entropy->ac_count_ptrs[compptr->ac_tbl_no]); entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0]; } @@ -819,24 +884,24 @@ */ GLOBAL(void) -jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]) +jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) { -#define MAX_CLEN 32 /* assumed maximum initial code length */ - UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ - int codesize[257]; /* codesize[k] = code length of symbol k */ - int others[257]; /* next symbol in current branch of tree */ +#define MAX_CLEN 32 /* assumed maximum initial code length */ + UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ + int codesize[257]; /* codesize[k] = code length of symbol k */ + int others[257]; /* next symbol in current branch of tree */ int c1, c2; int p, i, j; long v; /* This algorithm is explained in section K.2 of the JPEG standard */ - MEMZERO(bits, SIZEOF(bits)); - MEMZERO(codesize, SIZEOF(codesize)); + MEMZERO(bits, sizeof(bits)); + MEMZERO(codesize, sizeof(codesize)); for (i = 0; i < 257; i++) - others[i] = -1; /* init links to empty */ - - freq[256] = 1; /* make sure 256 has a nonzero count */ + others[i] = -1; /* init links to empty */ + + freq[256] = 1; /* make sure 256 has a nonzero count */ /* Including the pseudo-symbol 256 in the Huffman procedure guarantees * that no real symbol is given code-value of all ones, because 256 * will be placed last in the largest codeword category. @@ -851,8 +916,8 @@ v = 1000000000L; for (i = 0; i <= 256; i++) { if (freq[i] && freq[i] <= v) { - v = freq[i]; - c1 = i; + v = freq[i]; + c1 = i; } } @@ -862,15 +927,15 @@ v = 1000000000L; for (i = 0; i <= 256; i++) { if (freq[i] && freq[i] <= v && i != c1) { - v = freq[i]; - c2 = i; + v = freq[i]; + c2 = i; } } /* Done if we've merged everything into one frequency */ if (c2 < 0) break; - + /* Else merge the two counts/trees */ freq[c1] += freq[c2]; freq[c2] = 0; @@ -881,9 +946,9 @@ c1 = others[c1]; codesize[c1]++; } - - others[c1] = c2; /* chain c2 onto c1's tree branch */ - + + others[c1] = c2; /* chain c2 onto c1's tree branch */ + /* Increment the codesize of everything in c2's tree branch */ codesize[c2]++; while (others[c2] >= 0) { @@ -898,7 +963,7 @@ /* The JPEG standard seems to think that this can't happen, */ /* but I'm paranoid... */ if (codesize[i] > MAX_CLEN) - ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); + ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); bits[codesize[i]]++; } @@ -914,28 +979,28 @@ * shortest nonzero BITS entry is converted into a prefix for two code words * one bit longer. */ - + for (i = MAX_CLEN; i > 16; i--) { while (bits[i] > 0) { - j = i - 2; /* find length of new prefix to be used */ + j = i - 2; /* find length of new prefix to be used */ while (bits[j] == 0) - j--; - - bits[i] -= 2; /* remove two symbols */ - bits[i-1]++; /* one goes in this length */ - bits[j+1] += 2; /* two new symbols in this length */ - bits[j]--; /* symbol of this length is now a prefix */ + j--; + + bits[i] -= 2; /* remove two symbols */ + bits[i-1]++; /* one goes in this length */ + bits[j+1] += 2; /* two new symbols in this length */ + bits[j]--; /* symbol of this length is now a prefix */ } } /* Remove the count for the pseudo-symbol 256 from the largest codelength */ - while (bits[i] == 0) /* find largest codelength still in use */ + while (bits[i] == 0) /* find largest codelength still in use */ i--; bits[i]--; - + /* Return final symbol counts (only for lengths 0..16) */ - MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits)); - + MEMCOPY(htbl->bits, bits, sizeof(htbl->bits)); + /* Return a list of the symbols sorted by code length */ /* It's not real clear to me why we don't need to consider the codelength * changes made above, but the JPEG spec seems to think this works. @@ -944,8 +1009,8 @@ for (i = 1; i <= MAX_CLEN; i++) { for (j = 0; j <= 255; j++) { if (codesize[j] == i) { - htbl->huffval[p] = (UINT8) j; - p++; + htbl->huffval[p] = (UINT8) j; + p++; } } } @@ -964,7 +1029,7 @@ { huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; int ci, dctbl, actbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JHUFF_TBL **htblptr; boolean did_dc[NUM_HUFF_TBLS]; boolean did_ac[NUM_HUFF_TBLS]; @@ -972,8 +1037,8 @@ /* It's important not to apply jpeg_gen_optimal_table more than once * per table, because it clobbers the input frequency counts! */ - MEMZERO(did_dc, SIZEOF(did_dc)); - MEMZERO(did_ac, SIZEOF(did_ac)); + MEMZERO(did_dc, sizeof(did_dc)); + MEMZERO(did_ac, sizeof(did_ac)); for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; @@ -982,14 +1047,14 @@ if (! did_dc[dctbl]) { htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl]; if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]); did_dc[dctbl] = TRUE; } if (! did_ac[actbl]) { htblptr = & cinfo->ac_huff_tbl_ptrs[actbl]; if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]); did_ac[actbl] = TRUE; } @@ -1012,7 +1077,7 @@ entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(huff_entropy_encoder)); + sizeof(huff_entropy_encoder)); cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; entropy->pub.start_pass = start_pass_huff; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jchuff.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jchuff.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jchuff.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jchuff.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jchuff.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains declarations for Huffman entropy encoding routines * that are shared between the sequential encoder (jchuff.c) and the @@ -25,23 +28,16 @@ /* Derived data constructed for each Huffman table */ typedef struct { - unsigned int ehufco[256]; /* code for each symbol */ - char ehufsi[256]; /* length of code for each symbol */ + unsigned int ehufco[256]; /* code for each symbol */ + char ehufsi[256]; /* length of code for each symbol */ /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */ } c_derived_tbl; -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_make_c_derived_tbl jMkCDerived -#define jpeg_gen_optimal_table jGenOptTbl -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - /* Expand a Huffman table definition into the derived format */ EXTERN(void) jpeg_make_c_derived_tbl - JPP((j_compress_ptr cinfo, boolean isDC, int tblno, - c_derived_tbl ** pdtbl)); + (j_compress_ptr cinfo, boolean isDC, int tblno, + c_derived_tbl ** pdtbl); /* Generate an optimal table definition given the specified counts */ EXTERN(void) jpeg_gen_optimal_table - JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])); + (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcinit.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcinit.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcinit.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcinit.c 2021-04-25 01:47:22.000000000 +0800 @@ -3,7 +3,8 @@ * * Copyright (C) 1991-1997, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains initialization logic for the JPEG compressor. * This routine is in charge of selecting the modules to be executed and @@ -60,7 +61,7 @@ /* Need a full-image coefficient buffer in any multi-pass mode. */ jinit_c_coef_controller(cinfo, - (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding)); + (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding)); jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */); jinit_marker_writer(cinfo); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcmainct.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcmainct.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcmainct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcmainct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcmainct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the main buffer controller for compression. * The main buffer lies between the pre-processor and the JPEG @@ -15,50 +18,30 @@ #include "jpeglib.h" -/* Note: currently, there is no operating mode in which a full-image buffer - * is needed at this step. If there were, that mode could not be used with - * "raw data" input, since this module is bypassed in that case. However, - * we've left the code here for possible use in special applications. - */ -#undef FULL_MAIN_BUFFER_SUPPORTED - - /* Private buffer controller object */ typedef struct { struct jpeg_c_main_controller pub; /* public fields */ - JDIMENSION cur_iMCU_row; /* number of current iMCU row */ - JDIMENSION rowgroup_ctr; /* counts row groups received in iMCU row */ - boolean suspended; /* remember if we suspended output */ - J_BUF_MODE pass_mode; /* current operating mode */ + JDIMENSION cur_iMCU_row; /* number of current iMCU row */ + JDIMENSION rowgroup_ctr; /* counts row groups received in iMCU row */ + boolean suspended; /* remember if we suspended output */ + J_BUF_MODE pass_mode; /* current operating mode */ /* If using just a strip buffer, this points to the entire set of buffers * (we allocate one for each component). In the full-image case, this * points to the currently accessible strips of the virtual arrays. */ JSAMPARRAY buffer[MAX_COMPONENTS]; - -#ifdef FULL_MAIN_BUFFER_SUPPORTED - /* If using full-image storage, this array holds pointers to virtual-array - * control blocks for each component. Unused if not full-image storage. - */ - jvirt_sarray_ptr whole_image[MAX_COMPONENTS]; -#endif } my_main_controller; -typedef my_main_controller * my_main_ptr; +typedef my_main_controller *my_main_ptr; /* Forward declarations */ METHODDEF(void) process_data_simple_main - JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf, - JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail)); -#ifdef FULL_MAIN_BUFFER_SUPPORTED -METHODDEF(void) process_data_buffer_main - JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf, - JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail)); -#endif + (j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail); /* @@ -74,32 +57,14 @@ if (cinfo->raw_data_in) return; - main_ptr->cur_iMCU_row = 0; /* initialize counters */ + if (pass_mode != JBUF_PASS_THRU) + ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); + + main_ptr->cur_iMCU_row = 0; /* initialize counters */ main_ptr->rowgroup_ctr = 0; main_ptr->suspended = FALSE; - main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */ - - switch (pass_mode) { - case JBUF_PASS_THRU: -#ifdef FULL_MAIN_BUFFER_SUPPORTED - if (main_ptr->whole_image[0] != NULL) - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); -#endif - main_ptr->pub.process_data = process_data_simple_main; - break; -#ifdef FULL_MAIN_BUFFER_SUPPORTED - case JBUF_SAVE_SOURCE: - case JBUF_CRANK_DEST: - case JBUF_SAVE_AND_PASS: - if (main_ptr->whole_image[0] == NULL) - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); - main_ptr->pub.process_data = process_data_buffer_main; - break; -#endif - default: - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); - break; - } + main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */ + main_ptr->pub.process_data = process_data_simple_main; } @@ -111,8 +76,8 @@ METHODDEF(void) process_data_simple_main (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail) + JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail) { my_main_ptr main_ptr = (my_main_ptr) cinfo->main; @@ -120,9 +85,9 @@ /* Read input data if we haven't filled the main buffer yet */ if (main_ptr->rowgroup_ctr < DCTSIZE) (*cinfo->prep->pre_process_data) (cinfo, - input_buf, in_row_ctr, in_rows_avail, - main_ptr->buffer, &main_ptr->rowgroup_ctr, - (JDIMENSION) DCTSIZE); + input_buf, in_row_ctr, in_rows_avail, + main_ptr->buffer, &main_ptr->rowgroup_ctr, + (JDIMENSION) DCTSIZE); /* If we don't have a full iMCU row buffered, return to application for * more data. Note that preprocessor will always pad to fill the iMCU row @@ -140,8 +105,8 @@ * think we were done. */ if (! main_ptr->suspended) { - (*in_row_ctr)--; - main_ptr->suspended = TRUE; + (*in_row_ctr)--; + main_ptr->suspended = TRUE; } return; } @@ -158,85 +123,6 @@ } -#ifdef FULL_MAIN_BUFFER_SUPPORTED - -/* - * Process some data. - * This routine handles all of the modes that use a full-size buffer. - */ - -METHODDEF(void) -process_data_buffer_main (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail) -{ - my_main_ptr main = (my_main_ptr) cinfo->main; - int ci; - jpeg_component_info *compptr; - boolean writing = (main_ptr->pass_mode != JBUF_CRANK_DEST); - - while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) { - /* Realign the virtual buffers if at the start of an iMCU row. */ - if (main_ptr->rowgroup_ctr == 0) { - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, main_ptr->whole_image[ci], - main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE), - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing); - } - /* In a read pass, pretend we just read some source data. */ - if (! writing) { - *in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE; - main_ptr->rowgroup_ctr = DCTSIZE; - } - } - - /* If a write pass, read input data until the current iMCU row is full. */ - /* Note: preprocessor will pad if necessary to fill the last iMCU row. */ - if (writing) { - (*cinfo->prep->pre_process_data) (cinfo, - input_buf, in_row_ctr, in_rows_avail, - main_ptr->buffer, &main_ptr->rowgroup_ctr, - (JDIMENSION) DCTSIZE); - /* Return to application if we need more data to fill the iMCU row. */ - if (main_ptr->rowgroup_ctr < DCTSIZE) - return; - } - - /* Emit data, unless this is a sink-only pass. */ - if (main_ptr->pass_mode != JBUF_SAVE_SOURCE) { - if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) { - /* If compressor did not consume the whole row, then we must need to - * suspend processing and return to the application. In this situation - * we pretend we didn't yet consume the last input row; otherwise, if - * it happened to be the last row of the image, the application would - * think we were done. - */ - if (! main_ptr->suspended) { - (*in_row_ctr)--; - main_ptr->suspended = TRUE; - } - return; - } - /* We did finish the row. Undo our little suspension hack if a previous - * call suspended; then mark the main buffer empty. - */ - if (main_ptr->suspended) { - (*in_row_ctr)++; - main_ptr->suspended = FALSE; - } - } - - /* If get here, we are done with this iMCU row. Mark buffer empty. */ - main_ptr->rowgroup_ctr = 0; - main_ptr->cur_iMCU_row++; - } -} - -#endif /* FULL_MAIN_BUFFER_SUPPORTED */ - - /* * Initialize main buffer controller. */ @@ -250,7 +136,7 @@ main_ptr = (my_main_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_main_controller)); + sizeof(my_main_controller)); cinfo->main = (struct jpeg_c_main_controller *) main_ptr; main_ptr->pub.start_pass = start_pass_main; @@ -262,32 +148,15 @@ * may be of a different size. */ if (need_full_buffer) { -#ifdef FULL_MAIN_BUFFER_SUPPORTED - /* Allocate a full-image virtual array for each component */ - /* Note we pad the bottom to a multiple of the iMCU height */ - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - main_ptr->whole_image[ci] = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - compptr->width_in_blocks * DCTSIZE, - (JDIMENSION) jround_up((long) compptr->height_in_blocks, - (long) compptr->v_samp_factor) * DCTSIZE, - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); - } -#else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); -#endif } else { -#ifdef FULL_MAIN_BUFFER_SUPPORTED - main_ptr->whole_image[0] = NULL; /* flag for no virtual arrays */ -#endif /* Allocate a strip buffer for each component */ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - compptr->width_in_blocks * DCTSIZE, - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + compptr->width_in_blocks * DCTSIZE, + (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcmarker.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcmarker.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcmarker.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcmarker.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jcmarker.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. + * Modified 2003-2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains routines to write JPEG datastream markers. */ @@ -15,29 +18,29 @@ #include "jpegcomp.h" -typedef enum { /* JPEG marker codes */ +typedef enum { /* JPEG marker codes */ M_SOF0 = 0xc0, M_SOF1 = 0xc1, M_SOF2 = 0xc2, M_SOF3 = 0xc3, - + M_SOF5 = 0xc5, M_SOF6 = 0xc6, M_SOF7 = 0xc7, - + M_JPG = 0xc8, M_SOF9 = 0xc9, M_SOF10 = 0xca, M_SOF11 = 0xcb, - + M_SOF13 = 0xcd, M_SOF14 = 0xce, M_SOF15 = 0xcf, - + M_DHT = 0xc4, - + M_DAC = 0xcc, - + M_RST0 = 0xd0, M_RST1 = 0xd1, M_RST2 = 0xd2, @@ -46,7 +49,7 @@ M_RST5 = 0xd5, M_RST6 = 0xd6, M_RST7 = 0xd7, - + M_SOI = 0xd8, M_EOI = 0xd9, M_SOS = 0xda, @@ -55,7 +58,7 @@ M_DRI = 0xdd, M_DHP = 0xde, M_EXP = 0xdf, - + M_APP0 = 0xe0, M_APP1 = 0xe1, M_APP2 = 0xe2, @@ -72,13 +75,13 @@ M_APP13 = 0xed, M_APP14 = 0xee, M_APP15 = 0xef, - + M_JPG0 = 0xf0, M_JPG13 = 0xfd, M_COM = 0xfe, - + M_TEM = 0x01, - + M_ERROR = 0x100 } JPEG_MARKER; @@ -91,7 +94,7 @@ unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */ } my_marker_writer; -typedef my_marker_writer * my_marker_ptr; +typedef my_marker_writer *my_marker_ptr; /* @@ -110,7 +113,7 @@ emit_byte (j_compress_ptr cinfo, int val) /* Emit a byte */ { - struct jpeg_destination_mgr * dest = cinfo->dest; + struct jpeg_destination_mgr *dest = cinfo->dest; *(dest->next_output_byte)++ = (JOCTET) val; if (--dest->free_in_buffer == 0) { @@ -147,7 +150,7 @@ /* Emit a DQT marker */ /* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */ { - JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index]; + JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[index]; int prec; int i; @@ -171,7 +174,7 @@ /* The table entries must be emitted in zigzag order. */ unsigned int qval = qtbl->quantval[jpeg_natural_order[i]]; if (prec) - emit_byte(cinfo, (int) (qval >> 8)); + emit_byte(cinfo, (int) (qval >> 8)); emit_byte(cinfo, (int) (qval & 0xFF)); } @@ -186,35 +189,35 @@ emit_dht (j_compress_ptr cinfo, int index, boolean is_ac) /* Emit a DHT marker */ { - JHUFF_TBL * htbl; + JHUFF_TBL *htbl; int length, i; - + if (is_ac) { htbl = cinfo->ac_huff_tbl_ptrs[index]; - index += 0x10; /* output index has AC bit set */ + index += 0x10; /* output index has AC bit set */ } else { htbl = cinfo->dc_huff_tbl_ptrs[index]; } if (htbl == NULL) ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index); - + if (! htbl->sent_table) { emit_marker(cinfo, M_DHT); - + length = 0; for (i = 1; i <= 16; i++) length += htbl->bits[i]; - + emit_2bytes(cinfo, length + 2 + 1 + 16); emit_byte(cinfo, index); - + for (i = 1; i <= 16; i++) emit_byte(cinfo, htbl->bits[i]); - + for (i = 0; i < length; i++) emit_byte(cinfo, htbl->huffval[i]); - + htbl->sent_table = TRUE; } } @@ -231,32 +234,38 @@ char ac_in_use[NUM_ARITH_TBLS]; int length, i; jpeg_component_info *compptr; - + for (i = 0; i < NUM_ARITH_TBLS; i++) dc_in_use[i] = ac_in_use[i] = 0; - + for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; - dc_in_use[compptr->dc_tbl_no] = 1; - ac_in_use[compptr->ac_tbl_no] = 1; + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) + dc_in_use[compptr->dc_tbl_no] = 1; + /* AC needs no table when not present */ + if (cinfo->Se) + ac_in_use[compptr->ac_tbl_no] = 1; } - + length = 0; for (i = 0; i < NUM_ARITH_TBLS; i++) length += dc_in_use[i] + ac_in_use[i]; - - emit_marker(cinfo, M_DAC); - - emit_2bytes(cinfo, length*2 + 2); - - for (i = 0; i < NUM_ARITH_TBLS; i++) { - if (dc_in_use[i]) { - emit_byte(cinfo, i); - emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); - } - if (ac_in_use[i]) { - emit_byte(cinfo, i + 0x10); - emit_byte(cinfo, cinfo->arith_ac_K[i]); + + if (length) { + emit_marker(cinfo, M_DAC); + + emit_2bytes(cinfo, length*2 + 2); + + for (i = 0; i < NUM_ARITH_TBLS; i++) { + if (dc_in_use[i]) { + emit_byte(cinfo, i); + emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); + } + if (ac_in_use[i]) { + emit_byte(cinfo, i + 0x10); + emit_byte(cinfo, cinfo->arith_ac_K[i]); + } } } #endif /* C_ARITH_CODING_SUPPORTED */ @@ -268,8 +277,8 @@ /* Emit a DRI marker */ { emit_marker(cinfo, M_DRI); - - emit_2bytes(cinfo, 4); /* fixed length */ + + emit_2bytes(cinfo, 4); /* fixed length */ emit_2bytes(cinfo, (int) cinfo->restart_interval); } @@ -281,9 +290,9 @@ { int ci; jpeg_component_info *compptr; - + emit_marker(cinfo, code); - + emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */ /* Make sure image isn't bigger than SOF field can handle */ @@ -312,32 +321,26 @@ { int i, td, ta; jpeg_component_info *compptr; - + emit_marker(cinfo, M_SOS); - + emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */ - + emit_byte(cinfo, cinfo->comps_in_scan); - + for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; emit_byte(cinfo, compptr->component_id); - td = compptr->dc_tbl_no; - ta = compptr->ac_tbl_no; - if (cinfo->progressive_mode) { - /* Progressive mode: only DC or only AC tables are used in one scan; - * furthermore, Huffman coding of DC refinement uses no table at all. - * We emit 0 for unused field(s); this is recommended by the P&M text - * but does not seem to be specified in the standard. - */ - if (cinfo->Ss == 0) { - ta = 0; /* DC scan */ - if (cinfo->Ah != 0 && !cinfo->arith_code) - td = 0; /* no DC table either */ - } else { - td = 0; /* AC scan */ - } - } + + /* We emit 0 for unused field(s); this is recommended by the P&M text + * but does not seem to be specified in the standard. + */ + + /* DC needs no table for refinement scan */ + td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0; + /* AC needs no table when not present */ + ta = cinfo->Se ? compptr->ac_tbl_no : 0; + emit_byte(cinfo, (td << 4) + ta); } @@ -352,22 +355,22 @@ /* Emit a JFIF-compliant APP0 marker */ { /* - * Length of APP0 block (2 bytes) - * Block ID (4 bytes - ASCII "JFIF") - * Zero byte (1 byte to terminate the ID string) - * Version Major, Minor (2 bytes - major first) - * Units (1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm) - * Xdpu (2 bytes - dots per unit horizontal) - * Ydpu (2 bytes - dots per unit vertical) - * Thumbnail X size (1 byte) - * Thumbnail Y size (1 byte) + * Length of APP0 block (2 bytes) + * Block ID (4 bytes - ASCII "JFIF") + * Zero byte (1 byte to terminate the ID string) + * Version Major, Minor (2 bytes - major first) + * Units (1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm) + * Xdpu (2 bytes - dots per unit horizontal) + * Ydpu (2 bytes - dots per unit vertical) + * Thumbnail X size (1 byte) + * Thumbnail Y size (1 byte) */ - + emit_marker(cinfo, M_APP0); - + emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */ - emit_byte(cinfo, 0x4A); /* Identifier: ASCII "JFIF" */ + emit_byte(cinfo, 0x4A); /* Identifier: ASCII "JFIF" */ emit_byte(cinfo, 0x46); emit_byte(cinfo, 0x49); emit_byte(cinfo, 0x46); @@ -377,7 +380,7 @@ emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */ emit_2bytes(cinfo, (int) cinfo->X_density); emit_2bytes(cinfo, (int) cinfo->Y_density); - emit_byte(cinfo, 0); /* No thumbnail image */ + emit_byte(cinfo, 0); /* No thumbnail image */ emit_byte(cinfo, 0); } @@ -387,12 +390,12 @@ /* Emit an Adobe APP14 marker */ { /* - * Length of APP14 block (2 bytes) - * Block ID (5 bytes - ASCII "Adobe") - * Version Number (2 bytes - currently 100) - * Flags0 (2 bytes - currently 0) - * Flags1 (2 bytes - currently 0) - * Color transform (1 byte) + * Length of APP14 block (2 bytes) + * Block ID (5 bytes - ASCII "Adobe") + * Version Number (2 bytes - currently 100) + * Flags0 (2 bytes - currently 0) + * Flags1 (2 bytes - currently 0) + * Color transform (1 byte) * * Although Adobe TN 5116 mentions Version = 101, all the Adobe files * now in circulation seem to use Version = 100, so that's what we write. @@ -401,28 +404,28 @@ * YCbCr, 2 if it's YCCK, 0 otherwise. Adobe's definition has to do with * whether the encoder performed a transformation, which is pretty useless. */ - + emit_marker(cinfo, M_APP14); - + emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */ - emit_byte(cinfo, 0x41); /* Identifier: ASCII "Adobe" */ + emit_byte(cinfo, 0x41); /* Identifier: ASCII "Adobe" */ emit_byte(cinfo, 0x64); emit_byte(cinfo, 0x6F); emit_byte(cinfo, 0x62); emit_byte(cinfo, 0x65); - emit_2bytes(cinfo, 100); /* Version */ - emit_2bytes(cinfo, 0); /* Flags0 */ - emit_2bytes(cinfo, 0); /* Flags1 */ + emit_2bytes(cinfo, 100); /* Version */ + emit_2bytes(cinfo, 0); /* Flags0 */ + emit_2bytes(cinfo, 0); /* Flags1 */ switch (cinfo->jpeg_color_space) { case JCS_YCbCr: - emit_byte(cinfo, 1); /* Color transform = 1 */ + emit_byte(cinfo, 1); /* Color transform = 1 */ break; case JCS_YCCK: - emit_byte(cinfo, 2); /* Color transform = 2 */ + emit_byte(cinfo, 2); /* Color transform = 2 */ break; default: - emit_byte(cinfo, 0); /* Color transform = 0 */ + emit_byte(cinfo, 0); /* Color transform = 0 */ break; } } @@ -440,12 +443,12 @@ write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen) /* Emit an arbitrary marker header */ { - if (datalen > (unsigned int) 65533) /* safety check */ + if (datalen > (unsigned int) 65533) /* safety check */ ERREXIT(cinfo, JERR_BAD_LENGTH); emit_marker(cinfo, (JPEG_MARKER) marker); - emit_2bytes(cinfo, (int) (datalen + 2)); /* total length */ + emit_2bytes(cinfo, (int) (datalen + 2)); /* total length */ } METHODDEF(void) @@ -472,12 +475,12 @@ { my_marker_ptr marker = (my_marker_ptr) cinfo->marker; - emit_marker(cinfo, M_SOI); /* first the SOI */ + emit_marker(cinfo, M_SOI); /* first the SOI */ /* SOI is defined to reset restart interval to 0 */ marker->last_restart_interval = 0; - if (cinfo->write_JFIF_header) /* next an optional JFIF APP0 */ + if (cinfo->write_JFIF_header) /* next an optional JFIF APP0 */ emit_jfif_app0(cinfo); if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */ emit_adobe_app14(cinfo); @@ -498,7 +501,7 @@ int ci, prec; boolean is_baseline; jpeg_component_info *compptr; - + /* Emit DQT for each quantization table. * Note that emit_dqt() suppresses any duplicate tables. */ @@ -518,9 +521,9 @@ } else { is_baseline = TRUE; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1) - is_baseline = FALSE; + is_baseline = FALSE; } if (prec && is_baseline) { is_baseline = FALSE; @@ -531,14 +534,17 @@ /* Emit the proper SOF marker */ if (cinfo->arith_code) { - emit_sof(cinfo, M_SOF9); /* SOF code for arithmetic coding */ + if (cinfo->progressive_mode) + emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */ + else + emit_sof(cinfo, M_SOF9); /* SOF code for sequential arithmetic */ } else { if (cinfo->progressive_mode) - emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */ + emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */ else if (is_baseline) - emit_sof(cinfo, M_SOF0); /* SOF code for baseline implementation */ + emit_sof(cinfo, M_SOF0); /* SOF code for baseline implementation */ else - emit_sof(cinfo, M_SOF1); /* SOF code for non-baseline Huffman file */ + emit_sof(cinfo, M_SOF1); /* SOF code for non-baseline Huffman file */ } } @@ -568,19 +574,12 @@ */ for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; - if (cinfo->progressive_mode) { - /* Progressive mode: only DC or only AC tables are used in one scan */ - if (cinfo->Ss == 0) { - if (cinfo->Ah == 0) /* DC needs no table for refinement scan */ - emit_dht(cinfo, compptr->dc_tbl_no, FALSE); - } else { - emit_dht(cinfo, compptr->ac_tbl_no, TRUE); - } - } else { - /* Sequential mode: need both DC and AC tables */ - emit_dht(cinfo, compptr->dc_tbl_no, FALSE); - emit_dht(cinfo, compptr->ac_tbl_no, TRUE); - } + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) + emit_dht(cinfo, compptr->dc_tbl_no, FALSE); + /* AC needs no table when not present */ + if (cinfo->Se) + emit_dht(cinfo, compptr->ac_tbl_no, TRUE); } } @@ -629,9 +628,9 @@ if (! cinfo->arith_code) { for (i = 0; i < NUM_HUFF_TBLS; i++) { if (cinfo->dc_huff_tbl_ptrs[i] != NULL) - emit_dht(cinfo, i, FALSE); + emit_dht(cinfo, i, FALSE); if (cinfo->ac_huff_tbl_ptrs[i] != NULL) - emit_dht(cinfo, i, TRUE); + emit_dht(cinfo, i, TRUE); } } @@ -651,7 +650,7 @@ /* Create the subobject */ marker = (my_marker_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_marker_writer)); + sizeof(my_marker_writer)); cinfo->marker = (struct jpeg_marker_writer *) marker; /* Initialize method pointers */ marker->pub.write_file_header = write_file_header; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcmaster.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcmaster.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcmaster.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcmaster.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,15 +1,17 @@ /* * jcmaster.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 2003-2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains master control logic for the JPEG compressor. * These routines are concerned with parameter validation, initial setup, - * and inter-pass control (determining the number of passes and the work + * and inter-pass control (determining the number of passes and the work * to be done in each pass). */ @@ -22,23 +24,23 @@ /* Private state */ typedef enum { - main_pass, /* input data, also do first output step */ - huff_opt_pass, /* Huffman code optimization pass */ - output_pass /* data output pass */ + main_pass, /* input data, also do first output step */ + huff_opt_pass, /* Huffman code optimization pass */ + output_pass /* data output pass */ } c_pass_type; typedef struct { - struct jpeg_comp_master pub; /* public fields */ + struct jpeg_comp_master pub; /* public fields */ - c_pass_type pass_type; /* the type of the current pass */ + c_pass_type pass_type; /* the type of the current pass */ - int pass_number; /* # of passes completed */ - int total_passes; /* total # of passes needed */ + int pass_number; /* # of passes completed */ + int total_passes; /* total # of passes needed */ - int scan_number; /* current index in scan_info[] */ + int scan_number; /* current index in scan_info[] */ } my_comp_master; -typedef my_comp_master * my_master_ptr; +typedef my_comp_master *my_master_ptr; /* @@ -104,7 +106,7 @@ /* Check that number of components won't exceed internal array sizes */ if (cinfo->num_components > MAX_COMPONENTS) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPONENTS); + MAX_COMPONENTS); /* Compute maximum sampling factors; check factor validity */ cinfo->max_h_samp_factor = 1; @@ -112,12 +114,12 @@ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR || - compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) + compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) ERREXIT(cinfo, JERR_BAD_SAMPLING); cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor, - compptr->h_samp_factor); + compptr->h_samp_factor); cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor, - compptr->v_samp_factor); + compptr->v_samp_factor); } /* Compute dimensions of components */ @@ -134,17 +136,17 @@ /* Size in DCT blocks */ compptr->width_in_blocks = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor, - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + (long) (cinfo->max_h_samp_factor * DCTSIZE)); compptr->height_in_blocks = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor, - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + (long) (cinfo->max_v_samp_factor * DCTSIZE)); /* Size in samples */ compptr->downsampled_width = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor, - (long) cinfo->max_h_samp_factor); + (long) cinfo->max_h_samp_factor); compptr->downsampled_height = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor, - (long) cinfo->max_v_samp_factor); + (long) cinfo->max_v_samp_factor); /* Mark component needed (this flag isn't actually used for compression) */ compptr->component_needed = TRUE; } @@ -154,7 +156,7 @@ */ cinfo->total_iMCU_rows = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + (long) (cinfo->max_v_samp_factor*DCTSIZE)); } @@ -166,12 +168,12 @@ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode. */ { - const jpeg_scan_info * scanptr; + const jpeg_scan_info *scanptr; int scanno, ncomps, ci, coefi, thisi; int Ss, Se, Ah, Al; boolean component_sent[MAX_COMPONENTS]; #ifdef C_PROGRESSIVE_SUPPORTED - int * last_bitpos_ptr; + int *last_bitpos_ptr; int last_bitpos[MAX_COMPONENTS][DCTSIZE2]; /* -1 until that coefficient has been seen; then last Al for it */ #endif @@ -187,15 +189,15 @@ #ifdef C_PROGRESSIVE_SUPPORTED cinfo->progressive_mode = TRUE; last_bitpos_ptr = & last_bitpos[0][0]; - for (ci = 0; ci < cinfo->num_components; ci++) + for (ci = 0; ci < cinfo->num_components; ci++) for (coefi = 0; coefi < DCTSIZE2; coefi++) - *last_bitpos_ptr++ = -1; + *last_bitpos_ptr++ = -1; #else ERREXIT(cinfo, JERR_NOT_COMPILED); #endif } else { cinfo->progressive_mode = FALSE; - for (ci = 0; ci < cinfo->num_components; ci++) + for (ci = 0; ci < cinfo->num_components; ci++) component_sent[ci] = FALSE; } @@ -207,10 +209,10 @@ for (ci = 0; ci < ncomps; ci++) { thisi = scanptr->component_index[ci]; if (thisi < 0 || thisi >= cinfo->num_components) - ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); + ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); /* Components must appear in SOF order within each scan */ if (ci > 0 && thisi <= scanptr->component_index[ci-1]) - ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); + ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); } /* Validate progression parameters */ Ss = scanptr->Ss; @@ -232,43 +234,43 @@ #define MAX_AH_AL 13 #endif if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 || - Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL) - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL) + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); if (Ss == 0) { - if (Se != 0) /* DC and AC together not OK */ - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + if (Se != 0) /* DC and AC together not OK */ + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); } else { - if (ncomps != 1) /* AC scans must be for only one component */ - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + if (ncomps != 1) /* AC scans must be for only one component */ + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); } for (ci = 0; ci < ncomps; ci++) { - last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0]; - if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */ - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); - for (coefi = Ss; coefi <= Se; coefi++) { - if (last_bitpos_ptr[coefi] < 0) { - /* first scan of this coefficient */ - if (Ah != 0) - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); - } else { - /* not first scan */ - if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1) - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); - } - last_bitpos_ptr[coefi] = Al; - } + last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0]; + if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */ + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + for (coefi = Ss; coefi <= Se; coefi++) { + if (last_bitpos_ptr[coefi] < 0) { + /* first scan of this coefficient */ + if (Ah != 0) + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + } else { + /* not first scan */ + if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1) + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + } + last_bitpos_ptr[coefi] = Al; + } } #endif } else { /* For sequential JPEG, all progression parameters must be these: */ if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0) - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); /* Make sure components are not sent twice */ for (ci = 0; ci < ncomps; ci++) { - thisi = scanptr->component_index[ci]; - if (component_sent[thisi]) - ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); - component_sent[thisi] = TRUE; + thisi = scanptr->component_index[ci]; + if (component_sent[thisi]) + ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); + component_sent[thisi] = TRUE; } } } @@ -283,13 +285,13 @@ */ for (ci = 0; ci < cinfo->num_components; ci++) { if (last_bitpos[ci][0] < 0) - ERREXIT(cinfo, JERR_MISSING_DATA); + ERREXIT(cinfo, JERR_MISSING_DATA); } #endif } else { for (ci = 0; ci < cinfo->num_components; ci++) { if (! component_sent[ci]) - ERREXIT(cinfo, JERR_MISSING_DATA); + ERREXIT(cinfo, JERR_MISSING_DATA); } } } @@ -307,12 +309,12 @@ if (cinfo->scan_info != NULL) { /* Prepare for current scan --- the script is already validated */ my_master_ptr master = (my_master_ptr) cinfo->master; - const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number; + const jpeg_scan_info *scanptr = cinfo->scan_info + master->scan_number; cinfo->comps_in_scan = scanptr->comps_in_scan; for (ci = 0; ci < scanptr->comps_in_scan; ci++) { cinfo->cur_comp_info[ci] = - &cinfo->comp_info[scanptr->component_index[ci]]; + &cinfo->comp_info[scanptr->component_index[ci]]; } cinfo->Ss = scanptr->Ss; cinfo->Se = scanptr->Se; @@ -325,7 +327,7 @@ /* Prepare for single sequential-JPEG scan containing all components */ if (cinfo->num_components > MAX_COMPS_IN_SCAN) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPS_IN_SCAN); + MAX_COMPS_IN_SCAN); cinfo->comps_in_scan = cinfo->num_components; for (ci = 0; ci < cinfo->num_components; ci++) { cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci]; @@ -345,16 +347,16 @@ { int ci, mcublks, tmp; jpeg_component_info *compptr; - + if (cinfo->comps_in_scan == 1) { - + /* Noninterleaved (single-component) scan */ compptr = cinfo->cur_comp_info[0]; - + /* Overall image size in MCUs */ cinfo->MCUs_per_row = compptr->width_in_blocks; cinfo->MCU_rows_in_scan = compptr->height_in_blocks; - + /* For noninterleaved scan, always one block per MCU */ compptr->MCU_width = 1; compptr->MCU_height = 1; @@ -367,28 +369,28 @@ tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor); if (tmp == 0) tmp = compptr->v_samp_factor; compptr->last_row_height = tmp; - + /* Prepare array describing MCU composition */ cinfo->blocks_in_MCU = 1; cinfo->MCU_membership[0] = 0; - + } else { - + /* Interleaved (multi-component) scan */ if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan, - MAX_COMPS_IN_SCAN); - + MAX_COMPS_IN_SCAN); + /* Overall image size in MCUs */ cinfo->MCUs_per_row = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_width, - (long) (cinfo->max_h_samp_factor*DCTSIZE)); + (long) (cinfo->max_h_samp_factor*DCTSIZE)); cinfo->MCU_rows_in_scan = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); - + (long) (cinfo->max_v_samp_factor*DCTSIZE)); + cinfo->blocks_in_MCU = 0; - + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; /* Sampling factors give # of blocks of component in each MCU */ @@ -406,12 +408,12 @@ /* Prepare array describing MCU composition */ mcublks = compptr->MCU_blocks; if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU) - ERREXIT(cinfo, JERR_BAD_MCU_SIZE); + ERREXIT(cinfo, JERR_BAD_MCU_SIZE); while (mcublks-- > 0) { - cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; + cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; } } - + } /* Convert restart specified in rows to actual MCU count. */ @@ -451,8 +453,8 @@ (*cinfo->fdct->start_pass) (cinfo); (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding); (*cinfo->coef->start_pass) (cinfo, - (master->total_passes > 1 ? - JBUF_SAVE_AND_PASS : JBUF_PASS_THRU)); + (master->total_passes > 1 ? + JBUF_SAVE_AND_PASS : JBUF_PASS_THRU)); (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU); if (cinfo->optimize_coding) { /* No immediate data output; postpone writing frame/scan headers */ @@ -580,7 +582,7 @@ master = (my_master_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_comp_master)); + sizeof(my_comp_master)); cinfo->master = (struct jpeg_comp_master *) master; master->pub.prepare_for_pass = prepare_for_pass; master->pub.pass_startup = pass_startup; @@ -601,7 +603,7 @@ cinfo->num_scans = 1; } - if (cinfo->progressive_mode) /* TEMPORARY HACK ??? */ + if (cinfo->progressive_mode && !cinfo->arith_code) /* TEMPORARY HACK ??? */ cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */ /* Initialize my private state */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcomapi.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcomapi.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcomapi.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcomapi.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcomapi.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface routines that are used for both * compression and decompression. @@ -72,8 +75,8 @@ /* NB: mem pointer is NULL if memory mgr failed to initialize. */ if (cinfo->mem != NULL) (*cinfo->mem->self_destruct) (cinfo); - cinfo->mem = NULL; /* be safe if jpeg_destroy is called twice */ - cinfo->global_state = 0; /* mark it destroyed */ + cinfo->mem = NULL; /* be safe if jpeg_destroy is called twice */ + cinfo->global_state = 0; /* mark it destroyed */ } @@ -88,8 +91,8 @@ JQUANT_TBL *tbl; tbl = (JQUANT_TBL *) - (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL)); - tbl->sent_table = FALSE; /* make sure this is false in any new table */ + (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, sizeof(JQUANT_TBL)); + tbl->sent_table = FALSE; /* make sure this is false in any new table */ return tbl; } @@ -100,7 +103,7 @@ JHUFF_TBL *tbl; tbl = (JHUFF_TBL *) - (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL)); - tbl->sent_table = FALSE; /* make sure this is false in any new table */ + (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, sizeof(JHUFF_TBL)); + tbl->sent_table = FALSE; /* make sure this is false in any new table */ return tbl; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfig.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfig.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfig.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfig.h 2021-04-25 01:47:22.000000000 +0800 @@ -5,16 +5,27 @@ #define JPEG_LIB_VERSION 62 /* libjpeg-turbo version */ -#define LIBJPEG_TURBO_VERSION 1.2.0 +#define LIBJPEG_TURBO_VERSION 1.4.2 /* Support arithmetic encoding */ -#define C_ARITH_CODING_SUPPORTED 1 +/* #define C_ARITH_CODING_SUPPORTED 1 */ /* Support arithmetic decoding */ -#define D_ARITH_CODING_SUPPORTED 1 +/* #define D_ARITH_CODING_SUPPORTED 1 */ -/* Compiler supports function prototypes. */ -#define HAVE_PROTOTYPES 1 +/* + * Define BITS_IN_JSAMPLE as either + * 8 for 8-bit sample values (the usual setting) + * 12 for 12-bit sample values + * Only 8 and 12 are legal data precisions for lossy JPEG according to the + * JPEG standard, and the IJG code does not support anything else! + * We do not support run-time selection of data precision, sorry. + */ + +#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LOCALE_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDDEF_H 1 @@ -22,25 +33,27 @@ /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 -/* Compiler supports 'unsigned char'. */ +/* Define to 1 if the system has the type `unsigned char'. */ #define HAVE_UNSIGNED_CHAR 1 -/* Compiler supports 'unsigned short'. */ +/* Define to 1 if the system has the type `unsigned short'. */ #define HAVE_UNSIGNED_SHORT 1 -/* Compiler does not support pointers to unspecified structures. */ +/* Compiler does not support pointers to undefined structures. */ /* #undef INCOMPLETE_TYPES_BROKEN */ -/* Compiler has rather than standard . */ -/* #undef NEED_BSD_STRINGS */ +/* Support in-memory source/destination managers */ +/* #undef MEM_SRCDST_SUPPORTED */ -/* Linker requires that global names be unique in first 15 characters. */ -/* #undef NEED_SHORT_EXTERNAL_NAMES */ +/* Define if you have BSD-like bzero and bcopy in rather than + memset/memcpy in . */ +/* #undef NEED_BSD_STRINGS */ -/* Need to include in order to obtain size_t. */ -#define NEED_SYS_TYPES_H 1 +/* Define if you need to include to get size_t. */ +/* #undef NEED_SYS_TYPES_H 1 */ -/* Broken compiler shifts signed values as an unsigned shift. */ +/* Define if your (broken) compiler shifts signed values as if they were + unsigned. */ /* #undef RIGHT_SHIFT_IS_UNSIGNED */ /* Use accelerated SIMD routines. */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfig.h.in glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfig.h.in --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfig.h.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfig.h.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,70 @@ +/* Version ID for the JPEG library. + * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60". + */ +#define JPEG_LIB_VERSION 62 /* Version 6b */ + +/* libjpeg-turbo version */ +#define LIBJPEG_TURBO_VERSION 0 + +/* Support arithmetic encoding */ +#undef C_ARITH_CODING_SUPPORTED + +/* Support arithmetic decoding */ +#undef D_ARITH_CODING_SUPPORTED + +/* + * Define BITS_IN_JSAMPLE as either + * 8 for 8-bit sample values (the usual setting) + * 12 for 12-bit sample values + * Only 8 and 12 are legal data precisions for lossy JPEG according to the + * JPEG standard, and the IJG code does not support anything else! + * We do not support run-time selection of data precision, sorry. + */ + +#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */ + +/* Define to 1 if you have the header file. */ +#undef HAVE_LOCALE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDDEF_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if the system has the type `unsigned char'. */ +#undef HAVE_UNSIGNED_CHAR + +/* Define to 1 if the system has the type `unsigned short'. */ +#undef HAVE_UNSIGNED_SHORT + +/* Compiler does not support pointers to undefined structures. */ +#undef INCOMPLETE_TYPES_BROKEN + +/* Support in-memory source/destination managers */ +#undef MEM_SRCDST_SUPPORTED + +/* Define if you have BSD-like bzero and bcopy in rather than + memset/memcpy in . */ +#undef NEED_BSD_STRINGS + +/* Define if you need to include to get size_t. */ +#undef NEED_SYS_TYPES_H + +/* Define if your (broken) compiler shifts signed values as if they were + unsigned. */ +#undef RIGHT_SHIFT_IS_UNSIGNED + +/* Use accelerated SIMD routines. */ +#undef WITH_SIMD + +/* Define to 1 if type `char' is unsigned and you are not using gcc. */ +#ifndef __CHAR_UNSIGNED__ +# undef __CHAR_UNSIGNED__ +#endif + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfigint.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfigint.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfigint.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfigint.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,27 @@ +/* jconfigint.h. Generated from jconfigint.h.in by configure. */ +/* libjpeg-turbo build number */ +#define BUILD "" + +/* How to obtain function inlining. */ +#ifndef INLINE +#if defined(__GNUC__) +#define INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define INLINE __forceinline +#else +#define INLINE +#endif +#endif + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "libjpeg-turbo" + +/* Version number of package */ +#define VERSION "1.4.90" + +/* The size of `size_t', as computed by sizeof. */ +#if __WORDSIZE==64 || defined(_WIN64) +#define SIZEOF_SIZE_T 8 +#else +#define SIZEOF_SIZE_T 4 +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfigint.h.in glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfigint.h.in --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfigint.h.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfigint.h.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,14 @@ +/* libjpeg-turbo build number */ +#undef BUILD + +/* How to obtain function inlining. */ +#undef INLINE + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Version number of package */ +#undef VERSION + +/* The size of `size_t', as computed by sizeof. */ +#undef SIZEOF_SIZE_T diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfig.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfig.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jconfig.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jconfig.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,143 @@ +/* + * jconfig.txt + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1994, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file documents the configuration options that are required to + * customize the JPEG software for a particular system. + * + * The actual configuration options for a particular installation are stored + * in jconfig.h. On many machines, jconfig.h can be generated automatically + * or copied from one of the "canned" jconfig files that we supply. But if + * you need to generate a jconfig.h file by hand, this file tells you how. + * + * DO NOT EDIT THIS FILE --- IT WON'T ACCOMPLISH ANYTHING. + * EDIT A COPY NAMED JCONFIG.H. + */ + + +/* + * These symbols indicate the properties of your machine or compiler. + * #define the symbol if yes, #undef it if no. + */ + +/* Does your compiler support the declaration "unsigned char" ? + * How about "unsigned short" ? + */ +#define HAVE_UNSIGNED_CHAR +#define HAVE_UNSIGNED_SHORT + +/* Define "void" as "char" if your compiler doesn't know about type void. + * NOTE: be sure to define void such that "void *" represents the most general + * pointer type, e.g., that returned by malloc(). + */ +/* #define void char */ + +/* Define "const" as empty if your compiler doesn't know the "const" keyword. + */ +/* #define const */ + +/* Define this if an ordinary "char" type is unsigned. + * If you're not sure, leaving it undefined will work at some cost in speed. + * If you defined HAVE_UNSIGNED_CHAR then the speed difference is minimal. + */ +#undef __CHAR_UNSIGNED__ + +/* Define this if your system has an ANSI-conforming file. + */ +#define HAVE_STDDEF_H + +/* Define this if your system has an ANSI-conforming file. + */ +#define HAVE_STDLIB_H + +/* Define this if your system does not have an ANSI/SysV , + * but does have a BSD-style . + */ +#undef NEED_BSD_STRINGS + +/* Define this if your system does not provide typedef size_t in any of the + * ANSI-standard places (stddef.h, stdlib.h, or stdio.h), but places it in + * instead. + */ +#undef NEED_SYS_TYPES_H + +/* Although a real ANSI C compiler can deal perfectly well with pointers to + * unspecified structures (see "incomplete types" in the spec), a few pre-ANSI + * and pseudo-ANSI compilers get confused. To keep one of these bozos happy, + * define INCOMPLETE_TYPES_BROKEN. This is not recommended unless you + * actually get "missing structure definition" warnings or errors while + * compiling the JPEG code. + */ +#undef INCOMPLETE_TYPES_BROKEN + +/* Define "boolean" as unsigned char, not int, on Windows systems. + */ +#ifdef _WIN32 +#ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */ +typedef unsigned char boolean; +#endif +#define HAVE_BOOLEAN /* prevent jmorecfg.h from redefining it */ +#endif + + +/* + * The following options affect code selection within the JPEG library, + * but they don't need to be visible to applications using the library. + * To minimize application namespace pollution, the symbols won't be + * defined unless JPEG_INTERNALS has been defined. + */ + +#ifdef JPEG_INTERNALS + +/* Define this if your compiler implements ">>" on signed values as a logical + * (unsigned) shift; leave it undefined if ">>" is a signed (arithmetic) shift, + * which is the normal and rational definition. + */ +#undef RIGHT_SHIFT_IS_UNSIGNED + + +#endif /* JPEG_INTERNALS */ + + +/* + * The remaining options do not affect the JPEG library proper, + * but only the sample applications cjpeg/djpeg (see cjpeg.c, djpeg.c). + * Other applications can ignore these. + */ + +#ifdef JPEG_CJPEG_DJPEG + +/* These defines indicate which image (non-JPEG) file formats are allowed. */ + +#define BMP_SUPPORTED /* BMP image file format */ +#define GIF_SUPPORTED /* GIF image file format */ +#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */ +#undef RLE_SUPPORTED /* Utah RLE image file format */ +#define TARGA_SUPPORTED /* Targa image file format */ + +/* Define this if you want to name both input and output files on the command + * line, rather than using stdout and optionally stdin. You MUST do this if + * your system can't cope with binary I/O to stdin/stdout. See comments at + * head of cjpeg.c or djpeg.c. + */ +#undef TWO_FILE_COMMANDLINE + +/* By default, we open image files with fopen(...,"rb") or fopen(...,"wb"). + * This is necessary on systems that distinguish text files from binary files, + * and is harmless on most systems that don't. If you have one of the rare + * systems that complains about the "b" spec, define this symbol. + */ +#undef DONT_USE_B_MODE + +/* Define this if you want percent-done progress reports from cjpeg/djpeg. + */ +#undef PROGRESS_REPORT + + +#endif /* JPEG_CJPEG_DJPEG */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcparam.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcparam.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcparam.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcparam.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,13 @@ /* * jcparam.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. * Modified 2003-2008 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains optional default-setting code for the JPEG compressor. * Applications do not have to use this file, but those that don't use it @@ -15,6 +17,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jstdhuff.c" /* @@ -23,15 +26,15 @@ GLOBAL(void) jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, - const unsigned int *basic_table, - int scale_factor, boolean force_baseline) + const unsigned int *basic_table, + int scale_factor, boolean force_baseline) /* Define a quantization table equal to the basic_table times * a scale factor (given as a percentage). * If force_baseline is TRUE, the computed quantization table entries * are limited to 1..255 for JPEG baseline compatibility. */ { - JQUANT_TBL ** qtblptr; + JQUANT_TBL **qtblptr; int i; long temp; @@ -53,7 +56,7 @@ if (temp <= 0L) temp = 1L; if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */ if (force_baseline && temp > 255L) - temp = 255L; /* limit to baseline range if requested */ + temp = 255L; /* limit to baseline range if requested */ (*qtblptr)->quantval[i] = (UINT16) temp; } @@ -98,16 +101,16 @@ { /* Set up two quantization tables using the specified scaling */ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, - cinfo->q_scale_factor[0], force_baseline); + cinfo->q_scale_factor[0], force_baseline); jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, - cinfo->q_scale_factor[1], force_baseline); + cinfo->q_scale_factor[1], force_baseline); } #endif GLOBAL(void) jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, - boolean force_baseline) + boolean force_baseline) /* Set or change the 'quality' (quantization) setting, using default tables * and a straight percentage-scaling quality scale. In most cases it's better * to use jpeg_set_quality (below); this entry point is provided for @@ -116,9 +119,9 @@ { /* Set up two quantization tables using the specified scaling */ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, - scale_factor, force_baseline); + scale_factor, force_baseline); jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, - scale_factor, force_baseline); + scale_factor, force_baseline); } @@ -165,116 +168,6 @@ /* - * Huffman table setup routines - */ - -LOCAL(void) -add_huff_table (j_compress_ptr cinfo, - JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val) -/* Define a Huffman table */ -{ - int nsymbols, len; - - if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); - - /* Copy the number-of-symbols-of-each-code-length counts */ - MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits)); - - /* Validate the counts. We do this here mainly so we can copy the right - * number of symbols from the val[] array, without risking marching off - * the end of memory. jchuff.c will do a more thorough test later. - */ - nsymbols = 0; - for (len = 1; len <= 16; len++) - nsymbols += bits[len]; - if (nsymbols < 1 || nsymbols > 256) - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); - - MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8)); - - /* Initialize sent_table FALSE so table will be written to JPEG file. */ - (*htblptr)->sent_table = FALSE; -} - - -LOCAL(void) -std_huff_tables (j_compress_ptr cinfo) -/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */ -/* IMPORTANT: these are only valid for 8-bit data precision! */ -{ - static const UINT8 bits_dc_luminance[17] = - { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; - static const UINT8 val_dc_luminance[] = - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; - - static const UINT8 bits_dc_chrominance[17] = - { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; - static const UINT8 val_dc_chrominance[] = - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; - - static const UINT8 bits_ac_luminance[17] = - { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d }; - static const UINT8 val_ac_luminance[] = - { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, - 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, - 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, - 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, - 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, - 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, - 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, - 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, - 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, - 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, - 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, - 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, - 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, - 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, - 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, - 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, - 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, - 0xf9, 0xfa }; - - static const UINT8 bits_ac_chrominance[17] = - { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 }; - static const UINT8 val_ac_chrominance[] = - { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, - 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, - 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, - 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, - 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, - 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, - 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, - 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, - 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, - 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, - 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, - 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, - 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, - 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, - 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, - 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, - 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, - 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, - 0xf9, 0xfa }; - - add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0], - bits_dc_luminance, val_dc_luminance); - add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0], - bits_ac_luminance, val_ac_luminance); - add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1], - bits_dc_chrominance, val_dc_chrominance); - add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1], - bits_ac_chrominance, val_ac_chrominance); -} - - -/* * Default parameter setup for compression. * * Applications that don't choose to use this routine must do their @@ -300,19 +193,19 @@ if (cinfo->comp_info == NULL) cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - MAX_COMPONENTS * SIZEOF(jpeg_component_info)); + MAX_COMPONENTS * sizeof(jpeg_component_info)); /* Initialize everything not dependent on the color space */ #if JPEG_LIB_VERSION >= 70 - cinfo->scale_num = 1; /* 1:1 scaling */ + cinfo->scale_num = 1; /* 1:1 scaling */ cinfo->scale_denom = 1; #endif cinfo->data_precision = BITS_IN_JSAMPLE; /* Set up two quantization tables using default quality of 75 */ jpeg_set_quality(cinfo, 75, TRUE); /* Set up two Huffman tables */ - std_huff_tables(cinfo); + std_huff_tables((j_common_ptr) cinfo); /* Initialize default arithmetic coding conditioning */ for (i = 0; i < NUM_ARITH_TBLS; i++) { @@ -370,8 +263,8 @@ */ cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */ cinfo->JFIF_minor_version = 1; - cinfo->density_unit = 0; /* Pixel size is unknown by default */ - cinfo->X_density = 1; /* Pixel aspect ratio is square by default */ + cinfo->density_unit = 0; /* Pixel size is unknown by default */ + cinfo->X_density = 1; /* Pixel aspect ratio is square by default */ cinfo->Y_density = 1; /* Choose JPEG colorspace based on input space, set defaults accordingly */ @@ -429,7 +322,7 @@ GLOBAL(void) jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace) { - jpeg_component_info * compptr; + jpeg_component_info *compptr; int ci; #define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl) \ @@ -497,7 +390,7 @@ cinfo->num_components = cinfo->input_components; if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPONENTS); + MAX_COMPONENTS); for (ci = 0; ci < cinfo->num_components; ci++) { SET_COMP(ci, ci, 1,1, 0, 0,0); } @@ -511,8 +404,8 @@ #ifdef C_PROGRESSIVE_SUPPORTED LOCAL(jpeg_scan_info *) -fill_a_scan (jpeg_scan_info * scanptr, int ci, - int Ss, int Se, int Ah, int Al) +fill_a_scan (jpeg_scan_info *scanptr, int ci, + int Ss, int Se, int Ah, int Al) /* Support routine: generate one scan for specified component */ { scanptr->comps_in_scan = 1; @@ -526,8 +419,8 @@ } LOCAL(jpeg_scan_info *) -fill_scans (jpeg_scan_info * scanptr, int ncomps, - int Ss, int Se, int Ah, int Al) +fill_scans (jpeg_scan_info *scanptr, int ncomps, + int Ss, int Se, int Ah, int Al) /* Support routine: generate one scan for each component */ { int ci; @@ -545,7 +438,7 @@ } LOCAL(jpeg_scan_info *) -fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al) +fill_dc_scans (jpeg_scan_info *scanptr, int ncomps, int Ah, int Al) /* Support routine: generate interleaved DC scan if possible, else N scans */ { int ci; @@ -577,7 +470,7 @@ { int ncomps = cinfo->num_components; int nscans; - jpeg_scan_info * scanptr; + jpeg_scan_info *scanptr; /* Safety check to ensure start_compress not called yet. */ if (cinfo->global_state != CSTATE_START) @@ -590,9 +483,9 @@ } else { /* All-purpose script for other color spaces. */ if (ncomps > MAX_COMPS_IN_SCAN) - nscans = 6 * ncomps; /* 2 DC + 4 AC scans per component */ + nscans = 6 * ncomps; /* 2 DC + 4 AC scans per component */ else - nscans = 2 + 4 * ncomps; /* 2 DC scans; 4 AC scans per component */ + nscans = 2 + 4 * ncomps; /* 2 DC scans; 4 AC scans per component */ } /* Allocate space for script. @@ -606,7 +499,7 @@ cinfo->script_space_size = MAX(nscans, 10); cinfo->script_space = (jpeg_scan_info *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - cinfo->script_space_size * SIZEOF(jpeg_scan_info)); + cinfo->script_space_size * sizeof(jpeg_scan_info)); } scanptr = cinfo->script_space; cinfo->scan_info = scanptr; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcphuff.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcphuff.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcphuff.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcphuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcphuff.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains Huffman entropy encoding routines for progressive JPEG. * @@ -15,7 +18,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jchuff.h" /* Declarations shared with jchuff.c */ +#include "jchuff.h" /* Declarations shared with jchuff.c */ #ifdef C_PROGRESSIVE_SUPPORTED @@ -30,36 +33,36 @@ /* Bit-level coding status. * next_output_byte/free_in_buffer are local copies of cinfo->dest fields. */ - JOCTET * next_output_byte; /* => next byte to write in buffer */ - size_t free_in_buffer; /* # of byte spaces remaining in buffer */ - INT32 put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ - j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + size_t put_buffer; /* current bit-accumulation buffer */ + int put_bits; /* # of bits now in it */ + j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ /* Coding status for DC components */ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ /* Coding status for AC components */ - int ac_tbl_no; /* the table number of the single component */ - unsigned int EOBRUN; /* run length of EOBs */ - unsigned int BE; /* # of buffered correction bits before MCU */ - char * bit_buffer; /* buffer for correction bits (1 per char) */ + int ac_tbl_no; /* the table number of the single component */ + unsigned int EOBRUN; /* run length of EOBs */ + unsigned int BE; /* # of buffered correction bits before MCU */ + char *bit_buffer; /* buffer for correction bits (1 per char) */ /* packing correction bits tightly would save some space but cost time... */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - int next_restart_num; /* next restart number to write (0-7) */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ /* Pointers to derived tables (these workspaces have image lifespan). * Since any one scan codes only DC or only AC, we only need one set * of tables, not one for DC and one for AC. */ - c_derived_tbl * derived_tbls[NUM_HUFF_TBLS]; + c_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; /* Statistics tables for optimization; again, one set is enough */ - long * count_ptrs[NUM_HUFF_TBLS]; + long *count_ptrs[NUM_HUFF_TBLS]; } phuff_entropy_encoder; -typedef phuff_entropy_encoder * phuff_entropy_ptr; +typedef phuff_entropy_encoder *phuff_entropy_ptr; /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit * buffer can hold. Larger sizes may slightly improve compression, but @@ -67,35 +70,35 @@ * The minimum safe size is 64 bits. */ -#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ +#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ -/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32. - * We assume that int right shift is unsigned if INT32 right shift is, +/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG. + * We assume that int right shift is unsigned if JLONG right shift is, * which should be safe. */ #ifdef RIGHT_SHIFT_IS_UNSIGNED -#define ISHIFT_TEMPS int ishift_temp; +#define ISHIFT_TEMPS int ishift_temp; #define IRIGHT_SHIFT(x,shft) \ - ((ishift_temp = (x)) < 0 ? \ - (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ - (ishift_temp >> (shft))) + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ + (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) #endif /* Forward declarations */ -METHODDEF(boolean) encode_mcu_DC_first JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_AC_first JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_DC_refine JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_AC_refine JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_phuff JPP((j_compress_ptr cinfo)); -METHODDEF(void) finish_pass_gather_phuff JPP((j_compress_ptr cinfo)); +METHODDEF(boolean) encode_mcu_DC_first (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) encode_mcu_AC_first (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) encode_mcu_DC_refine (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) encode_mcu_AC_refine (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_phuff (j_compress_ptr cinfo); +METHODDEF(void) finish_pass_gather_phuff (j_compress_ptr cinfo); /* @@ -104,11 +107,11 @@ METHODDEF(void) start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; boolean is_DC_band; int ci, tbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; entropy->cinfo = cinfo; entropy->gather_statistics = gather_statistics; @@ -130,9 +133,9 @@ entropy->pub.encode_mcu = encode_mcu_AC_refine; /* AC refinement needs a correction bit buffer */ if (entropy->bit_buffer == NULL) - entropy->bit_buffer = (char *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - MAX_CORR_BITS * SIZEOF(char)); + entropy->bit_buffer = (char *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + MAX_CORR_BITS * sizeof(char)); } } if (gather_statistics) @@ -149,8 +152,8 @@ entropy->last_dc_val[ci] = 0; /* Get table index */ if (is_DC_band) { - if (cinfo->Ah != 0) /* DC refinement needs no table */ - continue; + if (cinfo->Ah != 0) /* DC refinement needs no table */ + continue; tbl = compptr->dc_tbl_no; } else { entropy->ac_tbl_no = tbl = compptr->ac_tbl_no; @@ -163,15 +166,15 @@ /* Allocate and zero the statistics tables */ /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ if (entropy->count_ptrs[tbl] == NULL) - entropy->count_ptrs[tbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long)); + entropy->count_ptrs[tbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + 257 * sizeof(long)); + MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long)); } else { /* Compute derived values for Huffman table */ /* We may do this more than once for a table, but it's not expensive */ jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl, - & entropy->derived_tbls[tbl]); + & entropy->derived_tbls[tbl]); } } @@ -196,16 +199,16 @@ /* Emit a byte */ #define emit_byte(entropy,val) \ - { *(entropy)->next_output_byte++ = (JOCTET) (val); \ - if (--(entropy)->free_in_buffer == 0) \ - dump_buffer(entropy); } + { *(entropy)->next_output_byte++ = (JOCTET) (val); \ + if (--(entropy)->free_in_buffer == 0) \ + dump_buffer(entropy); } LOCAL(void) dump_buffer (phuff_entropy_ptr entropy) /* Empty the output buffer; we do not support suspension in this module. */ { - struct jpeg_destination_mgr * dest = entropy->cinfo->dest; + struct jpeg_destination_mgr *dest = entropy->cinfo->dest; if (! (*dest->empty_output_buffer) (entropy->cinfo)) ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); @@ -228,7 +231,7 @@ /* Emit some bits, unless we are in gather mode */ { /* This routine is heavily used, so it's worth coding tightly. */ - register INT32 put_buffer = (INT32) code; + register size_t put_buffer = (size_t) code; register int put_bits = entropy->put_bits; /* if size is 0, caller used an invalid Huffman table entry */ @@ -236,21 +239,21 @@ ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); if (entropy->gather_statistics) - return; /* do nothing if we're only getting stats */ + return; /* do nothing if we're only getting stats */ + + put_buffer &= (((size_t) 1)<put_buffer; /* and merge with old buffer contents */ while (put_bits >= 8) { int c = (int) ((put_buffer >> 16) & 0xFF); - + emit_byte(entropy, c); - if (c == 0xFF) { /* need to stuff a zero byte? */ + if (c == 0xFF) { /* need to stuff a zero byte? */ emit_byte(entropy, 0); } put_buffer <<= 8; @@ -281,7 +284,7 @@ if (entropy->gather_statistics) entropy->count_ptrs[tbl_no][symbol]++; else { - c_derived_tbl * tbl = entropy->derived_tbls[tbl_no]; + c_derived_tbl *tbl = entropy->derived_tbls[tbl_no]; emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); } } @@ -292,11 +295,11 @@ */ LOCAL(void) -emit_buffered_bits (phuff_entropy_ptr entropy, char * bufstart, - unsigned int nbits) +emit_buffered_bits (phuff_entropy_ptr entropy, char *bufstart, + unsigned int nbits) { if (entropy->gather_statistics) - return; /* no real work */ + return; /* no real work */ while (nbits > 0) { emit_bits(entropy, (unsigned int) (*bufstart), 1); @@ -315,7 +318,7 @@ { register int temp, nbits; - if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ + if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ temp = entropy->EOBRUN; nbits = 0; while ((temp >>= 1)) @@ -380,7 +383,7 @@ int blkn, ci; int Al = cinfo->Al; JBLOCKROW block; - jpeg_component_info * compptr; + jpeg_component_info *compptr; ISHIFT_TEMPS entropy->next_output_byte = cinfo->dest->next_output_byte; @@ -409,12 +412,12 @@ /* Encode the DC coefficient difference per section G.1.2.1 */ temp2 = temp; if (temp < 0) { - temp = -temp; /* temp is abs value of input */ + temp = -temp; /* temp is abs value of input */ /* For a negative input, want temp2 = bitwise complement of abs(input) */ /* This code assumes we are on a two's complement machine */ temp2--; } - + /* Find the number of bits needed for the magnitude of the coefficient */ nbits = 0; while (temp) { @@ -426,13 +429,13 @@ */ if (nbits > MAX_COEF_BITS+1) ERREXIT(cinfo, JERR_BAD_DCT_COEF); - + /* Count/emit the Huffman-coded symbol for the number of bits */ emit_symbol(entropy, compptr->dc_tbl_no, nbits); - + /* Emit that number of bits of the value, if positive, */ /* or the complement of its magnitude, if negative. */ - if (nbits) /* emit_bits rejects calls with size 0 */ + if (nbits) /* emit_bits rejects calls with size 0 */ emit_bits(entropy, (unsigned int) temp2, nbits); } @@ -481,9 +484,9 @@ block = MCU_data[0]; /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ - - r = 0; /* r = run length of zeros */ - + + r = 0; /* r = run length of zeros */ + for (k = cinfo->Ss; k <= Se; k++) { if ((temp = (*block)[jpeg_natural_order[k]]) == 0) { r++; @@ -495,12 +498,12 @@ * interwoven with finding the abs value (temp) and output bits (temp2). */ if (temp < 0) { - temp = -temp; /* temp is abs value of input */ - temp >>= Al; /* apply the point transform */ + temp = -temp; /* temp is abs value of input */ + temp >>= Al; /* apply the point transform */ /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ temp2 = ~temp; } else { - temp >>= Al; /* apply the point transform */ + temp >>= Al; /* apply the point transform */ temp2 = temp; } /* Watch out for case that nonzero coef is zero after point transform */ @@ -519,7 +522,7 @@ } /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 1; /* there must be at least one 1 bit */ + nbits = 1; /* there must be at least one 1 bit */ while ((temp >>= 1)) nbits++; /* Check for out-of-range coefficient values */ @@ -533,13 +536,13 @@ /* or the complement of its magnitude, if negative. */ emit_bits(entropy, (unsigned int) temp2, nbits); - r = 0; /* reset zero run length */ + r = 0; /* reset zero run length */ } - if (r > 0) { /* If there are trailing zeroes, */ - entropy->EOBRUN++; /* count an EOB */ + if (r > 0) { /* If there are trailing zeroes, */ + entropy->EOBRUN++; /* count an EOB */ if (entropy->EOBRUN == 0x7FFF) - emit_eobrun(entropy); /* force it out to avoid overflow */ + emit_eobrun(entropy); /* force it out to avoid overflow */ } cinfo->dest->next_output_byte = entropy->next_output_byte; @@ -648,17 +651,17 @@ * in C, we shift after obtaining the absolute value. */ if (temp < 0) - temp = -temp; /* temp is abs value of input */ - temp >>= Al; /* apply the point transform */ - absvalues[k] = temp; /* save abs value for main pass */ + temp = -temp; /* temp is abs value of input */ + temp >>= Al; /* apply the point transform */ + absvalues[k] = temp; /* save abs value for main pass */ if (temp == 1) - EOB = k; /* EOB = index of last newly-nonzero coef */ + EOB = k; /* EOB = index of last newly-nonzero coef */ } /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ - - r = 0; /* r = run length of zeros */ - BR = 0; /* BR = count of buffered bits added now */ + + r = 0; /* r = run length of zeros */ + BR = 0; /* BR = count of buffered bits added now */ BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ for (k = cinfo->Ss; k <= Se; k++) { @@ -705,12 +708,12 @@ emit_buffered_bits(entropy, BR_buffer, BR); BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ BR = 0; - r = 0; /* reset zero run length */ + r = 0; /* reset zero run length */ } - if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ - entropy->EOBRUN++; /* count an EOB */ - entropy->BE += BR; /* concat my correction bits to older ones */ + if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ + entropy->EOBRUN++; /* count an EOB */ + entropy->BE += BR; /* concat my correction bits to older ones */ /* We force out the EOB if we risk either: * 1. overflow of the EOB counter; * 2. overflow of the correction bit buffer during the next MCU. @@ -742,7 +745,7 @@ METHODDEF(void) finish_pass_phuff (j_compress_ptr cinfo) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; entropy->next_output_byte = cinfo->dest->next_output_byte; @@ -767,7 +770,7 @@ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; boolean is_DC_band; int ci, tbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JHUFF_TBL **htblptr; boolean did[NUM_HUFF_TBLS]; @@ -779,13 +782,13 @@ /* It's important not to apply jpeg_gen_optimal_table more than once * per table, because it clobbers the input frequency counts! */ - MEMZERO(did, SIZEOF(did)); + MEMZERO(did, sizeof(did)); for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; if (is_DC_band) { - if (cinfo->Ah != 0) /* DC refinement needs no table */ - continue; + if (cinfo->Ah != 0) /* DC refinement needs no table */ + continue; tbl = compptr->dc_tbl_no; } else { tbl = compptr->ac_tbl_no; @@ -816,7 +819,7 @@ entropy = (phuff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(phuff_entropy_encoder)); + sizeof(phuff_entropy_encoder)); cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; entropy->pub.start_pass = start_pass_phuff; @@ -825,7 +828,7 @@ entropy->derived_tbls[i] = NULL; entropy->count_ptrs[i] = NULL; } - entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ + entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ } #endif /* C_PROGRESSIVE_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcprepct.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcprepct.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcprepct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcprepct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcprepct.c * + * This file is part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the compression preprocessing controller. * This controller manages the color conversion, downsampling, @@ -58,16 +61,16 @@ */ JSAMPARRAY color_buf[MAX_COMPONENTS]; - JDIMENSION rows_to_go; /* counts rows remaining in source image */ - int next_buf_row; /* index of next row to store in color_buf */ + JDIMENSION rows_to_go; /* counts rows remaining in source image */ + int next_buf_row; /* index of next row to store in color_buf */ -#ifdef CONTEXT_ROWS_SUPPORTED /* only needed for context case */ - int this_row_group; /* starting row index of group to process */ - int next_buf_stop; /* downsample when we reach this index */ +#ifdef CONTEXT_ROWS_SUPPORTED /* only needed for context case */ + int this_row_group; /* starting row index of group to process */ + int next_buf_stop; /* downsample when we reach this index */ #endif } my_prep_controller; -typedef my_prep_controller * my_prep_ptr; +typedef my_prep_controller *my_prep_ptr; /* @@ -104,13 +107,13 @@ LOCAL(void) expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols, - int input_rows, int output_rows) + int input_rows, int output_rows) { register int row; for (row = input_rows; row < output_rows; row++) { jcopy_sample_rows(image_data, input_rows-1, image_data, row, - 1, num_cols); + 1, num_cols); } } @@ -126,43 +129,43 @@ METHODDEF(void) pre_process_data (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail, - JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, - JDIMENSION out_row_groups_avail) + JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail, + JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, + JDIMENSION out_row_groups_avail) { my_prep_ptr prep = (my_prep_ptr) cinfo->prep; int numrows, ci; JDIMENSION inrows; - jpeg_component_info * compptr; + jpeg_component_info *compptr; while (*in_row_ctr < in_rows_avail && - *out_row_group_ctr < out_row_groups_avail) { + *out_row_group_ctr < out_row_groups_avail) { /* Do color conversion to fill the conversion buffer. */ inrows = in_rows_avail - *in_row_ctr; numrows = cinfo->max_v_samp_factor - prep->next_buf_row; numrows = (int) MIN((JDIMENSION) numrows, inrows); (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr, - prep->color_buf, - (JDIMENSION) prep->next_buf_row, - numrows); + prep->color_buf, + (JDIMENSION) prep->next_buf_row, + numrows); *in_row_ctr += numrows; prep->next_buf_row += numrows; prep->rows_to_go -= numrows; /* If at bottom of image, pad to fill the conversion buffer. */ if (prep->rows_to_go == 0 && - prep->next_buf_row < cinfo->max_v_samp_factor) { + prep->next_buf_row < cinfo->max_v_samp_factor) { for (ci = 0; ci < cinfo->num_components; ci++) { - expand_bottom_edge(prep->color_buf[ci], cinfo->image_width, - prep->next_buf_row, cinfo->max_v_samp_factor); + expand_bottom_edge(prep->color_buf[ci], cinfo->image_width, + prep->next_buf_row, cinfo->max_v_samp_factor); } prep->next_buf_row = cinfo->max_v_samp_factor; } /* If we've filled the conversion buffer, empty it. */ if (prep->next_buf_row == cinfo->max_v_samp_factor) { (*cinfo->downsample->downsample) (cinfo, - prep->color_buf, (JDIMENSION) 0, - output_buf, *out_row_group_ctr); + prep->color_buf, (JDIMENSION) 0, + output_buf, *out_row_group_ctr); prep->next_buf_row = 0; (*out_row_group_ctr)++; } @@ -170,16 +173,16 @@ * Note we assume the caller is providing a one-iMCU-height output buffer! */ if (prep->rows_to_go == 0 && - *out_row_group_ctr < out_row_groups_avail) { + *out_row_group_ctr < out_row_groups_avail) { for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - expand_bottom_edge(output_buf[ci], - compptr->width_in_blocks * DCTSIZE, - (int) (*out_row_group_ctr * compptr->v_samp_factor), - (int) (out_row_groups_avail * compptr->v_samp_factor)); + ci++, compptr++) { + expand_bottom_edge(output_buf[ci], + compptr->width_in_blocks * DCTSIZE, + (int) (*out_row_group_ctr * compptr->v_samp_factor), + (int) (out_row_groups_avail * compptr->v_samp_factor)); } *out_row_group_ctr = out_row_groups_avail; - break; /* can exit outer loop without test */ + break; /* can exit outer loop without test */ } } } @@ -193,10 +196,10 @@ METHODDEF(void) pre_process_context (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail, - JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, - JDIMENSION out_row_groups_avail) + JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail, + JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, + JDIMENSION out_row_groups_avail) { my_prep_ptr prep = (my_prep_ptr) cinfo->prep; int numrows, ci; @@ -210,19 +213,19 @@ numrows = prep->next_buf_stop - prep->next_buf_row; numrows = (int) MIN((JDIMENSION) numrows, inrows); (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr, - prep->color_buf, - (JDIMENSION) prep->next_buf_row, - numrows); + prep->color_buf, + (JDIMENSION) prep->next_buf_row, + numrows); /* Pad at top of image, if first time through */ if (prep->rows_to_go == cinfo->image_height) { - for (ci = 0; ci < cinfo->num_components; ci++) { - int row; - for (row = 1; row <= cinfo->max_v_samp_factor; row++) { - jcopy_sample_rows(prep->color_buf[ci], 0, - prep->color_buf[ci], -row, - 1, cinfo->image_width); - } - } + for (ci = 0; ci < cinfo->num_components; ci++) { + int row; + for (row = 1; row <= cinfo->max_v_samp_factor; row++) { + jcopy_sample_rows(prep->color_buf[ci], 0, + prep->color_buf[ci], -row, + 1, cinfo->image_width); + } + } } *in_row_ctr += numrows; prep->next_buf_row += numrows; @@ -230,29 +233,29 @@ } else { /* Return for more data, unless we are at the bottom of the image. */ if (prep->rows_to_go != 0) - break; + break; /* When at bottom of image, pad to fill the conversion buffer. */ if (prep->next_buf_row < prep->next_buf_stop) { - for (ci = 0; ci < cinfo->num_components; ci++) { - expand_bottom_edge(prep->color_buf[ci], cinfo->image_width, - prep->next_buf_row, prep->next_buf_stop); - } - prep->next_buf_row = prep->next_buf_stop; + for (ci = 0; ci < cinfo->num_components; ci++) { + expand_bottom_edge(prep->color_buf[ci], cinfo->image_width, + prep->next_buf_row, prep->next_buf_stop); + } + prep->next_buf_row = prep->next_buf_stop; } } /* If we've gotten enough data, downsample a row group. */ if (prep->next_buf_row == prep->next_buf_stop) { (*cinfo->downsample->downsample) (cinfo, - prep->color_buf, - (JDIMENSION) prep->this_row_group, - output_buf, *out_row_group_ctr); + prep->color_buf, + (JDIMENSION) prep->this_row_group, + output_buf, *out_row_group_ctr); (*out_row_group_ctr)++; /* Advance pointers with wraparound as necessary. */ prep->this_row_group += cinfo->max_v_samp_factor; if (prep->this_row_group >= buf_height) - prep->this_row_group = 0; + prep->this_row_group = 0; if (prep->next_buf_row >= buf_height) - prep->next_buf_row = 0; + prep->next_buf_row = 0; prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor; } } @@ -269,7 +272,7 @@ my_prep_ptr prep = (my_prep_ptr) cinfo->prep; int rgroup_height = cinfo->max_v_samp_factor; int ci, i; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JSAMPARRAY true_buffer, fake_buffer; /* Grab enough space for fake row pointers for all the components; @@ -277,8 +280,8 @@ */ fake_buffer = (JSAMPARRAY) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (cinfo->num_components * 5 * rgroup_height) * - SIZEOF(JSAMPROW)); + (cinfo->num_components * 5 * rgroup_height) * + sizeof(JSAMPROW)); for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { @@ -289,11 +292,11 @@ true_buffer = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE, (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * - cinfo->max_h_samp_factor) / compptr->h_samp_factor), + cinfo->max_h_samp_factor) / compptr->h_samp_factor), (JDIMENSION) (3 * rgroup_height)); /* Copy true buffer row pointers into the middle of the fake row array */ MEMCOPY(fake_buffer + rgroup_height, true_buffer, - 3 * rgroup_height * SIZEOF(JSAMPROW)); + 3 * rgroup_height * sizeof(JSAMPROW)); /* Fill in the above and below wraparound pointers */ for (i = 0; i < rgroup_height; i++) { fake_buffer[i] = true_buffer[2 * rgroup_height + i]; @@ -316,14 +319,14 @@ { my_prep_ptr prep; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; - if (need_full_buffer) /* safety check */ + if (need_full_buffer) /* safety check */ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); prep = (my_prep_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_prep_controller)); + sizeof(my_prep_controller)); cinfo->prep = (struct jpeg_c_prep_controller *) prep; prep->pub.start_pass = start_pass_prep; @@ -343,12 +346,12 @@ /* No context, just make it tall enough for one row group */ prep->pub.pre_process_data = pre_process_data; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { prep->color_buf[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * - cinfo->max_h_samp_factor) / compptr->h_samp_factor), - (JDIMENSION) cinfo->max_v_samp_factor); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * + cinfo->max_h_samp_factor) / compptr->h_samp_factor), + (JDIMENSION) cinfo->max_v_samp_factor); } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcsample.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcsample.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcsample.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcsample.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,14 @@ /* * jcsample.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2014, MIPS Technologies, Inc., California + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains downsampling routines. * @@ -53,20 +57,21 @@ /* Pointer to routine to downsample a single component */ -typedef JMETHOD(void, downsample1_ptr, - (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data)); +typedef void (*downsample1_ptr) (j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY output_data); /* Private subobject */ typedef struct { - struct jpeg_downsampler pub; /* public fields */ + struct jpeg_downsampler pub; /* public fields */ /* Downsampling method pointers, one per component */ downsample1_ptr methods[MAX_COMPONENTS]; } my_downsampler; -typedef my_downsampler * my_downsample_ptr; +typedef my_downsampler *my_downsample_ptr; /* @@ -87,7 +92,7 @@ LOCAL(void) expand_right_edge (JSAMPARRAY image_data, int num_rows, - JDIMENSION input_cols, JDIMENSION output_cols) + JDIMENSION input_cols, JDIMENSION output_cols) { register JSAMPROW ptr; register JSAMPLE pixval; @@ -98,9 +103,9 @@ if (numcols > 0) { for (row = 0; row < num_rows; row++) { ptr = image_data[row] + input_cols; - pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ + pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ for (count = numcols; count > 0; count--) - *ptr++ = pixval; + *ptr++ = pixval; } } } @@ -114,12 +119,12 @@ METHODDEF(void) sep_downsample (j_compress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_index, - JSAMPIMAGE output_buf, JDIMENSION out_row_group_index) + JSAMPIMAGE input_buf, JDIMENSION in_row_index, + JSAMPIMAGE output_buf, JDIMENSION out_row_group_index) { my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JSAMPARRAY in_ptr, out_ptr; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; @@ -139,14 +144,14 @@ */ METHODDEF(void) -int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +int_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v; - JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */ + JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */ JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; JSAMPROW inptr, outptr; - INT32 outvalue; + JLONG outvalue; h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor; v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor; @@ -158,19 +163,19 @@ * efficient. */ expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * h_expand); + cinfo->image_width, output_cols * h_expand); inrow = 0; for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; for (outcol = 0, outcol_h = 0; outcol < output_cols; - outcol++, outcol_h += h_expand) { + outcol++, outcol_h += h_expand) { outvalue = 0; for (v = 0; v < v_expand; v++) { - inptr = input_data[inrow+v] + outcol_h; - for (h = 0; h < h_expand; h++) { - outvalue += (INT32) GETJSAMPLE(*inptr++); - } + inptr = input_data[inrow+v] + outcol_h; + for (h = 0; h < h_expand; h++) { + outvalue += (JLONG) GETJSAMPLE(*inptr++); + } } *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix); } @@ -186,15 +191,15 @@ */ METHODDEF(void) -fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { /* Copy the data */ jcopy_sample_rows(input_data, 0, output_data, 0, - cinfo->max_v_samp_factor, cinfo->image_width); + cinfo->max_v_samp_factor, cinfo->image_width); /* Edge-expand */ expand_right_edge(output_data, cinfo->max_v_samp_factor, - cinfo->image_width, compptr->width_in_blocks * DCTSIZE); + cinfo->image_width, compptr->width_in_blocks * DCTSIZE); } @@ -211,8 +216,8 @@ */ METHODDEF(void) -h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int outrow; JDIMENSION outcol; @@ -225,16 +230,16 @@ * efficient. */ expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * 2); + cinfo->image_width, output_cols * 2); for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr = input_data[outrow]; - bias = 0; /* bias = 0,1,0,1,... for successive samples */ + bias = 0; /* bias = 0,1,0,1,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) - + bias) >> 1); - bias ^= 1; /* 0=>1, 1=>0 */ + + bias) >> 1); + bias ^= 1; /* 0=>1, 1=>0 */ inptr += 2; } } @@ -248,8 +253,8 @@ */ METHODDEF(void) -h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow; JDIMENSION outcol; @@ -262,19 +267,19 @@ * efficient. */ expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * 2); + cinfo->image_width, output_cols * 2); inrow = 0; for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr0 = input_data[inrow]; inptr1 = input_data[inrow+1]; - bias = 1; /* bias = 1,2,1,2,... for successive samples */ + bias = 1; /* bias = 1,2,1,2,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) - + bias) >> 2); - bias ^= 3; /* 1=>2, 2=>1 */ + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + + bias) >> 2); + bias ^= 3; /* 1=>2, 2=>1 */ inptr0 += 2; inptr1 += 2; } inrow += 2; @@ -291,21 +296,21 @@ */ METHODDEF(void) -h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow; JDIMENSION colctr; JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr; - INT32 membersum, neighsum, memberscale, neighscale; + JLONG membersum, neighsum, memberscale, neighscale; /* Expand input data enough to let all the output samples be generated * by the standard loop. Special-casing padded output would be more * efficient. */ expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2, - cinfo->image_width, output_cols * 2); + cinfo->image_width, output_cols * 2); /* We don't bother to form the individual "smoothed" input pixel values; * we can directly compute the output which is the average of the four @@ -333,14 +338,14 @@ /* Special case for first column: pretend column -1 is same as column 0 */ membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]); + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + + GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) + + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]); neighsum += neighsum; neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]); + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]); membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; @@ -348,17 +353,17 @@ for (colctr = output_cols - 2; colctr > 0; colctr--) { /* sum of pixels directly mapped to this output element */ membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); /* sum of edge-neighbor pixels */ neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) + - GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]); + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + + GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) + + GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]); /* The edge-neighbors count twice as much as corner-neighbors */ neighsum += neighsum; /* Add in the corner-neighbors */ neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) + - GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]); + GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]); /* form final output scaled up by 2^16 */ membersum = membersum * memberscale + neighsum * neighscale; /* round, descale and output it */ @@ -368,14 +373,14 @@ /* Special case for last column */ membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]); + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + + GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) + + GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]); neighsum += neighsum; neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]); + GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]); membersum = membersum * memberscale + neighsum * neighscale; *outptr = (JSAMPLE) ((membersum + 32768) >> 16); @@ -392,13 +397,13 @@ METHODDEF(void) fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) + JSAMPARRAY input_data, JSAMPARRAY output_data) { int outrow; JDIMENSION colctr; JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; register JSAMPROW inptr, above_ptr, below_ptr, outptr; - INT32 membersum, neighsum, memberscale, neighscale; + JLONG membersum, neighsum, memberscale, neighscale; int colsum, lastcolsum, nextcolsum; /* Expand input data enough to let all the output samples be generated @@ -406,7 +411,7 @@ * efficient. */ expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2, - cinfo->image_width, output_cols); + cinfo->image_width, output_cols); /* Each of the eight neighbor pixels contributes a fraction SF to the * smoothed pixel, while the main pixel contributes (1-8*SF). In order @@ -425,10 +430,10 @@ /* Special case for first column */ colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) + - GETJSAMPLE(*inptr); + GETJSAMPLE(*inptr); membersum = GETJSAMPLE(*inptr++); nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + - GETJSAMPLE(*inptr); + GETJSAMPLE(*inptr); neighsum = colsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); @@ -438,7 +443,7 @@ membersum = GETJSAMPLE(*inptr++); above_ptr++; below_ptr++; nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + - GETJSAMPLE(*inptr); + GETJSAMPLE(*inptr); neighsum = lastcolsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); @@ -467,12 +472,12 @@ { my_downsample_ptr downsample; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; boolean smoothok = TRUE; downsample = (my_downsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_downsampler)); + sizeof(my_downsampler)); cinfo->downsample = (struct jpeg_downsampler *) downsample; downsample->pub.start_pass = start_pass_downsample; downsample->pub.downsample = sep_downsample; @@ -485,35 +490,42 @@ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { if (compptr->h_samp_factor == cinfo->max_h_samp_factor && - compptr->v_samp_factor == cinfo->max_v_samp_factor) { + compptr->v_samp_factor == cinfo->max_v_samp_factor) { #ifdef INPUT_SMOOTHING_SUPPORTED if (cinfo->smoothing_factor) { - downsample->methods[ci] = fullsize_smooth_downsample; - downsample->pub.need_context_rows = TRUE; + downsample->methods[ci] = fullsize_smooth_downsample; + downsample->pub.need_context_rows = TRUE; } else #endif - downsample->methods[ci] = fullsize_downsample; + downsample->methods[ci] = fullsize_downsample; } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && - compptr->v_samp_factor == cinfo->max_v_samp_factor) { + compptr->v_samp_factor == cinfo->max_v_samp_factor) { smoothok = FALSE; if (jsimd_can_h2v1_downsample()) downsample->methods[ci] = jsimd_h2v1_downsample; else downsample->methods[ci] = h2v1_downsample; } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && - compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) { + compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) { #ifdef INPUT_SMOOTHING_SUPPORTED if (cinfo->smoothing_factor) { - downsample->methods[ci] = h2v2_smooth_downsample; - downsample->pub.need_context_rows = TRUE; +#if defined(__mips__) + if (jsimd_can_h2v2_smooth_downsample()) + downsample->methods[ci] = jsimd_h2v2_smooth_downsample; + else +#endif + downsample->methods[ci] = h2v2_smooth_downsample; + downsample->pub.need_context_rows = TRUE; } else #endif - if (jsimd_can_h2v2_downsample()) - downsample->methods[ci] = jsimd_h2v2_downsample; - else - downsample->methods[ci] = h2v2_downsample; + { + if (jsimd_can_h2v2_downsample()) + downsample->methods[ci] = jsimd_h2v2_downsample; + else + downsample->methods[ci] = h2v2_downsample; + } } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 && - (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) { + (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) { smoothok = FALSE; downsample->methods[ci] = int_downsample; } else diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcstest.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jcstest.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jcstest.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jcstest.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,126 @@ +/* + * Copyright (C)2011 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* This program demonstrates how to check for the colorspace extension + capabilities of libjpeg-turbo at both compile time and run time. */ + +#include +#include +#include +#include + +#ifndef JCS_EXTENSIONS +#define JCS_EXT_RGB 6 +#endif +#if !defined(JCS_EXTENSIONS) || !defined(JCS_ALPHA_EXTENSIONS) +#define JCS_EXT_RGBA 12 +#endif + +static char lasterror[JMSG_LENGTH_MAX] = "No error"; + +typedef struct _error_mgr { + struct jpeg_error_mgr pub; + jmp_buf jb; +} error_mgr; + +static void my_error_exit(j_common_ptr cinfo) +{ + error_mgr *myerr = (error_mgr *)cinfo->err; + (*cinfo->err->output_message)(cinfo); + longjmp(myerr->jb, 1); +} + +static void my_output_message(j_common_ptr cinfo) +{ + (*cinfo->err->format_message)(cinfo, lasterror); +} + +int main(void) +{ + int jcs_valid = -1, jcs_alpha_valid = -1; + struct jpeg_compress_struct cinfo; + error_mgr jerr; + + printf("libjpeg-turbo colorspace extensions:\n"); + #if JCS_EXTENSIONS + printf(" Present at compile time\n"); + #else + printf(" Not present at compile time\n"); + #endif + + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = my_error_exit; + jerr.pub.output_message = my_output_message; + + if(setjmp(jerr.jb)) { + /* this will execute if libjpeg has an error */ + jcs_valid = 0; + goto done; + } + + jpeg_create_compress(&cinfo); + cinfo.input_components = 3; + jpeg_set_defaults(&cinfo); + cinfo.in_color_space = JCS_EXT_RGB; + jpeg_default_colorspace(&cinfo); + jcs_valid = 1; + + done: + if (jcs_valid) + printf(" Working properly\n"); + else + printf(" Not working properly. Error returned was:\n %s\n", + lasterror); + + printf("libjpeg-turbo alpha colorspace extensions:\n"); + #if JCS_ALPHA_EXTENSIONS + printf(" Present at compile time\n"); + #else + printf(" Not present at compile time\n"); + #endif + + if(setjmp(jerr.jb)) { + /* this will execute if libjpeg has an error */ + jcs_alpha_valid = 0; + goto done2; + } + + cinfo.in_color_space = JCS_EXT_RGBA; + jpeg_default_colorspace(&cinfo); + jcs_alpha_valid = 1; + + done2: + if (jcs_alpha_valid) + printf(" Working properly\n"); + else + printf(" Not working properly. Error returned was:\n %s\n", + lasterror); + + jpeg_destroy_compress(&cinfo); + return 0; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jctrans.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jctrans.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jctrans.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jctrans.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jctrans.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1998, Thomas G. Lane. * Modified 2000-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains library routines for transcoding compression, * that is, writing raw DCT coefficient arrays to an output JPEG file. @@ -18,9 +21,9 @@ /* Forward declarations */ LOCAL(void) transencode_master_selection - JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)); + (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays); LOCAL(void) transencode_coef_controller - JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)); + (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays); /* @@ -36,7 +39,7 @@ */ GLOBAL(void) -jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays) +jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays) { if (cinfo->global_state != CSTATE_START) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); @@ -48,7 +51,7 @@ /* Perform master selection of active modules */ transencode_master_selection(cinfo, coef_arrays); /* Wait for jpeg_finish_compress() call */ - cinfo->next_scanline = 0; /* so jpeg_write_marker works */ + cinfo->next_scanline = 0; /* so jpeg_write_marker works */ cinfo->global_state = CSTATE_WRCOEFS; } @@ -62,9 +65,9 @@ GLOBAL(void) jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, - j_compress_ptr dstinfo) + j_compress_ptr dstinfo) { - JQUANT_TBL ** qtblptr; + JQUANT_TBL **qtblptr; jpeg_component_info *incomp, *outcomp; JQUANT_TBL *c_quant, *slot_quant; int tblno, ci, coefi; @@ -96,10 +99,10 @@ if (srcinfo->quant_tbl_ptrs[tblno] != NULL) { qtblptr = & dstinfo->quant_tbl_ptrs[tblno]; if (*qtblptr == NULL) - *qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo); + *qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo); MEMCOPY((*qtblptr)->quantval, - srcinfo->quant_tbl_ptrs[tblno]->quantval, - SIZEOF((*qtblptr)->quantval)); + srcinfo->quant_tbl_ptrs[tblno]->quantval, + sizeof((*qtblptr)->quantval)); (*qtblptr)->sent_table = FALSE; } } @@ -109,7 +112,7 @@ dstinfo->num_components = srcinfo->num_components; if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS) ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components, - MAX_COMPONENTS); + MAX_COMPONENTS); for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info; ci < dstinfo->num_components; ci++, incomp++, outcomp++) { outcomp->component_id = incomp->component_id; @@ -122,14 +125,14 @@ */ tblno = outcomp->quant_tbl_no; if (tblno < 0 || tblno >= NUM_QUANT_TBLS || - srcinfo->quant_tbl_ptrs[tblno] == NULL) + srcinfo->quant_tbl_ptrs[tblno] == NULL) ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno); slot_quant = srcinfo->quant_tbl_ptrs[tblno]; c_quant = incomp->quant_table; if (c_quant != NULL) { for (coefi = 0; coefi < DCTSIZE2; coefi++) { - if (c_quant->quantval[coefi] != slot_quant->quantval[coefi]) - ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno); + if (c_quant->quantval[coefi] != slot_quant->quantval[coefi]) + ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno); } } /* Note: we do not copy the source's Huffman table assignments; @@ -163,7 +166,7 @@ LOCAL(void) transencode_master_selection (j_compress_ptr cinfo, - jvirt_barray_ptr * coef_arrays) + jvirt_barray_ptr *coef_arrays) { /* Although we don't actually use input_components for transcoding, * jcmaster.c's initial_setup will complain if input_components is 0. @@ -219,19 +222,19 @@ typedef struct { struct jpeg_c_coef_controller pub; /* public fields */ - JDIMENSION iMCU_row_num; /* iMCU row # within image */ - JDIMENSION mcu_ctr; /* counts MCUs processed in current row */ - int MCU_vert_offset; /* counts MCU rows within iMCU row */ - int MCU_rows_per_iMCU_row; /* number of such rows needed */ + JDIMENSION iMCU_row_num; /* iMCU row # within image */ + JDIMENSION mcu_ctr; /* counts MCUs processed in current row */ + int MCU_vert_offset; /* counts MCU rows within iMCU row */ + int MCU_rows_per_iMCU_row; /* number of such rows needed */ /* Virtual block array for each component. */ - jvirt_barray_ptr * whole_image; + jvirt_barray_ptr *whole_image; /* Workspace for constructing dummy blocks at right/bottom edges. */ JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU]; } my_coef_controller; -typedef my_coef_controller * my_coef_ptr; +typedef my_coef_controller *my_coef_ptr; LOCAL(void) @@ -289,7 +292,7 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; int blkn, ci, xindex, yindex, yoffset, blockcnt; @@ -312,44 +315,44 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row; - MCU_col_num++) { + MCU_col_num++) { /* Construct list of pointers to DCT blocks belonging to this MCU */ - blkn = 0; /* index of current DCT block within MCU */ + blkn = 0; /* index of current DCT block within MCU */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - start_col = MCU_col_num * compptr->MCU_width; - blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - if (coef->iMCU_row_num < last_iMCU_row || - yindex+yoffset < compptr->last_row_height) { - /* Fill in pointers to real blocks in this row */ - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; - for (xindex = 0; xindex < blockcnt; xindex++) - MCU_buffer[blkn++] = buffer_ptr++; - } else { - /* At bottom of image, need a whole row of dummy blocks */ - xindex = 0; - } - /* Fill in any dummy blocks needed in this row. - * Dummy blocks are filled in the same way as in jccoefct.c: - * all zeroes in the AC entries, DC entries equal to previous - * block's DC value. The init routine has already zeroed the - * AC entries, so we need only set the DC entries correctly. - */ - for (; xindex < compptr->MCU_width; xindex++) { - MCU_buffer[blkn] = coef->dummy_buffer[blkn]; - MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0]; - blkn++; - } - } + compptr = cinfo->cur_comp_info[ci]; + start_col = MCU_col_num * compptr->MCU_width; + blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width + : compptr->last_col_width; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + if (coef->iMCU_row_num < last_iMCU_row || + yindex+yoffset < compptr->last_row_height) { + /* Fill in pointers to real blocks in this row */ + buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + for (xindex = 0; xindex < blockcnt; xindex++) + MCU_buffer[blkn++] = buffer_ptr++; + } else { + /* At bottom of image, need a whole row of dummy blocks */ + xindex = 0; + } + /* Fill in any dummy blocks needed in this row. + * Dummy blocks are filled in the same way as in jccoefct.c: + * all zeroes in the AC entries, DC entries equal to previous + * block's DC value. The init routine has already zeroed the + * AC entries, so we need only set the DC entries correctly. + */ + for (; xindex < compptr->MCU_width; xindex++) { + MCU_buffer[blkn] = coef->dummy_buffer[blkn]; + MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0]; + blkn++; + } + } } /* Try to write the MCU. */ if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->mcu_ctr = MCU_col_num; - return FALSE; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->mcu_ctr = MCU_col_num; + return FALSE; } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -372,7 +375,7 @@ LOCAL(void) transencode_coef_controller (j_compress_ptr cinfo, - jvirt_barray_ptr * coef_arrays) + jvirt_barray_ptr *coef_arrays) { my_coef_ptr coef; JBLOCKROW buffer; @@ -380,7 +383,7 @@ coef = (my_coef_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_coef_controller)); + sizeof(my_coef_controller)); cinfo->coef = (struct jpeg_c_coef_controller *) coef; coef->pub.start_pass = start_pass_coef; coef->pub.compress_data = compress_output; @@ -391,8 +394,8 @@ /* Allocate and pre-zero space for dummy DCT blocks. */ buffer = (JBLOCKROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)); - jzero_far((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)); + C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); + jzero_far((void *) buffer, C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) { coef->dummy_buffer[i] = buffer + i; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdapimin.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdapimin.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdapimin.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdapimin.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdapimin.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface code for the decompression half * of the JPEG library. These are the "minimum" API routines that may be @@ -19,6 +22,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jdmaster.h" /* @@ -32,12 +36,12 @@ int i; /* Guard against version mismatches between library and caller. */ - cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */ + cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */ if (version != JPEG_LIB_VERSION) ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version); - if (structsize != SIZEOF(struct jpeg_decompress_struct)) - ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, - (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize); + if (structsize != sizeof(struct jpeg_decompress_struct)) + ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, + (int) sizeof(struct jpeg_decompress_struct), (int) structsize); /* For debugging purposes, we zero the whole master structure. * But the application has already set the err pointer, and may have set @@ -48,7 +52,7 @@ { struct jpeg_error_mgr * err = cinfo->err; void * client_data = cinfo->client_data; /* ignore Purify complaint here */ - MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct)); + MEMZERO(cinfo, sizeof(struct jpeg_decompress_struct)); cinfo->err = err; cinfo->client_data = client_data; } @@ -80,6 +84,14 @@ /* OK, I'm ready */ cinfo->global_state = DSTATE_START; + + /* The master struct is used to store extension parameters, so we allocate it + * here. + */ + cinfo->master = (struct jpeg_decomp_master *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(my_decomp_master)); + MEMZERO(cinfo->master, sizeof(my_decomp_master)); } @@ -121,22 +133,22 @@ cinfo->jpeg_color_space = JCS_GRAYSCALE; cinfo->out_color_space = JCS_GRAYSCALE; break; - + case 3: if (cinfo->saw_JFIF_marker) { cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */ } else if (cinfo->saw_Adobe_marker) { switch (cinfo->Adobe_transform) { case 0: - cinfo->jpeg_color_space = JCS_RGB; - break; + cinfo->jpeg_color_space = JCS_RGB; + break; case 1: - cinfo->jpeg_color_space = JCS_YCbCr; - break; + cinfo->jpeg_color_space = JCS_YCbCr; + break; default: - WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform); - cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */ - break; + WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform); + cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */ + break; } } else { /* Saw no special markers, try to guess from the component IDs */ @@ -145,31 +157,31 @@ int cid2 = cinfo->comp_info[2].component_id; if (cid0 == 1 && cid1 == 2 && cid2 == 3) - cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */ + cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */ else if (cid0 == 82 && cid1 == 71 && cid2 == 66) - cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */ + cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */ else { - TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2); - cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */ + TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2); + cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */ } } /* Always guess RGB is proper output colorspace. */ cinfo->out_color_space = JCS_RGB; break; - + case 4: if (cinfo->saw_Adobe_marker) { switch (cinfo->Adobe_transform) { case 0: - cinfo->jpeg_color_space = JCS_CMYK; - break; + cinfo->jpeg_color_space = JCS_CMYK; + break; case 2: - cinfo->jpeg_color_space = JCS_YCCK; - break; + cinfo->jpeg_color_space = JCS_YCCK; + break; default: - WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform); - cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */ - break; + WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform); + cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */ + break; } } else { /* No special markers, assume straight CMYK. */ @@ -177,7 +189,7 @@ } cinfo->out_color_space = JCS_CMYK; break; - + default: cinfo->jpeg_color_space = JCS_UNKNOWN; cinfo->out_color_space = JCS_UNKNOWN; @@ -185,7 +197,7 @@ } /* Set defaults for other decompression parameters. */ - cinfo->scale_num = 1; /* 1:1 scaling */ + cinfo->scale_num = 1; /* 1:1 scaling */ cinfo->scale_denom = 1; cinfo->output_gamma = 1.0; cinfo->buffered_image = FALSE; @@ -253,7 +265,7 @@ retcode = JPEG_HEADER_OK; break; case JPEG_REACHED_EOI: - if (require_image) /* Complain if application wanted an image */ + if (require_image) /* Complain if application wanted an image */ ERREXIT(cinfo, JERR_NO_IMAGE); /* Reset to start state; it would be safer to require the application to * call jpeg_abort, but we can't change it now for compatibility reasons. @@ -385,7 +397,7 @@ /* Read until EOI */ while (! cinfo->inputctl->eoi_reached) { if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED) - return FALSE; /* Suspend, come back later */ + return FALSE; /* Suspend, come back later */ } /* Do final cleanup */ (*cinfo->src->term_source) (cinfo); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdapistd.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdapistd.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdapistd.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdapistd.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jdapistd.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2015-2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface code for the decompression half * of the JPEG library. These are the "standard" API routines that are @@ -15,14 +18,14 @@ * whole decompression library into a transcoder. */ -#define JPEG_INTERNALS #include "jinclude.h" -#include "jpeglib.h" -#include "jpegcomp.h" - +#include "jdmainct.h" +#include "jdcoefct.h" +#include "jdsample.h" +#include "jmemsys.h" /* Forward declarations */ -LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo)); +LOCAL(boolean) output_pass_setup (j_decompress_ptr cinfo); /* @@ -54,24 +57,24 @@ if (cinfo->inputctl->has_multiple_scans) { #ifdef D_MULTISCAN_FILES_SUPPORTED for (;;) { - int retcode; - /* Call progress monitor hook if present */ - if (cinfo->progress != NULL) - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); - /* Absorb some more input */ - retcode = (*cinfo->inputctl->consume_input) (cinfo); - if (retcode == JPEG_SUSPENDED) - return FALSE; - if (retcode == JPEG_REACHED_EOI) - break; - /* Advance progress counter if appropriate */ - if (cinfo->progress != NULL && - (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { - if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { - /* jdmaster underestimated number of scans; ratchet up one scan */ - cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; - } - } + int retcode; + /* Call progress monitor hook if present */ + if (cinfo->progress != NULL) + (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + /* Absorb some more input */ + retcode = (*cinfo->inputctl->consume_input) (cinfo); + if (retcode == JPEG_SUSPENDED) + return FALSE; + if (retcode == JPEG_REACHED_EOI) + break; + /* Advance progress counter if appropriate */ + if (cinfo->progress != NULL && + (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { + if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { + /* jdmaster underestimated number of scans; ratchet up one scan */ + cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; + } + } } #else ERREXIT(cinfo, JERR_NOT_COMPILED); @@ -110,16 +113,16 @@ JDIMENSION last_scanline; /* Call progress monitor hook if present */ if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) cinfo->output_scanline; - cinfo->progress->pass_limit = (long) cinfo->output_height; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long) cinfo->output_scanline; + cinfo->progress->pass_limit = (long) cinfo->output_height; + (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); } /* Process some data */ last_scanline = cinfo->output_scanline; (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL, - &cinfo->output_scanline, (JDIMENSION) 0); + &cinfo->output_scanline, (JDIMENSION) 0); if (cinfo->output_scanline == last_scanline) - return FALSE; /* No progress made, must suspend */ + return FALSE; /* No progress made, must suspend */ } /* Finish up dummy pass, and set up for another one */ (*cinfo->master->finish_output_pass) (cinfo); @@ -138,6 +141,110 @@ /* + * Enable partial scanline decompression + * + * Must be called after jpeg_start_decompress() and before any calls to + * jpeg_read_scanlines() or jpeg_skip_scanlines(). + * + * Refer to libjpeg.txt for more information. + */ + +GLOBAL(void) +jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width) +{ + int ci, align, orig_downsampled_width; + JDIMENSION input_xoffset; + boolean reinit_upsampler = FALSE; + jpeg_component_info *compptr; + + if (cinfo->global_state != DSTATE_SCANNING || cinfo->output_scanline != 0) + ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + + if (!xoffset || !width) + ERREXIT(cinfo, JERR_BAD_CROP_SPEC); + + /* xoffset and width must fall within the output image dimensions. */ + if (*width == 0 || *xoffset + *width > cinfo->output_width) + ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); + + /* No need to do anything if the caller wants the entire width. */ + if (*width == cinfo->output_width) + return; + + /* Ensuring the proper alignment of xoffset is tricky. At minimum, it + * must align with an MCU boundary, because: + * + * (1) The IDCT is performed in blocks, and it is not feasible to modify + * the algorithm so that it can transform partial blocks. + * (2) Because of the SIMD extensions, any input buffer passed to the + * upsampling and color conversion routines must be aligned to the + * SIMD word size (for instance, 128-bit in the case of SSE2.) The + * easiest way to accomplish this without copying data is to ensure + * that upsampling and color conversion begin at the start of the + * first MCU column that will be inverse transformed. + * + * In practice, we actually impose a stricter alignment requirement. We + * require that xoffset be a multiple of the maximum MCU column width of all + * of the components (the "iMCU column width.") This is to simplify the + * single-pass decompression case, allowing us to use the same MCU column + * width for all of the components. + */ + align = cinfo->_min_DCT_scaled_size * cinfo->max_h_samp_factor; + + /* Adjust xoffset to the nearest iMCU boundary <= the requested value */ + input_xoffset = *xoffset; + *xoffset = (input_xoffset / align) * align; + + /* Adjust the width so that the right edge of the output image is as + * requested (only the left edge is altered.) It is important that calling + * programs check this value after this function returns, so that they can + * allocate an output buffer with the appropriate size. + */ + *width = *width + input_xoffset - *xoffset; + cinfo->output_width = *width; + + /* Set the first and last iMCU columns that we must decompress. These values + * will be used in single-scan decompressions. + */ + cinfo->master->first_iMCU_col = + (JDIMENSION) (long) (*xoffset) / (long) align; + cinfo->master->last_iMCU_col = + (JDIMENSION) jdiv_round_up((long) (*xoffset + cinfo->output_width), + (long) align) - 1; + + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + /* Set downsampled_width to the new output width. */ + orig_downsampled_width = compptr->downsampled_width; + compptr->downsampled_width = + (JDIMENSION) jdiv_round_up((long) (cinfo->output_width * + compptr->h_samp_factor), + (long) cinfo->max_h_samp_factor); + if (compptr->downsampled_width < 2 && orig_downsampled_width >= 2) + reinit_upsampler = TRUE; + + /* Set the first and last iMCU columns that we must decompress. These + * values will be used in multi-scan decompressions. + */ + cinfo->master->first_MCU_col[ci] = + (JDIMENSION) (long) (*xoffset * compptr->h_samp_factor) / + (long) align; + cinfo->master->last_MCU_col[ci] = + (JDIMENSION) jdiv_round_up((long) ((*xoffset + cinfo->output_width) * + compptr->h_samp_factor), + (long) align) - 1; + } + + if (reinit_upsampler) { + cinfo->master->jinit_upsampler_no_alloc = TRUE; + jinit_upsampler(cinfo); + cinfo->master->jinit_upsampler_no_alloc = FALSE; + } +} + + +/* * Read some scanlines of data from the JPEG decompressor. * * The return value will be the number of lines actually read. @@ -152,7 +259,7 @@ GLOBAL(JDIMENSION) jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines, - JDIMENSION max_lines) + JDIMENSION max_lines) { JDIMENSION row_ctr; @@ -178,6 +285,236 @@ } +/* Dummy color convert function used by jpeg_skip_scanlines() */ +LOCAL(void) +noop_convert (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) +{ +} + + +/* + * In some cases, it is best to call jpeg_read_scanlines() and discard the + * output, rather than skipping the scanlines, because this allows us to + * maintain the internal state of the context-based upsampler. In these cases, + * we set up and tear down a dummy color converter in order to avoid valgrind + * errors and to achieve the best possible performance. + */ + +LOCAL(void) +read_and_discard_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) +{ + JDIMENSION n; + void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows); + + color_convert = cinfo->cconvert->color_convert; + cinfo->cconvert->color_convert = noop_convert; + + for (n = 0; n < num_lines; n++) + jpeg_read_scanlines(cinfo, NULL, 1); + + cinfo->cconvert->color_convert = color_convert; +} + + +/* + * Called by jpeg_skip_scanlines(). This partially skips a decompress block by + * incrementing the rowgroup counter. + */ + +LOCAL(void) +increment_simple_rowgroup_ctr (j_decompress_ptr cinfo, JDIMENSION rows) +{ + JDIMENSION rows_left; + my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + + /* Increment the counter to the next row group after the skipped rows. */ + main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor; + + /* Partially skipping a row group would involve modifying the internal state + * of the upsampler, so read the remaining rows into a dummy buffer instead. + */ + rows_left = rows % cinfo->max_v_samp_factor; + cinfo->output_scanline += rows - rows_left; + + read_and_discard_scanlines(cinfo, rows_left); +} + +/* + * Skips some scanlines of data from the JPEG decompressor. + * + * The return value will be the number of lines actually skipped. If skipping + * num_lines would move beyond the end of the image, then the actual number of + * lines remaining in the image is returned. Otherwise, the return value will + * be equal to num_lines. + * + * Refer to libjpeg.txt for more information. + */ + +GLOBAL(JDIMENSION) +jpeg_skip_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) +{ + my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + JDIMENSION i, x; + int y; + JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row; + JDIMENSION lines_to_skip, lines_to_read; + + if (cinfo->global_state != DSTATE_SCANNING) + ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + + /* Do not skip past the bottom of the image. */ + if (cinfo->output_scanline + num_lines >= cinfo->output_height) { + cinfo->output_scanline = cinfo->output_height; + return cinfo->output_height - cinfo->output_scanline; + } + + if (num_lines == 0) + return 0; + + lines_per_iMCU_row = cinfo->_min_DCT_scaled_size * cinfo->max_v_samp_factor; + lines_left_in_iMCU_row = + (lines_per_iMCU_row - (cinfo->output_scanline % lines_per_iMCU_row)) % + lines_per_iMCU_row; + lines_after_iMCU_row = num_lines - lines_left_in_iMCU_row; + + /* Skip the lines remaining in the current iMCU row. When upsampling + * requires context rows, we need the previous and next rows in order to read + * the current row. This adds some complexity. + */ + if (cinfo->upsample->need_context_rows) { + /* If the skipped lines would not move us past the current iMCU row, we + * read the lines and ignore them. There might be a faster way of doing + * this, but we are facing increasing complexity for diminishing returns. + * The increasing complexity would be a by-product of meddling with the + * state machine used to skip context rows. Near the end of an iMCU row, + * the next iMCU row may have already been entropy-decoded. In this unique + * case, we will read the next iMCU row if we cannot skip past it as well. + */ + if ((num_lines < lines_left_in_iMCU_row + 1) || + (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full && + lines_after_iMCU_row < lines_per_iMCU_row + 1)) { + read_and_discard_scanlines(cinfo, num_lines); + return num_lines; + } + + /* If the next iMCU row has already been entropy-decoded, make sure that + * we do not skip too far. + */ + if (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full) { + cinfo->output_scanline += lines_left_in_iMCU_row + lines_per_iMCU_row; + lines_after_iMCU_row -= lines_per_iMCU_row; + } else { + cinfo->output_scanline += lines_left_in_iMCU_row; + } + + /* If we have just completed the first block, adjust the buffer pointers */ + if (main_ptr->iMCU_row_ctr == 0 || + (main_ptr->iMCU_row_ctr == 1 && lines_left_in_iMCU_row > 2)) + set_wraparound_pointers(cinfo); + main_ptr->buffer_full = FALSE; + main_ptr->rowgroup_ctr = 0; + main_ptr->context_state = CTX_PREPARE_FOR_IMCU; + upsample->next_row_out = cinfo->max_v_samp_factor; + upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; + } + + /* Skipping is much simpler when context rows are not required. */ + else { + if (num_lines < lines_left_in_iMCU_row) { + increment_simple_rowgroup_ctr(cinfo, num_lines); + return num_lines; + } else { + cinfo->output_scanline += lines_left_in_iMCU_row; + main_ptr->buffer_full = FALSE; + main_ptr->rowgroup_ctr = 0; + upsample->next_row_out = cinfo->max_v_samp_factor; + upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; + } + } + + /* Calculate how many full iMCU rows we can skip. */ + if (cinfo->upsample->need_context_rows) + lines_to_skip = ((lines_after_iMCU_row - 1) / lines_per_iMCU_row) * + lines_per_iMCU_row; + else + lines_to_skip = (lines_after_iMCU_row / lines_per_iMCU_row) * + lines_per_iMCU_row; + /* Calculate the number of lines that remain to be skipped after skipping all + * of the full iMCU rows that we can. We will not read these lines unless we + * have to. + */ + lines_to_read = lines_after_iMCU_row - lines_to_skip; + + /* For images requiring multiple scans (progressive, non-interleaved, etc.), + * all of the entropy decoding occurs in jpeg_start_decompress(), assuming + * that the input data source is non-suspending. This makes skipping easy. + */ + if (cinfo->inputctl->has_multiple_scans) { + if (cinfo->upsample->need_context_rows) { + cinfo->output_scanline += lines_to_skip; + cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row; + main_ptr->iMCU_row_ctr += lines_after_iMCU_row / lines_per_iMCU_row; + /* It is complex to properly move to the middle of a context block, so + * read the remaining lines instead of skipping them. + */ + read_and_discard_scanlines(cinfo, lines_to_read); + } else { + cinfo->output_scanline += lines_to_skip; + cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row; + increment_simple_rowgroup_ctr(cinfo, lines_to_read); + } + upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; + return num_lines; + } + + /* Skip the iMCU rows that we can safely skip. */ + for (i = 0; i < lines_to_skip; i += lines_per_iMCU_row) { + for (y = 0; y < coef->MCU_rows_per_iMCU_row; y++) { + for (x = 0; x < cinfo->MCUs_per_row; x++) { + /* Calling decode_mcu() with a NULL pointer causes it to discard the + * decoded coefficients. This is ~5% faster for large subsets, but + * it's tough to tell a difference for smaller images. + */ + (*cinfo->entropy->decode_mcu) (cinfo, NULL); + } + } + cinfo->input_iMCU_row++; + cinfo->output_iMCU_row++; + if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) + start_iMCU_row(cinfo); + else + (*cinfo->inputctl->finish_input_pass) (cinfo); + } + cinfo->output_scanline += lines_to_skip; + + if (cinfo->upsample->need_context_rows) { + /* Context-based upsampling keeps track of iMCU rows. */ + main_ptr->iMCU_row_ctr += lines_to_skip / lines_per_iMCU_row; + + /* It is complex to properly move to the middle of a context block, so + * read the remaining lines instead of skipping them. + */ + read_and_discard_scanlines(cinfo, lines_to_read); + } else { + increment_simple_rowgroup_ctr(cinfo, lines_to_read); + } + + /* Since skipping lines involves skipping the upsampling step, the value of + * "rows_to_go" will become invalid unless we set it here. NOTE: This is a + * bit odd, since "rows_to_go" seems to be redundantly keeping track of + * output_scanline. + */ + upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; + + /* Always skip the requested number of lines. */ + return num_lines; +} + /* * Alternate entry point to read raw data. * Processes exactly one iMCU row per call, unless suspended. @@ -185,7 +522,7 @@ GLOBAL(JDIMENSION) jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, - JDIMENSION max_lines) + JDIMENSION max_lines) { JDIMENSION lines_per_iMCU_row; @@ -210,7 +547,7 @@ /* Decompress directly into user's buffer. */ if (! (*cinfo->coef->decompress_data) (cinfo, data)) - return 0; /* suspension forced, can do nothing more */ + return 0; /* suspension forced, can do nothing more */ /* OK, we processed one iMCU row. */ cinfo->output_scanline += lines_per_iMCU_row; @@ -266,9 +603,9 @@ } /* Read markers looking for SOS or EOI */ while (cinfo->input_scan_number <= cinfo->output_scan_number && - ! cinfo->inputctl->eoi_reached) { + ! cinfo->inputctl->eoi_reached) { if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED) - return FALSE; /* Suspend, come back later */ + return FALSE; /* Suspend, come back later */ } cinfo->global_state = DSTATE_BUFIMAGE; return TRUE; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdarith.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdarith.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdarith.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdarith.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdarith.c * - * Developed 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * This file was part of the Independent JPEG Group's software: + * Developed 1997-2015 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains portable arithmetic entropy decoding routines for JPEG * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81). @@ -23,8 +26,8 @@ typedef struct { struct jpeg_entropy_decoder pub; /* public fields */ - INT32 c; /* C register, base of coding interval + input bit buffer */ - INT32 a; /* A register, normalized size of coding interval */ + JLONG c; /* C register, base of coding interval + input bit buffer */ + JLONG a; /* A register, normalized size of coding interval */ int ct; /* bit shift counter, # of bits left in bit buffer part of C */ /* init: ct = -16 */ /* run: ct = 0..7 */ @@ -32,17 +35,17 @@ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ /* Pointers to statistics areas (these workspaces have image lifespan) */ - unsigned char * dc_stats[NUM_ARITH_TBLS]; - unsigned char * ac_stats[NUM_ARITH_TBLS]; + unsigned char *dc_stats[NUM_ARITH_TBLS]; + unsigned char *ac_stats[NUM_ARITH_TBLS]; /* Statistics bin for coding with fixed probability 0.5 */ unsigned char fixed_bin[4]; } arith_entropy_decoder; -typedef arith_entropy_decoder * arith_entropy_ptr; +typedef arith_entropy_decoder *arith_entropy_ptr; /* The following two definitions specify the allocation chunk size * for the statistics area. @@ -65,7 +68,7 @@ get_byte (j_decompress_ptr cinfo) /* Read next input byte; we do not support suspension in this module. */ { - struct jpeg_source_mgr * src = cinfo->src; + struct jpeg_source_mgr *src = cinfo->src; if (src->bytes_in_buffer == 0) if (! (*src->fill_input_buffer) (cinfo)) @@ -94,7 +97,7 @@ * (instead of fixed) with the bit shift counter CT. * Thus, we also need only one (variable instead of * fixed size) shift for the LPS/MPS decision, and - * we can get away with any renormalization update + * we can do away with any renormalization update * of C (except for new data insertion, of course). * * I've also introduced a new scheme for accessing @@ -107,7 +110,7 @@ { register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; register unsigned char nl, nm; - register INT32 qe, temp; + register JLONG qe, temp; register int sv, data; /* Renormalization & data input per section D.2.6 */ @@ -115,32 +118,32 @@ if (--e->ct < 0) { /* Need to fetch next data byte */ if (cinfo->unread_marker) - data = 0; /* stuff zero data */ + data = 0; /* stuff zero data */ else { - data = get_byte(cinfo); /* read next input byte */ - if (data == 0xFF) { /* zero stuff or marker code */ - do data = get_byte(cinfo); - while (data == 0xFF); /* swallow extra 0xFF bytes */ - if (data == 0) - data = 0xFF; /* discard stuffed zero byte */ - else { - /* Note: Different from the Huffman decoder, hitting - * a marker while processing the compressed data - * segment is legal in arithmetic coding. - * The convention is to supply zero data - * then until decoding is complete. - */ - cinfo->unread_marker = data; - data = 0; - } - } + data = get_byte(cinfo); /* read next input byte */ + if (data == 0xFF) { /* zero stuff or marker code */ + do data = get_byte(cinfo); + while (data == 0xFF); /* swallow extra 0xFF bytes */ + if (data == 0) + data = 0xFF; /* discard stuffed zero byte */ + else { + /* Note: Different from the Huffman decoder, hitting + * a marker while processing the compressed data + * segment is legal in arithmetic coding. + * The convention is to supply zero data + * then until decoding is complete. + */ + cinfo->unread_marker = data; + data = 0; + } + } } e->c = (e->c << 8) | data; /* insert data into C register */ - if ((e->ct += 8) < 0) /* update bit shift counter */ - /* Need more initial bytes */ - if (++e->ct == 0) - /* Got 2 initial bytes -> re-init A and exit loop */ - e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */ + if ((e->ct += 8) < 0) /* update bit shift counter */ + /* Need more initial bytes */ + if (++e->ct == 0) + /* Got 2 initial bytes -> re-init A and exit loop */ + e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */ } e->a <<= 1; } @@ -149,9 +152,9 @@ * Qe values and probability estimation state machine */ sv = *st; - qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ - nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ - nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ + qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ + nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ + nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ /* Decode & estimation procedures per sections D.2.4 & D.2.5 */ temp = e->a - qe; @@ -162,19 +165,19 @@ /* Conditional LPS (less probable symbol) exchange */ if (e->a < qe) { e->a = qe; - *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ } else { e->a = qe; - *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ - sv ^= 0x80; /* Exchange LPS/MPS */ + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + sv ^= 0x80; /* Exchange LPS/MPS */ } } else if (e->a < 0x8000L) { /* Conditional MPS (more probable symbol) exchange */ if (e->a < qe) { - *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ - sv ^= 0x80; /* Exchange LPS/MPS */ + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + sv ^= 0x80; /* Exchange LPS/MPS */ } else { - *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ } } @@ -191,7 +194,7 @@ { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; /* Advance past the RSTn marker */ if (! (*cinfo->marker->read_restart_marker) (cinfo)) @@ -200,13 +203,13 @@ /* Re-initialize statistics areas */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; - if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { + if (!cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS); /* Reset DC predictions to 0 */ entropy->last_dc_val[ci] = 0; entropy->dc_context[ci] = 0; } - if (! cinfo->progressive_mode || cinfo->Ss) { + if (!cinfo->progressive_mode || cinfo->Ss) { MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS); } } @@ -214,7 +217,7 @@ /* Reset arithmetic decoding variables */ entropy->c = 0; entropy->a = 0; - entropy->ct = -16; /* force reading 2 initial bytes to fill C */ + entropy->ct = -16; /* force reading 2 initial bytes to fill C */ /* Reset restart counter */ entropy->restarts_to_go = cinfo->restart_interval; @@ -253,7 +256,7 @@ entropy->restarts_to_go--; } - if (entropy->ct == -1) return TRUE; /* if error do nothing */ + if (entropy->ct == -1) return TRUE; /* if error do nothing */ /* Outer loop handles each block in the MCU */ @@ -277,34 +280,34 @@ st += 2; st += sign; /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { - st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ - while (arith_decode(cinfo, st)) { - if ((m <<= 1) == 0x8000) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* magnitude overflow */ - return TRUE; - } - st += 1; - } + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } } /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) - entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ + entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ else - entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ + entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ v = m; /* Figure F.24: Decoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - if (arith_decode(cinfo, st)) v |= m; + if (arith_decode(cinfo, st)) v |= m; v += 1; if (sign) v = -v; entropy->last_dc_val[ci] += v; } /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */ - (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al); + (*block)[0] = (JCOEF) LEFT_SHIFT(entropy->last_dc_val[ci], cinfo->Al); } return TRUE; @@ -332,7 +335,7 @@ entropy->restarts_to_go--; } - if (entropy->ct == -1) return TRUE; /* if error do nothing */ + if (entropy->ct == -1) return TRUE; /* if error do nothing */ /* There is always only one block per MCU */ block = MCU_data[0]; @@ -343,13 +346,13 @@ /* Figure F.20: Decode_AC_coefficients */ for (k = cinfo->Ss; k <= cinfo->Se; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); - if (arith_decode(cinfo, st)) break; /* EOB flag */ + if (arith_decode(cinfo, st)) break; /* EOB flag */ while (arith_decode(cinfo, st + 1) == 0) { st += 3; k++; if (k > cinfo->Se) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* spectral overflow */ - return TRUE; + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; } } /* Figure F.21: Decoding nonzero value v */ @@ -359,17 +362,17 @@ /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { if (arith_decode(cinfo, st)) { - m <<= 1; - st = entropy->ac_stats[tbl] + - (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); - while (arith_decode(cinfo, st)) { - if ((m <<= 1) == 0x8000) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* magnitude overflow */ - return TRUE; - } - st += 1; - } + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } } } v = m; @@ -404,8 +407,8 @@ entropy->restarts_to_go--; } - st = entropy->fixed_bin; /* use fixed probability estimation */ - p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + st = entropy->fixed_bin; /* use fixed probability estimation */ + p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ /* Outer loop handles each block in the MCU */ @@ -440,14 +443,14 @@ entropy->restarts_to_go--; } - if (entropy->ct == -1) return TRUE; /* if error do nothing */ + if (entropy->ct == -1) return TRUE; /* if error do nothing */ /* There is always only one block per MCU */ block = MCU_data[0]; tbl = cinfo->cur_comp_info[0]->ac_tbl_no; - p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ - m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ + p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ /* Establish EOBx (previous stage end-of-block) index */ for (kex = cinfo->Se; kex > 0; kex--) @@ -456,30 +459,30 @@ for (k = cinfo->Ss; k <= cinfo->Se; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); if (k > kex) - if (arith_decode(cinfo, st)) break; /* EOB flag */ + if (arith_decode(cinfo, st)) break; /* EOB flag */ for (;;) { thiscoef = *block + jpeg_natural_order[k]; - if (*thiscoef) { /* previously nonzero coef */ - if (arith_decode(cinfo, st + 2)) { - if (*thiscoef < 0) - *thiscoef += m1; - else - *thiscoef += p1; - } - break; - } - if (arith_decode(cinfo, st + 1)) { /* newly nonzero coef */ - if (arith_decode(cinfo, entropy->fixed_bin)) - *thiscoef = m1; - else - *thiscoef = p1; - break; + if (*thiscoef) { /* previously nonzero coef */ + if (arith_decode(cinfo, st + 2)) { + if (*thiscoef < 0) + *thiscoef += m1; + else + *thiscoef += p1; + } + break; + } + if (arith_decode(cinfo, st + 1)) { /* newly nonzero coef */ + if (arith_decode(cinfo, entropy->fixed_bin)) + *thiscoef = m1; + else + *thiscoef = p1; + break; } st += 3; k++; if (k > cinfo->Se) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* spectral overflow */ - return TRUE; + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; } } } @@ -496,7 +499,7 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JBLOCKROW block; unsigned char *st; int blkn, ci, tbl, sign, k; @@ -509,12 +512,12 @@ entropy->restarts_to_go--; } - if (entropy->ct == -1) return TRUE; /* if error do nothing */ + if (entropy->ct == -1) return TRUE; /* if error do nothing */ /* Outer loop handles each block in the MCU */ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - block = MCU_data[blkn]; + block = MCU_data ? MCU_data[blkn] : NULL; ci = cinfo->MCU_membership[blkn]; compptr = cinfo->cur_comp_info[ci]; @@ -535,33 +538,34 @@ st += 2; st += sign; /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { - st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ - while (arith_decode(cinfo, st)) { - if ((m <<= 1) == 0x8000) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* magnitude overflow */ - return TRUE; - } - st += 1; - } + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } } /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) - entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ + entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ else - entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ + entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ v = m; /* Figure F.24: Decoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - if (arith_decode(cinfo, st)) v |= m; + if (arith_decode(cinfo, st)) v |= m; v += 1; if (sign) v = -v; entropy->last_dc_val[ci] += v; } - (*block)[0] = (JCOEF) entropy->last_dc_val[ci]; + if (block) + (*block)[0] = (JCOEF) entropy->last_dc_val[ci]; /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */ @@ -570,14 +574,14 @@ /* Figure F.20: Decode_AC_coefficients */ for (k = 1; k <= DCTSIZE2 - 1; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); - if (arith_decode(cinfo, st)) break; /* EOB flag */ + if (arith_decode(cinfo, st)) break; /* EOB flag */ while (arith_decode(cinfo, st + 1) == 0) { - st += 3; k++; - if (k > DCTSIZE2 - 1) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* spectral overflow */ - return TRUE; - } + st += 3; k++; + if (k > DCTSIZE2 - 1) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; + } } /* Figure F.21: Decoding nonzero value v */ /* Figure F.22: Decoding the sign of v */ @@ -585,27 +589,28 @@ st += 2; /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { - if (arith_decode(cinfo, st)) { - m <<= 1; - st = entropy->ac_stats[tbl] + - (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); - while (arith_decode(cinfo, st)) { - if ((m <<= 1) == 0x8000) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* magnitude overflow */ - return TRUE; - } - st += 1; - } - } + if (arith_decode(cinfo, st)) { + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } + } } v = m; /* Figure F.24: Decoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - if (arith_decode(cinfo, st)) v |= m; + if (arith_decode(cinfo, st)) v |= m; v += 1; if (sign) v = -v; - (*block)[jpeg_natural_order[k]] = (JCOEF) v; + if (block) + (*block)[jpeg_natural_order[k]] = (JCOEF) v; } } @@ -622,30 +627,30 @@ { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; int ci, tbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; if (cinfo->progressive_mode) { /* Validate progressive scan parameters */ if (cinfo->Ss == 0) { if (cinfo->Se != 0) - goto bad; + goto bad; } else { /* need not check Ss/Se < 0 since they came from unsigned bytes */ if (cinfo->Se < cinfo->Ss || cinfo->Se > DCTSIZE2 - 1) - goto bad; + goto bad; /* AC scans may have only one component */ if (cinfo->comps_in_scan != 1) - goto bad; + goto bad; } if (cinfo->Ah != 0) { /* Successive approximation refinement scan: must have Al = Ah-1. */ if (cinfo->Ah-1 != cinfo->Al) - goto bad; + goto bad; } - if (cinfo->Al > 13) { /* need not check for < 0 */ + if (cinfo->Al > 13) { /* need not check for < 0 */ bad: ERREXIT4(cinfo, JERR_BAD_PROGRESSION, - cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); + cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); } /* Update progression status, and verify that scan order is legal. * Note that inter-scan inconsistencies are treated as warnings @@ -655,32 +660,32 @@ int coefi, cindex = cinfo->cur_comp_info[ci]->component_index; int *coef_bit_ptr = & cinfo->coef_bits[cindex][0]; if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ - WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { - int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; - if (cinfo->Ah != expected) - WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); - coef_bit_ptr[coefi] = cinfo->Al; + int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; + if (cinfo->Ah != expected) + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); + coef_bit_ptr[coefi] = cinfo->Al; } } /* Select MCU decoding routine */ if (cinfo->Ah == 0) { if (cinfo->Ss == 0) - entropy->pub.decode_mcu = decode_mcu_DC_first; + entropy->pub.decode_mcu = decode_mcu_DC_first; else - entropy->pub.decode_mcu = decode_mcu_AC_first; + entropy->pub.decode_mcu = decode_mcu_AC_first; } else { if (cinfo->Ss == 0) - entropy->pub.decode_mcu = decode_mcu_DC_refine; + entropy->pub.decode_mcu = decode_mcu_DC_refine; else - entropy->pub.decode_mcu = decode_mcu_AC_refine; + entropy->pub.decode_mcu = decode_mcu_AC_refine; } } else { /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG. * This ought to be an error condition, but we make it a warning. */ if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 || - (cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1)) + (cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1)) WARNMS(cinfo, JWRN_NOT_SEQUENTIAL); /* Select MCU decoding routine */ entropy->pub.decode_mcu = decode_mcu; @@ -689,25 +694,25 @@ /* Allocate & initialize requested statistics areas */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; - if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { + if (!cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { tbl = compptr->dc_tbl_no; if (tbl < 0 || tbl >= NUM_ARITH_TBLS) - ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->dc_stats[tbl] == NULL) - entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); + entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS); /* Initialize DC predictions to 0 */ entropy->last_dc_val[ci] = 0; entropy->dc_context[ci] = 0; } - if (! cinfo->progressive_mode || cinfo->Ss) { + if (!cinfo->progressive_mode || cinfo->Ss) { tbl = compptr->ac_tbl_no; if (tbl < 0 || tbl >= NUM_ARITH_TBLS) - ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->ac_stats[tbl] == NULL) - entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); + entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS); } } @@ -715,7 +720,7 @@ /* Initialize arithmetic decoding variables */ entropy->c = 0; entropy->a = 0; - entropy->ct = -16; /* force reading 2 initial bytes to fill C */ + entropy->ct = -16; /* force reading 2 initial bytes to fill C */ /* Initialize restart counter */ entropy->restarts_to_go = cinfo->restart_interval; @@ -734,7 +739,7 @@ entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(arith_entropy_decoder)); + sizeof(arith_entropy_decoder)); cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; entropy->pub.start_pass = start_pass; @@ -752,10 +757,10 @@ int *coef_bit_ptr, ci; cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components*DCTSIZE2*SIZEOF(int)); + cinfo->num_components*DCTSIZE2*sizeof(int)); coef_bit_ptr = & cinfo->coef_bits[0][0]; - for (ci = 0; ci < cinfo->num_components; ci++) + for (ci = 0; ci < cinfo->num_components; ci++) for (i = 0; i < DCTSIZE2; i++) - *coef_bit_ptr++ = -1; + *coef_bit_ptr++ = -1; } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdatadst.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdatadst.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdatadst.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdatadst.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,283 @@ +/* + * jdatadst.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2009-2012 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains compression data destination routines for the case of + * emitting JPEG data to memory or to a file (or any stdio stream). + * While these routines are sufficient for most applications, + * some will want to use a different destination manager. + * IMPORTANT: we assume that fwrite() will correctly transcribe an array of + * JOCTETs into 8-bit-wide elements on external storage. If char is wider + * than 8 bits on your machine, you may need to do some tweaking. + */ + +/* this is not a core library module, so it doesn't define JPEG_INTERNALS */ +#include "jinclude.h" +#include "jpeglib.h" +#include "jerror.h" + +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void *malloc (size_t size); +extern void free (void *ptr); +#endif + + +/* Expanded data destination object for stdio output */ + +typedef struct { + struct jpeg_destination_mgr pub; /* public fields */ + + FILE *outfile; /* target stream */ + JOCTET *buffer; /* start of buffer */ +} my_destination_mgr; + +typedef my_destination_mgr *my_dest_ptr; + +#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */ + + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +/* Expanded data destination object for memory output */ + +typedef struct { + struct jpeg_destination_mgr pub; /* public fields */ + + unsigned char **outbuffer; /* target buffer */ + unsigned long *outsize; + unsigned char *newbuffer; /* newly allocated buffer */ + JOCTET *buffer; /* start of buffer */ + size_t bufsize; +} my_mem_destination_mgr; + +typedef my_mem_destination_mgr *my_mem_dest_ptr; +#endif + + +/* + * Initialize destination --- called by jpeg_start_compress + * before any data is actually written. + */ + +METHODDEF(void) +init_destination (j_compress_ptr cinfo) +{ + my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + + /* Allocate the output buffer --- it will be released when done with image */ + dest->buffer = (JOCTET *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + OUTPUT_BUF_SIZE * sizeof(JOCTET)); + + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = OUTPUT_BUF_SIZE; +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(void) +init_mem_destination (j_compress_ptr cinfo) +{ + /* no work necessary here */ +} +#endif + + +/* + * Empty the output buffer --- called whenever buffer fills up. + * + * In typical applications, this should write the entire output buffer + * (ignoring the current state of next_output_byte & free_in_buffer), + * reset the pointer & count to the start of the buffer, and return TRUE + * indicating that the buffer has been dumped. + * + * In applications that need to be able to suspend compression due to output + * overrun, a FALSE return indicates that the buffer cannot be emptied now. + * In this situation, the compressor will return to its caller (possibly with + * an indication that it has not accepted all the supplied scanlines). The + * application should resume compression after it has made more room in the + * output buffer. Note that there are substantial restrictions on the use of + * suspension --- see the documentation. + * + * When suspending, the compressor will back up to a convenient restart point + * (typically the start of the current MCU). next_output_byte & free_in_buffer + * indicate where the restart point will be if the current call returns FALSE. + * Data beyond this point will be regenerated after resumption, so do not + * write it out when emptying the buffer externally. + */ + +METHODDEF(boolean) +empty_output_buffer (j_compress_ptr cinfo) +{ + my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + + if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) != + (size_t) OUTPUT_BUF_SIZE) + ERREXIT(cinfo, JERR_FILE_WRITE); + + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = OUTPUT_BUF_SIZE; + + return TRUE; +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(boolean) +empty_mem_output_buffer (j_compress_ptr cinfo) +{ + size_t nextsize; + JOCTET *nextbuffer; + my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + + /* Try to allocate new buffer with double size */ + nextsize = dest->bufsize * 2; + nextbuffer = (JOCTET *) malloc(nextsize); + + if (nextbuffer == NULL) + ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); + + MEMCOPY(nextbuffer, dest->buffer, dest->bufsize); + + if (dest->newbuffer != NULL) + free(dest->newbuffer); + + dest->newbuffer = nextbuffer; + + dest->pub.next_output_byte = nextbuffer + dest->bufsize; + dest->pub.free_in_buffer = dest->bufsize; + + dest->buffer = nextbuffer; + dest->bufsize = nextsize; + + return TRUE; +} +#endif + + +/* + * Terminate destination --- called by jpeg_finish_compress + * after all data has been written. Usually needs to flush buffer. + * + * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding + * application must deal with any cleanup that should happen even + * for error exit. + */ + +METHODDEF(void) +term_destination (j_compress_ptr cinfo) +{ + my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer; + + /* Write any data remaining in the buffer */ + if (datacount > 0) { + if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount) + ERREXIT(cinfo, JERR_FILE_WRITE); + } + fflush(dest->outfile); + /* Make sure we wrote the output file OK */ + if (ferror(dest->outfile)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(void) +term_mem_destination (j_compress_ptr cinfo) +{ + my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + + *dest->outbuffer = dest->buffer; + *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer); +} +#endif + + +/* + * Prepare for output to a stdio stream. + * The caller must have already opened the stream, and is responsible + * for closing it after finishing compression. + */ + +GLOBAL(void) +jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile) +{ + my_dest_ptr dest; + + /* The destination object is made permanent so that multiple JPEG images + * can be written to the same file without re-executing jpeg_stdio_dest. + * This makes it dangerous to use this manager and a different destination + * manager serially with the same JPEG object, because their private object + * sizes may be different. Caveat programmer. + */ + if (cinfo->dest == NULL) { /* first time for this JPEG object? */ + cinfo->dest = (struct jpeg_destination_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(my_destination_mgr)); + } + + dest = (my_dest_ptr) cinfo->dest; + dest->pub.init_destination = init_destination; + dest->pub.empty_output_buffer = empty_output_buffer; + dest->pub.term_destination = term_destination; + dest->outfile = outfile; +} + + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +/* + * Prepare for output to a memory buffer. + * The caller may supply an own initial buffer with appropriate size. + * Otherwise, or when the actual data output exceeds the given size, + * the library adapts the buffer size as necessary. + * The standard library functions malloc/free are used for allocating + * larger memory, so the buffer is available to the application after + * finishing compression, and then the application is responsible for + * freeing the requested memory. + * Note: An initial buffer supplied by the caller is expected to be + * managed by the application. The library does not free such buffer + * when allocating a larger buffer. + */ + +GLOBAL(void) +jpeg_mem_dest (j_compress_ptr cinfo, + unsigned char **outbuffer, unsigned long *outsize) +{ + my_mem_dest_ptr dest; + + if (outbuffer == NULL || outsize == NULL) /* sanity check */ + ERREXIT(cinfo, JERR_BUFFER_SIZE); + + /* The destination object is made permanent so that multiple JPEG images + * can be written to the same buffer without re-executing jpeg_mem_dest. + */ + if (cinfo->dest == NULL) { /* first time for this JPEG object? */ + cinfo->dest = (struct jpeg_destination_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(my_mem_destination_mgr)); + } + + dest = (my_mem_dest_ptr) cinfo->dest; + dest->pub.init_destination = init_mem_destination; + dest->pub.empty_output_buffer = empty_mem_output_buffer; + dest->pub.term_destination = term_mem_destination; + dest->outbuffer = outbuffer; + dest->outsize = outsize; + dest->newbuffer = NULL; + + if (*outbuffer == NULL || *outsize == 0) { + /* Allocate initial buffer */ + dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE); + if (dest->newbuffer == NULL) + ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); + *outsize = OUTPUT_BUF_SIZE; + } + + dest->pub.next_output_byte = dest->buffer = *outbuffer; + dest->pub.free_in_buffer = dest->bufsize = *outsize; +} +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdatadst-tj.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdatadst-tj.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdatadst-tj.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdatadst-tj.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* - * jdatadst.c + * jdatadst-tj.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Modified 2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 2009-2012 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2011, 2014 D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains compression data destination routines for the case of * emitting JPEG data to memory or to a file (or any stdio stream). @@ -20,13 +23,13 @@ #include "jpeglib.h" #include "jerror.h" -#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void * malloc JPP((size_t size)); -extern void free JPP((void *ptr)); +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void *malloc (size_t size); +extern void free (void *ptr); #endif -#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */ +#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */ /* Expanded data destination object for memory output */ @@ -34,15 +37,15 @@ typedef struct { struct jpeg_destination_mgr pub; /* public fields */ - unsigned char ** outbuffer; /* target buffer */ - unsigned long * outsize; - unsigned char * newbuffer; /* newly allocated buffer */ - JOCTET * buffer; /* start of buffer */ + unsigned char **outbuffer; /* target buffer */ + unsigned long *outsize; + unsigned char *newbuffer; /* newly allocated buffer */ + JOCTET *buffer; /* start of buffer */ size_t bufsize; boolean alloc; } my_mem_destination_mgr; -typedef my_mem_destination_mgr * my_mem_dest_ptr; +typedef my_mem_destination_mgr *my_mem_dest_ptr; /* @@ -84,14 +87,14 @@ empty_mem_output_buffer (j_compress_ptr cinfo) { size_t nextsize; - JOCTET * nextbuffer; + JOCTET *nextbuffer; my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; if (!dest->alloc) ERREXIT(cinfo, JERR_BUFFER_SIZE); /* Try to allocate new buffer with double size */ nextsize = dest->bufsize * 2; - nextbuffer = malloc(nextsize); + nextbuffer = (JOCTET *) malloc(nextsize); if (nextbuffer == NULL) ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); @@ -145,29 +148,33 @@ GLOBAL(void) jpeg_mem_dest_tj (j_compress_ptr cinfo, - unsigned char ** outbuffer, unsigned long * outsize, - boolean alloc) + unsigned char **outbuffer, unsigned long *outsize, + boolean alloc) { + boolean reused = FALSE; my_mem_dest_ptr dest; - if (outbuffer == NULL || outsize == NULL) /* sanity check */ + if (outbuffer == NULL || outsize == NULL) /* sanity check */ ERREXIT(cinfo, JERR_BUFFER_SIZE); /* The destination object is made permanent so that multiple JPEG images * can be written to the same buffer without re-executing jpeg_mem_dest. */ - if (cinfo->dest == NULL) { /* first time for this JPEG object? */ + if (cinfo->dest == NULL) { /* first time for this JPEG object? */ cinfo->dest = (struct jpeg_destination_mgr *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(my_mem_destination_mgr)); + sizeof(my_mem_destination_mgr)); dest = (my_mem_dest_ptr) cinfo->dest; dest->newbuffer = NULL; + dest->buffer = NULL; } dest = (my_mem_dest_ptr) cinfo->dest; dest->pub.init_destination = init_mem_destination; dest->pub.empty_output_buffer = empty_mem_output_buffer; dest->pub.term_destination = term_mem_destination; + if (dest->buffer == *outbuffer && *outbuffer != NULL && alloc) + reused = TRUE; dest->outbuffer = outbuffer; dest->outsize = outsize; dest->alloc = alloc; @@ -175,7 +182,7 @@ if (*outbuffer == NULL || *outsize == 0) { if (alloc) { /* Allocate initial buffer */ - dest->newbuffer = *outbuffer = malloc(OUTPUT_BUF_SIZE); + dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE); if (dest->newbuffer == NULL) ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); *outsize = OUTPUT_BUF_SIZE; @@ -184,5 +191,7 @@ } dest->pub.next_output_byte = dest->buffer = *outbuffer; - dest->pub.free_in_buffer = dest->bufsize = *outsize; + if (!reused) + dest->bufsize = *outsize; + dest->pub.free_in_buffer = dest->bufsize; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdatasrc.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdatasrc.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdatasrc.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdatasrc.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,284 @@ +/* + * jdatasrc.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2009-2011 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains decompression data source routines for the case of + * reading JPEG data from memory or from a file (or any stdio stream). + * While these routines are sufficient for most applications, + * some will want to use a different source manager. + * IMPORTANT: we assume that fread() will correctly transcribe an array of + * JOCTETs from 8-bit-wide elements on external storage. If char is wider + * than 8 bits on your machine, you may need to do some tweaking. + */ + +/* this is not a core library module, so it doesn't define JPEG_INTERNALS */ +#include "jinclude.h" +#include "jpeglib.h" +#include "jerror.h" + + +/* Expanded data source object for stdio input */ + +typedef struct { + struct jpeg_source_mgr pub; /* public fields */ + + FILE *infile; /* source stream */ + JOCTET *buffer; /* start of buffer */ + boolean start_of_file; /* have we gotten any data yet? */ +} my_source_mgr; + +typedef my_source_mgr *my_src_ptr; + +#define INPUT_BUF_SIZE 4096 /* choose an efficiently fread'able size */ + + +/* + * Initialize source --- called by jpeg_read_header + * before any data is actually read. + */ + +METHODDEF(void) +init_source (j_decompress_ptr cinfo) +{ + my_src_ptr src = (my_src_ptr) cinfo->src; + + /* We reset the empty-input-file flag for each image, + * but we don't clear the input buffer. + * This is correct behavior for reading a series of images from one source. + */ + src->start_of_file = TRUE; +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(void) +init_mem_source (j_decompress_ptr cinfo) +{ + /* no work necessary here */ +} +#endif + + +/* + * Fill the input buffer --- called whenever buffer is emptied. + * + * In typical applications, this should read fresh data into the buffer + * (ignoring the current state of next_input_byte & bytes_in_buffer), + * reset the pointer & count to the start of the buffer, and return TRUE + * indicating that the buffer has been reloaded. It is not necessary to + * fill the buffer entirely, only to obtain at least one more byte. + * + * There is no such thing as an EOF return. If the end of the file has been + * reached, the routine has a choice of ERREXIT() or inserting fake data into + * the buffer. In most cases, generating a warning message and inserting a + * fake EOI marker is the best course of action --- this will allow the + * decompressor to output however much of the image is there. However, + * the resulting error message is misleading if the real problem is an empty + * input file, so we handle that case specially. + * + * In applications that need to be able to suspend compression due to input + * not being available yet, a FALSE return indicates that no more data can be + * obtained right now, but more may be forthcoming later. In this situation, + * the decompressor will return to its caller (with an indication of the + * number of scanlines it has read, if any). The application should resume + * decompression after it has loaded more data into the input buffer. Note + * that there are substantial restrictions on the use of suspension --- see + * the documentation. + * + * When suspending, the decompressor will back up to a convenient restart point + * (typically the start of the current MCU). next_input_byte & bytes_in_buffer + * indicate where the restart point will be if the current call returns FALSE. + * Data beyond this point must be rescanned after resumption, so move it to + * the front of the buffer rather than discarding it. + */ + +METHODDEF(boolean) +fill_input_buffer (j_decompress_ptr cinfo) +{ + my_src_ptr src = (my_src_ptr) cinfo->src; + size_t nbytes; + + nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE); + + if (nbytes <= 0) { + if (src->start_of_file) /* Treat empty input file as fatal error */ + ERREXIT(cinfo, JERR_INPUT_EMPTY); + WARNMS(cinfo, JWRN_JPEG_EOF); + /* Insert a fake EOI marker */ + src->buffer[0] = (JOCTET) 0xFF; + src->buffer[1] = (JOCTET) JPEG_EOI; + nbytes = 2; + } + + src->pub.next_input_byte = src->buffer; + src->pub.bytes_in_buffer = nbytes; + src->start_of_file = FALSE; + + return TRUE; +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(boolean) +fill_mem_input_buffer (j_decompress_ptr cinfo) +{ + static const JOCTET mybuffer[4] = { + (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0 + }; + + /* The whole JPEG data is expected to reside in the supplied memory + * buffer, so any request for more data beyond the given buffer size + * is treated as an error. + */ + WARNMS(cinfo, JWRN_JPEG_EOF); + + /* Insert a fake EOI marker */ + + cinfo->src->next_input_byte = mybuffer; + cinfo->src->bytes_in_buffer = 2; + + return TRUE; +} +#endif + + +/* + * Skip data --- used to skip over a potentially large amount of + * uninteresting data (such as an APPn marker). + * + * Writers of suspendable-input applications must note that skip_input_data + * is not granted the right to give a suspension return. If the skip extends + * beyond the data currently in the buffer, the buffer can be marked empty so + * that the next read will cause a fill_input_buffer call that can suspend. + * Arranging for additional bytes to be discarded before reloading the input + * buffer is the application writer's problem. + */ + +METHODDEF(void) +skip_input_data (j_decompress_ptr cinfo, long num_bytes) +{ + struct jpeg_source_mgr *src = cinfo->src; + + /* Just a dumb implementation for now. Could use fseek() except + * it doesn't work on pipes. Not clear that being smart is worth + * any trouble anyway --- large skips are infrequent. + */ + if (num_bytes > 0) { + while (num_bytes > (long) src->bytes_in_buffer) { + num_bytes -= (long) src->bytes_in_buffer; + (void) (*src->fill_input_buffer) (cinfo); + /* note we assume that fill_input_buffer will never return FALSE, + * so suspension need not be handled. + */ + } + src->next_input_byte += (size_t) num_bytes; + src->bytes_in_buffer -= (size_t) num_bytes; + } +} + + +/* + * An additional method that can be provided by data source modules is the + * resync_to_restart method for error recovery in the presence of RST markers. + * For the moment, this source module just uses the default resync method + * provided by the JPEG library. That method assumes that no backtracking + * is possible. + */ + + +/* + * Terminate source --- called by jpeg_finish_decompress + * after all data has been read. Often a no-op. + * + * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding + * application must deal with any cleanup that should happen even + * for error exit. + */ + +METHODDEF(void) +term_source (j_decompress_ptr cinfo) +{ + /* no work necessary here */ +} + + +/* + * Prepare for input from a stdio stream. + * The caller must have already opened the stream, and is responsible + * for closing it after finishing decompression. + */ + +GLOBAL(void) +jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile) +{ + my_src_ptr src; + + /* The source object and input buffer are made permanent so that a series + * of JPEG images can be read from the same file by calling jpeg_stdio_src + * only before the first one. (If we discarded the buffer at the end of + * one image, we'd likely lose the start of the next one.) + * This makes it unsafe to use this manager and a different source + * manager serially with the same JPEG object. Caveat programmer. + */ + if (cinfo->src == NULL) { /* first time for this JPEG object? */ + cinfo->src = (struct jpeg_source_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(my_source_mgr)); + src = (my_src_ptr) cinfo->src; + src->buffer = (JOCTET *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + INPUT_BUF_SIZE * sizeof(JOCTET)); + } + + src = (my_src_ptr) cinfo->src; + src->pub.init_source = init_source; + src->pub.fill_input_buffer = fill_input_buffer; + src->pub.skip_input_data = skip_input_data; + src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */ + src->pub.term_source = term_source; + src->infile = infile; + src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */ + src->pub.next_input_byte = NULL; /* until buffer loaded */ +} + + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +/* + * Prepare for input from a supplied memory buffer. + * The buffer must contain the whole JPEG data. + */ + +GLOBAL(void) +jpeg_mem_src (j_decompress_ptr cinfo, + const unsigned char *inbuffer, unsigned long insize) +{ + struct jpeg_source_mgr *src; + + if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */ + ERREXIT(cinfo, JERR_INPUT_EMPTY); + + /* The source object is made permanent so that a series of JPEG images + * can be read from the same buffer by calling jpeg_mem_src only before + * the first one. + */ + if (cinfo->src == NULL) { /* first time for this JPEG object? */ + cinfo->src = (struct jpeg_source_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(struct jpeg_source_mgr)); + } + + src = cinfo->src; + src->init_source = init_mem_source; + src->fill_input_buffer = fill_mem_input_buffer; + src->skip_input_data = skip_input_data; + src->resync_to_restart = jpeg_resync_to_restart; /* use default method */ + src->term_source = term_source; + src->bytes_in_buffer = (size_t) insize; + src->next_input_byte = (const JOCTET *) inbuffer; +} +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdatasrc-tj.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdatasrc-tj.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdatasrc-tj.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdatasrc-tj.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* - * jdatasrc.c + * jdatasrc-tj.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Modified 2009-2010 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 2009-2011 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2011, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains decompression data source routines for the case of * reading JPEG data from memory or from a file (or any stdio stream). @@ -69,16 +72,17 @@ METHODDEF(boolean) fill_mem_input_buffer (j_decompress_ptr cinfo) { - static JOCTET mybuffer[4]; + static const JOCTET mybuffer[4] = { + (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0 + }; /* The whole JPEG data is expected to reside in the supplied memory * buffer, so any request for more data beyond the given buffer size * is treated as an error. */ WARNMS(cinfo, JWRN_JPEG_EOF); + /* Insert a fake EOI marker */ - mybuffer[0] = (JOCTET) 0xFF; - mybuffer[1] = (JOCTET) JPEG_EOI; cinfo->src->next_input_byte = mybuffer; cinfo->src->bytes_in_buffer = 2; @@ -102,7 +106,7 @@ METHODDEF(void) skip_input_data (j_decompress_ptr cinfo, long num_bytes) { - struct jpeg_source_mgr * src = cinfo->src; + struct jpeg_source_mgr *src = cinfo->src; /* Just a dumb implementation for now. Could use fseek() except * it doesn't work on pipes. Not clear that being smart is worth @@ -154,21 +158,21 @@ GLOBAL(void) jpeg_mem_src_tj (j_decompress_ptr cinfo, - unsigned char * inbuffer, unsigned long insize) + const unsigned char *inbuffer, unsigned long insize) { - struct jpeg_source_mgr * src; + struct jpeg_source_mgr *src; - if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */ + if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */ ERREXIT(cinfo, JERR_INPUT_EMPTY); /* The source object is made permanent so that a series of JPEG images * can be read from the same buffer by calling jpeg_mem_src only before * the first one. */ - if (cinfo->src == NULL) { /* first time for this JPEG object? */ + if (cinfo->src == NULL) { /* first time for this JPEG object? */ cinfo->src = (struct jpeg_source_mgr *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(struct jpeg_source_mgr)); + sizeof(struct jpeg_source_mgr)); } src = cinfo->src; @@ -178,5 +182,5 @@ src->resync_to_restart = jpeg_resync_to_restart; /* use default method */ src->term_source = term_source; src->bytes_in_buffer = (size_t) insize; - src->next_input_byte = (JOCTET *) inbuffer; + src->next_input_byte = (const JOCTET *) inbuffer; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcoefct.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcoefct.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcoefct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcoefct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,14 @@ /* * jdcoefct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1997, Thomas G. Lane. - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2010, 2015-2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the coefficient buffer controller for decompression. * This controller is the top level of the JPEG decompressor proper. @@ -15,95 +19,25 @@ * Also, the input side (only) is used when reading a file for transcoding. */ -#define JPEG_INTERNALS #include "jinclude.h" -#include "jpeglib.h" +#include "jdcoefct.h" #include "jpegcomp.h" -/* Block smoothing is only applicable for progressive JPEG, so: */ -#ifndef D_PROGRESSIVE_SUPPORTED -#undef BLOCK_SMOOTHING_SUPPORTED -#endif - -/* Private buffer controller object */ - -typedef struct { - struct jpeg_d_coef_controller pub; /* public fields */ - - /* These variables keep track of the current location of the input side. */ - /* cinfo->input_iMCU_row is also used for this. */ - JDIMENSION MCU_ctr; /* counts MCUs processed in current row */ - int MCU_vert_offset; /* counts MCU rows within iMCU row */ - int MCU_rows_per_iMCU_row; /* number of such rows needed */ - - /* The output side's location is represented by cinfo->output_iMCU_row. */ - - /* In single-pass modes, it's sufficient to buffer just one MCU. - * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks, - * and let the entropy decoder write into that workspace each time. - * (On 80x86, the workspace is FAR even though it's not really very big; - * this is to keep the module interfaces unchanged when a large coefficient - * buffer is necessary.) - * In multi-pass modes, this array points to the current MCU's blocks - * within the virtual arrays; it is used only by the input side. - */ - JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU]; - - /* Temporary workspace for one MCU */ - JCOEF * workspace; - -#ifdef D_MULTISCAN_FILES_SUPPORTED - /* In multi-pass modes, we need a virtual block array for each component. */ - jvirt_barray_ptr whole_image[MAX_COMPONENTS]; -#endif - -#ifdef BLOCK_SMOOTHING_SUPPORTED - /* When doing block smoothing, we latch coefficient Al values here */ - int * coef_bits_latch; -#define SAVED_COEFS 6 /* we save coef_bits[0..5] */ -#endif -} my_coef_controller; - -typedef my_coef_controller * my_coef_ptr; /* Forward declarations */ METHODDEF(int) decompress_onepass - JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); #ifdef D_MULTISCAN_FILES_SUPPORTED METHODDEF(int) decompress_data - JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); #endif #ifdef BLOCK_SMOOTHING_SUPPORTED -LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo)); +LOCAL(boolean) smoothing_ok (j_decompress_ptr cinfo); METHODDEF(int) decompress_smooth_data - JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); #endif -LOCAL(void) -start_iMCU_row (j_decompress_ptr cinfo) -/* Reset within-iMCU-row counters for a new row (input side) */ -{ - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - - /* In an interleaved scan, an MCU row is the same as an iMCU row. - * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows. - * But at the bottom of the image, process only what's left. - */ - if (cinfo->comps_in_scan > 1) { - coef->MCU_rows_per_iMCU_row = 1; - } else { - if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1)) - coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor; - else - coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height; - } - - coef->MCU_ctr = 0; - coef->MCU_vert_offset = 0; -} - - /* * Initialize for an input processing pass. */ @@ -152,7 +86,7 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; int blkn, ci, xindex, yindex, yoffset, useful_width; @@ -165,49 +99,57 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col; - MCU_col_num++) { + MCU_col_num++) { /* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */ - jzero_far((void FAR *) coef->MCU_buffer[0], - (size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK))); + jzero_far((void *) coef->MCU_buffer[0], + (size_t) (cinfo->blocks_in_MCU * sizeof(JBLOCK))); if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->MCU_ctr = MCU_col_num; - return JPEG_SUSPENDED; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->MCU_ctr = MCU_col_num; + return JPEG_SUSPENDED; } - /* Determine where data should go in output_buf and do the IDCT thing. - * We skip dummy blocks at the right and bottom edges (but blkn gets - * incremented past them!). Note the inner loop relies on having - * allocated the MCU_buffer[] blocks sequentially. + + /* Only perform the IDCT on blocks that are contained within the desired + * cropping region. */ - blkn = 0; /* index of current DCT block within MCU */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - /* Don't bother to IDCT an uninteresting component. */ - if (! compptr->component_needed) { - blkn += compptr->MCU_blocks; - continue; - } - inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index]; - useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; - output_ptr = output_buf[compptr->component_index] + - yoffset * compptr->_DCT_scaled_size; - start_col = MCU_col_num * compptr->MCU_sample_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - if (cinfo->input_iMCU_row < last_iMCU_row || - yoffset+yindex < compptr->last_row_height) { - output_col = start_col; - for (xindex = 0; xindex < useful_width; xindex++) { - (*inverse_DCT) (cinfo, compptr, - (JCOEFPTR) coef->MCU_buffer[blkn+xindex], - output_ptr, output_col); - output_col += compptr->_DCT_scaled_size; - } - } - blkn += compptr->MCU_width; - output_ptr += compptr->_DCT_scaled_size; - } + if (MCU_col_num >= cinfo->master->first_iMCU_col && + MCU_col_num <= cinfo->master->last_iMCU_col) { + /* Determine where data should go in output_buf and do the IDCT thing. + * We skip dummy blocks at the right and bottom edges (but blkn gets + * incremented past them!). Note the inner loop relies on having + * allocated the MCU_buffer[] blocks sequentially. + */ + blkn = 0; /* index of current DCT block within MCU */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* Don't bother to IDCT an uninteresting component. */ + if (! compptr->component_needed) { + blkn += compptr->MCU_blocks; + continue; + } + inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index]; + useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width + : compptr->last_col_width; + output_ptr = output_buf[compptr->component_index] + + yoffset * compptr->_DCT_scaled_size; + start_col = (MCU_col_num - cinfo->master->first_iMCU_col) * + compptr->MCU_sample_width; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + if (cinfo->input_iMCU_row < last_iMCU_row || + yoffset+yindex < compptr->last_row_height) { + output_col = start_col; + for (xindex = 0; xindex < useful_width; xindex++) { + (*inverse_DCT) (cinfo, compptr, + (JCOEFPTR) coef->MCU_buffer[blkn+xindex], + output_ptr, output_col); + output_col += compptr->_DCT_scaled_size; + } + } + blkn += compptr->MCU_width; + output_ptr += compptr->_DCT_scaled_size; + } + } } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -232,7 +174,7 @@ METHODDEF(int) dummy_consume_data (j_decompress_ptr cinfo) { - return JPEG_SUSPENDED; /* Always indicate nothing was done */ + return JPEG_SUSPENDED; /* Always indicate nothing was done */ } @@ -249,7 +191,7 @@ consume_data (j_decompress_ptr cinfo) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ int blkn, ci, xindex, yindex, yoffset; JDIMENSION start_col; JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN]; @@ -273,25 +215,25 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row; - MCU_col_num++) { + MCU_col_num++) { /* Construct list of pointers to DCT blocks belonging to this MCU */ - blkn = 0; /* index of current DCT block within MCU */ + blkn = 0; /* index of current DCT block within MCU */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - start_col = MCU_col_num * compptr->MCU_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; - for (xindex = 0; xindex < compptr->MCU_width; xindex++) { - coef->MCU_buffer[blkn++] = buffer_ptr++; - } - } + compptr = cinfo->cur_comp_info[ci]; + start_col = MCU_col_num * compptr->MCU_width; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + for (xindex = 0; xindex < compptr->MCU_width; xindex++) { + coef->MCU_buffer[blkn++] = buffer_ptr++; + } + } } /* Try to fetch the MCU. */ if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->MCU_ctr = MCU_col_num; - return JPEG_SUSPENDED; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->MCU_ctr = MCU_col_num; + return JPEG_SUSPENDED; } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -332,8 +274,8 @@ /* Force some input to be done if we are getting ahead of the input. */ while (cinfo->input_scan_number < cinfo->output_scan_number || - (cinfo->input_scan_number == cinfo->output_scan_number && - cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) { + (cinfo->input_scan_number == cinfo->output_scan_number && + cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) { if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED) return JPEG_SUSPENDED; } @@ -361,13 +303,14 @@ output_ptr = output_buf[ci]; /* Loop over all DCT blocks to be processed. */ for (block_row = 0; block_row < block_rows; block_row++) { - buffer_ptr = buffer[block_row]; + buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci]; output_col = 0; - for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) { - (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr, - output_ptr, output_col); - buffer_ptr++; - output_col += compptr->_DCT_scaled_size; + for (block_num = cinfo->master->first_MCU_col[ci]; + block_num <= cinfo->master->last_MCU_col[ci]; block_num++) { + (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr, + output_ptr, output_col); + buffer_ptr++; + output_col += compptr->_DCT_scaled_size; } output_ptr += compptr->_DCT_scaled_size; } @@ -413,9 +356,9 @@ boolean smoothing_useful = FALSE; int ci, coefi; jpeg_component_info *compptr; - JQUANT_TBL * qtable; - int * coef_bits; - int * coef_bits_latch; + JQUANT_TBL *qtable; + int *coef_bits; + int *coef_bits_latch; if (! cinfo->progressive_mode || cinfo->coef_bits == NULL) return FALSE; @@ -424,8 +367,8 @@ if (coef->coef_bits_latch == NULL) coef->coef_bits_latch = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components * - (SAVED_COEFS * SIZEOF(int))); + cinfo->num_components * + (SAVED_COEFS * sizeof(int))); coef_bits_latch = coef->coef_bits_latch; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; @@ -435,11 +378,11 @@ return FALSE; /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */ if (qtable->quantval[0] == 0 || - qtable->quantval[Q01_POS] == 0 || - qtable->quantval[Q10_POS] == 0 || - qtable->quantval[Q20_POS] == 0 || - qtable->quantval[Q11_POS] == 0 || - qtable->quantval[Q02_POS] == 0) + qtable->quantval[Q01_POS] == 0 || + qtable->quantval[Q10_POS] == 0 || + qtable->quantval[Q20_POS] == 0 || + qtable->quantval[Q11_POS] == 0 || + qtable->quantval[Q02_POS] == 0) return FALSE; /* DC values must be at least partly known for all components. */ coef_bits = cinfo->coef_bits[ci]; @@ -449,7 +392,7 @@ for (coefi = 1; coefi <= 5; coefi++) { coef_bits_latch[coefi] = coef_bits[coefi]; if (coef_bits[coefi] != 0) - smoothing_useful = TRUE; + smoothing_useful = TRUE; } coef_bits_latch += SAVED_COEFS; } @@ -476,10 +419,10 @@ jpeg_component_info *compptr; inverse_DCT_method_ptr inverse_DCT; boolean first_row, last_row; - JCOEF * workspace; + JCOEF *workspace; int *coef_bits; JQUANT_TBL *quanttbl; - INT32 Q00,Q01,Q02,Q10,Q11,Q20, num; + JLONG Q00,Q01,Q02,Q10,Q11,Q20, num; int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9; int Al, pred; @@ -488,7 +431,7 @@ /* Force some input to be done if we are getting ahead of the input. */ while (cinfo->input_scan_number <= cinfo->output_scan_number && - ! cinfo->inputctl->eoi_reached) { + ! cinfo->inputctl->eoi_reached) { if (cinfo->input_scan_number == cinfo->output_scan_number) { /* If input is working on current scan, we ordinarily want it to * have completed the current row. But if input scan is DC, @@ -497,7 +440,7 @@ */ JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0; if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta) - break; + break; } if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED) return JPEG_SUSPENDED; @@ -525,15 +468,15 @@ if (cinfo->output_iMCU_row > 0) { access_rows += compptr->v_samp_factor; /* prior iMCU row too */ buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], - (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor, - (JDIMENSION) access_rows, FALSE); - buffer += compptr->v_samp_factor; /* point to current iMCU row */ + ((j_common_ptr) cinfo, coef->whole_image[ci], + (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor, + (JDIMENSION) access_rows, FALSE); + buffer += compptr->v_samp_factor; /* point to current iMCU row */ first_row = FALSE; } else { buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], - (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE); + ((j_common_ptr) cinfo, coef->whole_image[ci], + (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE); first_row = TRUE; } /* Fetch component-dependent info */ @@ -549,15 +492,15 @@ output_ptr = output_buf[ci]; /* Loop over all DCT blocks to be processed. */ for (block_row = 0; block_row < block_rows; block_row++) { - buffer_ptr = buffer[block_row]; + buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci]; if (first_row && block_row == 0) - prev_block_row = buffer_ptr; + prev_block_row = buffer_ptr; else - prev_block_row = buffer[block_row-1]; + prev_block_row = buffer[block_row-1]; if (last_row && block_row == block_rows-1) - next_block_row = buffer_ptr; + next_block_row = buffer_ptr; else - next_block_row = buffer[block_row+1]; + next_block_row = buffer[block_row+1]; /* We fetch the surrounding DC values using a sliding-register approach. * Initialize all nine here so as to do the right thing on narrow pics. */ @@ -566,103 +509,104 @@ DC7 = DC8 = DC9 = (int) next_block_row[0][0]; output_col = 0; last_block_column = compptr->width_in_blocks - 1; - for (block_num = 0; block_num <= last_block_column; block_num++) { - /* Fetch current DCT block into workspace so we can modify it. */ - jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1); - /* Update DC values */ - if (block_num < last_block_column) { - DC3 = (int) prev_block_row[1][0]; - DC6 = (int) buffer_ptr[1][0]; - DC9 = (int) next_block_row[1][0]; - } - /* Compute coefficient estimates per K.8. - * An estimate is applied only if coefficient is still zero, - * and is not known to be fully accurate. - */ - /* AC01 */ - if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) { - num = 36 * Q00 * (DC4 - DC6); - if (num >= 0) { - pred = (int) (((Q01<<7) + num) / (Q01<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { - pred = (int) (((Q10<<7) + num) / (Q10<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { - pred = (int) (((Q20<<7) + num) / (Q20<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { - pred = (int) (((Q11<<7) + num) / (Q11<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { - pred = (int) (((Q02<<7) + num) / (Q02<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<_DCT_scaled_size; + for (block_num = cinfo->master->first_MCU_col[ci]; + block_num <= cinfo->master->last_MCU_col[ci]; block_num++) { + /* Fetch current DCT block into workspace so we can modify it. */ + jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1); + /* Update DC values */ + if (block_num < last_block_column) { + DC3 = (int) prev_block_row[1][0]; + DC6 = (int) buffer_ptr[1][0]; + DC9 = (int) next_block_row[1][0]; + } + /* Compute coefficient estimates per K.8. + * An estimate is applied only if coefficient is still zero, + * and is not known to be fully accurate. + */ + /* AC01 */ + if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) { + num = 36 * Q00 * (DC4 - DC6); + if (num >= 0) { + pred = (int) (((Q01<<7) + num) / (Q01<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { + pred = (int) (((Q10<<7) + num) / (Q10<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { + pred = (int) (((Q20<<7) + num) / (Q20<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { + pred = (int) (((Q11<<7) + num) / (Q11<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { + pred = (int) (((Q02<<7) + num) / (Q02<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<_DCT_scaled_size; } output_ptr += compptr->_DCT_scaled_size; } @@ -687,7 +631,7 @@ coef = (my_coef_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_coef_controller)); + sizeof(my_coef_controller)); cinfo->coef = (struct jpeg_d_coef_controller *) coef; coef->pub.start_input_pass = start_input_pass; coef->pub.start_output_pass = start_output_pass; @@ -705,20 +649,20 @@ jpeg_component_info *compptr; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { access_rows = compptr->v_samp_factor; #ifdef BLOCK_SMOOTHING_SUPPORTED /* If block smoothing could be used, need a bigger window */ if (cinfo->progressive_mode) - access_rows *= 3; + access_rows *= 3; #endif coef->whole_image[ci] = (*cinfo->mem->request_virt_barray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE, - (JDIMENSION) jround_up((long) compptr->width_in_blocks, - (long) compptr->h_samp_factor), - (JDIMENSION) jround_up((long) compptr->height_in_blocks, - (long) compptr->v_samp_factor), - (JDIMENSION) access_rows); + ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE, + (JDIMENSION) jround_up((long) compptr->width_in_blocks, + (long) compptr->h_samp_factor), + (JDIMENSION) jround_up((long) compptr->height_in_blocks, + (long) compptr->v_samp_factor), + (JDIMENSION) access_rows); } coef->pub.consume_data = consume_data; coef->pub.decompress_data = decompress_data; @@ -733,7 +677,7 @@ buffer = (JBLOCKROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)); + D_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) { coef->MCU_buffer[i] = buffer + i; } @@ -745,5 +689,5 @@ /* Allocate the workspace buffer */ coef->workspace = (JCOEF *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(JCOEF) * DCTSIZE2); + sizeof(JCOEF) * DCTSIZE2); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcoefct.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcoefct.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcoefct.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcoefct.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,82 @@ +/* + * jdcoefct.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright 2009 Pierre Ossman for Cendio AB + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +#define JPEG_INTERNALS +#include "jpeglib.h" + + +/* Block smoothing is only applicable for progressive JPEG, so: */ +#ifndef D_PROGRESSIVE_SUPPORTED +#undef BLOCK_SMOOTHING_SUPPORTED +#endif + + +/* Private buffer controller object */ + +typedef struct { + struct jpeg_d_coef_controller pub; /* public fields */ + + /* These variables keep track of the current location of the input side. */ + /* cinfo->input_iMCU_row is also used for this. */ + JDIMENSION MCU_ctr; /* counts MCUs processed in current row */ + int MCU_vert_offset; /* counts MCU rows within iMCU row */ + int MCU_rows_per_iMCU_row; /* number of such rows needed */ + + /* The output side's location is represented by cinfo->output_iMCU_row. */ + + /* In single-pass modes, it's sufficient to buffer just one MCU. + * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks, + * and let the entropy decoder write into that workspace each time. + * In multi-pass modes, this array points to the current MCU's blocks + * within the virtual arrays; it is used only by the input side. + */ + JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU]; + + /* Temporary workspace for one MCU */ + JCOEF *workspace; + +#ifdef D_MULTISCAN_FILES_SUPPORTED + /* In multi-pass modes, we need a virtual block array for each component. */ + jvirt_barray_ptr whole_image[MAX_COMPONENTS]; +#endif + +#ifdef BLOCK_SMOOTHING_SUPPORTED + /* When doing block smoothing, we latch coefficient Al values here */ + int *coef_bits_latch; +#define SAVED_COEFS 6 /* we save coef_bits[0..5] */ +#endif +} my_coef_controller; + +typedef my_coef_controller *my_coef_ptr; + + +LOCAL(void) +start_iMCU_row (j_decompress_ptr cinfo) +/* Reset within-iMCU-row counters for a new row (input side) */ +{ + my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + + /* In an interleaved scan, an MCU row is the same as an iMCU row. + * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows. + * But at the bottom of the image, process only what's left. + */ + if (cinfo->comps_in_scan > 1) { + coef->MCU_rows_per_iMCU_row = 1; + } else { + if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1)) + coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor; + else + coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height; + } + + coef->MCU_ctr = 0; + coef->MCU_vert_offset = 0; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcol565.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcol565.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcol565.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcol565.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,384 @@ +/* + * jdcol565.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modifications: + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains output colorspace conversion routines. + */ + +/* This file is included by jdcolor.c */ + + +INLINE +LOCAL(void) +ycc_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int y, cb, cr; + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + register int * Crrtab = cconvert->Cr_r_tab; + register int * Cbbtab = cconvert->Cb_b_tab; + register JLONG * Crgtab = cconvert->Cr_g_tab; + register JLONG * Cbgtab = cconvert->Cb_g_tab; + SHIFT_TEMPS + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + + if (PACK_NEED_ALIGNMENT(outptr)) { + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[y + Crrtab[cr]]; + g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + b = range_limit[y + Cbbtab[cb]]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[y + Crrtab[cr]]; + g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + b = range_limit[y + Cbbtab[cb]]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[y + Crrtab[cr]]; + g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + b = range_limit[y + Cbbtab[cb]]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + y = GETJSAMPLE(*inptr0); + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + r = range_limit[y + Crrtab[cr]]; + g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + b = range_limit[y + Cbbtab[cb]]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +ycc_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int y, cb, cr; + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + register int * Crrtab = cconvert->Cr_r_tab; + register int * Cbbtab = cconvert->Cb_b_tab; + register JLONG * Crgtab = cconvert->Cr_g_tab; + register JLONG * Cbgtab = cconvert->Cb_g_tab; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + SHIFT_TEMPS + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; + g = range_limit[DITHER_565_G(y + + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)), d0)]; + b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; + g = range_limit[DITHER_565_G(y + + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)), d0)]; + b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; + g = range_limit[DITHER_565_G(y + + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)), d0)]; + b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + y = GETJSAMPLE(*inptr0); + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; + g = range_limit[DITHER_565_G(y + + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)), d0)]; + b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +rgb_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + SHIFT_TEMPS + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + r = GETJSAMPLE(*inptr0++); + g = GETJSAMPLE(*inptr1++); + b = GETJSAMPLE(*inptr2++); + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + r = GETJSAMPLE(*inptr0++); + g = GETJSAMPLE(*inptr1++); + b = GETJSAMPLE(*inptr2++); + rgb = PACK_SHORT_565(r, g, b); + + r = GETJSAMPLE(*inptr0++); + g = GETJSAMPLE(*inptr1++); + b = GETJSAMPLE(*inptr2++); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + r = GETJSAMPLE(*inptr0); + g = GETJSAMPLE(*inptr1); + b = GETJSAMPLE(*inptr2); + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +rgb_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + register JSAMPLE * range_limit = cinfo->sample_range_limit; + JDIMENSION num_cols = cinfo->output_width; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + SHIFT_TEMPS + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; + g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; + b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; + g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; + b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; + g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; + b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)]; + g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)]; + b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +gray_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr, outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int g; + + inptr = input_buf[0][input_row++]; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + g = *inptr++; + rgb = PACK_SHORT_565(g, g, g); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + g = *inptr++; + rgb = PACK_SHORT_565(g, g, g); + g = *inptr++; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g)); + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + g = *inptr; + rgb = PACK_SHORT_565(g, g, g); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +gray_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr, outptr; + register JDIMENSION col; + register JSAMPLE * range_limit = cinfo->sample_range_limit; + JDIMENSION num_cols = cinfo->output_width; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int g; + + inptr = input_buf[0][input_row++]; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + g = *inptr++; + g = range_limit[DITHER_565_R(g, d0)]; + rgb = PACK_SHORT_565(g, g, g); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + g = *inptr++; + g = range_limit[DITHER_565_R(g, d0)]; + rgb = PACK_SHORT_565(g, g, g); + d0 = DITHER_ROTATE(d0); + + g = *inptr++; + g = range_limit[DITHER_565_R(g, d0)]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g)); + d0 = DITHER_ROTATE(d0); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + g = *inptr; + g = range_limit[DITHER_565_R(g, d0)]; + rgb = PACK_SHORT_565(g, g, g); + *(INT16*)outptr = (INT16)rgb; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcolext.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcolext.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcolext.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcolext.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,143 @@ +/* + * jdcolext.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2009, 2011, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains output colorspace conversion routines. + */ + + +/* This file is included by jdcolor.c */ + + +/* + * Convert some rows of samples to the output colorspace. + * + * Note that we change from noninterleaved, one-plane-per-component format + * to interleaved-pixel format. The output buffer is therefore three times + * as wide as the input buffer. + * A starting row offset is provided only for the input buffer. The caller + * can easily adjust the passed output_buf value to accommodate any row + * offset required on that side. + */ + +INLINE +LOCAL(void) +ycc_rgb_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int y, cb, cr; + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + register int * Crrtab = cconvert->Cr_r_tab; + register int * Cbbtab = cconvert->Cb_b_tab; + register JLONG * Crgtab = cconvert->Cr_g_tab; + register JLONG * Cbgtab = cconvert->Cb_g_tab; + SHIFT_TEMPS + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + y = GETJSAMPLE(inptr0[col]); + cb = GETJSAMPLE(inptr1[col]); + cr = GETJSAMPLE(inptr2[col]); + /* Range-limiting is essential due to noise introduced by DCT losses. */ + outptr[RGB_RED] = range_limit[y + Crrtab[cr]]; + outptr[RGB_GREEN] = range_limit[y + + ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]]; + /* Set unused byte to 0xFF so it can be interpreted as an opaque */ + /* alpha channel value */ +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + } +} + + +/* + * Convert grayscale to RGB: just duplicate the graylevel three times. + * This is provided to support applications that don't want to cope + * with grayscale as a separate case. + */ + +INLINE +LOCAL(void) +gray_rgb_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr, outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + inptr = input_buf[0][input_row++]; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + /* We can dispense with GETJSAMPLE() here */ + outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col]; + /* Set unused byte to 0xFF so it can be interpreted as an opaque */ + /* alpha channel value */ +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + } +} + + +/* + * Convert RGB to extended RGB: just swap the order of source pixels + */ + +INLINE +LOCAL(void) +rgb_rgb_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr0, inptr1, inptr2; + register JSAMPROW outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + /* We can dispense with GETJSAMPLE() here */ + outptr[RGB_RED] = inptr0[col]; + outptr[RGB_GREEN] = inptr1[col]; + outptr[RGB_BLUE] = inptr2[col]; + /* Set unused byte to 0xFF so it can be interpreted as an opaque */ + /* alpha channel value */ +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcolext.c.inc glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcolext.c.inc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcolext.c.inc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcolext.c.inc 1970-01-01 08:00:00.000000000 +0800 @@ -1,104 +0,0 @@ -/* - * jdcolext.c - * - * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009, 2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains output colorspace conversion routines. - */ - - -/* This file is included by jdcolor.c */ - - -/* - * Convert some rows of samples to the output colorspace. - * - * Note that we change from noninterleaved, one-plane-per-component format - * to interleaved-pixel format. The output buffer is therefore three times - * as wide as the input buffer. - * A starting row offset is provided only for the input buffer. The caller - * can easily adjust the passed output_buf value to accommodate any row - * offset required on that side. - */ - -INLINE -LOCAL(void) -ycc_rgb_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; - register int y, cb, cr; - register JSAMPROW outptr; - register JSAMPROW inptr0, inptr1, inptr2; - register JDIMENSION col; - JDIMENSION num_cols = cinfo->output_width; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - register int * Crrtab = cconvert->Cr_r_tab; - register int * Cbbtab = cconvert->Cb_b_tab; - register INT32 * Crgtab = cconvert->Cr_g_tab; - register INT32 * Cbgtab = cconvert->Cb_g_tab; - SHIFT_TEMPS - - while (--num_rows >= 0) { - inptr0 = input_buf[0][input_row]; - inptr1 = input_buf[1][input_row]; - inptr2 = input_buf[2][input_row]; - input_row++; - outptr = *output_buf++; - for (col = 0; col < num_cols; col++) { - y = GETJSAMPLE(inptr0[col]); - cb = GETJSAMPLE(inptr1[col]); - cr = GETJSAMPLE(inptr2[col]); - /* Range-limiting is essential due to noise introduced by DCT losses. */ - outptr[RGB_RED] = range_limit[y + Crrtab[cr]]; - outptr[RGB_GREEN] = range_limit[y + - ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], - SCALEBITS))]; - outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]]; - /* Set unused byte to 0xFF so it can be interpreted as an opaque */ - /* alpha channel value */ -#ifdef RGB_ALPHA - outptr[RGB_ALPHA] = 0xFF; -#endif - outptr += RGB_PIXELSIZE; - } - } -} - - -/* - * Convert grayscale to RGB: just duplicate the graylevel three times. - * This is provided to support applications that don't want to cope - * with grayscale as a separate case. - */ - -INLINE -LOCAL(void) -gray_rgb_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - register JSAMPROW inptr, outptr; - register JDIMENSION col; - JDIMENSION num_cols = cinfo->output_width; - - while (--num_rows >= 0) { - inptr = input_buf[0][input_row++]; - outptr = *output_buf++; - for (col = 0; col < num_cols; col++) { - /* We can dispense with GETJSAMPLE() here */ - outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col]; - /* Set unused byte to 0xFF so it can be interpreted as an opaque */ - /* alpha channel value */ -#ifdef RGB_ALPHA - outptr[RGB_ALPHA] = 0xFF; -#endif - outptr += RGB_PIXELSIZE; - } - } -} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcolor.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcolor.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdcolor.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdcolor.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,15 @@ /* * jdcolor.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2011 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009, 2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2009, 2011-2012, 2014-2015, D. R. Commander. + * Copyright (C) 2013, Linaro Limited. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains output colorspace conversion routines. */ @@ -14,7 +18,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jsimd.h" -#include "config.h" +#include "jconfigint.h" /* Private subobject */ @@ -23,24 +27,32 @@ struct jpeg_color_deconverter pub; /* public fields */ /* Private state for YCC->RGB conversion */ - int * Cr_r_tab; /* => table for Cr to R conversion */ - int * Cb_b_tab; /* => table for Cb to B conversion */ - INT32 * Cr_g_tab; /* => table for Cr to G conversion */ - INT32 * Cb_g_tab; /* => table for Cb to G conversion */ + int *Cr_r_tab; /* => table for Cr to R conversion */ + int *Cb_b_tab; /* => table for Cb to B conversion */ + JLONG *Cr_g_tab; /* => table for Cr to G conversion */ + JLONG *Cb_g_tab; /* => table for Cb to G conversion */ + + /* Private state for RGB->Y conversion */ + JLONG *rgb_y_tab; /* => table for RGB to Y conversion */ } my_color_deconverter; -typedef my_color_deconverter * my_cconvert_ptr; +typedef my_color_deconverter *my_cconvert_ptr; /**************** YCbCr -> RGB conversion: most common case **************/ +/**************** RGB -> Y conversion: less common case **************/ /* * YCbCr is defined per CCIR 601-1, except that Cb and Cr are * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5. * The conversion equations to be implemented are therefore - * R = Y + 1.40200 * Cr - * G = Y - 0.34414 * Cb - 0.71414 * Cr - * B = Y + 1.77200 * Cb + * + * R = Y + 1.40200 * Cr + * G = Y - 0.34414 * Cb - 0.71414 * Cr + * B = Y + 1.77200 * Cb + * + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * * where Cb and Cr represent the incoming values less CENTERJSAMPLE. * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.) * @@ -61,14 +73,26 @@ * together before rounding. */ -#define SCALEBITS 16 /* speediest right-shift on some machines */ -#define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) -#define FIX(x) ((INT32) ((x) * (1L<Y conversion and divide it up into + * three parts, instead of doing three alloc_small requests. This lets us + * use a single table base address, which can be held in a register in the + * inner loops on many machines (more than can hold all three addresses, + * anyway). + */ + +#define R_Y_OFF 0 /* offset to R => Y section */ +#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ +#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ +#define TABLE_SIZE (3*(MAXJSAMPLE+1)) /* Include inline routines for colorspace extensions */ -#include "jdcolext.c.inc" +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -80,13 +104,15 @@ #define RGB_PIXELSIZE EXT_RGB_PIXELSIZE #define ycc_rgb_convert_internal ycc_extrgb_convert_internal #define gray_rgb_convert_internal gray_extrgb_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extrgb_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_RGBX_RED #define RGB_GREEN EXT_RGBX_GREEN @@ -95,7 +121,8 @@ #define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE #define ycc_rgb_convert_internal ycc_extrgbx_convert_internal #define gray_rgb_convert_internal gray_extrgbx_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extrgbx_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -103,6 +130,7 @@ #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGR_RED #define RGB_GREEN EXT_BGR_GREEN @@ -110,13 +138,15 @@ #define RGB_PIXELSIZE EXT_BGR_PIXELSIZE #define ycc_rgb_convert_internal ycc_extbgr_convert_internal #define gray_rgb_convert_internal gray_extbgr_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extbgr_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGRX_RED #define RGB_GREEN EXT_BGRX_GREEN @@ -125,7 +155,8 @@ #define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE #define ycc_rgb_convert_internal ycc_extbgrx_convert_internal #define gray_rgb_convert_internal gray_extbgrx_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extbgrx_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -133,6 +164,7 @@ #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XBGR_RED #define RGB_GREEN EXT_XBGR_GREEN @@ -141,7 +173,8 @@ #define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE #define ycc_rgb_convert_internal ycc_extxbgr_convert_internal #define gray_rgb_convert_internal gray_extxbgr_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extxbgr_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -149,6 +182,7 @@ #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XRGB_RED #define RGB_GREEN EXT_XRGB_GREEN @@ -157,7 +191,8 @@ #define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE #define ycc_rgb_convert_internal ycc_extxrgb_convert_internal #define gray_rgb_convert_internal gray_extxrgb_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extxrgb_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -165,6 +200,7 @@ #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal /* @@ -176,31 +212,31 @@ { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; int i; - INT32 x; + JLONG x; SHIFT_TEMPS cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(int)); + (MAXJSAMPLE+1) * sizeof(int)); cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(int)); - cconvert->Cr_g_tab = (INT32 *) + (MAXJSAMPLE+1) * sizeof(int)); + cconvert->Cr_g_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(INT32)); - cconvert->Cb_g_tab = (INT32 *) + (MAXJSAMPLE+1) * sizeof(JLONG)); + cconvert->Cb_g_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(INT32)); + (MAXJSAMPLE+1) * sizeof(JLONG)); for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) { /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */ /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */ /* Cr=>R value is nearest int to 1.40200 * x */ cconvert->Cr_r_tab[i] = (int) - RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); + RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); /* Cb=>B value is nearest int to 1.77200 * x */ cconvert->Cb_b_tab[i] = (int) - RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); + RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); /* Cr=>G value is scaled-up -0.71414 * x */ cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x; /* Cb=>G value is scaled-up -0.34414 * x */ @@ -216,8 +252,8 @@ METHODDEF(void) ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { switch (cinfo->out_color_space) { case JCS_EXT_RGB: @@ -260,32 +296,122 @@ /* + * Initialize for RGB->grayscale colorspace conversion. + */ + +LOCAL(void) +build_rgb_y_table (j_decompress_ptr cinfo) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + JLONG *rgb_y_tab; + JLONG i; + + /* Allocate and fill in the conversion tables. */ + cconvert->rgb_y_tab = rgb_y_tab = (JLONG *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (TABLE_SIZE * sizeof(JLONG))); + + for (i = 0; i <= MAXJSAMPLE; i++) { + rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i; + rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i; + rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; + } +} + + +/* + * Convert RGB to grayscale. + */ + +METHODDEF(void) +rgb_gray_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int r, g, b; + register JLONG *ctab = cconvert->rgb_y_tab; + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + r = GETJSAMPLE(inptr0[col]); + g = GETJSAMPLE(inptr1[col]); + b = GETJSAMPLE(inptr2[col]); + /* Y */ + outptr[col] = (JSAMPLE) + ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) + >> SCALEBITS); + } + } +} + + +/* * Color conversion for no colorspace change: just copy the data, * converting from separate-planes to interleaved representation. */ METHODDEF(void) null_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { - register JSAMPROW inptr, outptr; - register JDIMENSION count; + register JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr; + register JDIMENSION col; register int num_components = cinfo->num_components; JDIMENSION num_cols = cinfo->output_width; int ci; - while (--num_rows >= 0) { - for (ci = 0; ci < num_components; ci++) { - inptr = input_buf[ci][input_row]; - outptr = output_buf[0] + ci; - for (count = num_cols; count > 0; count--) { - *outptr = *inptr++; /* needn't bother with GETJSAMPLE() here */ - outptr += num_components; + if (num_components == 3) { + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + *outptr++ = inptr0[col]; + *outptr++ = inptr1[col]; + *outptr++ = inptr2[col]; } } - input_row++; - output_buf++; + } else if (num_components == 4) { + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + inptr3 = input_buf[3][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + *outptr++ = inptr0[col]; + *outptr++ = inptr1[col]; + *outptr++ = inptr2[col]; + *outptr++ = inptr3[col]; + } + } + } else { + while (--num_rows >= 0) { + for (ci = 0; ci < num_components; ci++) { + inptr = input_buf[ci][input_row]; + outptr = *output_buf; + for (col = 0; col < num_cols; col++) { + outptr[ci] = inptr[col]; + outptr += num_components; + } + } + output_buf++; + input_row++; + } } } @@ -298,11 +424,11 @@ METHODDEF(void) grayscale_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0, - num_rows, cinfo->output_width); + num_rows, cinfo->output_width); } @@ -312,8 +438,8 @@ METHODDEF(void) gray_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { switch (cinfo->out_color_space) { case JCS_EXT_RGB: @@ -353,6 +479,52 @@ /* + * Convert plain RGB to extended RGB + */ + +METHODDEF(void) +rgb_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGR: + rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + default: + rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + } +} + + +/* * Adobe-style YCCK->CMYK conversion. * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same * conversion as above, while passing K (black) unchanged. @@ -361,8 +533,8 @@ METHODDEF(void) ycck_cmyk_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int y, cb, cr; @@ -371,11 +543,11 @@ register JDIMENSION col; JDIMENSION num_cols = cinfo->output_width; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - register int * Crrtab = cconvert->Cr_r_tab; - register int * Cbbtab = cconvert->Cb_b_tab; - register INT32 * Crgtab = cconvert->Cr_g_tab; - register INT32 * Cbgtab = cconvert->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + register int *Crrtab = cconvert->Cr_r_tab; + register int *Cbbtab = cconvert->Cb_b_tab; + register JLONG *Crgtab = cconvert->Cr_g_tab; + register JLONG *Cbgtab = cconvert->Cb_g_tab; SHIFT_TEMPS while (--num_rows >= 0) { @@ -390,13 +562,13 @@ cb = GETJSAMPLE(inptr1[col]); cr = GETJSAMPLE(inptr2[col]); /* Range-limiting is essential due to noise introduced by DCT losses. */ - outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */ - outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */ - ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], - SCALEBITS)))]; - outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */ + outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */ + outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */ + ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)))]; + outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */ /* K passes through unchanged */ - outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */ + outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */ outptr += 4; } } @@ -404,6 +576,164 @@ /* + * RGB565 conversion + */ + +#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ + (((g) << 3) & 0x7E0) | ((b) >> 3)) +#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ + (((g) << 11) & 0xE000) | \ + (((b) << 5) & 0x1F00)) + +#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) +#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) + +#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) + +#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels) ((*(int *)(addr)) = pixels) + +#define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) +#define DITHER_565_G(g, dither) ((g) + (((dither) & 0xFF) >> 1)) +#define DITHER_565_B(b, dither) ((b) + ((dither) & 0xFF)) + + +/* Declarations for ordered dithering + * + * We use a 4x4 ordered dither array packed into 32 bits. This array is + * sufficent for dithering RGB888 to RGB565. + */ + +#define DITHER_MASK 0x3 +#define DITHER_ROTATE(x) ((((x) & 0xFF) << 24) | (((x) >> 8) & 0x00FFFFFF)) +static const JLONG dither_matrix[4] = { + 0x0008020A, + 0x0C040E06, + 0x030B0109, + 0x0F070D05 +}; + + +static INLINE boolean is_big_endian(void) +{ + int test_value = 1; + if(*(char *)&test_value != 1) + return TRUE; + return FALSE; +} + + +/* Include inline routines for RGB565 conversion */ + +#define PACK_SHORT_565 PACK_SHORT_565_LE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE +#define ycc_rgb565_convert_internal ycc_rgb565_convert_le +#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_le +#define rgb_rgb565_convert_internal rgb_rgb565_convert_le +#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_le +#define gray_rgb565_convert_internal gray_rgb565_convert_le +#define gray_rgb565D_convert_internal gray_rgb565D_convert_le +#include "jdcol565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef ycc_rgb565_convert_internal +#undef ycc_rgb565D_convert_internal +#undef rgb_rgb565_convert_internal +#undef rgb_rgb565D_convert_internal +#undef gray_rgb565_convert_internal +#undef gray_rgb565D_convert_internal + +#define PACK_SHORT_565 PACK_SHORT_565_BE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE +#define ycc_rgb565_convert_internal ycc_rgb565_convert_be +#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_be +#define rgb_rgb565_convert_internal rgb_rgb565_convert_be +#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_be +#define gray_rgb565_convert_internal gray_rgb565_convert_be +#define gray_rgb565D_convert_internal gray_rgb565D_convert_be +#include "jdcol565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef ycc_rgb565_convert_internal +#undef ycc_rgb565D_convert_internal +#undef rgb_rgb565_convert_internal +#undef rgb_rgb565D_convert_internal +#undef gray_rgb565_convert_internal +#undef gray_rgb565D_convert_internal + + +METHODDEF(void) +ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + ycc_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + ycc_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +ycc_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + ycc_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + ycc_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +rgb_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + rgb_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + rgb_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +rgb_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + rgb_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + rgb_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +gray_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + gray_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + gray_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +gray_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + gray_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + gray_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +/* * Empty method for start_pass. */ @@ -426,7 +756,7 @@ cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_color_deconverter)); + sizeof(my_color_deconverter)); cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert; cconvert->pub.start_pass = start_pass_dcolor; @@ -449,7 +779,7 @@ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); break; - default: /* JCS_UNKNOWN can be anything */ + default: /* JCS_UNKNOWN can be anything */ if (cinfo->num_components < 1) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); break; @@ -464,11 +794,14 @@ case JCS_GRAYSCALE: cinfo->out_color_components = 1; if (cinfo->jpeg_color_space == JCS_GRAYSCALE || - cinfo->jpeg_color_space == JCS_YCbCr) { + cinfo->jpeg_color_space == JCS_YCbCr) { cconvert->pub.color_convert = grayscale_convert; /* For color->grayscale conversion, only the Y (0) component is needed */ for (ci = 1; ci < cinfo->num_components; ci++) - cinfo->comp_info[ci].component_needed = FALSE; + cinfo->comp_info[ci].component_needed = FALSE; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + cconvert->pub.color_convert = rgb_gray_convert; + build_rgb_y_table(cinfo); } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; @@ -494,13 +827,48 @@ } } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { cconvert->pub.color_convert = gray_rgb_convert; - } else if (cinfo->jpeg_color_space == cinfo->out_color_space && - rgb_pixelsize[cinfo->out_color_space] == 3) { - cconvert->pub.color_convert = null_convert; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + if (rgb_red[cinfo->out_color_space] == 0 && + rgb_green[cinfo->out_color_space] == 1 && + rgb_blue[cinfo->out_color_space] == 2 && + rgb_pixelsize[cinfo->out_color_space] == 3) + cconvert->pub.color_convert = null_convert; + else + cconvert->pub.color_convert = rgb_rgb_convert; } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; + case JCS_RGB565: + cinfo->out_color_components = 3; + if (cinfo->dither_mode == JDITHER_NONE) { + if (cinfo->jpeg_color_space == JCS_YCbCr) { + if (jsimd_can_ycc_rgb565()) + cconvert->pub.color_convert = jsimd_ycc_rgb565_convert; + else { + cconvert->pub.color_convert = ycc_rgb565_convert; + build_ycc_rgb_table(cinfo); + } + } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { + cconvert->pub.color_convert = gray_rgb565_convert; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + cconvert->pub.color_convert = rgb_rgb565_convert; + } else + ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); + } else { + /* only ordered dithering is supported */ + if (cinfo->jpeg_color_space == JCS_YCbCr) { + cconvert->pub.color_convert = ycc_rgb565D_convert; + build_ycc_rgb_table(cinfo); + } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { + cconvert->pub.color_convert = gray_rgb565D_convert; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + cconvert->pub.color_convert = rgb_rgb565D_convert; + } else + ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); + } + break; + case JCS_CMYK: cinfo->out_color_components = 4; if (cinfo->jpeg_color_space == JCS_YCCK) { @@ -517,7 +885,7 @@ if (cinfo->out_color_space == cinfo->jpeg_color_space) { cinfo->out_color_components = cinfo->num_components; cconvert->pub.color_convert = null_convert; - } else /* unsupported non-null conversion */ + } else /* unsupported non-null conversion */ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdct.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jdct.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdct.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdct.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,14 +1,17 @@ /* * jdct.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This include file contains common declarations for the forward and * inverse DCT modules. These declarations are private to the DCT managers * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms. - * The individual DCT algorithms are kept in separate files to ease + * The individual DCT algorithms are kept in separate files to ease * machine-dependent tuning (e.g., assembly coding). */ @@ -16,7 +19,7 @@ /* * A forward DCT routine is given a pointer to a work area of type DCTELEM[]; * the DCT is to be performed in-place in that buffer. Type DCTELEM is int - * for 8-bit samples, INT32 for 12-bit samples. (NOTE: Floating-point DCT + * for 8-bit samples, JLONG for 12-bit samples. (NOTE: Floating-point DCT * implementations use an array of type FAST_FLOAT, instead.) * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE). * The DCT outputs are returned scaled up by a factor of 8; they therefore @@ -29,7 +32,7 @@ #if BITS_IN_JSAMPLE == 8 #ifndef WITH_SIMD -typedef int DCTELEM; /* 16 or 32 bits is fine */ +typedef int DCTELEM; /* 16 or 32 bits is fine */ typedef unsigned int UDCTELEM; typedef unsigned long long UDCTELEM2; #else @@ -38,8 +41,7 @@ typedef unsigned int UDCTELEM2; #endif #else -typedef INT32 DCTELEM; /* must have 32 bits */ -typedef UINT32 UDCTELEM; +typedef JLONG DCTELEM; /* must have 32 bits */ typedef unsigned long long UDCTELEM2; #endif @@ -64,10 +66,10 @@ typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */ #if BITS_IN_JSAMPLE == 8 typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */ -#define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ +#define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ #else -typedef INT32 IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ -#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ +typedef JLONG IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ +#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ #endif typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ @@ -86,57 +88,79 @@ #define RANGE_MASK (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */ -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_fdct_islow jFDislow -#define jpeg_fdct_ifast jFDifast -#define jpeg_fdct_float jFDfloat -#define jpeg_idct_islow jRDislow -#define jpeg_idct_ifast jRDifast -#define jpeg_idct_float jRDfloat -#define jpeg_idct_4x4 jRD4x4 -#define jpeg_idct_2x2 jRD2x2 -#define jpeg_idct_1x1 jRD1x1 -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - /* Extern declarations for the forward and inverse DCT routines. */ -EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data)); -EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data)); -EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data)); +EXTERN(void) jpeg_fdct_islow (DCTELEM *data); +EXTERN(void) jpeg_fdct_ifast (DCTELEM *data); +EXTERN(void) jpeg_fdct_float (FAST_FLOAT *data); EXTERN(void) jpeg_idct_islow - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_ifast - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_float - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_7x7 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_6x6 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_5x5 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_4x4 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_3x3 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_2x2 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_1x1 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_9x9 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_10x10 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_11x11 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_12x12 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_13x13 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_14x14 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_15x15 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_16x16 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); /* * Macros for handling fixed-point arithmetic; these are used by many * but not all of the DCT/IDCT modules. * - * All values are expected to be of type INT32. + * All values are expected to be of type JLONG. * Fractional constants are scaled left by CONST_BITS bits. * CONST_BITS is defined within each module using these macros, * and may differ from one module to the next. */ -#define ONE ((INT32) 1) +#define ONE ((JLONG) 1) #define CONST_SCALE (ONE << CONST_BITS) /* Convert a positive real constant to an integer scaled by CONST_SCALE. @@ -144,16 +168,16 @@ * thus causing a lot of useless floating-point operations at run time. */ -#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) +#define FIX(x) ((JLONG) ((x) * CONST_SCALE + 0.5)) -/* Descale and correctly round an INT32 value that's scaled by N bits. +/* Descale and correctly round a JLONG value that's scaled by N bits. * We assume RIGHT_SHIFT rounds towards minus infinity, so adding * the fudge factor is correct for either sign of X. */ #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. +/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result. * This macro is used only when the two inputs will actually be no more than * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a * full 32x32 multiply. This provides a useful speedup on many machines. @@ -162,23 +186,23 @@ * correct combination of casts. */ -#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ #define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT16) (const))) #endif -#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ -#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT32) (const))) +#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ +#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((JLONG) (const))) #endif -#ifndef MULTIPLY16C16 /* default definition */ +#ifndef MULTIPLY16C16 /* default definition */ #define MULTIPLY16C16(var,const) ((var) * (const)) #endif /* Same except both inputs are variables. */ -#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ #define MULTIPLY16V16(var1,var2) (((INT16) (var1)) * ((INT16) (var2))) #endif -#ifndef MULTIPLY16V16 /* default definition */ +#ifndef MULTIPLY16V16 /* default definition */ #define MULTIPLY16V16(var1,var2) ((var1) * (var2)) #endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jddctmgr.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jddctmgr.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jddctmgr.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jddctmgr.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,15 @@ /* * jddctmgr.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2002-2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2010, 2015, D. R. Commander. + * Copyright (C) 2013, MIPS Technologies, Inc., California + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the inverse-DCT management logic. * This code selects a particular IDCT implementation to be used, @@ -20,7 +24,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #include "jsimddct.h" #include "jpegcomp.h" @@ -45,7 +49,7 @@ /* Private subobject for this module */ typedef struct { - struct jpeg_inverse_dct pub; /* public fields */ + struct jpeg_inverse_dct pub; /* public fields */ /* This array contains the IDCT method code that each multiplier table * is currently set up for, or -1 if it's not yet set up. @@ -55,7 +59,7 @@ int cur_method[MAX_COMPONENTS]; } my_idct_controller; -typedef my_idct_controller * my_idct_ptr; +typedef my_idct_controller *my_idct_ptr; /* Allocated multiplier tables: big enough for any supported variant */ @@ -97,7 +101,7 @@ jpeg_component_info *compptr; int method = 0; inverse_DCT_method_ptr method_ptr = NULL; - JQUANT_TBL * qtbl; + JQUANT_TBL *qtbl; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { @@ -106,57 +110,117 @@ #ifdef IDCT_SCALING_SUPPORTED case 1: method_ptr = jpeg_idct_1x1; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctred uses islow-style table */ break; case 2: if (jsimd_can_idct_2x2()) method_ptr = jsimd_idct_2x2; else method_ptr = jpeg_idct_2x2; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctred uses islow-style table */ + break; + case 3: + method_ptr = jpeg_idct_3x3; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ break; case 4: if (jsimd_can_idct_4x4()) method_ptr = jsimd_idct_4x4; else method_ptr = jpeg_idct_4x4; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctred uses islow-style table */ + break; + case 5: + method_ptr = jpeg_idct_5x5; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 6: +#if defined(__mips__) + if (jsimd_can_idct_6x6()) + method_ptr = jsimd_idct_6x6; + else +#endif + method_ptr = jpeg_idct_6x6; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 7: + method_ptr = jpeg_idct_7x7; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ break; #endif case DCTSIZE: switch (cinfo->dct_method) { #ifdef DCT_ISLOW_SUPPORTED case JDCT_ISLOW: - if (jsimd_can_idct_islow()) - method_ptr = jsimd_idct_islow; - else - method_ptr = jpeg_idct_islow; - method = JDCT_ISLOW; - break; + if (jsimd_can_idct_islow()) + method_ptr = jsimd_idct_islow; + else + method_ptr = jpeg_idct_islow; + method = JDCT_ISLOW; + break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: - if (jsimd_can_idct_ifast()) - method_ptr = jsimd_idct_ifast; - else - method_ptr = jpeg_idct_ifast; - method = JDCT_IFAST; - break; + if (jsimd_can_idct_ifast()) + method_ptr = jsimd_idct_ifast; + else + method_ptr = jpeg_idct_ifast; + method = JDCT_IFAST; + break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: - if (jsimd_can_idct_float()) - method_ptr = jsimd_idct_float; - else - method_ptr = jpeg_idct_float; - method = JDCT_FLOAT; - break; + if (jsimd_can_idct_float()) + method_ptr = jsimd_idct_float; + else + method_ptr = jpeg_idct_float; + method = JDCT_FLOAT; + break; #endif default: - ERREXIT(cinfo, JERR_NOT_COMPILED); - break; + ERREXIT(cinfo, JERR_NOT_COMPILED); + break; } break; +#ifdef IDCT_SCALING_SUPPORTED + case 9: + method_ptr = jpeg_idct_9x9; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 10: + method_ptr = jpeg_idct_10x10; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 11: + method_ptr = jpeg_idct_11x11; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 12: +#if defined(__mips__) + if (jsimd_can_idct_12x12()) + method_ptr = jsimd_idct_12x12; + else +#endif + method_ptr = jpeg_idct_12x12; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 13: + method_ptr = jpeg_idct_13x13; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 14: + method_ptr = jpeg_idct_14x14; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 15: + method_ptr = jpeg_idct_15x15; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 16: + method_ptr = jpeg_idct_16x16; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; +#endif default: ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size); break; @@ -172,81 +236,81 @@ if (! compptr->component_needed || idct->cur_method[ci] == method) continue; qtbl = compptr->quant_table; - if (qtbl == NULL) /* happens if no data yet for component */ + if (qtbl == NULL) /* happens if no data yet for component */ continue; idct->cur_method[ci] = method; switch (method) { #ifdef PROVIDE_ISLOW_TABLES case JDCT_ISLOW: { - /* For LL&M IDCT method, multipliers are equal to raw quantization - * coefficients, but are stored as ints to ensure access efficiency. - */ - ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table; - for (i = 0; i < DCTSIZE2; i++) { - ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i]; - } + /* For LL&M IDCT method, multipliers are equal to raw quantization + * coefficients, but are stored as ints to ensure access efficiency. + */ + ISLOW_MULT_TYPE *ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table; + for (i = 0; i < DCTSIZE2; i++) { + ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i]; + } } break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: { - /* For AA&N IDCT method, multipliers are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - * For integer operation, the multiplier table is to be scaled by - * IFAST_SCALE_BITS. - */ - IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table; + /* For AA&N IDCT method, multipliers are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * For integer operation, the multiplier table is to be scaled by + * IFAST_SCALE_BITS. + */ + IFAST_MULT_TYPE *ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table; #define CONST_BITS 14 - static const INT16 aanscales[DCTSIZE2] = { - /* precomputed values scaled up by 14 bits */ - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, - 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, - 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, - 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, - 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 - }; - SHIFT_TEMPS - - for (i = 0; i < DCTSIZE2; i++) { - ifmtbl[i] = (IFAST_MULT_TYPE) - DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], - (INT32) aanscales[i]), - CONST_BITS-IFAST_SCALE_BITS); - } + static const INT16 aanscales[DCTSIZE2] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 + }; + SHIFT_TEMPS + + for (i = 0; i < DCTSIZE2; i++) { + ifmtbl[i] = (IFAST_MULT_TYPE) + DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], + (JLONG) aanscales[i]), + CONST_BITS-IFAST_SCALE_BITS); + } } break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: { - /* For float AA&N IDCT method, multipliers are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - */ - FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table; - int row, col; - static const double aanscalefactor[DCTSIZE] = { - 1.0, 1.387039845, 1.306562965, 1.175875602, - 1.0, 0.785694958, 0.541196100, 0.275899379 - }; - - i = 0; - for (row = 0; row < DCTSIZE; row++) { - for (col = 0; col < DCTSIZE; col++) { - fmtbl[i] = (FLOAT_MULT_TYPE) - ((double) qtbl->quantval[i] * - aanscalefactor[row] * aanscalefactor[col]); - i++; - } - } + /* For float AA&N IDCT method, multipliers are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + */ + FLOAT_MULT_TYPE *fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table; + int row, col; + static const double aanscalefactor[DCTSIZE] = { + 1.0, 1.387039845, 1.306562965, 1.175875602, + 1.0, 0.785694958, 0.541196100, 0.275899379 + }; + + i = 0; + for (row = 0; row < DCTSIZE; row++) { + for (col = 0; col < DCTSIZE; col++) { + fmtbl[i] = (FLOAT_MULT_TYPE) + ((double) qtbl->quantval[i] * + aanscalefactor[row] * aanscalefactor[col]); + i++; + } + } } break; #endif @@ -271,7 +335,7 @@ idct = (my_idct_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_idct_controller)); + sizeof(my_idct_controller)); cinfo->idct = (struct jpeg_inverse_dct *) idct; idct->pub.start_pass = start_pass; @@ -280,8 +344,8 @@ /* Allocate and pre-zero a multiplier table for each component */ compptr->dct_table = (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(multiplier_table)); - MEMZERO(compptr->dct_table, SIZEOF(multiplier_table)); + sizeof(multiplier_table)); + MEMZERO(compptr->dct_table, sizeof(multiplier_table)); /* Mark multiplier table not yet set up for any method */ idct->cur_method[ci] = -1; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdhuff.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdhuff.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdhuff.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdhuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jdhuff.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2011, 2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains Huffman entropy decoding routines. * @@ -18,8 +20,9 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdhuff.h" /* Declarations shared with jdphuff.c */ +#include "jdhuff.h" /* Declarations shared with jdphuff.c */ #include "jpegcomp.h" +#include "jstdhuff.c" /* @@ -43,10 +46,10 @@ #else #if MAX_COMPS_IN_SCAN == 4 #define ASSIGN_STATE(dest,src) \ - ((dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) + ((dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -57,27 +60,27 @@ /* These fields are loaded into local variables at start of each MCU. * In case of suspension, we exit WITHOUT updating them. */ - bitread_perm_state bitstate; /* Bit buffer at start of MCU */ - savable_state saved; /* Other state at start of MCU */ + bitread_perm_state bitstate; /* Bit buffer at start of MCU */ + savable_state saved; /* Other state at start of MCU */ /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ /* Pointers to derived tables (these workspaces have image lifespan) */ - d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; - d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; + d_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS]; + d_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS]; /* Precalculated info set up by start_pass for use in decode_mcu: */ /* Pointers to derived tables to be used for each block within an MCU */ - d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU]; - d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU]; + d_derived_tbl *dc_cur_tbls[D_MAX_BLOCKS_IN_MCU]; + d_derived_tbl *ac_cur_tbls[D_MAX_BLOCKS_IN_MCU]; /* Whether we care about the DC and AC coefficient values for each block */ boolean dc_needed[D_MAX_BLOCKS_IN_MCU]; boolean ac_needed[D_MAX_BLOCKS_IN_MCU]; } huff_entropy_decoder; -typedef huff_entropy_decoder * huff_entropy_ptr; +typedef huff_entropy_decoder *huff_entropy_ptr; /* @@ -89,7 +92,8 @@ { huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; int ci, blkn, dctbl, actbl; - jpeg_component_info * compptr; + d_derived_tbl **pdtbl; + jpeg_component_info *compptr; /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG. * This ought to be an error condition, but we make it a warning because @@ -105,10 +109,10 @@ actbl = compptr->ac_tbl_no; /* Compute derived values for Huffman tables */ /* We may do this more than once for a table, but it's not expensive */ - jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, - & entropy->dc_derived_tbls[dctbl]); - jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, - & entropy->ac_derived_tbls[actbl]); + pdtbl = entropy->dc_derived_tbls + dctbl; + jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, pdtbl); + pdtbl = entropy->ac_derived_tbls + actbl; + jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, pdtbl); /* Initialize DC predictions to 0 */ entropy->saved.last_dc_val[ci] = 0; } @@ -149,7 +153,7 @@ GLOBAL(void) jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, - d_derived_tbl ** pdtbl) + d_derived_tbl **pdtbl) { JHUFF_TBL *htbl; d_derived_tbl *dtbl; @@ -175,26 +179,26 @@ if (*pdtbl == NULL) *pdtbl = (d_derived_tbl *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(d_derived_tbl)); + sizeof(d_derived_tbl)); dtbl = *pdtbl; - dtbl->pub = htbl; /* fill in back link */ - + dtbl->pub = htbl; /* fill in back link */ + /* Figure C.1: make table of Huffman code length for each symbol */ p = 0; for (l = 1; l <= 16; l++) { i = (int) htbl->bits[l]; - if (i < 0 || p + i > 256) /* protect against table overrun */ + if (i < 0 || p + i > 256) /* protect against table overrun */ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); while (i--) huffsize[p++] = (char) l; } huffsize[p] = 0; numsymbols = p; - + /* Figure C.2: generate the codes themselves */ /* We also validate that the counts represent a legal Huffman code tree. */ - + code = 0; si = huffsize[0]; p = 0; @@ -206,7 +210,7 @@ /* code is now 1 more than the last code used for codelength si; but * it must still fit in si bits, since no code is allowed to be all ones. */ - if (((INT32) code) >= (((INT32) 1) << si)) + if (((JLONG) code) >= (((JLONG) 1) << si)) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); code <<= 1; si++; @@ -220,11 +224,11 @@ /* valoffset[l] = huffval[] index of 1st symbol of code length l, * minus the minimum code of length l */ - dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p]; + dtbl->valoffset[l] = (JLONG) p - (JLONG) huffcode[p]; p += htbl->bits[l]; dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */ } else { - dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ + dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ } } dtbl->valoffset[17] = 0; @@ -247,8 +251,8 @@ /* Generate left-justified code followed by all possible bit sequences */ lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l); for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) { - dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; - lookbits++; + dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; + lookbits++; } } } @@ -263,7 +267,7 @@ for (i = 0; i < numsymbols; i++) { int sym = htbl->huffval[i]; if (sym < 0 || sym > 15) - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); } } } @@ -285,20 +289,20 @@ */ #ifdef SLOW_SHIFT_32 -#define MIN_GET_BITS 15 /* minimum allowable value */ +#define MIN_GET_BITS 15 /* minimum allowable value */ #else #define MIN_GET_BITS (BIT_BUF_SIZE-7) #endif GLOBAL(boolean) -jpeg_fill_bit_buffer (bitread_working_state * state, - register bit_buf_type get_buffer, register int bits_left, - int nbits) +jpeg_fill_bit_buffer (bitread_working_state *state, + register bit_buf_type get_buffer, register int bits_left, + int nbits) /* Load up the bit buffer to a depth of at least nbits */ { /* Copy heavily used state fields into locals (hopefully registers) */ - register const JOCTET * next_input_byte = state->next_input_byte; + register const JOCTET *next_input_byte = state->next_input_byte; register size_t bytes_in_buffer = state->bytes_in_buffer; j_decompress_ptr cinfo = state->cinfo; @@ -306,54 +310,54 @@ /* (It is assumed that no request will be for more than that many bits.) */ /* We fail to do so only if we hit a marker or are forced to suspend. */ - if (cinfo->unread_marker == 0) { /* cannot advance past a marker */ + if (cinfo->unread_marker == 0) { /* cannot advance past a marker */ while (bits_left < MIN_GET_BITS) { register int c; /* Attempt to read a byte */ if (bytes_in_buffer == 0) { - if (! (*cinfo->src->fill_input_buffer) (cinfo)) - return FALSE; - next_input_byte = cinfo->src->next_input_byte; - bytes_in_buffer = cinfo->src->bytes_in_buffer; + if (! (*cinfo->src->fill_input_buffer) (cinfo)) + return FALSE; + next_input_byte = cinfo->src->next_input_byte; + bytes_in_buffer = cinfo->src->bytes_in_buffer; } bytes_in_buffer--; c = GETJOCTET(*next_input_byte++); /* If it's 0xFF, check and discard stuffed zero byte */ if (c == 0xFF) { - /* Loop here to discard any padding FF's on terminating marker, - * so that we can save a valid unread_marker value. NOTE: we will - * accept multiple FF's followed by a 0 as meaning a single FF data - * byte. This data pattern is not valid according to the standard. - */ - do { - if (bytes_in_buffer == 0) { - if (! (*cinfo->src->fill_input_buffer) (cinfo)) - return FALSE; - next_input_byte = cinfo->src->next_input_byte; - bytes_in_buffer = cinfo->src->bytes_in_buffer; - } - bytes_in_buffer--; - c = GETJOCTET(*next_input_byte++); - } while (c == 0xFF); - - if (c == 0) { - /* Found FF/00, which represents an FF data byte */ - c = 0xFF; - } else { - /* Oops, it's actually a marker indicating end of compressed data. - * Save the marker code for later use. - * Fine point: it might appear that we should save the marker into - * bitread working state, not straight into permanent state. But - * once we have hit a marker, we cannot need to suspend within the - * current MCU, because we will read no more bytes from the data - * source. So it is OK to update permanent state right away. - */ - cinfo->unread_marker = c; - /* See if we need to insert some fake zero bits. */ - goto no_more_bytes; - } + /* Loop here to discard any padding FF's on terminating marker, + * so that we can save a valid unread_marker value. NOTE: we will + * accept multiple FF's followed by a 0 as meaning a single FF data + * byte. This data pattern is not valid according to the standard. + */ + do { + if (bytes_in_buffer == 0) { + if (! (*cinfo->src->fill_input_buffer) (cinfo)) + return FALSE; + next_input_byte = cinfo->src->next_input_byte; + bytes_in_buffer = cinfo->src->bytes_in_buffer; + } + bytes_in_buffer--; + c = GETJOCTET(*next_input_byte++); + } while (c == 0xFF); + + if (c == 0) { + /* Found FF/00, which represents an FF data byte */ + c = 0xFF; + } else { + /* Oops, it's actually a marker indicating end of compressed data. + * Save the marker code for later use. + * Fine point: it might appear that we should save the marker into + * bitread working state, not straight into permanent state. But + * once we have hit a marker, we cannot need to suspend within the + * current MCU, because we will read no more bytes from the data + * source. So it is OK to update permanent state right away. + */ + cinfo->unread_marker = c; + /* See if we need to insert some fake zero bits. */ + goto no_more_bytes; + } } /* OK, load c into get_buffer */ @@ -373,8 +377,8 @@ * appears per data segment. */ if (! cinfo->entropy->insufficient_data) { - WARNMS(cinfo, JWRN_HIT_MARKER); - cinfo->entropy->insufficient_data = TRUE; + WARNMS(cinfo, JWRN_HIT_MARKER); + cinfo->entropy->insufficient_data = TRUE; } /* Fill the buffer with zero bits */ get_buffer <<= MIN_GET_BITS - bits_left; @@ -417,11 +421,11 @@ } \ } -#if __WORDSIZE == 64 || defined(_WIN64) +#if SIZEOF_SIZE_T==8 || defined(_WIN64) /* Pre-fetch 48 bytes, because the holding register is 64-bit */ #define FILL_BIT_BUFFER_FAST \ - if (bits_left < 16) { \ + if (bits_left <= 16) { \ GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \ } @@ -429,7 +433,7 @@ /* Pre-fetch 16 bytes, because the holding register is 32-bit */ #define FILL_BIT_BUFFER_FAST \ - if (bits_left < 16) { \ + if (bits_left <= 16) { \ GET_BYTE GET_BYTE \ } @@ -442,12 +446,12 @@ */ GLOBAL(int) -jpeg_huff_decode (bitread_working_state * state, - register bit_buf_type get_buffer, register int bits_left, - d_derived_tbl * htbl, int min_bits) +jpeg_huff_decode (bitread_working_state *state, + register bit_buf_type get_buffer, register int bits_left, + d_derived_tbl *htbl, int min_bits) { register int l = min_bits; - register INT32 code; + register JLONG code; /* HUFF_DECODE has determined that the code is at least min_bits */ /* bits long, so fetch that many bits in one swoop. */ @@ -473,7 +477,7 @@ if (l > 16) { WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE); - return 0; /* fake a zero as the safest result */ + return 0; /* fake a zero as the safest result */ } return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ]; @@ -488,7 +492,8 @@ #define AVOID_TABLES #ifdef AVOID_TABLES -#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((-1)<<(s)) + 1))) +#define NEG_1 ((unsigned int)-1) +#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((NEG_1)<<(s)) + 1))) #else @@ -560,9 +565,9 @@ ASSIGN_STATE(state, entropy->saved); for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - JBLOCKROW block = MCU_data[blkn]; - d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn]; - d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; + JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; + d_derived_tbl *dctbl = entropy->dc_cur_tbls[blkn]; + d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn]; register int s, k, r; /* Decode a single block's worth of coefficients */ @@ -580,11 +585,13 @@ int ci = cinfo->MCU_membership[blkn]; s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; - /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ - (*block)[0] = (JCOEF) s; + if (block) { + /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ + (*block)[0] = (JCOEF) s; + } } - if (entropy->ac_needed[blkn]) { + if (entropy->ac_needed[blkn] && block) { /* Section F.2.2.2: decode the AC coefficients */ /* Since zeroes are skipped, output area must be cleared beforehand */ @@ -593,7 +600,7 @@ r = s >> 4; s &= 15; - + if (s) { k += r; CHECK_BIT_BUFFER(br_state, s, return FALSE); @@ -657,12 +664,12 @@ ASSIGN_STATE(state, entropy->saved); for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - JBLOCKROW block = MCU_data[blkn]; - d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn]; - d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; + JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; + d_derived_tbl *dctbl = entropy->dc_cur_tbls[blkn]; + d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn]; register int s, k, r, l; - HUFF_DECODE_FAST(s, l, dctbl); + HUFF_DECODE_FAST(s, l, dctbl, slow_decode_mcu); if (s) { FILL_BIT_BUFFER_FAST r = GET_BITS(s); @@ -673,16 +680,17 @@ int ci = cinfo->MCU_membership[blkn]; s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; - (*block)[0] = (JCOEF) s; + if (block) + (*block)[0] = (JCOEF) s; } - if (entropy->ac_needed[blkn]) { + if (entropy->ac_needed[blkn] && block) { for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE_FAST(s, l, actbl); + HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu); r = s >> 4; s &= 15; - + if (s) { k += r; FILL_BIT_BUFFER_FAST @@ -698,7 +706,7 @@ } else { for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE_FAST(s, l, actbl); + HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu); r = s >> 4; s &= 15; @@ -715,6 +723,7 @@ } if (cinfo->unread_marker != 0) { +slow_decode_mcu: cinfo->unread_marker = 0; return FALSE; } @@ -742,7 +751,7 @@ * this module, since we'll just re-assign them on the next call.) */ -#define BUFSIZE (DCTSIZE2 * 2) +#define BUFSIZE (DCTSIZE2 * 8) METHODDEF(boolean) decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) @@ -754,7 +763,7 @@ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; usefast = 0; } @@ -794,9 +803,15 @@ huff_entropy_ptr entropy; int i; + /* Motion JPEG frames typically do not include the Huffman tables if they + are the default tables. Thus, if the tables are not set by the time + the Huffman decoder is initialized (usually within the body of + jpeg_start_decompress()), we set them to default values. */ + std_huff_tables((j_common_ptr) cinfo); + entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(huff_entropy_decoder)); + sizeof(huff_entropy_decoder)); cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; entropy->pub.start_pass = start_pass_huff_decoder; entropy->pub.decode_mcu = decode_mcu; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdhuff.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jdhuff.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdhuff.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdhuff.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,34 +1,30 @@ /* * jdhuff.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2010-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2010-2011, 2015-2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains declarations for Huffman entropy decoding routines * that are shared between the sequential decoder (jdhuff.c) and the * progressive decoder (jdphuff.c). No other modules need to see these. */ -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_make_d_derived_tbl jMkDDerived -#define jpeg_fill_bit_buffer jFilBitBuf -#define jpeg_huff_decode jHufDecode -#endif /* NEED_SHORT_EXTERNAL_NAMES */ +#include "jconfigint.h" /* Derived data constructed for each Huffman table */ -#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */ +#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */ typedef struct { /* Basic tables: (element [0] of each array is unused) */ - INT32 maxcode[18]; /* largest code of length k (-1 if none) */ + JLONG maxcode[18]; /* largest code of length k (-1 if none) */ /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */ - INT32 valoffset[18]; /* huffval[] offset for codes of length k */ + JLONG valoffset[18]; /* huffval[] offset for codes of length k */ /* valoffset[k] = huffval[] index of 1st symbol of code length k, less * the smallest code of length k; so given a code of length k, the * corresponding symbol is huffval[code + valoffset[k]] @@ -52,8 +48,8 @@ /* Expand a Huffman table definition into the derived format */ EXTERN(void) jpeg_make_d_derived_tbl - JPP((j_decompress_ptr cinfo, boolean isDC, int tblno, - d_derived_tbl ** pdtbl)); + (j_decompress_ptr cinfo, boolean isDC, int tblno, + d_derived_tbl ** pdtbl); /* @@ -74,15 +70,19 @@ * necessary. */ -#if __WORDSIZE == 64 || defined(_WIN64) +#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T) +#error Cannot determine word size +#endif + +#if SIZEOF_SIZE_T==8 || defined(_WIN64) -typedef size_t bit_buf_type; /* type of bit-extraction buffer */ -#define BIT_BUF_SIZE 64 /* size of buffer in bits */ +typedef size_t bit_buf_type; /* type of bit-extraction buffer */ +#define BIT_BUF_SIZE 64 /* size of buffer in bits */ #else -typedef INT32 bit_buf_type; /* type of bit-extraction buffer */ -#define BIT_BUF_SIZE 32 /* size of buffer in bits */ +typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */ +#define BIT_BUF_SIZE 32 /* size of buffer in bits */ #endif @@ -93,43 +93,43 @@ * because not all machines measure sizeof in 8-bit bytes. */ -typedef struct { /* Bitreading state saved across MCUs */ - bit_buf_type get_buffer; /* current bit-extraction buffer */ - int bits_left; /* # of unused bits in it */ +typedef struct { /* Bitreading state saved across MCUs */ + bit_buf_type get_buffer; /* current bit-extraction buffer */ + int bits_left; /* # of unused bits in it */ } bitread_perm_state; -typedef struct { /* Bitreading working state within an MCU */ +typedef struct { /* Bitreading working state within an MCU */ /* Current data source location */ /* We need a copy, rather than munging the original, in case of suspension */ - const JOCTET * next_input_byte; /* => next byte to read from source */ - size_t bytes_in_buffer; /* # of bytes remaining in source buffer */ + const JOCTET *next_input_byte; /* => next byte to read from source */ + size_t bytes_in_buffer; /* # of bytes remaining in source buffer */ /* Bit input buffer --- note these values are kept in register variables, * not in this struct, inside the inner loops. */ - bit_buf_type get_buffer; /* current bit-extraction buffer */ - int bits_left; /* # of unused bits in it */ + bit_buf_type get_buffer; /* current bit-extraction buffer */ + int bits_left; /* # of unused bits in it */ /* Pointer needed by jpeg_fill_bit_buffer. */ - j_decompress_ptr cinfo; /* back link to decompress master record */ + j_decompress_ptr cinfo; /* back link to decompress master record */ } bitread_working_state; /* Macros to declare and load/save bitread local variables. */ #define BITREAD_STATE_VARS \ - register bit_buf_type get_buffer; \ - register int bits_left; \ - bitread_working_state br_state + register bit_buf_type get_buffer; \ + register int bits_left; \ + bitread_working_state br_state #define BITREAD_LOAD_STATE(cinfop,permstate) \ - br_state.cinfo = cinfop; \ - br_state.next_input_byte = cinfop->src->next_input_byte; \ - br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ - get_buffer = permstate.get_buffer; \ - bits_left = permstate.bits_left; + br_state.cinfo = cinfop; \ + br_state.next_input_byte = cinfop->src->next_input_byte; \ + br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ + get_buffer = permstate.get_buffer; \ + bits_left = permstate.bits_left; #define BITREAD_SAVE_STATE(cinfop,permstate) \ - cinfop->src->next_input_byte = br_state.next_input_byte; \ - cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ - permstate.get_buffer = get_buffer; \ - permstate.bits_left = bits_left + cinfop->src->next_input_byte = br_state.next_input_byte; \ + cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ + permstate.get_buffer = get_buffer; \ + permstate.bits_left = bits_left /* * These macros provide the in-line portion of bit fetching. @@ -137,37 +137,37 @@ * before using GET_BITS, PEEK_BITS, or DROP_BITS. * The variables get_buffer and bits_left are assumed to be locals, * but the state struct might not be (jpeg_huff_decode needs this). - * CHECK_BIT_BUFFER(state,n,action); - * Ensure there are N bits in get_buffer; if suspend, take action. + * CHECK_BIT_BUFFER(state,n,action); + * Ensure there are N bits in get_buffer; if suspend, take action. * val = GET_BITS(n); - * Fetch next N bits. + * Fetch next N bits. * val = PEEK_BITS(n); - * Fetch next N bits without removing them from the buffer. - * DROP_BITS(n); - * Discard next N bits. + * Fetch next N bits without removing them from the buffer. + * DROP_BITS(n); + * Discard next N bits. * The value N should be a simple variable, not an expression, because it * is evaluated multiple times. */ #define CHECK_BIT_BUFFER(state,nbits,action) \ - { if (bits_left < (nbits)) { \ - if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits)) \ - { action; } \ - get_buffer = (state).get_buffer; bits_left = (state).bits_left; } } + { if (bits_left < (nbits)) { \ + if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits)) \ + { action; } \ + get_buffer = (state).get_buffer; bits_left = (state).bits_left; } } #define GET_BITS(nbits) \ - (((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1)) + (((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1)) #define PEEK_BITS(nbits) \ - (((int) (get_buffer >> (bits_left - (nbits)))) & ((1<<(nbits))-1)) + (((int) (get_buffer >> (bits_left - (nbits)))) & ((1<<(nbits))-1)) #define DROP_BITS(nbits) \ - (bits_left -= (nbits)) + (bits_left -= (nbits)) /* Load up the bit buffer to a depth of at least nbits */ EXTERN(boolean) jpeg_fill_bit_buffer - JPP((bitread_working_state * state, register bit_buf_type get_buffer, - register int bits_left, int nbits)); + (bitread_working_state *state, register bit_buf_type get_buffer, + register int bits_left, int nbits); /* @@ -203,12 +203,12 @@ } else { \ slowlabel: \ if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \ - { failaction; } \ + { failaction; } \ get_buffer = state.get_buffer; bits_left = state.bits_left; \ } \ } -#define HUFF_DECODE_FAST(s,nb,htbl) \ +#define HUFF_DECODE_FAST(s,nb,htbl,slowlabel) \ FILL_BIT_BUFFER_FAST; \ s = PEEK_BITS(HUFF_LOOKAHEAD); \ s = htbl->lookup[s]; \ @@ -225,10 +225,12 @@ s |= GET_BITS(1); \ nb++; \ } \ - s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) & 0xFF ]; \ + if (nb > 16) \ + goto slowlabel; \ + s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) ]; \ } /* Out-of-line case for Huffman code fetching */ EXTERN(int) jpeg_huff_decode - JPP((bitread_working_state * state, register bit_buf_type get_buffer, - register int bits_left, d_derived_tbl * htbl, int min_bits)); + (bitread_working_state *state, register bit_buf_type get_buffer, + register int bits_left, d_derived_tbl *htbl, int min_bits); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdinput.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdinput.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdinput.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdinput.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,13 @@ /* * jdinput.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Modified 2002-2009 by Guido Vollbeding. - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains input control logic for the JPEG decompressor. * These routines are concerned with controlling the decompressor's input @@ -24,93 +26,20 @@ typedef struct { struct jpeg_input_controller pub; /* public fields */ - boolean inheaders; /* TRUE until first SOS is reached */ + boolean inheaders; /* TRUE until first SOS is reached */ } my_input_controller; -typedef my_input_controller * my_inputctl_ptr; +typedef my_input_controller *my_inputctl_ptr; /* Forward declarations */ -METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo)); +METHODDEF(int) consume_markers (j_decompress_ptr cinfo); /* * Routines to calculate various quantities related to the size of the image. */ - -#if JPEG_LIB_VERSION >= 80 -/* - * Compute output image dimensions and related values. - * NOTE: this is exported for possible use by application. - * Hence it mustn't do anything that can't be done twice. - */ - -GLOBAL(void) -jpeg_core_output_dimensions (j_decompress_ptr cinfo) -/* Do computations that are needed before master selection phase. - * This function is used for transcoding and full decompression. - */ -{ -#ifdef IDCT_SCALING_SUPPORTED - int ci; - jpeg_component_info *compptr; - - /* Compute actual output image dimensions and DCT scaling choices. */ - if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) { - /* Provide 1/block_size scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size); - cinfo->min_DCT_h_scaled_size = 1; - cinfo->min_DCT_v_scaled_size = 1; - } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) { - /* Provide 2/block_size scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size); - cinfo->min_DCT_h_scaled_size = 2; - cinfo->min_DCT_v_scaled_size = 2; - } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) { - /* Provide 4/block_size scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size); - cinfo->min_DCT_h_scaled_size = 4; - cinfo->min_DCT_v_scaled_size = 4; - } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) { - /* Provide 8/block_size scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size); - cinfo->min_DCT_h_scaled_size = 8; - cinfo->min_DCT_v_scaled_size = 8; - } - /* Recompute dimensions of components */ - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size; - compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size; - } - -#else /* !IDCT_SCALING_SUPPORTED */ - - /* Hardwire it to "no scaling" */ - cinfo->output_width = cinfo->image_width; - cinfo->output_height = cinfo->image_height; - /* jdinput.c has already initialized DCT_scaled_size, - * and has computed unscaled downsampled_width and downsampled_height. - */ - -#endif /* IDCT_SCALING_SUPPORTED */ -} -#endif - - LOCAL(void) initial_setup (j_decompress_ptr cinfo) /* Called once, when first SOS marker is reached */ @@ -130,7 +59,7 @@ /* Check that number of components won't exceed internal array sizes */ if (cinfo->num_components > MAX_COMPONENTS) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPONENTS); + MAX_COMPONENTS); /* Compute maximum sampling factors; check factor validity */ cinfo->max_h_samp_factor = 1; @@ -138,12 +67,12 @@ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR || - compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) + compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) ERREXIT(cinfo, JERR_BAD_SAMPLING); cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor, - compptr->h_samp_factor); + compptr->h_samp_factor); cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor, - compptr->v_samp_factor); + compptr->v_samp_factor); } #if JPEG_LIB_VERSION >=80 @@ -173,10 +102,15 @@ /* Size in DCT blocks */ compptr->width_in_blocks = (JDIMENSION) jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + (long) (cinfo->max_h_samp_factor * DCTSIZE)); compptr->height_in_blocks = (JDIMENSION) jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + (long) (cinfo->max_v_samp_factor * DCTSIZE)); + /* Set the first and last MCU columns to decompress from multi-scan images. + * By default, decompress all of the MCU columns. + */ + cinfo->master->first_MCU_col[ci] = 0; + cinfo->master->last_MCU_col[ci] = compptr->width_in_blocks - 1; /* downsampled_width and downsampled_height will also be overridden by * jdmaster.c if we are doing full decompression. The transcoder library * doesn't use these values, but the calling application might. @@ -184,10 +118,10 @@ /* Size in samples */ compptr->downsampled_width = (JDIMENSION) jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) cinfo->max_h_samp_factor); + (long) cinfo->max_h_samp_factor); compptr->downsampled_height = (JDIMENSION) jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) cinfo->max_v_samp_factor); + (long) cinfo->max_v_samp_factor); /* Mark component needed, until color conversion says otherwise */ compptr->component_needed = TRUE; /* Mark no quantization table yet saved for component */ @@ -197,7 +131,7 @@ /* Compute number of fully interleaved MCU rows. */ cinfo->total_iMCU_rows = (JDIMENSION) jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + (long) (cinfo->max_v_samp_factor*DCTSIZE)); /* Decide whether file contains multiple scans */ if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode) @@ -214,16 +148,16 @@ { int ci, mcublks, tmp; jpeg_component_info *compptr; - + if (cinfo->comps_in_scan == 1) { - + /* Noninterleaved (single-component) scan */ compptr = cinfo->cur_comp_info[0]; - + /* Overall image size in MCUs */ cinfo->MCUs_per_row = compptr->width_in_blocks; cinfo->MCU_rows_in_scan = compptr->height_in_blocks; - + /* For noninterleaved scan, always one block per MCU */ compptr->MCU_width = 1; compptr->MCU_height = 1; @@ -236,28 +170,28 @@ tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor); if (tmp == 0) tmp = compptr->v_samp_factor; compptr->last_row_height = tmp; - + /* Prepare array describing MCU composition */ cinfo->blocks_in_MCU = 1; cinfo->MCU_membership[0] = 0; - + } else { - + /* Interleaved (multi-component) scan */ if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan, - MAX_COMPS_IN_SCAN); - + MAX_COMPS_IN_SCAN); + /* Overall image size in MCUs */ cinfo->MCUs_per_row = (JDIMENSION) jdiv_round_up((long) cinfo->image_width, - (long) (cinfo->max_h_samp_factor*DCTSIZE)); + (long) (cinfo->max_h_samp_factor*DCTSIZE)); cinfo->MCU_rows_in_scan = (JDIMENSION) jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); - + (long) (cinfo->max_v_samp_factor*DCTSIZE)); + cinfo->blocks_in_MCU = 0; - + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; /* Sampling factors give # of blocks of component in each MCU */ @@ -275,12 +209,12 @@ /* Prepare array describing MCU composition */ mcublks = compptr->MCU_blocks; if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU) - ERREXIT(cinfo, JERR_BAD_MCU_SIZE); + ERREXIT(cinfo, JERR_BAD_MCU_SIZE); while (mcublks-- > 0) { - cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; + cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; } } - + } } @@ -311,7 +245,7 @@ { int ci, qtblno; jpeg_component_info *compptr; - JQUANT_TBL * qtbl; + JQUANT_TBL *qtbl; for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; @@ -321,13 +255,13 @@ /* Make sure specified quantization table is present */ qtblno = compptr->quant_tbl_no; if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || - cinfo->quant_tbl_ptrs[qtblno] == NULL) + cinfo->quant_tbl_ptrs[qtblno] == NULL) ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); /* OK, save away the quantization table */ qtbl = (JQUANT_TBL *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(JQUANT_TBL)); - MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL)); + sizeof(JQUANT_TBL)); + MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL)); compptr->quant_table = qtbl; } } @@ -386,31 +320,31 @@ val = (*cinfo->marker->read_markers) (cinfo); switch (val) { - case JPEG_REACHED_SOS: /* Found SOS */ - if (inputctl->inheaders) { /* 1st SOS */ + case JPEG_REACHED_SOS: /* Found SOS */ + if (inputctl->inheaders) { /* 1st SOS */ initial_setup(cinfo); inputctl->inheaders = FALSE; /* Note: start_input_pass must be called by jdmaster.c * before any more input can be consumed. jdapimin.c is * responsible for enforcing this sequencing. */ - } else { /* 2nd or later SOS marker */ + } else { /* 2nd or later SOS marker */ if (! inputctl->pub.has_multiple_scans) - ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */ + ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */ start_input_pass(cinfo); } break; - case JPEG_REACHED_EOI: /* Found EOI */ + case JPEG_REACHED_EOI: /* Found EOI */ inputctl->pub.eoi_reached = TRUE; - if (inputctl->inheaders) { /* Tables-only datastream, apparently */ + if (inputctl->inheaders) { /* Tables-only datastream, apparently */ if (cinfo->marker->saw_SOF) - ERREXIT(cinfo, JERR_SOF_NO_SOS); + ERREXIT(cinfo, JERR_SOF_NO_SOS); } else { /* Prevent infinite loop in coef ctlr's decompress_data routine * if user set output_scan_number larger than number of scans. */ if (cinfo->output_scan_number > cinfo->input_scan_number) - cinfo->output_scan_number = cinfo->input_scan_number; + cinfo->output_scan_number = cinfo->input_scan_number; } break; case JPEG_SUSPENDED: @@ -455,7 +389,7 @@ /* Create subobject in permanent pool */ inputctl = (my_inputctl_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(my_input_controller)); + sizeof(my_input_controller)); cinfo->inputctl = (struct jpeg_input_controller *) inputctl; /* Initialize method pointers */ inputctl->pub.consume_input = consume_markers; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmainct.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmainct.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmainct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmainct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jdmainct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the main buffer controller for decompression. * The main buffer lies between the JPEG decompressor proper and the @@ -14,10 +16,8 @@ * supplies the equivalent of the main buffer in that case. */ -#define JPEG_INTERNALS #include "jinclude.h" -#include "jpeglib.h" -#include "jpegcomp.h" +#include "jdmainct.h" /* @@ -111,47 +111,17 @@ */ -/* Private buffer controller object */ - -typedef struct { - struct jpeg_d_main_controller pub; /* public fields */ - - /* Pointer to allocated workspace (M or M+2 row groups). */ - JSAMPARRAY buffer[MAX_COMPONENTS]; - - boolean buffer_full; /* Have we gotten an iMCU row from decoder? */ - JDIMENSION rowgroup_ctr; /* counts row groups output to postprocessor */ - - /* Remaining fields are only used in the context case. */ - - /* These are the master pointers to the funny-order pointer lists. */ - JSAMPIMAGE xbuffer[2]; /* pointers to weird pointer lists */ - - int whichptr; /* indicates which pointer set is now in use */ - int context_state; /* process_data state machine status */ - JDIMENSION rowgroups_avail; /* row groups available to postprocessor */ - JDIMENSION iMCU_row_ctr; /* counts iMCU rows to detect image top/bot */ -} my_main_controller; - -typedef my_main_controller * my_main_ptr; - -/* context_state values: */ -#define CTX_PREPARE_FOR_IMCU 0 /* need to prepare for MCU row */ -#define CTX_PROCESS_IMCU 1 /* feeding iMCU to postprocessor */ -#define CTX_POSTPONED_ROW 2 /* feeding postponed row group */ - - /* Forward declarations */ METHODDEF(void) process_data_simple_main - JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); METHODDEF(void) process_data_context_main - JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); #ifdef QUANT_2PASS_SUPPORTED METHODDEF(void) process_data_crank_post - JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); #endif @@ -172,7 +142,7 @@ */ main_ptr->xbuffer[0] = (JSAMPIMAGE) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components * 2 * SIZEOF(JSAMPARRAY)); + cinfo->num_components * 2 * sizeof(JSAMPARRAY)); main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; @@ -184,8 +154,8 @@ */ xbuf = (JSAMPARRAY) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW)); - xbuf += rgroup; /* want one row group at negative offsets */ + 2 * (rgroup * (M + 4)) * sizeof(JSAMPROW)); + xbuf += rgroup; /* want one row group at negative offsets */ main_ptr->xbuffer[0][ci] = xbuf; xbuf += rgroup * (M + 4); main_ptr->xbuffer[1][ci] = xbuf; @@ -237,34 +207,6 @@ LOCAL(void) -set_wraparound_pointers (j_decompress_ptr cinfo) -/* Set up the "wraparound" pointers at top and bottom of the pointer lists. - * This changes the pointer list state from top-of-image to the normal state. - */ -{ - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; - int ci, i, rgroup; - int M = cinfo->_min_DCT_scaled_size; - jpeg_component_info *compptr; - JSAMPARRAY xbuf0, xbuf1; - - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / - cinfo->_min_DCT_scaled_size; /* height of a row group of component */ - xbuf0 = main_ptr->xbuffer[0][ci]; - xbuf1 = main_ptr->xbuffer[1][ci]; - for (i = 0; i < rgroup; i++) { - xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i]; - xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i]; - xbuf0[rgroup*(M+2) + i] = xbuf0[i]; - xbuf1[rgroup*(M+2) + i] = xbuf1[i]; - } - } -} - - -LOCAL(void) set_bottom_pointers (j_decompress_ptr cinfo) /* Change the pointer lists to duplicate the last sample row at the bottom * of the image. whichptr indicates which xbuffer holds the final iMCU row. @@ -315,14 +257,14 @@ if (cinfo->upsample->need_context_rows) { main_ptr->pub.process_data = process_data_context_main; make_funny_pointers(cinfo); /* Create the xbuffer[] lists */ - main_ptr->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ + main_ptr->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ main_ptr->context_state = CTX_PREPARE_FOR_IMCU; main_ptr->iMCU_row_ctr = 0; } else { /* Simple case with no context needed */ main_ptr->pub.process_data = process_data_simple_main; } - main_ptr->buffer_full = FALSE; /* Mark buffer empty */ + main_ptr->buffer_full = FALSE; /* Mark buffer empty */ main_ptr->rowgroup_ctr = 0; break; #ifdef QUANT_2PASS_SUPPORTED @@ -345,8 +287,8 @@ METHODDEF(void) process_data_simple_main (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_main_ptr main_ptr = (my_main_ptr) cinfo->main; JDIMENSION rowgroups_avail; @@ -354,8 +296,8 @@ /* Read input data if we haven't filled the main buffer yet */ if (! main_ptr->buffer_full) { if (! (*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer)) - return; /* suspension forced, can do nothing more */ - main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ + return; /* suspension forced, can do nothing more */ + main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ } /* There are always min_DCT_scaled_size row groups in an iMCU row. */ @@ -367,8 +309,8 @@ /* Feed the postprocessor */ (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer, - &main_ptr->rowgroup_ctr, rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); + &main_ptr->rowgroup_ctr, rowgroups_avail, + output_buf, out_row_ctr, out_rows_avail); /* Has postprocessor consumed all the data yet? If so, mark buffer empty */ if (main_ptr->rowgroup_ctr >= rowgroups_avail) { @@ -385,18 +327,18 @@ METHODDEF(void) process_data_context_main (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_main_ptr main_ptr = (my_main_ptr) cinfo->main; /* Read input data if we haven't filled the main buffer yet */ if (! main_ptr->buffer_full) { if (! (*cinfo->coef->decompress_data) (cinfo, - main_ptr->xbuffer[main_ptr->whichptr])) - return; /* suspension forced, can do nothing more */ - main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ - main_ptr->iMCU_row_ctr++; /* count rows received */ + main_ptr->xbuffer[main_ptr->whichptr])) + return; /* suspension forced, can do nothing more */ + main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ + main_ptr->iMCU_row_ctr++; /* count rows received */ } /* Postprocessor typically will not swallow all the input data it is handed @@ -408,13 +350,13 @@ case CTX_POSTPONED_ROW: /* Call postprocessor using previously set pointers for postponed row */ (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr], - &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); + &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, + output_buf, out_row_ctr, out_rows_avail); if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail) - return; /* Need to suspend */ + return; /* Need to suspend */ main_ptr->context_state = CTX_PREPARE_FOR_IMCU; if (*out_row_ctr >= out_rows_avail) - return; /* Postprocessor exactly filled output buf */ + return; /* Postprocessor exactly filled output buf */ /*FALLTHROUGH*/ case CTX_PREPARE_FOR_IMCU: /* Prepare to process first M-1 row groups of this iMCU row */ @@ -430,15 +372,15 @@ case CTX_PROCESS_IMCU: /* Call postprocessor using previously set pointers */ (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr], - &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); + &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, + output_buf, out_row_ctr, out_rows_avail); if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail) - return; /* Need to suspend */ + return; /* Need to suspend */ /* After the first iMCU, change wraparound pointers to normal state */ if (main_ptr->iMCU_row_ctr == 1) set_wraparound_pointers(cinfo); /* Prepare to load new iMCU row using other xbuffer list */ - main_ptr->whichptr ^= 1; /* 0=>1 or 1=>0 */ + main_ptr->whichptr ^= 1; /* 0=>1 or 1=>0 */ main_ptr->buffer_full = FALSE; /* Still need to process last row group of this iMCU row, */ /* which is saved at index M+1 of the other xbuffer */ @@ -459,12 +401,12 @@ METHODDEF(void) process_data_crank_post (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL, - (JDIMENSION *) NULL, (JDIMENSION) 0, - output_buf, out_row_ctr, out_rows_avail); + (JDIMENSION *) NULL, (JDIMENSION) 0, + output_buf, out_row_ctr, out_rows_avail); } #endif /* QUANT_2PASS_SUPPORTED */ @@ -483,11 +425,11 @@ main_ptr = (my_main_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_main_controller)); + sizeof(my_main_controller)); cinfo->main = (struct jpeg_d_main_controller *) main_ptr; main_ptr->pub.start_pass = start_pass_main; - if (need_full_buffer) /* shouldn't happen */ + if (need_full_buffer) /* shouldn't happen */ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); /* Allocate the workspace. @@ -507,8 +449,8 @@ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / cinfo->_min_DCT_scaled_size; /* height of a row group of component */ main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - compptr->width_in_blocks * compptr->_DCT_scaled_size, - (JDIMENSION) (rgroup * ngroups)); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + compptr->width_in_blocks * compptr->_DCT_scaled_size, + (JDIMENSION) (rgroup * ngroups)); } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmainct.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmainct.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmainct.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmainct.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,71 @@ +/* + * jdmainct.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +#define JPEG_INTERNALS +#include "jpeglib.h" +#include "jpegcomp.h" + + +/* Private buffer controller object */ + +typedef struct { + struct jpeg_d_main_controller pub; /* public fields */ + + /* Pointer to allocated workspace (M or M+2 row groups). */ + JSAMPARRAY buffer[MAX_COMPONENTS]; + + boolean buffer_full; /* Have we gotten an iMCU row from decoder? */ + JDIMENSION rowgroup_ctr; /* counts row groups output to postprocessor */ + + /* Remaining fields are only used in the context case. */ + + /* These are the master pointers to the funny-order pointer lists. */ + JSAMPIMAGE xbuffer[2]; /* pointers to weird pointer lists */ + + int whichptr; /* indicates which pointer set is now in use */ + int context_state; /* process_data state machine status */ + JDIMENSION rowgroups_avail; /* row groups available to postprocessor */ + JDIMENSION iMCU_row_ctr; /* counts iMCU rows to detect image top/bot */ +} my_main_controller; + +typedef my_main_controller *my_main_ptr; + + +/* context_state values: */ +#define CTX_PREPARE_FOR_IMCU 0 /* need to prepare for MCU row */ +#define CTX_PROCESS_IMCU 1 /* feeding iMCU to postprocessor */ +#define CTX_POSTPONED_ROW 2 /* feeding postponed row group */ + + +LOCAL(void) +set_wraparound_pointers (j_decompress_ptr cinfo) +/* Set up the "wraparound" pointers at top and bottom of the pointer lists. + * This changes the pointer list state from top-of-image to the normal state. + */ +{ + my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + int ci, i, rgroup; + int M = cinfo->_min_DCT_scaled_size; + jpeg_component_info *compptr; + JSAMPARRAY xbuf0, xbuf1; + + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / + cinfo->_min_DCT_scaled_size; /* height of a row group of component */ + xbuf0 = main_ptr->xbuffer[0][ci]; + xbuf1 = main_ptr->xbuffer[1][ci]; + for (i = 0; i < rgroup; i++) { + xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i]; + xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i]; + xbuf0[rgroup*(M+2) + i] = xbuf0[i]; + xbuf1[rgroup*(M+2) + i] = xbuf1[i]; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmarker.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmarker.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmarker.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmarker.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jdmarker.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. - * Copyright (C) 2012, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2012, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains routines to decode JPEG datastream markers. * Most of the complexity arises from our desire to support input @@ -18,29 +20,29 @@ #include "jpeglib.h" -typedef enum { /* JPEG marker codes */ +typedef enum { /* JPEG marker codes */ M_SOF0 = 0xc0, M_SOF1 = 0xc1, M_SOF2 = 0xc2, M_SOF3 = 0xc3, - + M_SOF5 = 0xc5, M_SOF6 = 0xc6, M_SOF7 = 0xc7, - + M_JPG = 0xc8, M_SOF9 = 0xc9, M_SOF10 = 0xca, M_SOF11 = 0xcb, - + M_SOF13 = 0xcd, M_SOF14 = 0xce, M_SOF15 = 0xcf, - + M_DHT = 0xc4, - + M_DAC = 0xcc, - + M_RST0 = 0xd0, M_RST1 = 0xd1, M_RST2 = 0xd2, @@ -49,7 +51,7 @@ M_RST5 = 0xd5, M_RST6 = 0xd6, M_RST7 = 0xd7, - + M_SOI = 0xd8, M_EOI = 0xd9, M_SOS = 0xda, @@ -58,7 +60,7 @@ M_DRI = 0xdd, M_DHP = 0xde, M_EXP = 0xdf, - + M_APP0 = 0xe0, M_APP1 = 0xe1, M_APP2 = 0xe2, @@ -75,13 +77,13 @@ M_APP13 = 0xed, M_APP14 = 0xee, M_APP15 = 0xef, - + M_JPG0 = 0xf0, M_JPG13 = 0xfd, M_COM = 0xfe, - + M_TEM = 0x01, - + M_ERROR = 0x100 } JPEG_MARKER; @@ -100,12 +102,12 @@ unsigned int length_limit_APPn[16]; /* Status of COM/APPn marker saving */ - jpeg_saved_marker_ptr cur_marker; /* NULL if not processing a marker */ - unsigned int bytes_read; /* data bytes read so far in marker */ + jpeg_saved_marker_ptr cur_marker; /* NULL if not processing a marker */ + unsigned int bytes_read; /* data bytes read so far in marker */ /* Note: cur_marker is not linked into marker_list until it's all read. */ } my_marker_reader; -typedef my_marker_reader * my_marker_ptr; +typedef my_marker_reader *my_marker_ptr; /* @@ -118,49 +120,49 @@ /* Declare and initialize local copies of input pointer/count */ #define INPUT_VARS(cinfo) \ - struct jpeg_source_mgr * datasrc = (cinfo)->src; \ - const JOCTET * next_input_byte = datasrc->next_input_byte; \ - size_t bytes_in_buffer = datasrc->bytes_in_buffer + struct jpeg_source_mgr *datasrc = (cinfo)->src; \ + const JOCTET *next_input_byte = datasrc->next_input_byte; \ + size_t bytes_in_buffer = datasrc->bytes_in_buffer /* Unload the local copies --- do this only at a restart boundary */ #define INPUT_SYNC(cinfo) \ - ( datasrc->next_input_byte = next_input_byte, \ - datasrc->bytes_in_buffer = bytes_in_buffer ) + ( datasrc->next_input_byte = next_input_byte, \ + datasrc->bytes_in_buffer = bytes_in_buffer ) /* Reload the local copies --- used only in MAKE_BYTE_AVAIL */ #define INPUT_RELOAD(cinfo) \ - ( next_input_byte = datasrc->next_input_byte, \ - bytes_in_buffer = datasrc->bytes_in_buffer ) + ( next_input_byte = datasrc->next_input_byte, \ + bytes_in_buffer = datasrc->bytes_in_buffer ) /* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available. * Note we do *not* do INPUT_SYNC before calling fill_input_buffer, * but we must reload the local copies after a successful fill. */ #define MAKE_BYTE_AVAIL(cinfo,action) \ - if (bytes_in_buffer == 0) { \ - if (! (*datasrc->fill_input_buffer) (cinfo)) \ - { action; } \ - INPUT_RELOAD(cinfo); \ - } + if (bytes_in_buffer == 0) { \ + if (! (*datasrc->fill_input_buffer) (cinfo)) \ + { action; } \ + INPUT_RELOAD(cinfo); \ + } /* Read a byte into variable V. * If must suspend, take the specified action (typically "return FALSE"). */ #define INPUT_BYTE(cinfo,V,action) \ - MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V = GETJOCTET(*next_input_byte++); ) + MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ + bytes_in_buffer--; \ + V = GETJOCTET(*next_input_byte++); ) /* As above, but read two bytes interpreted as an unsigned 16-bit integer. - * V should be declared unsigned int or perhaps INT32. + * V should be declared unsigned int or perhaps JLONG. */ #define INPUT_2BYTES(cinfo,V,action) \ - MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \ - MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V += GETJOCTET(*next_input_byte++); ) + MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ + bytes_in_buffer--; \ + V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \ + MAKE_BYTE_AVAIL(cinfo,action); \ + bytes_in_buffer--; \ + V += GETJOCTET(*next_input_byte++); ) /* @@ -199,7 +201,7 @@ /* Process an SOI marker */ { int i; - + TRACEMS(cinfo, 1, JTRC_SOI); if (cinfo->marker->saw_SOI) @@ -238,9 +240,9 @@ get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) /* Process a SOFn marker */ { - INT32 length; + JLONG length; int c, ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; INPUT_VARS(cinfo); cinfo->progressive_mode = is_prog; @@ -256,8 +258,8 @@ length -= 8; TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker, - (int) cinfo->image_width, (int) cinfo->image_height, - cinfo->num_components); + (int) cinfo->image_width, (int) cinfo->image_height, + cinfo->num_components); if (cinfo->marker->saw_SOF) ERREXIT(cinfo, JERR_SOF_DUPLICATE); @@ -272,11 +274,11 @@ if (length != (cinfo->num_components * 3)) ERREXIT(cinfo, JERR_BAD_LENGTH); - if (cinfo->comp_info == NULL) /* do only once, even if suspend */ + if (cinfo->comp_info == NULL) /* do only once, even if suspend */ cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components * SIZEOF(jpeg_component_info)); - + ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->num_components * sizeof(jpeg_component_info)); + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { compptr->component_index = ci; @@ -287,8 +289,8 @@ INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE); TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT, - compptr->component_id, compptr->h_samp_factor, - compptr->v_samp_factor, compptr->quant_tbl_no); + compptr->component_id, compptr->h_samp_factor, + compptr->v_samp_factor, compptr->quant_tbl_no); } cinfo->marker->saw_SOF = TRUE; @@ -302,9 +304,9 @@ get_sos (j_decompress_ptr cinfo) /* Process a SOS marker */ { - INT32 length; - int i, ci, n, c, cc; - jpeg_component_info * compptr; + JLONG length; + int i, ci, n, c, cc, pi; + jpeg_component_info *compptr; INPUT_VARS(cinfo); if (! cinfo->marker->saw_SOF) @@ -323,17 +325,18 @@ /* Collect the component-spec parameters */ - for (i = 0; i < cinfo->num_components; i++) + for (i = 0; i < MAX_COMPS_IN_SCAN; i++) cinfo->cur_comp_info[i] = NULL; for (i = 0; i < n; i++) { INPUT_BYTE(cinfo, cc, return FALSE); INPUT_BYTE(cinfo, c, return FALSE); - - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + + for (ci = 0, compptr = cinfo->comp_info; + ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN; + ci++, compptr++) { if (cc == compptr->component_id && !cinfo->cur_comp_info[ci]) - goto id_found; + goto id_found; } ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc); @@ -343,9 +346,16 @@ cinfo->cur_comp_info[i] = compptr; compptr->dc_tbl_no = (c >> 4) & 15; compptr->ac_tbl_no = (c ) & 15; - + TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc, - compptr->dc_tbl_no, compptr->ac_tbl_no); + compptr->dc_tbl_no, compptr->ac_tbl_no); + + /* This CSi (cc) should differ from the previous CSi */ + for (pi = 0; pi < i; pi++) { + if (cinfo->cur_comp_info[pi] == compptr) { + ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc); + } + } } /* Collect the additional scan parameters Ss, Se, Ah/Al. */ @@ -358,7 +368,7 @@ cinfo->Al = (c ) & 15; TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se, - cinfo->Ah, cinfo->Al); + cinfo->Ah, cinfo->Al); /* Prepare to scan data & restart markers */ cinfo->marker->next_restart_num = 0; @@ -377,13 +387,13 @@ get_dac (j_decompress_ptr cinfo) /* Process a DAC marker */ { - INT32 length; + JLONG length; int index, val; INPUT_VARS(cinfo); INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - + while (length > 0) { INPUT_BYTE(cinfo, index, return FALSE); INPUT_BYTE(cinfo, val, return FALSE); @@ -397,11 +407,11 @@ if (index >= NUM_ARITH_TBLS) { /* define AC table */ cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val; - } else { /* define DC table */ + } else { /* define DC table */ cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F); cinfo->arith_dc_U[index] = (UINT8) (val >> 4); if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index]) - ERREXIT1(cinfo, JERR_DAC_VALUE, val); + ERREXIT1(cinfo, JERR_DAC_VALUE, val); } } @@ -423,7 +433,7 @@ get_dht (j_decompress_ptr cinfo) /* Process a DHT marker */ { - INT32 length; + JLONG length; UINT8 bits[17]; UINT8 huffval[256]; int i, index, count; @@ -432,12 +442,12 @@ INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - + while (length > 16) { INPUT_BYTE(cinfo, index, return FALSE); TRACEMS1(cinfo, 1, JTRC_DHT, index); - + bits[0] = 0; count = 0; for (i = 1; i <= 16; i++) { @@ -448,38 +458,41 @@ length -= 1 + 16; TRACEMS8(cinfo, 2, JTRC_HUFFBITS, - bits[1], bits[2], bits[3], bits[4], - bits[5], bits[6], bits[7], bits[8]); + bits[1], bits[2], bits[3], bits[4], + bits[5], bits[6], bits[7], bits[8]); TRACEMS8(cinfo, 2, JTRC_HUFFBITS, - bits[9], bits[10], bits[11], bits[12], - bits[13], bits[14], bits[15], bits[16]); + bits[9], bits[10], bits[11], bits[12], + bits[13], bits[14], bits[15], bits[16]); /* Here we just do minimal validation of the counts to avoid walking * off the end of our table space. jdhuff.c will check more carefully. */ - if (count > 256 || ((INT32) count) > length) + if (count > 256 || ((JLONG) count) > length) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); for (i = 0; i < count; i++) INPUT_BYTE(cinfo, huffval[i], return FALSE); + MEMZERO(&huffval[count], (256 - count) * sizeof(UINT8)); + length -= count; - if (index & 0x10) { /* AC table definition */ + if (index & 0x10) { /* AC table definition */ index -= 0x10; + if (index < 0 || index >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_DHT_INDEX, index); htblptr = &cinfo->ac_huff_tbl_ptrs[index]; - } else { /* DC table definition */ + } else { /* DC table definition */ + if (index < 0 || index >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_DHT_INDEX, index); htblptr = &cinfo->dc_huff_tbl_ptrs[index]; } - if (index < 0 || index >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_DHT_INDEX, index); - if (*htblptr == NULL) *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); - - MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits)); - MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval)); + + MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits)); + MEMCOPY((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval)); } if (length != 0) @@ -494,7 +507,7 @@ get_dqt (j_decompress_ptr cinfo) /* Process a DQT marker */ { - INT32 length; + JLONG length; int n, i, prec; unsigned int tmp; JQUANT_TBL *quant_ptr; @@ -512,27 +525,27 @@ if (n >= NUM_QUANT_TBLS) ERREXIT1(cinfo, JERR_DQT_INDEX, n); - + if (cinfo->quant_tbl_ptrs[n] == NULL) cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo); quant_ptr = cinfo->quant_tbl_ptrs[n]; for (i = 0; i < DCTSIZE2; i++) { if (prec) - INPUT_2BYTES(cinfo, tmp, return FALSE); + INPUT_2BYTES(cinfo, tmp, return FALSE); else - INPUT_BYTE(cinfo, tmp, return FALSE); + INPUT_BYTE(cinfo, tmp, return FALSE); /* We convert the zigzag-order table to natural array order. */ quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp; } if (cinfo->err->trace_level >= 2) { for (i = 0; i < DCTSIZE2; i += 8) { - TRACEMS8(cinfo, 2, JTRC_QUANTVALS, - quant_ptr->quantval[i], quant_ptr->quantval[i+1], - quant_ptr->quantval[i+2], quant_ptr->quantval[i+3], - quant_ptr->quantval[i+4], quant_ptr->quantval[i+5], - quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]); + TRACEMS8(cinfo, 2, JTRC_QUANTVALS, + quant_ptr->quantval[i], quant_ptr->quantval[i+1], + quant_ptr->quantval[i+2], quant_ptr->quantval[i+3], + quant_ptr->quantval[i+4], quant_ptr->quantval[i+5], + quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]); } } @@ -552,12 +565,12 @@ get_dri (j_decompress_ptr cinfo) /* Process a DRI marker */ { - INT32 length; + JLONG length; unsigned int tmp; INPUT_VARS(cinfo); INPUT_2BYTES(cinfo, length, return FALSE); - + if (length != 4) ERREXIT(cinfo, JERR_BAD_LENGTH); @@ -579,20 +592,20 @@ * JFIF and Adobe markers, respectively. */ -#define APP0_DATA_LEN 14 /* Length of interesting data in APP0 */ -#define APP14_DATA_LEN 12 /* Length of interesting data in APP14 */ -#define APPN_DATA_LEN 14 /* Must be the largest of the above!! */ +#define APP0_DATA_LEN 14 /* Length of interesting data in APP0 */ +#define APP14_DATA_LEN 12 /* Length of interesting data in APP14 */ +#define APPN_DATA_LEN 14 /* Must be the largest of the above!! */ LOCAL(void) -examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data, - unsigned int datalen, INT32 remaining) +examine_app0 (j_decompress_ptr cinfo, JOCTET *data, + unsigned int datalen, JLONG remaining) /* Examine first few bytes from an APP0. * Take appropriate action if it is a JFIF marker. * datalen is # of bytes at data[], remaining is length of rest of marker data. */ { - INT32 totallen = (INT32) datalen + remaining; + JLONG totallen = (JLONG) datalen + remaining; if (datalen >= APP0_DATA_LEN && GETJOCTET(data[0]) == 0x4A && @@ -615,18 +628,18 @@ */ if (cinfo->JFIF_major_version != 1) WARNMS2(cinfo, JWRN_JFIF_MAJOR, - cinfo->JFIF_major_version, cinfo->JFIF_minor_version); + cinfo->JFIF_major_version, cinfo->JFIF_minor_version); /* Generate trace messages */ TRACEMS5(cinfo, 1, JTRC_JFIF, - cinfo->JFIF_major_version, cinfo->JFIF_minor_version, - cinfo->X_density, cinfo->Y_density, cinfo->density_unit); + cinfo->JFIF_major_version, cinfo->JFIF_minor_version, + cinfo->X_density, cinfo->Y_density, cinfo->density_unit); /* Validate thumbnail dimensions and issue appropriate messages */ if (GETJOCTET(data[12]) | GETJOCTET(data[13])) TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, - GETJOCTET(data[12]), GETJOCTET(data[13])); + GETJOCTET(data[12]), GETJOCTET(data[13])); totallen -= APP0_DATA_LEN; if (totallen != - ((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3)) + ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG) 3)) TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen); } else if (datalen >= 6 && GETJOCTET(data[0]) == 0x4A && @@ -650,7 +663,7 @@ break; default: TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, - GETJOCTET(data[5]), (int) totallen); + GETJOCTET(data[5]), (int) totallen); break; } } else { @@ -661,8 +674,8 @@ LOCAL(void) -examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data, - unsigned int datalen, INT32 remaining) +examine_app14 (j_decompress_ptr cinfo, JOCTET *data, + unsigned int datalen, JLONG remaining) /* Examine first few bytes from an APP14. * Take appropriate action if it is an Adobe marker. * datalen is # of bytes at data[], remaining is length of rest of marker data. @@ -695,7 +708,7 @@ get_interesting_appn (j_decompress_ptr cinfo) /* Process an APP0 or APP14 marker without saving it */ { - INT32 length; + JLONG length; JOCTET b[APPN_DATA_LEN]; unsigned int i, numtoread; INPUT_VARS(cinfo); @@ -717,10 +730,10 @@ /* process it */ switch (cinfo->unread_marker) { case M_APP0: - examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length); + examine_app0(cinfo, (JOCTET *) b, numtoread, length); break; case M_APP14: - examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length); + examine_app14(cinfo, (JOCTET *) b, numtoread, length); break; default: /* can't get here unless jpeg_save_markers chooses wrong processor */ @@ -746,33 +759,33 @@ my_marker_ptr marker = (my_marker_ptr) cinfo->marker; jpeg_saved_marker_ptr cur_marker = marker->cur_marker; unsigned int bytes_read, data_length; - JOCTET FAR * data; - INT32 length = 0; + JOCTET *data; + JLONG length = 0; INPUT_VARS(cinfo); if (cur_marker == NULL) { /* begin reading a marker */ INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - if (length >= 0) { /* watch out for bogus length word */ + if (length >= 0) { /* watch out for bogus length word */ /* figure out how much we want to save */ unsigned int limit; if (cinfo->unread_marker == (int) M_COM) - limit = marker->length_limit_COM; + limit = marker->length_limit_COM; else - limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0]; + limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0]; if ((unsigned int) length < limit) - limit = (unsigned int) length; + limit = (unsigned int) length; /* allocate and initialize the marker item */ cur_marker = (jpeg_saved_marker_ptr) - (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(struct jpeg_marker_struct) + limit); + (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(struct jpeg_marker_struct) + limit); cur_marker->next = NULL; cur_marker->marker = (UINT8) cinfo->unread_marker; cur_marker->original_length = (unsigned int) length; cur_marker->data_length = limit; /* data area is just beyond the jpeg_marker_struct */ - data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1); + data = cur_marker->data = (JOCTET *) (cur_marker + 1); marker->cur_marker = cur_marker; marker->bytes_read = 0; bytes_read = 0; @@ -790,7 +803,7 @@ } while (bytes_read < data_length) { - INPUT_SYNC(cinfo); /* move the restart point to here */ + INPUT_SYNC(cinfo); /* move the restart point to here */ marker->bytes_read = bytes_read; /* If there's not at least one byte in buffer, suspend */ MAKE_BYTE_AVAIL(cinfo, return FALSE); @@ -803,14 +816,14 @@ } /* Done reading what we want to read */ - if (cur_marker != NULL) { /* will be NULL if bogus length word */ + if (cur_marker != NULL) { /* will be NULL if bogus length word */ /* Add new marker to end of list */ if (cinfo->marker_list == NULL) { cinfo->marker_list = cur_marker; } else { jpeg_saved_marker_ptr prev = cinfo->marker_list; while (prev->next != NULL) - prev = prev->next; + prev = prev->next; prev->next = cur_marker; } /* Reset pointer & calc remaining data length */ @@ -830,12 +843,12 @@ break; default: TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, - (int) (data_length + length)); + (int) (data_length + length)); break; } /* skip any remaining data -- could be lots */ - INPUT_SYNC(cinfo); /* do before skip_input_data */ + INPUT_SYNC(cinfo); /* do before skip_input_data */ if (length > 0) (*cinfo->src->skip_input_data) (cinfo, (long) length); @@ -849,15 +862,15 @@ skip_variable (j_decompress_ptr cinfo) /* Skip over an unknown or uninteresting variable-length marker */ { - INT32 length; + JLONG length; INPUT_VARS(cinfo); INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - + TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length); - INPUT_SYNC(cinfo); /* do before skip_input_data */ + INPUT_SYNC(cinfo); /* do before skip_input_data */ if (length > 0) (*cinfo->src->skip_input_data) (cinfo, (long) length); @@ -901,7 +914,7 @@ INPUT_BYTE(cinfo, c, return FALSE); } while (c == 0xFF); if (c != 0) - break; /* found a valid marker, exit loop */ + break; /* found a valid marker, exit loop */ /* Reach here if we found a stuffed-zero data sequence (FF/00). * Discard it and loop back to try again. */ @@ -961,11 +974,11 @@ /* NB: first_marker() enforces the requirement that SOI appear first. */ if (cinfo->unread_marker == 0) { if (! cinfo->marker->saw_SOI) { - if (! first_marker(cinfo)) - return JPEG_SUSPENDED; + if (! first_marker(cinfo)) + return JPEG_SUSPENDED; } else { - if (! next_marker(cinfo)) - return JPEG_SUSPENDED; + if (! next_marker(cinfo)) + return JPEG_SUSPENDED; } } /* At this point cinfo->unread_marker contains the marker code and the @@ -975,74 +988,74 @@ switch (cinfo->unread_marker) { case M_SOI: if (! get_soi(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_SOF0: /* Baseline */ - case M_SOF1: /* Extended sequential, Huffman */ + case M_SOF0: /* Baseline */ + case M_SOF1: /* Extended sequential, Huffman */ if (! get_sof(cinfo, FALSE, FALSE)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_SOF2: /* Progressive, Huffman */ + case M_SOF2: /* Progressive, Huffman */ if (! get_sof(cinfo, TRUE, FALSE)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_SOF9: /* Extended sequential, arithmetic */ + case M_SOF9: /* Extended sequential, arithmetic */ if (! get_sof(cinfo, FALSE, TRUE)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_SOF10: /* Progressive, arithmetic */ + case M_SOF10: /* Progressive, arithmetic */ if (! get_sof(cinfo, TRUE, TRUE)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; /* Currently unsupported SOFn types */ - case M_SOF3: /* Lossless, Huffman */ - case M_SOF5: /* Differential sequential, Huffman */ - case M_SOF6: /* Differential progressive, Huffman */ - case M_SOF7: /* Differential lossless, Huffman */ - case M_JPG: /* Reserved for JPEG extensions */ - case M_SOF11: /* Lossless, arithmetic */ - case M_SOF13: /* Differential sequential, arithmetic */ - case M_SOF14: /* Differential progressive, arithmetic */ - case M_SOF15: /* Differential lossless, arithmetic */ + case M_SOF3: /* Lossless, Huffman */ + case M_SOF5: /* Differential sequential, Huffman */ + case M_SOF6: /* Differential progressive, Huffman */ + case M_SOF7: /* Differential lossless, Huffman */ + case M_JPG: /* Reserved for JPEG extensions */ + case M_SOF11: /* Lossless, arithmetic */ + case M_SOF13: /* Differential sequential, arithmetic */ + case M_SOF14: /* Differential progressive, arithmetic */ + case M_SOF15: /* Differential lossless, arithmetic */ ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker); break; case M_SOS: if (! get_sos(cinfo)) - return JPEG_SUSPENDED; - cinfo->unread_marker = 0; /* processed the marker */ + return JPEG_SUSPENDED; + cinfo->unread_marker = 0; /* processed the marker */ return JPEG_REACHED_SOS; - + case M_EOI: TRACEMS(cinfo, 1, JTRC_EOI); - cinfo->unread_marker = 0; /* processed the marker */ + cinfo->unread_marker = 0; /* processed the marker */ return JPEG_REACHED_EOI; - + case M_DAC: if (! get_dac(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - + case M_DHT: if (! get_dht(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - + case M_DQT: if (! get_dqt(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - + case M_DRI: if (! get_dri(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - + case M_APP0: case M_APP1: case M_APP2: @@ -1060,16 +1073,16 @@ case M_APP14: case M_APP15: if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[ - cinfo->unread_marker - (int) M_APP0]) (cinfo)) - return JPEG_SUSPENDED; + cinfo->unread_marker - (int) M_APP0]) (cinfo)) + return JPEG_SUSPENDED; break; - + case M_COM: if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_RST0: /* these are all parameterless */ + case M_RST0: /* these are all parameterless */ case M_RST1: case M_RST2: case M_RST3: @@ -1081,12 +1094,12 @@ TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker); break; - case M_DNL: /* Ignore DNL ... perhaps the wrong thing */ + case M_DNL: /* Ignore DNL ... perhaps the wrong thing */ if (! skip_variable(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - default: /* must be DHP, EXP, JPGn, or RESn */ + default: /* must be DHP, EXP, JPGn, or RESn */ /* For now, we treat the reserved markers as fatal errors since they are * likely to be used to signal incompatible JPEG Part 3 extensions. * Once the JPEG 3 version-number marker is well defined, this code @@ -1132,7 +1145,7 @@ /* Uh-oh, the restart markers have been messed up. */ /* Let the data source manager determine how to resync. */ if (! (*cinfo->src->resync_to_restart) (cinfo, - cinfo->marker->next_restart_num)) + cinfo->marker->next_restart_num)) return FALSE; } @@ -1197,25 +1210,25 @@ { int marker = cinfo->unread_marker; int action = 1; - + /* Always put up a warning. */ WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired); - + /* Outer loop handles repeated decision after scanning forward. */ for (;;) { if (marker < (int) M_SOF0) - action = 2; /* invalid marker */ + action = 2; /* invalid marker */ else if (marker < (int) M_RST0 || marker > (int) M_RST7) - action = 3; /* valid non-restart marker */ + action = 3; /* valid non-restart marker */ else { if (marker == ((int) M_RST0 + ((desired+1) & 7)) || - marker == ((int) M_RST0 + ((desired+2) & 7))) - action = 3; /* one of the next two expected restarts */ + marker == ((int) M_RST0 + ((desired+2) & 7))) + action = 3; /* one of the next two expected restarts */ else if (marker == ((int) M_RST0 + ((desired-1) & 7)) || - marker == ((int) M_RST0 + ((desired-2) & 7))) - action = 2; /* a prior restart, so advance */ + marker == ((int) M_RST0 + ((desired-2) & 7))) + action = 2; /* a prior restart, so advance */ else - action = 1; /* desired restart or too far away */ + action = 1; /* desired restart or too far away */ } TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action); switch (action) { @@ -1226,7 +1239,7 @@ case 2: /* Scan to the next marker, and repeat the decision loop. */ if (! next_marker(cinfo)) - return FALSE; + return FALSE; marker = cinfo->unread_marker; break; case 3: @@ -1247,10 +1260,10 @@ { my_marker_ptr marker = (my_marker_ptr) cinfo->marker; - cinfo->comp_info = NULL; /* until allocated by get_sof */ - cinfo->input_scan_number = 0; /* no SOS seen yet */ - cinfo->unread_marker = 0; /* no pending marker */ - marker->pub.saw_SOI = FALSE; /* set internal state too */ + cinfo->comp_info = NULL; /* until allocated by get_sof */ + cinfo->input_scan_number = 0; /* no SOS seen yet */ + cinfo->unread_marker = 0; /* no pending marker */ + marker->pub.saw_SOI = FALSE; /* set internal state too */ marker->pub.saw_SOF = FALSE; marker->pub.discarded_bytes = 0; marker->cur_marker = NULL; @@ -1271,7 +1284,7 @@ /* Create subobject in permanent pool */ marker = (my_marker_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(my_marker_reader)); + sizeof(my_marker_reader)); cinfo->marker = (struct jpeg_marker_reader *) marker; /* Initialize public method pointers */ marker->pub.reset_marker_reader = reset_marker_reader; @@ -1302,7 +1315,7 @@ GLOBAL(void) jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, - unsigned int length_limit) + unsigned int length_limit) { my_marker_ptr marker = (my_marker_ptr) cinfo->marker; long maxlength; @@ -1311,7 +1324,7 @@ /* Length limit mustn't be larger than what we can allocate * (should only be a concern in a 16-bit environment). */ - maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct); + maxlength = cinfo->mem->max_alloc_chunk - sizeof(struct jpeg_marker_struct); if (((long) length_limit) > maxlength) length_limit = (unsigned int) maxlength; @@ -1351,7 +1364,7 @@ GLOBAL(void) jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code, - jpeg_marker_parser_method routine) + jpeg_marker_parser_method routine) { my_marker_ptr marker = (my_marker_ptr) cinfo->marker; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmaster.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmaster.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmaster.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmaster.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,15 @@ /* * jdmaster.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 2002-2009 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2011, 2016, D. R. Commander. + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains master control logic for the JPEG decompressor. * These routines are concerned with selecting the modules to be executed @@ -16,25 +21,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jpegcomp.h" - - -/* Private state */ - -typedef struct { - struct jpeg_decomp_master pub; /* public fields */ - - int pass_number; /* # of passes completed */ - - boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */ - - /* Saved references to initialized quantizer modules, - * in case we need to switch modes. - */ - struct jpeg_color_quantizer * quantizer_1pass; - struct jpeg_color_quantizer * quantizer_2pass; -} my_decomp_master; - -typedef my_decomp_master * my_master_ptr; +#include "jdmaster.h" /* @@ -49,9 +36,10 @@ /* Merging is the equivalent of plain box-filter upsampling */ if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling) return FALSE; - /* jdmerge.c only supports YCC=>RGB color conversion */ + /* jdmerge.c only supports YCC=>RGB and YCC=>RGB565 color conversion */ if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 || (cinfo->out_color_space != JCS_RGB && + cinfo->out_color_space != JCS_RGB565 && cinfo->out_color_space != JCS_EXT_RGB && cinfo->out_color_space != JCS_EXT_RGBX && cinfo->out_color_space != JCS_EXT_BGR && @@ -61,8 +49,12 @@ cinfo->out_color_space != JCS_EXT_RGBA && cinfo->out_color_space != JCS_EXT_BGRA && cinfo->out_color_space != JCS_EXT_ABGR && - cinfo->out_color_space != JCS_EXT_ARGB) || - cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space]) + cinfo->out_color_space != JCS_EXT_ARGB)) + return FALSE; + if ((cinfo->out_color_space == JCS_RGB565 && + cinfo->out_color_components != 3) || + (cinfo->out_color_space != JCS_RGB565 && + cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space])) return FALSE; /* and it only handles 2h1v or 2h2v sampling ratios */ if (cinfo->comp_info[0].h_samp_factor != 2 || @@ -78,7 +70,7 @@ cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size) return FALSE; /* ??? also need to test for upsample-time rescaling, when & if supported */ - return TRUE; /* by golly, it'll work... */ + return TRUE; /* by golly, it'll work... */ #else return FALSE; #endif @@ -89,6 +81,177 @@ * Compute output image dimensions and related values. * NOTE: this is exported for possible use by application. * Hence it mustn't do anything that can't be done twice. + */ + +#if JPEG_LIB_VERSION >= 80 +GLOBAL(void) +#else +LOCAL(void) +#endif +jpeg_core_output_dimensions (j_decompress_ptr cinfo) +/* Do computations that are needed before master selection phase. + * This function is used for transcoding and full decompression. + */ +{ +#ifdef IDCT_SCALING_SUPPORTED + int ci; + jpeg_component_info *compptr; + + /* Compute actual output image dimensions and DCT scaling choices. */ + if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) { + /* Provide 1/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 1; + cinfo->_min_DCT_v_scaled_size = 1; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) { + /* Provide 2/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 2L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 2L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 2; + cinfo->_min_DCT_v_scaled_size = 2; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) { + /* Provide 3/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 3L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 3L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 3; + cinfo->_min_DCT_v_scaled_size = 3; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) { + /* Provide 4/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 4L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 4L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 4; + cinfo->_min_DCT_v_scaled_size = 4; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) { + /* Provide 5/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 5L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 5L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 5; + cinfo->_min_DCT_v_scaled_size = 5; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) { + /* Provide 6/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 6L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 6L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 6; + cinfo->_min_DCT_v_scaled_size = 6; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) { + /* Provide 7/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 7L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 7L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 7; + cinfo->_min_DCT_v_scaled_size = 7; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) { + /* Provide 8/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 8L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 8L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 8; + cinfo->_min_DCT_v_scaled_size = 8; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) { + /* Provide 9/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 9L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 9L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 9; + cinfo->_min_DCT_v_scaled_size = 9; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) { + /* Provide 10/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 10L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 10L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 10; + cinfo->_min_DCT_v_scaled_size = 10; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) { + /* Provide 11/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 11L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 11L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 11; + cinfo->_min_DCT_v_scaled_size = 11; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) { + /* Provide 12/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 12L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 12L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 12; + cinfo->_min_DCT_v_scaled_size = 12; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) { + /* Provide 13/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 13L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 13L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 13; + cinfo->_min_DCT_v_scaled_size = 13; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) { + /* Provide 14/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 14L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 14L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 14; + cinfo->_min_DCT_v_scaled_size = 14; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) { + /* Provide 15/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 15L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 15L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 15; + cinfo->_min_DCT_v_scaled_size = 15; + } else { + /* Provide 16/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 16L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 16L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 16; + cinfo->_min_DCT_v_scaled_size = 16; + } + + /* Recompute dimensions of components */ + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size; + compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size; + } + +#else /* !IDCT_SCALING_SUPPORTED */ + + /* Hardwire it to "no scaling" */ + cinfo->output_width = cinfo->image_width; + cinfo->output_height = cinfo->image_height; + /* jdinput.c has already initialized DCT_scaled_size, + * and has computed unscaled downsampled_width and downsampled_height. + */ + +#endif /* IDCT_SCALING_SUPPORTED */ +} + + +/* + * Compute output image dimensions and related values. + * NOTE: this is exported for possible use by application. + * Hence it mustn't do anything that can't be done twice. * Also note that it may be called before the master module is initialized! */ @@ -105,65 +268,24 @@ if (cinfo->global_state != DSTATE_READY) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + /* Compute core output image dimensions and DCT scaling choices. */ + jpeg_core_output_dimensions(cinfo); + #ifdef IDCT_SCALING_SUPPORTED - /* Compute actual output image dimensions and DCT scaling choices. */ - if (cinfo->scale_num * 8 <= cinfo->scale_denom) { - /* Provide 1/8 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 8L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 8L); -#if JPEG_LIB_VERSION >= 70 - cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 1; -#else - cinfo->min_DCT_scaled_size = 1; -#endif - } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) { - /* Provide 1/4 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 4L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 4L); -#if JPEG_LIB_VERSION >= 70 - cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 2; -#else - cinfo->min_DCT_scaled_size = 2; -#endif - } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) { - /* Provide 1/2 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 2L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 2L); -#if JPEG_LIB_VERSION >= 70 - cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 4; -#else - cinfo->min_DCT_scaled_size = 4; -#endif - } else { - /* Provide 1/1 scaling */ - cinfo->output_width = cinfo->image_width; - cinfo->output_height = cinfo->image_height; -#if JPEG_LIB_VERSION >= 70 - cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE; -#else - cinfo->min_DCT_scaled_size = DCTSIZE; -#endif - } /* In selecting the actual DCT scaling for each component, we try to * scale up the chroma components via IDCT scaling rather than upsampling. * This saves time if the upsampler gets to use 1:1 scaling. - * Note this code assumes that the supported DCT scalings are powers of 2. + * Note this code adapts subsampling ratios which are powers of 2. */ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { int ssize = cinfo->_min_DCT_scaled_size; while (ssize < DCTSIZE && - (compptr->h_samp_factor * ssize * 2 <= - cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) && - (compptr->v_samp_factor * ssize * 2 <= - cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size)) { + ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) % + (compptr->h_samp_factor * ssize * 2) == 0) && + ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) % + (compptr->v_samp_factor * ssize * 2) == 0)) { ssize = ssize * 2; } #if JPEG_LIB_VERSION >= 70 @@ -181,12 +303,12 @@ /* Size in samples, after IDCT scaling */ compptr->downsampled_width = (JDIMENSION) jdiv_round_up((long) cinfo->image_width * - (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size), - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size), + (long) (cinfo->max_h_samp_factor * DCTSIZE)); compptr->downsampled_height = (JDIMENSION) jdiv_round_up((long) cinfo->image_height * - (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size), - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size), + (long) (cinfo->max_v_samp_factor * DCTSIZE)); } #else /* !IDCT_SCALING_SUPPORTED */ @@ -220,18 +342,19 @@ cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space]; break; case JCS_YCbCr: + case JCS_RGB565: cinfo->out_color_components = 3; break; case JCS_CMYK: case JCS_YCCK: cinfo->out_color_components = 4; break; - default: /* else must be same colorspace as in file */ + default: /* else must be same colorspace as in file */ cinfo->out_color_components = cinfo->num_components; break; } cinfo->output_components = (cinfo->quantize_colors ? 1 : - cinfo->out_color_components); + cinfo->out_color_components); /* See if upsampler will want to emit more than one row at a time */ if (use_merged_upsample(cinfo)) @@ -248,20 +371,20 @@ * processes are inner loops and need to be as fast as possible. On most * machines, particularly CPUs with pipelines or instruction prefetch, * a (subscript-check-less) C table lookup - * x = sample_range_limit[x]; + * x = sample_range_limit[x]; * is faster than explicit tests - * if (x < 0) x = 0; - * else if (x > MAXJSAMPLE) x = MAXJSAMPLE; + * if (x < 0) x = 0; + * else if (x > MAXJSAMPLE) x = MAXJSAMPLE; * These processes all use a common table prepared by the routine below. * * For most steps we can mathematically guarantee that the initial value * of x is within MAXJSAMPLE+1 of the legal range, so a table running from * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient. But for the initial - * limiting step (just after the IDCT), a wildly out-of-range value is + * limiting step (just after the IDCT), a wildly out-of-range value is * possible if the input data is corrupt. To avoid any chance of indexing * off the end of memory and getting a bad-pointer trap, we perform the * post-IDCT limiting thus: - * x = range_limit[x & MASK]; + * x = range_limit[x & MASK]; * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit * samples. Under normal circumstances this is more than enough range and * a correct output will be generated; with bogus input data the mask will @@ -279,37 +402,34 @@ * We can save some space by overlapping the start of the post-IDCT table * with the simpler range limiting table. The post-IDCT table begins at * sample_range_limit + CENTERJSAMPLE. - * - * Note that the table is allocated in near data space on PCs; it's small - * enough and used often enough to justify this. */ LOCAL(void) prepare_range_limit_table (j_decompress_ptr cinfo) /* Allocate and fill in the sample_range_limit table */ { - JSAMPLE * table; + JSAMPLE *table; int i; table = (JSAMPLE *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE)); - table += (MAXJSAMPLE+1); /* allow negative subscripts of simple table */ + (5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * sizeof(JSAMPLE)); + table += (MAXJSAMPLE+1); /* allow negative subscripts of simple table */ cinfo->sample_range_limit = table; /* First segment of "simple" table: limit[x] = 0 for x < 0 */ - MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE)); + MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * sizeof(JSAMPLE)); /* Main part of "simple" table: limit[x] = x */ for (i = 0; i <= MAXJSAMPLE; i++) table[i] = (JSAMPLE) i; - table += CENTERJSAMPLE; /* Point to where post-IDCT table starts */ + table += CENTERJSAMPLE; /* Point to where post-IDCT table starts */ /* End of simple table, rest of first half of post-IDCT table */ for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++) table[i] = MAXJSAMPLE; /* Second half of post-IDCT table */ MEMZERO(table + (2 * (MAXJSAMPLE+1)), - (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE)); + (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * sizeof(JSAMPLE)); MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE), - cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE)); + cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE)); } @@ -442,6 +562,12 @@ /* Initialize input side of decompressor to consume first scan. */ (*cinfo->inputctl->start_input_pass) (cinfo); + /* Set the first and last iMCU columns to decompress from single-scan images. + * By default, decompress all of the iMCU columns. + */ + cinfo->master->first_iMCU_col = 0; + cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1; + #ifdef D_MULTISCAN_FILES_SUPPORTED /* If jpeg_start_decompress will read the whole file, initialize * progress monitoring appropriately. The input step is counted @@ -497,24 +623,24 @@ if (cinfo->quantize_colors && cinfo->colormap == NULL) { /* Select new quantization method */ if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) { - cinfo->cquantize = master->quantizer_2pass; - master->pub.is_dummy_pass = TRUE; + cinfo->cquantize = master->quantizer_2pass; + master->pub.is_dummy_pass = TRUE; } else if (cinfo->enable_1pass_quant) { - cinfo->cquantize = master->quantizer_1pass; + cinfo->cquantize = master->quantizer_1pass; } else { - ERREXIT(cinfo, JERR_MODE_CHANGE); + ERREXIT(cinfo, JERR_MODE_CHANGE); } } (*cinfo->idct->start_pass) (cinfo); (*cinfo->coef->start_output_pass) (cinfo); if (! cinfo->raw_data_out) { if (! master->using_merged_upsample) - (*cinfo->cconvert->start_pass) (cinfo); + (*cinfo->cconvert->start_pass) (cinfo); (*cinfo->upsample->start_pass) (cinfo); if (cinfo->quantize_colors) - (*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass); + (*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass); (*cinfo->post->start_pass) (cinfo, - (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU)); + (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU)); (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU); } } @@ -523,7 +649,7 @@ if (cinfo->progress != NULL) { cinfo->progress->completed_passes = master->pass_number; cinfo->progress->total_passes = master->pass_number + - (master->pub.is_dummy_pass ? 2 : 1); + (master->pub.is_dummy_pass ? 2 : 1); /* In buffered-image mode, we assume one more output pass if EOI not * yet reached, but no more passes if EOI has been reached. */ @@ -586,16 +712,13 @@ GLOBAL(void) jinit_master_decompress (j_decompress_ptr cinfo) { - my_master_ptr master; + my_master_ptr master = (my_master_ptr) cinfo->master; - master = (my_master_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_decomp_master)); - cinfo->master = (struct jpeg_decomp_master *) master; master->pub.prepare_for_output_pass = prepare_for_output_pass; master->pub.finish_output_pass = finish_output_pass; master->pub.is_dummy_pass = FALSE; + master->pub.jinit_upsampler_no_alloc = FALSE; master_selection(cinfo); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmaster.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmaster.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmaster.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmaster.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,28 @@ +/* + * jdmaster.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1995, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains the master control structure for the JPEG decompressor. + */ + +/* Private state */ + +typedef struct { + struct jpeg_decomp_master pub; /* public fields */ + + int pass_number; /* # of passes completed */ + + boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */ + + /* Saved references to initialized quantizer modules, + * in case we need to switch modes. + */ + struct jpeg_color_quantizer *quantizer_1pass; + struct jpeg_color_quantizer *quantizer_2pass; +} my_decomp_master; + +typedef my_decomp_master *my_master_ptr; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmerge.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmerge.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmerge.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmerge.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,14 @@ /* * jdmerge.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009, 2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander. + * Copyright (C) 2013, Linaro Limited. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains code for merged upsampling/color conversion. * @@ -16,19 +19,19 @@ * (ie, box filtering), we can save some work in color conversion by * calculating all the output pixels corresponding to a pair of chroma * samples at one time. In the conversion equations - * R = Y + K1 * Cr - * G = Y + K2 * Cb + K3 * Cr - * B = Y + K4 * Cb + * R = Y + K1 * Cr + * G = Y + K2 * Cb + K3 * Cr + * B = Y + K4 * Cb * only the Y term varies among the group of pixels corresponding to a pair * of chroma samples, so the rest of the terms can be calculated just once. * At typical sampling ratios, this eliminates half or three-quarters of the * multiplications needed for color conversion. * * This file currently provides implementations for the following cases: - * YCbCr => RGB color conversion only. - * Sampling ratios of 2h1v or 2h2v. - * No scaling needed at upsample time. - * Corner-aligned (non-CCIR601) sampling alignment. + * YCbCr => RGB color conversion only. + * Sampling ratios of 2h1v or 2h2v. + * No scaling needed at upsample time. + * Corner-aligned (non-CCIR601) sampling alignment. * Other special cases could be added, but in most applications these are * the only common cases. (For uncommon cases we fall back on the more * general code in jdsample.c and jdcolor.c.) @@ -38,7 +41,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jsimd.h" -#include "config.h" +#include "jconfigint.h" #ifdef UPSAMPLE_MERGING_SUPPORTED @@ -46,18 +49,17 @@ /* Private subobject */ typedef struct { - struct jpeg_upsampler pub; /* public fields */ + struct jpeg_upsampler pub; /* public fields */ /* Pointer to routine to do actual upsampling/conversion of one row group */ - JMETHOD(void, upmethod, (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf)); + void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); /* Private state for YCC->RGB conversion */ - int * Cr_r_tab; /* => table for Cr to R conversion */ - int * Cb_b_tab; /* => table for Cb to B conversion */ - INT32 * Cr_g_tab; /* => table for Cr to G conversion */ - INT32 * Cb_g_tab; /* => table for Cb to G conversion */ + int *Cr_r_tab; /* => table for Cr to R conversion */ + int *Cb_b_tab; /* => table for Cb to B conversion */ + JLONG *Cr_g_tab; /* => table for Cr to G conversion */ + JLONG *Cb_g_tab; /* => table for Cb to G conversion */ /* For 2:1 vertical sampling, we produce two output rows at a time. * We need a "spare" row buffer to hold the second output row if the @@ -65,22 +67,22 @@ * to discard the dummy last row if the image height is odd. */ JSAMPROW spare_row; - boolean spare_full; /* T if spare buffer is occupied */ + boolean spare_full; /* T if spare buffer is occupied */ - JDIMENSION out_row_width; /* samples per output row */ - JDIMENSION rows_to_go; /* counts rows remaining in image */ + JDIMENSION out_row_width; /* samples per output row */ + JDIMENSION rows_to_go; /* counts rows remaining in image */ } my_upsampler; -typedef my_upsampler * my_upsample_ptr; +typedef my_upsampler *my_upsample_ptr; -#define SCALEBITS 16 /* speediest right-shift on some machines */ -#define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) -#define FIX(x) ((INT32) ((x) * (1L<upsample; int i; - INT32 x; + JLONG x; SHIFT_TEMPS upsample->Cr_r_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(int)); + (MAXJSAMPLE+1) * sizeof(int)); upsample->Cb_b_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(int)); - upsample->Cr_g_tab = (INT32 *) + (MAXJSAMPLE+1) * sizeof(int)); + upsample->Cr_g_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(INT32)); - upsample->Cb_g_tab = (INT32 *) + (MAXJSAMPLE+1) * sizeof(JLONG)); + upsample->Cb_g_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(INT32)); + (MAXJSAMPLE+1) * sizeof(JLONG)); for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) { /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */ /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */ /* Cr=>R value is nearest int to 1.40200 * x */ upsample->Cr_r_tab[i] = (int) - RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); + RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); /* Cb=>B value is nearest int to 1.77200 * x */ upsample->Cb_b_tab[i] = (int) - RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); + RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); /* Cr=>G value is scaled-up -0.71414 * x */ upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x; /* Cb=>G value is scaled-up -0.34414 * x */ @@ -239,20 +249,23 @@ METHODDEF(void) merged_2v_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) /* 2:1 vertical sampling case: may need a spare row. */ { my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; JSAMPROW work_ptrs[2]; - JDIMENSION num_rows; /* number of rows returned to caller */ + JDIMENSION num_rows; /* number of rows returned to caller */ if (upsample->spare_full) { /* If we have a spare row saved from a previous cycle, just return it. */ + JDIMENSION size = upsample->out_row_width; + if (cinfo->out_color_space == JCS_RGB565) + size = cinfo->output_width * 2; jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0, - 1, upsample->out_row_width); + 1, size); num_rows = 1; upsample->spare_full = FALSE; } else { @@ -288,17 +301,17 @@ METHODDEF(void) merged_1v_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) /* 1:1 vertical sampling case: much easier, never need a spare row. */ { my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; /* Just do the upsampling. */ (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, - output_buf + *out_row_ctr); + output_buf + *out_row_ctr); /* Adjust counts */ (*out_row_ctr)++; (*in_row_group_ctr)++; @@ -321,8 +334,8 @@ METHODDEF(void) h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { switch (cinfo->out_color_space) { case JCS_EXT_RGB: @@ -367,8 +380,8 @@ METHODDEF(void) h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { switch (cinfo->out_color_space) { case JCS_EXT_RGB: @@ -408,6 +421,151 @@ /* + * RGB565 conversion + */ + +#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ + (((g) << 3) & 0x7E0) | ((b) >> 3)) +#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ + (((g) << 11) & 0xE000) | \ + (((b) << 5) & 0x1F00)) + +#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) +#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) + +#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) + +#define WRITE_TWO_PIXELS_LE(addr, pixels) { \ + ((INT16*)(addr))[0] = (INT16)(pixels); \ + ((INT16*)(addr))[1] = (INT16)((pixels) >> 16); \ +} +#define WRITE_TWO_PIXELS_BE(addr, pixels) { \ + ((INT16*)(addr))[1] = (INT16)(pixels); \ + ((INT16*)(addr))[0] = (INT16)((pixels) >> 16); \ +} + +#define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) +#define DITHER_565_G(g, dither) ((g) + (((dither) & 0xFF) >> 1)) +#define DITHER_565_B(b, dither) ((b) + ((dither) & 0xFF)) + + +/* Declarations for ordered dithering + * + * We use a 4x4 ordered dither array packed into 32 bits. This array is + * sufficent for dithering RGB888 to RGB565. + */ + +#define DITHER_MASK 0x3 +#define DITHER_ROTATE(x) ((((x) & 0xFF) << 24) | (((x) >> 8) & 0x00FFFFFF)) +static const JLONG dither_matrix[4] = { + 0x0008020A, + 0x0C040E06, + 0x030B0109, + 0x0F070D05 +}; + + +/* Include inline routines for RGB565 conversion */ + +#define PACK_SHORT_565 PACK_SHORT_565_LE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE +#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_LE +#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_le +#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_le +#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_le +#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_le +#include "jdmrg565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef WRITE_TWO_PIXELS +#undef h2v1_merged_upsample_565_internal +#undef h2v1_merged_upsample_565D_internal +#undef h2v2_merged_upsample_565_internal +#undef h2v2_merged_upsample_565D_internal + +#define PACK_SHORT_565 PACK_SHORT_565_BE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE +#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_BE +#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_be +#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_be +#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_be +#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_be +#include "jdmrg565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef WRITE_TWO_PIXELS +#undef h2v1_merged_upsample_565_internal +#undef h2v1_merged_upsample_565D_internal +#undef h2v2_merged_upsample_565_internal +#undef h2v2_merged_upsample_565D_internal + + +static INLINE boolean is_big_endian(void) +{ + int test_value = 1; + if(*(char *)&test_value != 1) + return TRUE; + return FALSE; +} + + +METHODDEF(void) +h2v1_merged_upsample_565 (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + if (is_big_endian()) + h2v1_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v1_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr, + output_buf); + } + + +METHODDEF(void) +h2v1_merged_upsample_565D (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + if (is_big_endian()) + h2v1_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v1_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr, + output_buf); +} + + +METHODDEF(void) +h2v2_merged_upsample_565 (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + if (is_big_endian()) + h2v2_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v2_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr, + output_buf); +} + + +METHODDEF(void) +h2v2_merged_upsample_565D (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + if (is_big_endian()) + h2v2_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v2_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr, + output_buf); +} + + +/* * Module initialization routine for merged upsampling/color conversion. * * NB: this is called under the conditions determined by use_merged_upsample() @@ -422,7 +580,7 @@ upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_upsampler)); + sizeof(my_upsampler)); cinfo->upsample = (struct jpeg_upsampler *) upsample; upsample->pub.start_pass = start_pass_merged_upsample; upsample->pub.need_context_rows = FALSE; @@ -435,16 +593,30 @@ upsample->upmethod = jsimd_h2v2_merged_upsample; else upsample->upmethod = h2v2_merged_upsample; + if (cinfo->out_color_space == JCS_RGB565) { + if (cinfo->dither_mode != JDITHER_NONE) { + upsample->upmethod = h2v2_merged_upsample_565D; + } else { + upsample->upmethod = h2v2_merged_upsample_565; + } + } /* Allocate a spare row buffer */ upsample->spare_row = (JSAMPROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (size_t) (upsample->out_row_width * SIZEOF(JSAMPLE))); + (size_t) (upsample->out_row_width * sizeof(JSAMPLE))); } else { upsample->pub.upsample = merged_1v_upsample; if (jsimd_can_h2v1_merged_upsample()) upsample->upmethod = jsimd_h2v1_merged_upsample; else upsample->upmethod = h2v1_merged_upsample; + if (cinfo->out_color_space == JCS_RGB565) { + if (cinfo->dither_mode != JDITHER_NONE) { + upsample->upmethod = h2v1_merged_upsample_565D; + } else { + upsample->upmethod = h2v1_merged_upsample_565; + } + } /* No spare row needed */ upsample->spare_row = NULL; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmrg565.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmrg565.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmrg565.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmrg565.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,356 @@ +/* + * jdmrg565.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains code for merged upsampling/color conversion. + */ + + +INLINE +LOCAL(void) +h2v1_merged_upsample_565_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr; + JSAMPROW inptr0, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + unsigned int r, g, b; + JLONG rgb; + SHIFT_TEMPS + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + + /* Loop for each pair of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 2 Y values and emit 2 pixels */ + y = GETJSAMPLE(*inptr0++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr, rgb); + outptr += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr0); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } + + +INLINE +LOCAL(void) +h2v1_merged_upsample_565D_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr; + JSAMPROW inptr0, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + unsigned int r, g, b; + JLONG rgb; + SHIFT_TEMPS + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + + /* Loop for each pair of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 2 Y values and emit 2 pixels */ + y = GETJSAMPLE(*inptr0++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr, rgb); + outptr += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr0); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } +} + + +INLINE +LOCAL(void) +h2v2_merged_upsample_565_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr0, outptr1; + JSAMPROW inptr00, inptr01, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + unsigned int r, g, b; + JLONG rgb; + SHIFT_TEMPS + + inptr00 = input_buf[0][in_row_group_ctr * 2]; + inptr01 = input_buf[0][in_row_group_ctr * 2 + 1]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr0 = output_buf[0]; + outptr1 = output_buf[1]; + + /* Loop for each group of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 4 Y values and emit 4 pixels */ + y = GETJSAMPLE(*inptr00++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr00++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr0, rgb); + outptr0 += 4; + + y = GETJSAMPLE(*inptr01++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr01++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr1, rgb); + outptr1 += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + y = GETJSAMPLE(*inptr00); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr0 = (INT16)rgb; + + y = GETJSAMPLE(*inptr01); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr1 = (INT16)rgb; + } +} + + +INLINE +LOCAL(void) +h2v2_merged_upsample_565D_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr0, outptr1; + JSAMPROW inptr00, inptr01, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + JLONG d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK]; + unsigned int r, g, b; + JLONG rgb; + SHIFT_TEMPS + + inptr00 = input_buf[0][in_row_group_ctr*2]; + inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr0 = output_buf[0]; + outptr1 = output_buf[1]; + + /* Loop for each group of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 4 Y values and emit 4 pixels */ + y = GETJSAMPLE(*inptr00++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr00++); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + d1 = DITHER_ROTATE(d1); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr0, rgb); + outptr0 += 4; + + y = GETJSAMPLE(*inptr01++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr01++); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + d1 = DITHER_ROTATE(d1); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr1, rgb); + outptr1 += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + y = GETJSAMPLE(*inptr00); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr0 = (INT16)rgb; + + y = GETJSAMPLE(*inptr01); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr1 = (INT16)rgb; + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmrgext.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmrgext.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmrgext.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmrgext.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,186 @@ +/* + * jdmrgext.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2011, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains code for merged upsampling/color conversion. + */ + + +/* This file is included by jdmerge.c */ + + +/* + * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. + */ + +INLINE +LOCAL(void) +h2v1_merged_upsample_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr; + JSAMPROW inptr0, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + SHIFT_TEMPS + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + /* Loop for each pair of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + /* Fetch 2 Y values and emit 2 pixels */ + y = GETJSAMPLE(*inptr0++); + outptr[RGB_RED] = range_limit[y + cred]; + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + y = GETJSAMPLE(*inptr0++); + outptr[RGB_RED] = range_limit[y + cred]; + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr0); + outptr[RGB_RED] = range_limit[y + cred]; + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + } +} + + +/* + * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. + */ + +INLINE +LOCAL(void) +h2v2_merged_upsample_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr0, outptr1; + JSAMPROW inptr00, inptr01, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + SHIFT_TEMPS + + inptr00 = input_buf[0][in_row_group_ctr*2]; + inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr0 = output_buf[0]; + outptr1 = output_buf[1]; + /* Loop for each group of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + /* Fetch 4 Y values and emit 4 pixels */ + y = GETJSAMPLE(*inptr00++); + outptr0[RGB_RED] = range_limit[y + cred]; + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif + outptr0 += RGB_PIXELSIZE; + y = GETJSAMPLE(*inptr00++); + outptr0[RGB_RED] = range_limit[y + cred]; + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif + outptr0 += RGB_PIXELSIZE; + y = GETJSAMPLE(*inptr01++); + outptr1[RGB_RED] = range_limit[y + cred]; + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif + outptr1 += RGB_PIXELSIZE; + y = GETJSAMPLE(*inptr01++); + outptr1[RGB_RED] = range_limit[y + cred]; + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif + outptr1 += RGB_PIXELSIZE; + } + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr00); + outptr0[RGB_RED] = range_limit[y + cred]; + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif + y = GETJSAMPLE(*inptr01); + outptr1[RGB_RED] = range_limit[y + cred]; + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmrgext.c.inc glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmrgext.c.inc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdmrgext.c.inc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdmrgext.c.inc 1970-01-01 08:00:00.000000000 +0800 @@ -1,156 +0,0 @@ -/* - * jdmrgext.c - * - * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains code for merged upsampling/color conversion. - */ - - -/* This file is included by jdmerge.c */ - - -/* - * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. - */ - -INLINE -LOCAL(void) -h2v1_merged_upsample_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - register int y, cred, cgreen, cblue; - int cb, cr; - register JSAMPROW outptr; - JSAMPROW inptr0, inptr1, inptr2; - JDIMENSION col; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - INT32 * Crgtab = upsample->Cr_g_tab; - INT32 * Cbgtab = upsample->Cb_g_tab; - SHIFT_TEMPS - - inptr0 = input_buf[0][in_row_group_ctr]; - inptr1 = input_buf[1][in_row_group_ctr]; - inptr2 = input_buf[2][in_row_group_ctr]; - outptr = output_buf[0]; - /* Loop for each pair of output pixels */ - for (col = cinfo->output_width >> 1; col > 0; col--) { - /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); - outptr[RGB_RED] = range_limit[y + cred]; - outptr[RGB_GREEN] = range_limit[y + cgreen]; - outptr[RGB_BLUE] = range_limit[y + cblue]; - outptr += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr0++); - outptr[RGB_RED] = range_limit[y + cred]; - outptr[RGB_GREEN] = range_limit[y + cgreen]; - outptr[RGB_BLUE] = range_limit[y + cblue]; - outptr += RGB_PIXELSIZE; - } - /* If image width is odd, do the last output column separately */ - if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); - outptr[RGB_RED] = range_limit[y + cred]; - outptr[RGB_GREEN] = range_limit[y + cgreen]; - outptr[RGB_BLUE] = range_limit[y + cblue]; - } -} - - -/* - * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. - */ - -INLINE -LOCAL(void) -h2v2_merged_upsample_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - register int y, cred, cgreen, cblue; - int cb, cr; - register JSAMPROW outptr0, outptr1; - JSAMPROW inptr00, inptr01, inptr1, inptr2; - JDIMENSION col; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - INT32 * Crgtab = upsample->Cr_g_tab; - INT32 * Cbgtab = upsample->Cb_g_tab; - SHIFT_TEMPS - - inptr00 = input_buf[0][in_row_group_ctr*2]; - inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; - inptr1 = input_buf[1][in_row_group_ctr]; - inptr2 = input_buf[2][in_row_group_ctr]; - outptr0 = output_buf[0]; - outptr1 = output_buf[1]; - /* Loop for each group of output pixels */ - for (col = cinfo->output_width >> 1; col > 0; col--) { - /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); - outptr0[RGB_RED] = range_limit[y + cred]; - outptr0[RGB_GREEN] = range_limit[y + cgreen]; - outptr0[RGB_BLUE] = range_limit[y + cblue]; - outptr0 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr00++); - outptr0[RGB_RED] = range_limit[y + cred]; - outptr0[RGB_GREEN] = range_limit[y + cgreen]; - outptr0[RGB_BLUE] = range_limit[y + cblue]; - outptr0 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr01++); - outptr1[RGB_RED] = range_limit[y + cred]; - outptr1[RGB_GREEN] = range_limit[y + cgreen]; - outptr1[RGB_BLUE] = range_limit[y + cblue]; - outptr1 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr01++); - outptr1[RGB_RED] = range_limit[y + cred]; - outptr1[RGB_GREEN] = range_limit[y + cgreen]; - outptr1[RGB_BLUE] = range_limit[y + cblue]; - outptr1 += RGB_PIXELSIZE; - } - /* If image width is odd, do the last output column separately */ - if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr00); - outptr0[RGB_RED] = range_limit[y + cred]; - outptr0[RGB_GREEN] = range_limit[y + cgreen]; - outptr0[RGB_BLUE] = range_limit[y + cblue]; - y = GETJSAMPLE(*inptr01); - outptr1[RGB_RED] = range_limit[y + cred]; - outptr1[RGB_GREEN] = range_limit[y + cgreen]; - outptr1[RGB_BLUE] = range_limit[y + cblue]; - } -} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdphuff.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdphuff.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdphuff.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdphuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdphuff.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains Huffman entropy decoding routines for progressive JPEG. * @@ -17,7 +20,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdhuff.h" /* Declarations shared with jdhuff.c */ +#include "jdhuff.h" /* Declarations shared with jdhuff.c */ #ifdef D_PROGRESSIVE_SUPPORTED @@ -30,8 +33,8 @@ */ typedef struct { - unsigned int EOBRUN; /* remaining EOBs in EOBRUN */ - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ + unsigned int EOBRUN; /* remaining EOBs in EOBRUN */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; /* This macro is to work around compilers with missing or broken @@ -44,11 +47,11 @@ #else #if MAX_COMPS_IN_SCAN == 4 #define ASSIGN_STATE(dest,src) \ - ((dest).EOBRUN = (src).EOBRUN, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) + ((dest).EOBRUN = (src).EOBRUN, \ + (dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -59,29 +62,29 @@ /* These fields are loaded into local variables at start of each MCU. * In case of suspension, we exit WITHOUT updating them. */ - bitread_perm_state bitstate; /* Bit buffer at start of MCU */ - savable_state saved; /* Other state at start of MCU */ + bitread_perm_state bitstate; /* Bit buffer at start of MCU */ + savable_state saved; /* Other state at start of MCU */ /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ /* Pointers to derived tables (these workspaces have image lifespan) */ - d_derived_tbl * derived_tbls[NUM_HUFF_TBLS]; + d_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; - d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */ + d_derived_tbl *ac_derived_tbl; /* active table during an AC scan */ } phuff_entropy_decoder; -typedef phuff_entropy_decoder * phuff_entropy_ptr; +typedef phuff_entropy_decoder *phuff_entropy_ptr; /* Forward declarations */ -METHODDEF(boolean) decode_mcu_DC_first JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_AC_first JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_DC_refine JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); +METHODDEF(boolean) decode_mcu_DC_first (j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) decode_mcu_AC_first (j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) decode_mcu_DC_refine (j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) decode_mcu_AC_refine (j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); /* @@ -94,8 +97,9 @@ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; boolean is_DC_band, bad; int ci, coefi, tbl; + d_derived_tbl **pdtbl; int *coef_bit_ptr; - jpeg_component_info * compptr; + jpeg_component_info *compptr; is_DC_band = (cinfo->Ss == 0); @@ -117,7 +121,7 @@ if (cinfo->Al != cinfo->Ah-1) bad = TRUE; } - if (cinfo->Al > 13) /* need not check for < 0 */ + if (cinfo->Al > 13) /* need not check for < 0 */ bad = TRUE; /* Arguably the maximum Al value should be less than 13 for 8-bit precision, * but the spec doesn't say so, and we try to be liberal about what we @@ -127,7 +131,7 @@ */ if (bad) ERREXIT4(cinfo, JERR_BAD_PROGRESSION, - cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); + cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); /* Update progression status, and verify that scan order is legal. * Note that inter-scan inconsistencies are treated as warnings * not fatal errors ... not clear if this is right way to behave. @@ -140,7 +144,7 @@ for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; if (cinfo->Ah != expected) - WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); coef_bit_ptr[coefi] = cinfo->Al; } } @@ -164,15 +168,15 @@ * We may build same derived table more than once, but it's not expensive. */ if (is_DC_band) { - if (cinfo->Ah == 0) { /* DC refinement needs no table */ - tbl = compptr->dc_tbl_no; - jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, - & entropy->derived_tbls[tbl]); + if (cinfo->Ah == 0) { /* DC refinement needs no table */ + tbl = compptr->dc_tbl_no; + pdtbl = entropy->derived_tbls + tbl; + jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl); } } else { tbl = compptr->ac_tbl_no; - jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, - & entropy->derived_tbls[tbl]); + pdtbl = entropy->derived_tbls + tbl; + jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl); /* remember the single active table */ entropy->ac_derived_tbl = entropy->derived_tbls[tbl]; } @@ -198,9 +202,11 @@ * On some machines, a shift and add will be faster than a table lookup. */ +#define AVOID_TABLES #ifdef AVOID_TABLES -#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) +#define NEG_1 ((unsigned)-1) +#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((NEG_1)<<(s)) + 1) : (x)) #else @@ -263,7 +269,7 @@ /* * Huffman MCU decoding. * Each of these routines decodes and returns one MCU's worth of - * Huffman-compressed coefficients. + * Huffman-compressed coefficients. * The coefficients are reordered from zigzag order into natural array order, * but are not dequantized. * @@ -284,7 +290,7 @@ METHODDEF(boolean) decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Al = cinfo->Al; register int s, r; @@ -292,14 +298,14 @@ JBLOCKROW block; BITREAD_STATE_VARS; savable_state state; - d_derived_tbl * tbl; - jpeg_component_info * compptr; + d_derived_tbl *tbl; + jpeg_component_info *compptr; /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; } /* If we've run out of data, just leave the MCU set to zeroes. @@ -324,16 +330,16 @@ /* Section F.2.2.1: decode the DC coefficient difference */ HUFF_DECODE(s, br_state, tbl, return FALSE, label1); if (s) { - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); } /* Convert DC difference to actual value, update last_dc_val */ s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */ - (*block)[0] = (JCOEF) (s << Al); + (*block)[0] = (JCOEF) LEFT_SHIFT(s, Al); } /* Completed MCU, so update state */ @@ -355,7 +361,7 @@ METHODDEF(boolean) decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Se = cinfo->Se; int Al = cinfo->Al; @@ -363,13 +369,13 @@ unsigned int EOBRUN; JBLOCKROW block; BITREAD_STATE_VARS; - d_derived_tbl * tbl; + d_derived_tbl *tbl; /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; } /* If we've run out of data, just leave the MCU set to zeroes. @@ -380,49 +386,49 @@ /* Load up working state. * We can avoid loading/saving bitread state if in an EOB run. */ - EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ + EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ /* There is always only one block per MCU */ - if (EOBRUN > 0) /* if it's a band of zeroes... */ - EOBRUN--; /* ...process it now (we do nothing) */ + if (EOBRUN > 0) /* if it's a band of zeroes... */ + EOBRUN--; /* ...process it now (we do nothing) */ else { BITREAD_LOAD_STATE(cinfo,entropy->bitstate); block = MCU_data[0]; tbl = entropy->ac_derived_tbl; for (k = cinfo->Ss; k <= Se; k++) { - HUFF_DECODE(s, br_state, tbl, return FALSE, label2); - r = s >> 4; - s &= 15; - if (s) { - k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); - /* Scale and output coefficient in natural (dezigzagged) order */ - (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al); - } else { - if (r == 15) { /* ZRL */ - k += 15; /* skip 15 zeroes in band */ - } else { /* EOBr, run length is 2^r + appended bits */ - EOBRUN = 1 << r; - if (r) { /* EOBr, r > 0 */ - CHECK_BIT_BUFFER(br_state, r, return FALSE); - r = GET_BITS(r); - EOBRUN += r; - } - EOBRUN--; /* this band is processed at this moment */ - break; /* force end-of-band */ - } - } + HUFF_DECODE(s, br_state, tbl, return FALSE, label2); + r = s >> 4; + s &= 15; + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + /* Scale and output coefficient in natural (dezigzagged) order */ + (*block)[jpeg_natural_order[k]] = (JCOEF) LEFT_SHIFT(s, Al); + } else { + if (r == 15) { /* ZRL */ + k += 15; /* skip 15 zeroes in band */ + } else { /* EOBr, run length is 2^r + appended bits */ + EOBRUN = 1 << r; + if (r) { /* EOBr, r > 0 */ + CHECK_BIT_BUFFER(br_state, r, return FALSE); + r = GET_BITS(r); + EOBRUN += r; + } + EOBRUN--; /* this band is processed at this moment */ + break; /* force end-of-band */ + } + } } BITREAD_SAVE_STATE(cinfo,entropy->bitstate); } /* Completed MCU, so update state */ - entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ + entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ } /* Account for restart interval (no-op if not using restarts) */ @@ -440,9 +446,9 @@ METHODDEF(boolean) decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ int blkn; JBLOCKROW block; BITREAD_STATE_VARS; @@ -451,7 +457,7 @@ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; } /* Not worth the cycles to check insufficient_data here, @@ -489,17 +495,17 @@ METHODDEF(boolean) decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Se = cinfo->Se; - int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ - int m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ + int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + int m1 = (NEG_1) << cinfo->Al; /* -1 in the bit position being coded */ register int s, k, r; unsigned int EOBRUN; JBLOCKROW block; JCOEFPTR thiscoef; BITREAD_STATE_VARS; - d_derived_tbl * tbl; + d_derived_tbl *tbl; int num_newnz; int newnz_pos[DCTSIZE2]; @@ -507,7 +513,7 @@ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; } /* If we've run out of data, don't modify the MCU. @@ -535,58 +541,58 @@ if (EOBRUN == 0) { for (; k <= Se; k++) { - HUFF_DECODE(s, br_state, tbl, goto undoit, label3); - r = s >> 4; - s &= 15; - if (s) { - if (s != 1) /* size of new coef should always be 1 */ - WARNMS(cinfo, JWRN_HUFF_BAD_CODE); - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) - s = p1; /* newly nonzero coef is positive */ - else - s = m1; /* newly nonzero coef is negative */ - } else { - if (r != 15) { - EOBRUN = 1 << r; /* EOBr, run length is 2^r + appended bits */ - if (r) { - CHECK_BIT_BUFFER(br_state, r, goto undoit); - r = GET_BITS(r); - EOBRUN += r; - } - break; /* rest of block is handled by EOB logic */ - } - /* note s = 0 for processing ZRL */ - } - /* Advance over already-nonzero coefs and r still-zero coefs, - * appending correction bits to the nonzeroes. A correction bit is 1 - * if the absolute value of the coefficient must be increased. - */ - do { - thiscoef = *block + jpeg_natural_order[k]; - if (*thiscoef != 0) { - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) { - if ((*thiscoef & p1) == 0) { /* do nothing if already set it */ - if (*thiscoef >= 0) - *thiscoef += p1; - else - *thiscoef += m1; - } - } - } else { - if (--r < 0) - break; /* reached target zero coefficient */ - } - k++; - } while (k <= Se); - if (s) { - int pos = jpeg_natural_order[k]; - /* Output newly nonzero coefficient */ - (*block)[pos] = (JCOEF) s; - /* Remember its position in case we have to suspend */ - newnz_pos[num_newnz++] = pos; - } + HUFF_DECODE(s, br_state, tbl, goto undoit, label3); + r = s >> 4; + s &= 15; + if (s) { + if (s != 1) /* size of new coef should always be 1 */ + WARNMS(cinfo, JWRN_HUFF_BAD_CODE); + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) + s = p1; /* newly nonzero coef is positive */ + else + s = m1; /* newly nonzero coef is negative */ + } else { + if (r != 15) { + EOBRUN = 1 << r; /* EOBr, run length is 2^r + appended bits */ + if (r) { + CHECK_BIT_BUFFER(br_state, r, goto undoit); + r = GET_BITS(r); + EOBRUN += r; + } + break; /* rest of block is handled by EOB logic */ + } + /* note s = 0 for processing ZRL */ + } + /* Advance over already-nonzero coefs and r still-zero coefs, + * appending correction bits to the nonzeroes. A correction bit is 1 + * if the absolute value of the coefficient must be increased. + */ + do { + thiscoef = *block + jpeg_natural_order[k]; + if (*thiscoef != 0) { + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) { + if ((*thiscoef & p1) == 0) { /* do nothing if already set it */ + if (*thiscoef >= 0) + *thiscoef += p1; + else + *thiscoef += m1; + } + } + } else { + if (--r < 0) + break; /* reached target zero coefficient */ + } + k++; + } while (k <= Se); + if (s) { + int pos = jpeg_natural_order[k]; + /* Output newly nonzero coefficient */ + (*block)[pos] = (JCOEF) s; + /* Remember its position in case we have to suspend */ + newnz_pos[num_newnz++] = pos; + } } } @@ -597,18 +603,18 @@ * if the absolute value of the coefficient must be increased. */ for (; k <= Se; k++) { - thiscoef = *block + jpeg_natural_order[k]; - if (*thiscoef != 0) { - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) { - if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */ - if (*thiscoef >= 0) - *thiscoef += p1; - else - *thiscoef += m1; - } - } - } + thiscoef = *block + jpeg_natural_order[k]; + if (*thiscoef != 0) { + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) { + if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */ + if (*thiscoef >= 0) + *thiscoef += p1; + else + *thiscoef += m1; + } + } + } } /* Count one block completed in EOB run */ EOBRUN--; @@ -646,7 +652,7 @@ entropy = (phuff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(phuff_entropy_decoder)); + sizeof(phuff_entropy_decoder)); cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; entropy->pub.start_pass = start_pass_phuff_decoder; @@ -658,9 +664,9 @@ /* Create progression status table */ cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components*DCTSIZE2*SIZEOF(int)); + cinfo->num_components*DCTSIZE2*sizeof(int)); coef_bit_ptr = & cinfo->coef_bits[0][0]; - for (ci = 0; ci < cinfo->num_components; ci++) + for (ci = 0; ci < cinfo->num_components; ci++) for (i = 0; i < DCTSIZE2; i++) *coef_bit_ptr++ = -1; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdpostct.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdpostct.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdpostct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdpostct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdpostct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the decompression postprocessing controller. * This controller manages the upsampling, color conversion, and color @@ -31,37 +34,34 @@ * For two-pass color quantization, we need a full-image buffer; * for one-pass operation, a strip buffer is sufficient. */ - jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */ - JSAMPARRAY buffer; /* strip buffer, or current strip of virtual */ - JDIMENSION strip_height; /* buffer size in rows */ + jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */ + JSAMPARRAY buffer; /* strip buffer, or current strip of virtual */ + JDIMENSION strip_height; /* buffer size in rows */ /* for two-pass mode only: */ - JDIMENSION starting_row; /* row # of first row in current strip */ - JDIMENSION next_row; /* index of next row to fill/empty in strip */ + JDIMENSION starting_row; /* row # of first row in current strip */ + JDIMENSION next_row; /* index of next row to fill/empty in strip */ } my_post_controller; -typedef my_post_controller * my_post_ptr; +typedef my_post_controller *my_post_ptr; /* Forward declarations */ METHODDEF(void) post_process_1pass - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); #ifdef QUANT_2PASS_SUPPORTED METHODDEF(void) post_process_prepass - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); METHODDEF(void) post_process_2pass - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); #endif @@ -84,9 +84,9 @@ * allocate a strip buffer. Use the virtual-array buffer as workspace. */ if (post->buffer == NULL) { - post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, - (JDIMENSION) 0, post->strip_height, TRUE); + post->buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, post->whole_image, + (JDIMENSION) 0, post->strip_height, TRUE); } } else { /* For single-pass processing without color quantization, @@ -124,10 +124,10 @@ METHODDEF(void) post_process_1pass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_post_ptr post = (my_post_ptr) cinfo->post; JDIMENSION num_rows, max_rows; @@ -139,11 +139,11 @@ max_rows = post->strip_height; num_rows = 0; (*cinfo->upsample->upsample) (cinfo, - input_buf, in_row_group_ctr, in_row_groups_avail, - post->buffer, &num_rows, max_rows); + input_buf, in_row_group_ctr, in_row_groups_avail, + post->buffer, &num_rows, max_rows); /* Quantize and emit data. */ (*cinfo->cquantize->color_quantize) (cinfo, - post->buffer, output_buf + *out_row_ctr, (int) num_rows); + post->buffer, output_buf + *out_row_ctr, (int) num_rows); *out_row_ctr += num_rows; } @@ -156,10 +156,10 @@ METHODDEF(void) post_process_prepass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_post_ptr post = (my_post_ptr) cinfo->post; JDIMENSION old_next_row, num_rows; @@ -167,22 +167,22 @@ /* Reposition virtual buffer if at start of strip. */ if (post->next_row == 0) { post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, - post->starting_row, post->strip_height, TRUE); + ((j_common_ptr) cinfo, post->whole_image, + post->starting_row, post->strip_height, TRUE); } /* Upsample some data (up to a strip height's worth). */ old_next_row = post->next_row; (*cinfo->upsample->upsample) (cinfo, - input_buf, in_row_group_ctr, in_row_groups_avail, - post->buffer, &post->next_row, post->strip_height); + input_buf, in_row_group_ctr, in_row_groups_avail, + post->buffer, &post->next_row, post->strip_height); /* Allow quantizer to scan new data. No data is emitted, */ /* but we advance out_row_ctr so outer loop can tell when we're done. */ if (post->next_row > old_next_row) { num_rows = post->next_row - old_next_row; (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row, - (JSAMPARRAY) NULL, (int) num_rows); + (JSAMPARRAY) NULL, (int) num_rows); *out_row_ctr += num_rows; } @@ -200,10 +200,10 @@ METHODDEF(void) post_process_2pass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_post_ptr post = (my_post_ptr) cinfo->post; JDIMENSION num_rows, max_rows; @@ -211,8 +211,8 @@ /* Reposition virtual buffer if at start of strip. */ if (post->next_row == 0) { post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, - post->starting_row, post->strip_height, FALSE); + ((j_common_ptr) cinfo, post->whole_image, + post->starting_row, post->strip_height, FALSE); } /* Determine number of rows to emit. */ @@ -227,8 +227,8 @@ /* Quantize and emit data. */ (*cinfo->cquantize->color_quantize) (cinfo, - post->buffer + post->next_row, output_buf + *out_row_ctr, - (int) num_rows); + post->buffer + post->next_row, output_buf + *out_row_ctr, + (int) num_rows); *out_row_ctr += num_rows; /* Advance if we filled the strip. */ @@ -253,11 +253,11 @@ post = (my_post_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_post_controller)); + sizeof(my_post_controller)); cinfo->post = (struct jpeg_d_post_controller *) post; post->pub.start_pass = start_pass_dpost; - post->whole_image = NULL; /* flag for no virtual arrays */ - post->buffer = NULL; /* flag for no strip buffer */ + post->whole_image = NULL; /* flag for no virtual arrays */ + post->buffer = NULL; /* flag for no strip buffer */ /* Create the quantization buffer, if needed */ if (cinfo->quantize_colors) { @@ -271,20 +271,20 @@ /* We round up the number of rows to a multiple of the strip height. */ #ifdef QUANT_2PASS_SUPPORTED post->whole_image = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - cinfo->output_width * cinfo->out_color_components, - (JDIMENSION) jround_up((long) cinfo->output_height, - (long) post->strip_height), - post->strip_height); + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + cinfo->output_width * cinfo->out_color_components, + (JDIMENSION) jround_up((long) cinfo->output_height, + (long) post->strip_height), + post->strip_height); #else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); #endif /* QUANT_2PASS_SUPPORTED */ } else { /* One-pass color quantization: just make a strip buffer. */ post->buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->output_width * cinfo->out_color_components, - post->strip_height); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->output_width * cinfo->out_color_components, + post->strip_height); } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdsample.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdsample.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdsample.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdsample.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,15 @@ /* * jdsample.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2010, 2015-2016, D. R. Commander. + * Copyright (C) 2014, MIPS Technologies, Inc., California + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains upsampling routines. * @@ -20,50 +24,12 @@ * Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7. */ -#define JPEG_INTERNALS #include "jinclude.h" -#include "jpeglib.h" +#include "jdsample.h" #include "jsimd.h" #include "jpegcomp.h" -/* Pointer to routine to upsample a single component */ -typedef JMETHOD(void, upsample1_ptr, - (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); - -/* Private subobject */ - -typedef struct { - struct jpeg_upsampler pub; /* public fields */ - - /* Color conversion buffer. When using separate upsampling and color - * conversion steps, this buffer holds one upsampled row group until it - * has been color converted and output. - * Note: we do not allocate any storage for component(s) which are full-size, - * ie do not need rescaling. The corresponding entry of color_buf[] is - * simply set to point to the input data array, thereby avoiding copying. - */ - JSAMPARRAY color_buf[MAX_COMPONENTS]; - - /* Per-component upsampling method pointers */ - upsample1_ptr methods[MAX_COMPONENTS]; - - int next_row_out; /* counts rows emitted from color_buf */ - JDIMENSION rows_to_go; /* counts rows remaining in image */ - - /* Height of an input row group for each component. */ - int rowgroup_height[MAX_COMPONENTS]; - - /* These arrays save pixel expansion factors so that int_expand need not - * recompute them each time. They are unused for other upsampling methods. - */ - UINT8 h_expand[MAX_COMPONENTS]; - UINT8 v_expand[MAX_COMPONENTS]; -} my_upsampler; - -typedef my_upsampler * my_upsample_ptr; - /* * Initialize for an upsampling pass. @@ -91,26 +57,26 @@ METHODDEF(void) sep_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JDIMENSION num_rows; /* Fill the conversion buffer, if it's empty */ if (upsample->next_row_out >= cinfo->max_v_samp_factor) { for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { /* Invoke per-component upsample method. Notice we pass a POINTER * to color_buf[ci], so that fullsize_upsample can change it. */ (*upsample->methods[ci]) (cinfo, compptr, - input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]), - upsample->color_buf + ci); + input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]), + upsample->color_buf + ci); } upsample->next_row_out = 0; } @@ -122,7 +88,7 @@ /* Not more than the distance to the end of the image. Need this test * in case the image height is not a multiple of max_v_samp_factor: */ - if (num_rows > upsample->rows_to_go) + if (num_rows > upsample->rows_to_go) num_rows = upsample->rows_to_go; /* And not more than what the client can accept: */ out_rows_avail -= *out_row_ctr; @@ -130,9 +96,9 @@ num_rows = out_rows_avail; (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf, - (JDIMENSION) upsample->next_row_out, - output_buf + *out_row_ctr, - (int) num_rows); + (JDIMENSION) upsample->next_row_out, + output_buf + *out_row_ctr, + (int) num_rows); /* Adjust counts */ *out_row_ctr += num_rows; @@ -158,8 +124,8 @@ */ METHODDEF(void) -fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { *output_data_ptr = input_data; } @@ -171,10 +137,10 @@ */ METHODDEF(void) -noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +noop_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { - *output_data_ptr = NULL; /* safety check */ + *output_data_ptr = NULL; /* safety check */ } @@ -190,8 +156,8 @@ */ METHODDEF(void) -int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; JSAMPARRAY output_data = *output_data_ptr; @@ -212,15 +178,15 @@ outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; /* don't need GETJSAMPLE() here */ for (h = h_expand; h > 0; h--) { - *outptr++ = invalue; + *outptr++ = invalue; } } /* Generate any additional output rows by duplicating the first one */ if (v_expand > 1) { jcopy_sample_rows(output_data, outrow, output_data, outrow+1, - v_expand-1, cinfo->output_width); + v_expand-1, cinfo->output_width); } inrow++; outrow += v_expand; @@ -234,8 +200,8 @@ */ METHODDEF(void) -h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -248,7 +214,7 @@ outptr = output_data[inrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; /* don't need GETJSAMPLE() here */ *outptr++ = invalue; *outptr++ = invalue; } @@ -262,8 +228,8 @@ */ METHODDEF(void) -h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -277,12 +243,12 @@ outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; /* don't need GETJSAMPLE() here */ *outptr++ = invalue; *outptr++ = invalue; } jcopy_sample_rows(output_data, outrow, output_data, outrow+1, - 1, cinfo->output_width); + 1, cinfo->output_width); inrow++; outrow += 2; } @@ -305,8 +271,8 @@ */ METHODDEF(void) -h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -346,15 +312,15 @@ */ METHODDEF(void) -h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr0, inptr1, outptr; #if BITS_IN_JSAMPLE == 8 register int thiscolsum, lastcolsum, nextcolsum; #else - register INT32 thiscolsum, lastcolsum, nextcolsum; + register JLONG thiscolsum, lastcolsum, nextcolsum; #endif register JDIMENSION colctr; int inrow, outrow, v; @@ -364,10 +330,10 @@ for (v = 0; v < 2; v++) { /* inptr0 points to nearest input row, inptr1 points to next nearest */ inptr0 = input_data[inrow]; - if (v == 0) /* next nearest is row above */ - inptr1 = input_data[inrow-1]; - else /* next nearest is row below */ - inptr1 = input_data[inrow+1]; + if (v == 0) /* next nearest is row above */ + inptr1 = input_data[inrow-1]; + else /* next nearest is row below */ + inptr1 = input_data[inrow+1]; outptr = output_data[outrow++]; /* Special case for first column */ @@ -378,12 +344,12 @@ lastcolsum = thiscolsum; thiscolsum = nextcolsum; for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { - /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */ - /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */ - nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4); - lastcolsum = thiscolsum; thiscolsum = nextcolsum; + /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */ + /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */ + nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4); + *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4); + lastcolsum = thiscolsum; thiscolsum = nextcolsum; } /* Special case for last column */ @@ -404,19 +370,22 @@ { my_upsample_ptr upsample; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; boolean need_buffer, do_fancy; int h_in_group, v_in_group, h_out_group, v_out_group; - upsample = (my_upsample_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_upsampler)); - cinfo->upsample = (struct jpeg_upsampler *) upsample; - upsample->pub.start_pass = start_pass_upsample; - upsample->pub.upsample = sep_upsample; - upsample->pub.need_context_rows = FALSE; /* until we find out differently */ + if (!cinfo->master->jinit_upsampler_no_alloc) { + upsample = (my_upsample_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(my_upsampler)); + cinfo->upsample = (struct jpeg_upsampler *) upsample; + upsample->pub.start_pass = start_pass_upsample; + upsample->pub.upsample = sep_upsample; + upsample->pub.need_context_rows = FALSE; /* until we find out differently */ + } else + upsample = (my_upsample_ptr) cinfo->upsample; - if (cinfo->CCIR601_sampling) /* this isn't supported */ + if (cinfo->CCIR601_sampling) /* this isn't supported */ ERREXIT(cinfo, JERR_CCIR601_NOTIMPL); /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1, @@ -433,9 +402,9 @@ * are to be converted to max_h_samp_factor * max_v_samp_factor pixels. */ h_in_group = (compptr->h_samp_factor * compptr->_DCT_scaled_size) / - cinfo->_min_DCT_scaled_size; + cinfo->_min_DCT_scaled_size; v_in_group = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / - cinfo->_min_DCT_scaled_size; + cinfo->_min_DCT_scaled_size; h_out_group = cinfo->max_h_samp_factor; v_out_group = cinfo->max_v_samp_factor; upsample->rowgroup_height[ci] = v_in_group; /* save for use later */ @@ -449,48 +418,53 @@ upsample->methods[ci] = fullsize_upsample; need_buffer = FALSE; } else if (h_in_group * 2 == h_out_group && - v_in_group == v_out_group) { + v_in_group == v_out_group) { /* Special cases for 2h1v upsampling */ if (do_fancy && compptr->downsampled_width > 2) { - if (jsimd_can_h2v1_fancy_upsample()) - upsample->methods[ci] = jsimd_h2v1_fancy_upsample; - else - upsample->methods[ci] = h2v1_fancy_upsample; + if (jsimd_can_h2v1_fancy_upsample()) + upsample->methods[ci] = jsimd_h2v1_fancy_upsample; + else + upsample->methods[ci] = h2v1_fancy_upsample; } else { - if (jsimd_can_h2v1_upsample()) - upsample->methods[ci] = jsimd_h2v1_upsample; - else - upsample->methods[ci] = h2v1_upsample; + if (jsimd_can_h2v1_upsample()) + upsample->methods[ci] = jsimd_h2v1_upsample; + else + upsample->methods[ci] = h2v1_upsample; } } else if (h_in_group * 2 == h_out_group && - v_in_group * 2 == v_out_group) { + v_in_group * 2 == v_out_group) { /* Special cases for 2h2v upsampling */ if (do_fancy && compptr->downsampled_width > 2) { - if (jsimd_can_h2v2_fancy_upsample()) - upsample->methods[ci] = jsimd_h2v2_fancy_upsample; - else - upsample->methods[ci] = h2v2_fancy_upsample; - upsample->pub.need_context_rows = TRUE; + if (jsimd_can_h2v2_fancy_upsample()) + upsample->methods[ci] = jsimd_h2v2_fancy_upsample; + else + upsample->methods[ci] = h2v2_fancy_upsample; + upsample->pub.need_context_rows = TRUE; } else { - if (jsimd_can_h2v2_upsample()) - upsample->methods[ci] = jsimd_h2v2_upsample; - else - upsample->methods[ci] = h2v2_upsample; + if (jsimd_can_h2v2_upsample()) + upsample->methods[ci] = jsimd_h2v2_upsample; + else + upsample->methods[ci] = h2v2_upsample; } } else if ((h_out_group % h_in_group) == 0 && - (v_out_group % v_in_group) == 0) { + (v_out_group % v_in_group) == 0) { /* Generic integral-factors upsampling method */ - upsample->methods[ci] = int_upsample; +#if defined(__mips__) + if (jsimd_can_int_upsample()) + upsample->methods[ci] = jsimd_int_upsample; + else +#endif + upsample->methods[ci] = int_upsample; upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group); upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group); } else ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL); - if (need_buffer) { + if (need_buffer && !cinfo->master->jinit_upsampler_no_alloc) { upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) jround_up((long) cinfo->output_width, - (long) cinfo->max_h_samp_factor), - (JDIMENSION) cinfo->max_v_samp_factor); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) jround_up((long) cinfo->output_width, + (long) cinfo->max_h_samp_factor), + (JDIMENSION) cinfo->max_v_samp_factor); } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdsample.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jdsample.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdsample.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdsample.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,50 @@ +/* + * jdsample.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +#define JPEG_INTERNALS +#include "jpeglib.h" + + +/* Pointer to routine to upsample a single component */ +typedef void (*upsample1_ptr) (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +/* Private subobject */ + +typedef struct { + struct jpeg_upsampler pub; /* public fields */ + + /* Color conversion buffer. When using separate upsampling and color + * conversion steps, this buffer holds one upsampled row group until it + * has been color converted and output. + * Note: we do not allocate any storage for component(s) which are full-size, + * ie do not need rescaling. The corresponding entry of color_buf[] is + * simply set to point to the input data array, thereby avoiding copying. + */ + JSAMPARRAY color_buf[MAX_COMPONENTS]; + + /* Per-component upsampling method pointers */ + upsample1_ptr methods[MAX_COMPONENTS]; + + int next_row_out; /* counts rows emitted from color_buf */ + JDIMENSION rows_to_go; /* counts rows remaining in image */ + + /* Height of an input row group for each component. */ + int rowgroup_height[MAX_COMPONENTS]; + + /* These arrays save pixel expansion factors so that int_expand need not + * recompute them each time. They are unused for other upsampling methods. + */ + UINT8 h_expand[MAX_COMPONENTS]; + UINT8 v_expand[MAX_COMPONENTS]; +} my_upsampler; + +typedef my_upsampler *my_upsample_ptr; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdtrans.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jdtrans.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jdtrans.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jdtrans.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdtrans.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains library routines for transcoding decompression, * that is, reading raw DCT coefficient arrays from an input JPEG file. @@ -16,7 +19,7 @@ /* Forward declarations */ -LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo)); +LOCAL(void) transdecode_master_selection (j_decompress_ptr cinfo); /* @@ -55,20 +58,20 @@ int retcode; /* Call progress monitor hook if present */ if (cinfo->progress != NULL) - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); /* Absorb some more input */ retcode = (*cinfo->inputctl->consume_input) (cinfo); if (retcode == JPEG_SUSPENDED) - return NULL; + return NULL; if (retcode == JPEG_REACHED_EOI) - break; + break; /* Advance progress counter if appropriate */ if (cinfo->progress != NULL && - (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { - if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { - /* startup underestimated number of scans; ratchet up one scan */ - cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; - } + (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { + if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { + /* startup underestimated number of scans; ratchet up one scan */ + cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; + } } } /* Set state so that jpeg_finish_decompress does the right thing */ @@ -84,7 +87,7 @@ } /* Oops, improper usage */ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); - return NULL; /* keep compiler happy */ + return NULL; /* keep compiler happy */ } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jerror.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jerror.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jerror.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jerror.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jerror.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains simple error-reporting and trace-message routines. * These are suitable for Unix-like systems and others where writing to @@ -28,7 +31,7 @@ #include #endif -#ifndef EXIT_FAILURE /* define exit() codes if not provided */ +#ifndef EXIT_FAILURE /* define exit() codes if not provided */ #define EXIT_FAILURE 1 #endif @@ -41,11 +44,7 @@ * want to refer to it directly. */ -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_std_message_table jMsgTable -#endif - -#define JMESSAGE(code,string) string , +#define JMESSAGE(code,string) string , const char * const jpeg_std_message_table[] = { #include "jerror.h" @@ -105,7 +104,7 @@ #ifdef USE_WINDOWS_MESSAGEBOX /* Display it in a message dialog box */ MessageBox(GetActiveWindow(), buffer, "JPEG Library Error", - MB_OK | MB_ICONERROR); + MB_OK | MB_ICONERROR); #else /* Send it to stderr, adding a newline */ fprintf(stderr, "%s\n", buffer); @@ -127,7 +126,7 @@ METHODDEF(void) emit_message (j_common_ptr cinfo, int msg_level) { - struct jpeg_error_mgr * err = cinfo->err; + struct jpeg_error_mgr *err = cinfo->err; if (msg_level < 0) { /* It's a warning message. Since corrupt files may generate many warnings, @@ -154,12 +153,12 @@ */ METHODDEF(void) -format_message (j_common_ptr cinfo, char * buffer) +format_message (j_common_ptr cinfo, char *buffer) { - struct jpeg_error_mgr * err = cinfo->err; + struct jpeg_error_mgr *err = cinfo->err; int msg_code = err->msg_code; - const char * msgtext = NULL; - const char * msgptr; + const char *msgtext = NULL; + const char *msgptr; char ch; boolean isstring; @@ -167,8 +166,8 @@ if (msg_code > 0 && msg_code <= err->last_jpeg_message) { msgtext = err->jpeg_message_table[msg_code]; } else if (err->addon_message_table != NULL && - msg_code >= err->first_addon_message && - msg_code <= err->last_addon_message) { + msg_code >= err->first_addon_message && + msg_code <= err->last_addon_message) { msgtext = err->addon_message_table[msg_code - err->first_addon_message]; } @@ -193,10 +192,10 @@ sprintf(buffer, msgtext, err->msg_parm.s); else sprintf(buffer, msgtext, - err->msg_parm.i[0], err->msg_parm.i[1], - err->msg_parm.i[2], err->msg_parm.i[3], - err->msg_parm.i[4], err->msg_parm.i[5], - err->msg_parm.i[6], err->msg_parm.i[7]); + err->msg_parm.i[0], err->msg_parm.i[1], + err->msg_parm.i[2], err->msg_parm.i[3], + err->msg_parm.i[4], err->msg_parm.i[5], + err->msg_parm.i[6], err->msg_parm.i[7]); } @@ -213,22 +212,22 @@ { cinfo->err->num_warnings = 0; /* trace_level is not reset since it is an application-supplied parameter */ - cinfo->err->msg_code = 0; /* may be useful as a flag for "no error" */ + cinfo->err->msg_code = 0; /* may be useful as a flag for "no error" */ } /* * Fill in the standard error-handling methods in a jpeg_error_mgr object. * Typical call is: - * struct jpeg_compress_struct cinfo; - * struct jpeg_error_mgr err; + * struct jpeg_compress_struct cinfo; + * struct jpeg_error_mgr err; * - * cinfo.err = jpeg_std_error(&err); + * cinfo.err = jpeg_std_error(&err); * after which the application may override some of the methods. */ GLOBAL(struct jpeg_error_mgr *) -jpeg_std_error (struct jpeg_error_mgr * err) +jpeg_std_error (struct jpeg_error_mgr *err) { err->error_exit = error_exit; err->emit_message = emit_message; @@ -236,16 +235,16 @@ err->format_message = format_message; err->reset_error_mgr = reset_error_mgr; - err->trace_level = 0; /* default = no tracing */ - err->num_warnings = 0; /* no warnings emitted yet */ - err->msg_code = 0; /* may be useful as a flag for "no error" */ + err->trace_level = 0; /* default = no tracing */ + err->num_warnings = 0; /* no warnings emitted yet */ + err->msg_code = 0; /* may be useful as a flag for "no error" */ /* Initialize message table pointers */ err->jpeg_message_table = jpeg_std_message_table; err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1; err->addon_message_table = NULL; - err->first_addon_message = 0; /* for safety */ + err->first_addon_message = 0; /* for safety */ err->last_addon_message = 0; return err; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jerror.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jerror.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jerror.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jerror.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jerror.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file defines the error and message codes for the JPEG library. * Edit this file to add new codes, or to translate the message strings to @@ -33,7 +36,7 @@ typedef enum { -#define JMESSAGE(code,string) code , +#define JMESSAGE(code,string) code , #endif /* JMAKE_ENUM_LIST */ @@ -42,7 +45,7 @@ /* For maintenance convenience, list is alphabetical by message code name */ #if JPEG_LIB_VERSION < 70 JMESSAGE(JERR_ARITH_NOTIMPL, - "Sorry, arithmetic coding is not implemented") + "Sorry, arithmetic coding is not implemented") #endif JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix") JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix") @@ -55,26 +58,26 @@ JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported") #if JPEG_LIB_VERSION >= 70 JMESSAGE(JERR_BAD_DROP_SAMPLING, - "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c") + "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c") #endif JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition") JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace") JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace") JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length") JMESSAGE(JERR_BAD_LIB_VERSION, - "Wrong JPEG library version: library is %d, caller expects %d") + "Wrong JPEG library version: library is %d, caller expects %d") JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan") JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d") JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d") JMESSAGE(JERR_BAD_PROGRESSION, - "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d") + "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d") JMESSAGE(JERR_BAD_PROG_SCRIPT, - "Invalid progressive parameters at scan script entry %d") + "Invalid progressive parameters at scan script entry %d") JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors") JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d") JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d") JMESSAGE(JERR_BAD_STRUCT_SIZE, - "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u") + "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u") JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access") JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small") JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here") @@ -98,7 +101,7 @@ JMESSAGE(JERR_INPUT_EMPTY, "Empty input file") JMESSAGE(JERR_INPUT_EOF, "Premature end of input file") JMESSAGE(JERR_MISMATCHED_QUANT_TABLE, - "Cannot transcode due to multiple use of quantization table %d") + "Cannot transcode due to multiple use of quantization table %d") JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data") JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change") JMESSAGE(JERR_NOTIMPL, "Not implemented yet") @@ -113,7 +116,7 @@ JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x") JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)") JMESSAGE(JERR_QUANT_COMPONENTS, - "Cannot quantize more than %d color components") + "Cannot quantize more than %d color components") JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors") JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors") JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers") @@ -125,19 +128,19 @@ JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file") JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file") JMESSAGE(JERR_TFILE_WRITE, - "Write failed on temporary file --- out of disk space?") + "Write failed on temporary file --- out of disk space?") JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines") JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x") JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up") JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation") JMESSAGE(JERR_XMS_READ, "Read from XMS failed") JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed") -JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT) +JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT_SHORT) JMESSAGE(JMSG_VERSION, JVERSION) JMESSAGE(JTRC_16BIT_TABLES, - "Caution: quantization tables are too coarse for baseline JPEG") + "Caution: quantization tables are too coarse for baseline JPEG") JMESSAGE(JTRC_ADOBE, - "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d") + "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d") JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u") JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u") JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x") @@ -150,9 +153,9 @@ JMESSAGE(JTRC_HUFFBITS, " %3d %3d %3d %3d %3d %3d %3d %3d") JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d %d") JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE, - "Warning: thumbnail image size does not match data length %u") + "Warning: thumbnail image size does not match data length %u") JMESSAGE(JTRC_JFIF_EXTENSION, - "JFIF extension marker: type 0x%02x, length %u") + "JFIF extension marker: type 0x%02x, length %u") JMESSAGE(JTRC_JFIF_THUMBNAIL, " with %d x %d thumbnail image") JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u") JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x") @@ -163,7 +166,7 @@ JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d") JMESSAGE(JTRC_RST, "RST%d") JMESSAGE(JTRC_SMOOTH_NOTIMPL, - "Smoothing not supported with nonstandard sampling ratios") + "Smoothing not supported with nonstandard sampling ratios") JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d") JMESSAGE(JTRC_SOF_COMPONENT, " Component %d: %dhx%dv q=%d") JMESSAGE(JTRC_SOI, "Start of Image") @@ -173,13 +176,13 @@ JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s") JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s") JMESSAGE(JTRC_THUMB_JPEG, - "JFIF extension marker: JPEG-compressed thumbnail image, length %u") + "JFIF extension marker: JPEG-compressed thumbnail image, length %u") JMESSAGE(JTRC_THUMB_PALETTE, - "JFIF extension marker: palette thumbnail image, length %u") + "JFIF extension marker: palette thumbnail image, length %u") JMESSAGE(JTRC_THUMB_RGB, - "JFIF extension marker: RGB thumbnail image, length %u") + "JFIF extension marker: RGB thumbnail image, length %u") JMESSAGE(JTRC_UNKNOWN_IDS, - "Unrecognized component IDs %d %d %d, assuming YCbCr") + "Unrecognized component IDs %d %d %d, assuming YCbCr") JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u") JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u") JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d") @@ -187,15 +190,15 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") #endif JMESSAGE(JWRN_BOGUS_PROGRESSION, - "Inconsistent progression sequence for component %d coefficient %d") + "Inconsistent progression sequence for component %d coefficient %d") JMESSAGE(JWRN_EXTRANEOUS_DATA, - "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x") + "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x") JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment") JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code") JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d") JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file") JMESSAGE(JWRN_MUST_RESYNC, - "Corrupt JPEG data: found marker 0x%02x instead of RST%d") + "Corrupt JPEG data: found marker 0x%02x instead of RST%d") JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG") JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines") #if JPEG_LIB_VERSION < 70 @@ -255,7 +258,7 @@ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) -#define MAKESTMT(stuff) do { stuff } while (0) +#define MAKESTMT(stuff) do { stuff } while (0) /* Nonfatal errors (we can keep going, but the data is probably corrupt) */ #define WARNMS(cinfo,code) \ @@ -286,26 +289,26 @@ (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl))) #define TRACEMS3(cinfo,lvl,code,p1,p2,p3) \ MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \ - (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \ + (cinfo)->err->msg_code = (code); \ + (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) #define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4) \ MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ - (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + (cinfo)->err->msg_code = (code); \ + (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) #define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5) \ MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ - _mp[4] = (p5); \ - (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + _mp[4] = (p5); \ + (cinfo)->err->msg_code = (code); \ + (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) #define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8) \ MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ - _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \ - (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \ + (cinfo)->err->msg_code = (code); \ + (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) #define TRACEMSS(cinfo,lvl,code,str) \ ((cinfo)->err->msg_code = (code), \ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jfdctflt.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jfdctflt.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jfdctflt.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jfdctflt.c 2021-04-25 01:47:22.000000000 +0800 @@ -3,7 +3,8 @@ * * Copyright (C) 1994-1996, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a floating-point implementation of the * forward DCT (Discrete Cosine Transform). @@ -20,8 +21,8 @@ * This implementation is based on Arai, Agui, and Nakajima's algorithm for * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in * Japanese, but the algorithm is described in the Pennebaker & Mitchell - * JPEG textbook (see REFERENCES section in file README). The following code - * is based directly on figure 4-8 in P&M. + * JPEG textbook (see REFERENCES section in file README.ijg). The following + * code is based directly on figure 4-8 in P&M. * While an 8-point DCT cannot be done in less than 11 multiplies, it is * possible to arrange the computation so that many of the multiplies are * simple scalings of the final outputs. These multiplies can then be @@ -37,7 +38,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_FLOAT_SUPPORTED @@ -56,7 +57,7 @@ */ GLOBAL(void) -jpeg_fdct_float (FAST_FLOAT * data) +jpeg_fdct_float (FAST_FLOAT *data) { FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FAST_FLOAT tmp10, tmp11, tmp12, tmp13; @@ -76,24 +77,24 @@ tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[0] = tmp10 + tmp11; /* phase 3 */ dataptr[4] = tmp10 - tmp11; - + z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ - dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ dataptr[6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -103,15 +104,15 @@ z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; - dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[5] = z13 + z2; /* phase 6 */ dataptr[3] = z13 - z2; dataptr[1] = z11 + z4; dataptr[7] = z11 - z4; - dataptr += DCTSIZE; /* advance pointer to next row */ + dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. */ @@ -126,24 +127,24 @@ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ dataptr[DCTSIZE*4] = tmp10 - tmp11; - + z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ dataptr[DCTSIZE*6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -153,7 +154,7 @@ z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ @@ -161,7 +162,7 @@ dataptr[DCTSIZE*1] = z11 + z4; dataptr[DCTSIZE*7] = z11 - z4; - dataptr++; /* advance pointer to next column */ + dataptr++; /* advance pointer to next column */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jfdctfst.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jfdctfst.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jfdctfst.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jfdctfst.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jfdctfst.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a fast, not so accurate integer implementation of the * forward DCT (Discrete Cosine Transform). @@ -15,8 +18,8 @@ * This implementation is based on Arai, Agui, and Nakajima's algorithm for * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in * Japanese, but the algorithm is described in the Pennebaker & Mitchell - * JPEG textbook (see REFERENCES section in file README). The following code - * is based directly on figure 4-8 in P&M. + * JPEG textbook (see REFERENCES section in file README.ijg). The following + * code is based directly on figure 4-8 in P&M. * While an 8-point DCT cannot be done in less than 11 multiplies, it is * possible to arrange the computation so that many of the multiplies are * simple scalings of the final outputs. These multiplies can then be @@ -33,7 +36,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_IFAST_SUPPORTED @@ -76,10 +79,10 @@ */ #if CONST_BITS == 8 -#define FIX_0_382683433 ((INT32) 98) /* FIX(0.382683433) */ -#define FIX_0_541196100 ((INT32) 139) /* FIX(0.541196100) */ -#define FIX_0_707106781 ((INT32) 181) /* FIX(0.707106781) */ -#define FIX_1_306562965 ((INT32) 334) /* FIX(1.306562965) */ +#define FIX_0_382683433 ((JLONG) 98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((JLONG) 139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((JLONG) 181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((JLONG) 334) /* FIX(1.306562965) */ #else #define FIX_0_382683433 FIX(0.382683433) #define FIX_0_541196100 FIX(0.541196100) @@ -99,7 +102,7 @@ #endif -/* Multiply a DCTELEM variable by an INT32 constant, and immediately +/* Multiply a DCTELEM variable by an JLONG constant, and immediately * descale to yield a DCTELEM result. */ @@ -111,7 +114,7 @@ */ GLOBAL(void) -jpeg_fdct_ifast (DCTELEM * data) +jpeg_fdct_ifast (DCTELEM *data) { DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; DCTELEM tmp10, tmp11, tmp12, tmp13; @@ -132,24 +135,24 @@ tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[0] = tmp10 + tmp11; /* phase 3 */ dataptr[4] = tmp10 - tmp11; - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ - dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ dataptr[6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -159,15 +162,15 @@ z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; - dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[5] = z13 + z2; /* phase 6 */ dataptr[3] = z13 - z2; dataptr[1] = z11 + z4; dataptr[7] = z11 - z4; - dataptr += DCTSIZE; /* advance pointer to next row */ + dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. */ @@ -182,24 +185,24 @@ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ dataptr[DCTSIZE*4] = tmp10 - tmp11; - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ dataptr[DCTSIZE*6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -209,7 +212,7 @@ z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ @@ -217,7 +220,7 @@ dataptr[DCTSIZE*1] = z11 + z4; dataptr[DCTSIZE*7] = z11 - z4; - dataptr++; /* advance pointer to next column */ + dataptr++; /* advance pointer to next column */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jfdctint.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jfdctint.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jfdctint.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jfdctint.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jfdctint.c * + * This file was part of the Independent JPEG Group's software. * Copyright (C) 1991-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a slow-but-accurate integer implementation of the * forward DCT (Discrete Cosine Transform). @@ -26,7 +29,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_ISLOW_SUPPORTED @@ -67,7 +70,7 @@ * they are represented to better-than-integral precision. These outputs * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word * with the recommended scaling. (For 12-bit sample data, the intermediate - * array is INT32 anyway.) + * array is JLONG anyway.) * * To avoid overflow of the 32-bit intermediate results in pass 2, we must * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis @@ -79,7 +82,7 @@ #define PASS1_BITS 2 #else #define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ #endif /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus @@ -90,18 +93,18 @@ */ #if CONST_BITS == 13 -#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ +#define FIX_0_298631336 ((JLONG) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((JLONG) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((JLONG) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((JLONG) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((JLONG) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((JLONG) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((JLONG) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((JLONG) 25172) /* FIX(3.072711026) */ #else #define FIX_0_298631336 FIX(0.298631336) #define FIX_0_390180644 FIX(0.390180644) @@ -118,7 +121,7 @@ #endif -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. +/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result. * For 8-bit samples with the recommended scaling, all the variable * and constant values involved are no more than 16 bits wide, so a * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. @@ -137,11 +140,11 @@ */ GLOBAL(void) -jpeg_fdct_islow (DCTELEM * data) +jpeg_fdct_islow (DCTELEM *data) { - INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - INT32 tmp10, tmp11, tmp12, tmp13; - INT32 z1, z2, z3, z4, z5; + JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + JLONG tmp10, tmp11, tmp12, tmp13; + JLONG z1, z2, z3, z4, z5; DCTELEM *dataptr; int ctr; SHIFT_TEMPS @@ -160,36 +163,36 @@ tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; - + /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ - + tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - - dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); - dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); - + + dataptr[0] = (DCTELEM) LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); + dataptr[4] = (DCTELEM) LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS-PASS1_BITS); + CONST_BITS-PASS1_BITS); dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS-PASS1_BITS); - + CONST_BITS-PASS1_BITS); + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ - + z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - + tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ @@ -198,16 +201,16 @@ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - + z3 += z5; z4 += z5; - + dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); - - dataptr += DCTSIZE; /* advance pointer to next row */ + + dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. @@ -225,36 +228,36 @@ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ - + tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS+PASS1_BITS); + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS+PASS1_BITS); - + CONST_BITS+PASS1_BITS); + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ - + z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - + tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ @@ -263,20 +266,20 @@ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - + z3 += z5; z4 += z5; - + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, - CONST_BITS+PASS1_BITS); + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, - CONST_BITS+PASS1_BITS); + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, - CONST_BITS+PASS1_BITS); + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, - CONST_BITS+PASS1_BITS); - - dataptr++; /* advance pointer to next column */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jidctflt.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jidctflt.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jidctflt.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jidctflt.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,13 @@ /* * jidctflt.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a floating-point implementation of the * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine @@ -22,8 +26,8 @@ * This implementation is based on Arai, Agui, and Nakajima's algorithm for * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in * Japanese, but the algorithm is described in the Pennebaker & Mitchell - * JPEG textbook (see REFERENCES section in file README). The following code - * is based directly on figure 4-8 in P&M. + * JPEG textbook (see REFERENCES section in file README.ijg). The following + * code is based directly on figure 4-8 in P&M. * While an 8-point DCT cannot be done in less than 11 multiplies, it is * possible to arrange the computation so that many of the multiplies are * simple scalings of the final outputs. These multiplies can then be @@ -39,7 +43,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_FLOAT_SUPPORTED @@ -65,21 +69,21 @@ */ GLOBAL(void) -jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FAST_FLOAT tmp10, tmp11, tmp12, tmp13; FAST_FLOAT z5, z10, z11, z12, z13; JCOEFPTR inptr; - FLOAT_MULT_TYPE * quantptr; - FAST_FLOAT * wsptr; + FLOAT_MULT_TYPE *quantptr; + FAST_FLOAT *wsptr; JSAMPROW outptr; - JSAMPLE *range_limit = IDCT_range_limit(cinfo); + JSAMPLE *range_limit = cinfo->sample_range_limit; int ctr; FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */ - SHIFT_TEMPS + #define _0_125 ((FLOAT_MULT_TYPE)0.125) /* Pass 1: process columns from input, store into work array. */ @@ -95,14 +99,15 @@ * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ - + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { /* AC terms all zero */ - FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - + FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], + quantptr[DCTSIZE*0] * _0_125); + wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; @@ -111,53 +116,53 @@ wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; - - inptr++; /* advance pointers to next column */ + + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; continue; } - + /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0] * _0_125); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2] * _0_125); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4] * _0_125); + tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6] * _0_125); - tmp10 = tmp0 + tmp2; /* phase 3 */ + tmp10 = tmp0 + tmp2; /* phase 3 */ tmp11 = tmp0 - tmp2; - tmp13 = tmp1 + tmp3; /* phases 5-3 */ + tmp13 = tmp1 + tmp3; /* phases 5-3 */ tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */ - tmp0 = tmp10 + tmp13; /* phase 2 */ + tmp0 = tmp10 + tmp13; /* phase 2 */ tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; - + /* Odd part */ - tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1] * _0_125); + tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3] * _0_125); + tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5] * _0_125); + tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7] * _0_125); - z13 = tmp6 + tmp5; /* phase 6 */ + z13 = tmp6 + tmp5; /* phase 6 */ z10 = tmp6 - tmp5; z11 = tmp4 + tmp7; z12 = tmp4 - tmp7; - tmp7 = z11 + z13; /* phase 5 */ + tmp7 = z11 + z13; /* phase 5 */ tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */ z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ - tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ - tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ + tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */ + tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */ - tmp6 = tmp12 - tmp7; /* phase 2 */ + tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; + tmp4 = tmp10 - tmp5; wsptr[DCTSIZE*0] = tmp0 + tmp7; wsptr[DCTSIZE*7] = tmp0 - tmp7; @@ -165,16 +170,15 @@ wsptr[DCTSIZE*6] = tmp1 - tmp6; wsptr[DCTSIZE*2] = tmp2 + tmp5; wsptr[DCTSIZE*5] = tmp2 - tmp5; - wsptr[DCTSIZE*4] = tmp3 + tmp4; - wsptr[DCTSIZE*3] = tmp3 - tmp4; + wsptr[DCTSIZE*3] = tmp3 + tmp4; + wsptr[DCTSIZE*4] = tmp3 - tmp4; - inptr++; /* advance pointers to next column */ + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } - + /* Pass 2: process rows from work array, store into output array. */ - /* Note that we must descale the results by a factor of 8 == 2**3. */ wsptr = workspace; for (ctr = 0; ctr < DCTSIZE; ctr++) { @@ -184,11 +188,13 @@ * the simplification applies less often (typically 5% to 10% of the time). * And testing floats for zero is relatively expensive, so we don't bother. */ - + /* Even part */ - tmp10 = wsptr[0] + wsptr[4]; - tmp11 = wsptr[0] - wsptr[4]; + /* Apply signed->unsigned and prepare float->int conversion */ + z5 = wsptr[0] + ((FAST_FLOAT) CENTERJSAMPLE + (FAST_FLOAT) 0.5); + tmp10 = z5 + wsptr[4]; + tmp11 = z5 - wsptr[4]; tmp13 = wsptr[2] + wsptr[6]; tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13; @@ -209,33 +215,25 @@ tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ - tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ - tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ + tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */ + tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */ tmp6 = tmp12 - tmp7; tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; + tmp4 = tmp10 - tmp5; + + /* Final output stage: float->int conversion and range-limit */ - /* Final output stage: scale down by a factor of 8 and range-limit */ + outptr[0] = range_limit[((int) (tmp0 + tmp7)) & RANGE_MASK]; + outptr[7] = range_limit[((int) (tmp0 - tmp7)) & RANGE_MASK]; + outptr[1] = range_limit[((int) (tmp1 + tmp6)) & RANGE_MASK]; + outptr[6] = range_limit[((int) (tmp1 - tmp6)) & RANGE_MASK]; + outptr[2] = range_limit[((int) (tmp2 + tmp5)) & RANGE_MASK]; + outptr[5] = range_limit[((int) (tmp2 - tmp5)) & RANGE_MASK]; + outptr[3] = range_limit[((int) (tmp3 + tmp4)) & RANGE_MASK]; + outptr[4] = range_limit[((int) (tmp3 - tmp4)) & RANGE_MASK]; - outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ + wsptr += DCTSIZE; /* advance pointer to next row */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jidctfst.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jidctfst.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jidctfst.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jidctfst.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jidctfst.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a fast, not so accurate integer implementation of the * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine @@ -17,8 +20,8 @@ * This implementation is based on Arai, Agui, and Nakajima's algorithm for * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in * Japanese, but the algorithm is described in the Pennebaker & Mitchell - * JPEG textbook (see REFERENCES section in file README). The following code - * is based directly on figure 4-8 in P&M. + * JPEG textbook (see REFERENCES section in file README.ijg). The following + * code is based directly on figure 4-8 in P&M. * While an 8-point DCT cannot be done in less than 11 multiplies, it is * possible to arrange the computation so that many of the multiplies are * simple scalings of the final outputs. These multiplies can then be @@ -35,7 +38,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_IFAST_SUPPORTED @@ -78,7 +81,7 @@ #define PASS1_BITS 2 #else #define CONST_BITS 8 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ #endif /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus @@ -89,10 +92,10 @@ */ #if CONST_BITS == 8 -#define FIX_1_082392200 ((INT32) 277) /* FIX(1.082392200) */ -#define FIX_1_414213562 ((INT32) 362) /* FIX(1.414213562) */ -#define FIX_1_847759065 ((INT32) 473) /* FIX(1.847759065) */ -#define FIX_2_613125930 ((INT32) 669) /* FIX(2.613125930) */ +#define FIX_1_082392200 ((JLONG) 277) /* FIX(1.082392200) */ +#define FIX_1_414213562 ((JLONG) 362) /* FIX(1.414213562) */ +#define FIX_1_847759065 ((JLONG) 473) /* FIX(1.847759065) */ +#define FIX_2_613125930 ((JLONG) 669) /* FIX(2.613125930) */ #else #define FIX_1_082392200 FIX(1.082392200) #define FIX_1_414213562 FIX(1.414213562) @@ -112,7 +115,7 @@ #endif -/* Multiply a DCTELEM variable by an INT32 constant, and immediately +/* Multiply a DCTELEM variable by an JLONG constant, and immediately * descale to yield a DCTELEM result. */ @@ -122,27 +125,27 @@ /* Dequantize a coefficient by multiplying it by the multiplier-table * entry; produce a DCTELEM result. For 8-bit data a 16x16->16 * multiplication will do. For 12-bit data, the multiplier table is - * declared INT32, so a 32-bit multiply will be used. + * declared JLONG, so a 32-bit multiply will be used. */ #if BITS_IN_JSAMPLE == 8 #define DEQUANTIZE(coef,quantval) (((IFAST_MULT_TYPE) (coef)) * (quantval)) #else #define DEQUANTIZE(coef,quantval) \ - DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS) + DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS) #endif /* Like DESCALE, but applies to a DCTELEM and produces an int. - * We assume that int right shift is unsigned if INT32 right shift is. + * We assume that int right shift is unsigned if JLONG right shift is. */ #ifdef RIGHT_SHIFT_IS_UNSIGNED -#define ISHIFT_TEMPS DCTELEM ishift_temp; +#define ISHIFT_TEMPS DCTELEM ishift_temp; #if BITS_IN_JSAMPLE == 8 -#define DCTELEMBITS 16 /* DCTELEM may be 16 or 32 bits */ +#define DCTELEMBITS 16 /* DCTELEM may be 16 or 32 bits */ #else -#define DCTELEMBITS 32 /* DCTELEM must be 32 bits */ +#define DCTELEMBITS 32 /* DCTELEM must be 32 bits */ #endif #define IRIGHT_SHIFT(x,shft) \ ((ishift_temp = (x)) < 0 ? \ @@ -150,7 +153,7 @@ (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) #endif #ifdef USE_ACCURATE_ROUNDING @@ -165,22 +168,22 @@ */ GLOBAL(void) -jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; DCTELEM tmp10, tmp11, tmp12, tmp13; DCTELEM z5, z10, z11, z12, z13; JCOEFPTR inptr; - IFAST_MULT_TYPE * quantptr; - int * wsptr; + IFAST_MULT_TYPE *quantptr; + int *wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE2]; /* buffers data between passes */ - SHIFT_TEMPS /* for DESCALE */ - ISHIFT_TEMPS /* for IDESCALE */ + int workspace[DCTSIZE2]; /* buffers data between passes */ + SHIFT_TEMPS /* for DESCALE */ + ISHIFT_TEMPS /* for IDESCALE */ /* Pass 1: process columns from input, store into work array. */ @@ -196,11 +199,11 @@ * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ - + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { /* AC terms all zero */ int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); @@ -212,13 +215,13 @@ wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; - - inptr++; /* advance pointers to next column */ + + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; continue; } - + /* Even part */ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); @@ -226,17 +229,17 @@ tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - tmp10 = tmp0 + tmp2; /* phase 3 */ + tmp10 = tmp0 + tmp2; /* phase 3 */ tmp11 = tmp0 - tmp2; - tmp13 = tmp1 + tmp3; /* phases 5-3 */ + tmp13 = tmp1 + tmp3; /* phases 5-3 */ tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ - tmp0 = tmp10 + tmp13; /* phase 2 */ + tmp0 = tmp10 + tmp13; /* phase 2 */ tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; - + /* Odd part */ tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); @@ -244,19 +247,19 @@ tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - z13 = tmp6 + tmp5; /* phase 6 */ + z13 = tmp6 + tmp5; /* phase 6 */ z10 = tmp6 - tmp5; z11 = tmp4 + tmp7; z12 = tmp4 - tmp7; - tmp7 = z11 + z13; /* phase 5 */ + tmp7 = z11 + z13; /* phase 5 */ tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ - tmp6 = tmp12 - tmp7; /* phase 2 */ + tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; @@ -269,11 +272,11 @@ wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); - inptr++; /* advance pointers to next column */ + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } - + /* Pass 2: process rows from work array, store into output array. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. */ @@ -288,14 +291,14 @@ * test takes more time than it's worth. In that case this section * may be commented out. */ - + #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - + & RANGE_MASK]; + outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; @@ -305,11 +308,11 @@ outptr[6] = dcval; outptr[7] = dcval; - wsptr += DCTSIZE; /* advance pointer to next row */ + wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif - + /* Even part */ tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); @@ -317,7 +320,7 @@ tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) - - tmp13; + - tmp13; tmp0 = tmp10 + tmp13; tmp3 = tmp10 - tmp13; @@ -331,37 +334,37 @@ z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; - tmp7 = z11 + z13; /* phase 5 */ + tmp7 = z11 + z13; /* phase 5 */ tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ - tmp6 = tmp12 - tmp7; /* phase 2 */ + tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; /* Final output stage: scale down by a factor of 8 and range-limit */ outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; - wsptr += DCTSIZE; /* advance pointer to next row */ + wsptr += DCTSIZE; /* advance pointer to next row */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jidctint.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jidctint.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jidctint.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jidctint.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,13 @@ /* * jidctint.c * + * This file was part of the Independent JPEG Group's software. * Copyright (C) 1991-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modification developed 2002-2009 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a slow-but-accurate integer implementation of the * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine @@ -23,12 +27,33 @@ * The advantage of this method is that no data path contains more than one * multiplication; this allows a very simple and accurate implementation in * scaled fixed-point arithmetic, with a minimal number of shifts. + * + * We also provide IDCT routines with various output sample block sizes for + * direct resolution reduction or enlargement without additional resampling: + * NxN (N=1...16) pixels for one 8x8 input DCT block. + * + * For N<8 we simply take the corresponding low-frequency coefficients of + * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block + * to yield the downscaled outputs. + * This can be seen as direct low-pass downsampling from the DCT domain + * point of view rather than the usual spatial domain point of view, + * yielding significant computational savings and results at least + * as good as common bilinear (averaging) spatial downsampling. + * + * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as + * lower frequencies and higher frequencies assumed to be zero. + * It turns out that the computational effort is similar to the 8x8 IDCT + * regarding the output size. + * Furthermore, the scaling and descaling is the same for all IDCT sizes. + * + * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases + * since there would be too many additional constants to pre-calculate. */ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_ISLOW_SUPPORTED @@ -38,7 +63,7 @@ */ #if DCTSIZE != 8 - Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ + Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ #endif @@ -67,7 +92,7 @@ * they are represented to better-than-integral precision. These outputs * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word * with the recommended scaling. (To scale up 12-bit sample data further, an - * intermediate INT32 array would be needed.) + * intermediate JLONG array would be needed.) * * To avoid overflow of the 32-bit intermediate results in pass 2, we must * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis @@ -79,7 +104,7 @@ #define PASS1_BITS 2 #else #define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ #endif /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus @@ -90,18 +115,18 @@ */ #if CONST_BITS == 13 -#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ +#define FIX_0_298631336 ((JLONG) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((JLONG) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((JLONG) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((JLONG) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((JLONG) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((JLONG) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((JLONG) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((JLONG) 25172) /* FIX(3.072711026) */ #else #define FIX_0_298631336 FIX(0.298631336) #define FIX_0_390180644 FIX(0.390180644) @@ -118,7 +143,7 @@ #endif -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. +/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result. * For 8-bit samples with the recommended scaling, all the variable * and constant values involved are no more than 16 bits wide, so a * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. @@ -145,20 +170,20 @@ */ GLOBAL(void) -jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp1, tmp2, tmp3; - INT32 tmp10, tmp11, tmp12, tmp13; - INT32 z1, z2, z3, z4, z5; + JLONG tmp0, tmp1, tmp2, tmp3; + JLONG tmp10, tmp11, tmp12, tmp13; + JLONG z1, z2, z3, z4, z5; JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE2]; /* buffers data between passes */ + int workspace[DCTSIZE2]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ @@ -177,14 +202,15 @@ * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ - + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { /* AC terms all zero */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), + PASS1_BITS); + wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; @@ -193,49 +219,49 @@ wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; - - inptr++; /* advance pointers to next column */ + + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; continue; } - + /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ - + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); - + z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - tmp0 = (z2 + z3) << CONST_BITS; - tmp1 = (z2 - z3) << CONST_BITS; - + tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); + tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); + tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ - + tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - + z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ @@ -244,17 +270,17 @@ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - + z3 += z5; z4 += z5; - + tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; - + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); @@ -263,12 +289,12 @@ wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); - - inptr++; /* advance pointers to next column */ + + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } - + /* Pass 2: process rows from work array, store into output array. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. */ @@ -283,14 +309,14 @@ * test takes more time than it's worth. In that case this section * may be commented out. */ - + #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - + JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) + & RANGE_MASK]; + outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; @@ -300,44 +326,44 @@ outptr[6] = dcval; outptr[7] = dcval; - wsptr += DCTSIZE; /* advance pointer to next row */ + wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif - + /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ - - z2 = (INT32) wsptr[2]; - z3 = (INT32) wsptr[6]; - + + z2 = (JLONG) wsptr[2]; + z3 = (JLONG) wsptr[6]; + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); - - tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS; - tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS; - + + tmp0 = LEFT_SHIFT((JLONG) wsptr[0] + (JLONG) wsptr[4], CONST_BITS); + tmp1 = LEFT_SHIFT((JLONG) wsptr[0] - (JLONG) wsptr[4], CONST_BITS); + tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ - - tmp0 = (INT32) wsptr[7]; - tmp1 = (INT32) wsptr[5]; - tmp2 = (INT32) wsptr[3]; - tmp3 = (INT32) wsptr[1]; - + + tmp0 = (JLONG) wsptr[7]; + tmp1 = (JLONG) wsptr[5]; + tmp2 = (JLONG) wsptr[3]; + tmp3 = (JLONG) wsptr[1]; + z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ @@ -346,44 +372,2256 @@ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - + z3 += z5; z4 += z5; - + tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; - + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ + } +} + +#ifdef IDCT_SCALING_SUPPORTED + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 7x7 output block. + * + * Optimized algorithm with 12 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/14). + */ + +GLOBAL(void) +jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; + JLONG z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[7*7]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp13 = LEFT_SHIFT(tmp13, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ + tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ + tmp0 = z1 + z3; + z2 -= tmp0; + tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ + tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ + tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ + tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + + tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp1 += tmp2; + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ + tmp0 += z2; + tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ + + /* Final output stage */ + + wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 7 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp13 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp13 = LEFT_SHIFT(tmp13, CONST_BITS); + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[4]; + z3 = (JLONG) wsptr[6]; + + tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ + tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ + tmp0 = z1 + z3; + z2 -= tmp0; + tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ + tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ + tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ + tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + + tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp1 += tmp2; + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ + tmp0 += z2; + tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 7; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 6x6 output block. + * + * Optimized algorithm with 3 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/12). + */ + +GLOBAL(void) +jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; + JLONG z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[6*6]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ + tmp1 = tmp0 + tmp10; + tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); + tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ + tmp10 = tmp1 + tmp0; + tmp12 = tmp1 - tmp0; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS); + tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS); + tmp1 = LEFT_SHIFT(z1 - z2 - z3, PASS1_BITS); + + /* Final output stage */ + + wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[6*1] = (int) (tmp11 + tmp1); + wsptr[6*4] = (int) (tmp11 - tmp1); + wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 6 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + tmp2 = (JLONG) wsptr[4]; + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ + tmp1 = tmp0 + tmp10; + tmp11 = tmp0 - tmp10 - tmp10; + tmp10 = (JLONG) wsptr[2]; + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ + tmp10 = tmp1 + tmp0; + tmp12 = tmp1 - tmp0; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS); + tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS); + tmp1 = LEFT_SHIFT(z1 - z2 - z3, CONST_BITS); + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 6; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 5x5 output block. + * + * Optimized algorithm with 5 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/10). + */ + +GLOBAL(void) +jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp10, tmp11, tmp12; + JLONG z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[5*5]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp12 = LEFT_SHIFT(tmp12, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ + z3 = tmp12 + z2; + tmp10 = z3 + z1; + tmp11 = z3 - z1; + tmp12 -= LEFT_SHIFT(z2, 2); + + /* Odd part */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ + tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ + tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ + + /* Final output stage */ + + wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 5 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp12 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp12 = LEFT_SHIFT(tmp12, CONST_BITS); + tmp0 = (JLONG) wsptr[2]; + tmp1 = (JLONG) wsptr[4]; + z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ + z3 = tmp12 + z2; + tmp10 = z3 + z1; + tmp11 = z3 - z1; + tmp12 -= LEFT_SHIFT(z2, 2); + + /* Odd part */ + + z2 = (JLONG) wsptr[1]; + z3 = (JLONG) wsptr[3]; + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ + tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ + tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 5; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 3x3 output block. + * + * Optimized algorithm with 2 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/6). + */ + +GLOBAL(void) +jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp2, tmp10, tmp12; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[3*3]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ + tmp10 = tmp0 + tmp12; + tmp2 = tmp0 - tmp12 - tmp12; + + /* Odd part */ + + tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ + + /* Final output stage */ + + wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 3 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 3; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + tmp2 = (JLONG) wsptr[2]; + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ + tmp10 = tmp0 + tmp12; + tmp2 = tmp0 - tmp12 - tmp12; + + /* Odd part */ + + tmp12 = (JLONG) wsptr[1]; + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 3; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 9x9 output block. + * + * Optimized algorithm with 10 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/18). + */ + +GLOBAL(void) +jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*9]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ + tmp1 = tmp0 + tmp3; + tmp2 = tmp0 - tmp3 - tmp3; + + tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ + tmp11 = tmp2 + tmp0; + tmp14 = tmp2 - tmp0 - tmp0; + + tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ + tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ + tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ + + tmp10 = tmp1 + tmp0 - tmp3; + tmp12 = tmp1 - tmp0 + tmp2; + tmp13 = tmp1 - tmp2 + tmp3; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ + + tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ + tmp0 = tmp2 + tmp3 - z2; + tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ + tmp2 += z2 - tmp1; + tmp3 += z2 + tmp1; + tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 9 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 9; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[4]; + z3 = (JLONG) wsptr[6]; + + tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ + tmp1 = tmp0 + tmp3; + tmp2 = tmp0 - tmp3 - tmp3; + + tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ + tmp11 = tmp2 + tmp0; + tmp14 = tmp2 - tmp0 - tmp0; + + tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ + tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ + tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ + + tmp10 = tmp1 + tmp0 - tmp3; + tmp12 = tmp1 - tmp0 + tmp2; + tmp13 = tmp1 - tmp2 + tmp3; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ + + tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ + tmp0 = tmp2 + tmp3 - z2; + tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ + tmp2 += z2 - tmp1; + tmp3 += z2 + tmp1; + tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 10x10 output block. + * + * Optimized algorithm with 12 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/20). + */ + +GLOBAL(void) +jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24; + JLONG z1, z2, z3, z4, z5; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*10]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = LEFT_SHIFT(z3, CONST_BITS); + /* Add fudge factor here for final descale. */ + z3 += ONE << (CONST_BITS-PASS1_BITS-1); + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ + tmp10 = z3 + z1; + tmp11 = z3 - z2; + + tmp22 = RIGHT_SHIFT(z3 - LEFT_SHIFT(z1 - z2, 1), + CONST_BITS-PASS1_BITS); /* c0 = (c4-c8)*2 */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ + + tmp20 = tmp10 + tmp12; + tmp24 = tmp10 - tmp12; + tmp21 = tmp11 + tmp13; + tmp23 = tmp11 - tmp13; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z2 + z4; + tmp13 = z2 - z4; + + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ + z5 = LEFT_SHIFT(z3, CONST_BITS); + + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ + z4 = z5 + tmp12; + + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ + + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ + z4 = z5 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1); + + tmp12 = LEFT_SHIFT(z1 - tmp13 - z3, PASS1_BITS); + + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) (tmp22 + tmp12); + wsptr[8*7] = (int) (tmp22 - tmp12); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 10 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 10; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = LEFT_SHIFT(z3, CONST_BITS); + z4 = (JLONG) wsptr[4]; + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ + tmp10 = z3 + z1; + tmp11 = z3 - z2; + + tmp22 = z3 - LEFT_SHIFT(z1 - z2, 1); /* c0 = (c4-c8)*2 */ + + z2 = (JLONG) wsptr[2]; + z3 = (JLONG) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ + + tmp20 = tmp10 + tmp12; + tmp24 = tmp10 - tmp12; + tmp21 = tmp11 + tmp13; + tmp23 = tmp11 - tmp13; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z3 = LEFT_SHIFT(z3, CONST_BITS); + z4 = (JLONG) wsptr[7]; + + tmp11 = z2 + z4; + tmp13 = z2 - z4; + + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ + + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ + z4 = z3 + tmp12; + + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ + + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ + z4 = z3 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1); + + tmp12 = LEFT_SHIFT(z1 - tmp13, CONST_BITS) - z3; + + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 11x11 output block. + * + * Optimized algorithm with 24 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/22). + */ + +GLOBAL(void) +jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*11]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp10 = LEFT_SHIFT(tmp10, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ + tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ + z4 = z1 + z3; + tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ + z4 -= z2; + tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ + tmp21 = tmp20 + tmp23 + tmp25 - + MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ + tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ + tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ + tmp24 += tmp25; + tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ + tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ + MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ + tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z1 + z2; + tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ + tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ + tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ + z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ + tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ + tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ + z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ + tmp11 += z1; + tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ + tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ + MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ + MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 11 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 11; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp10 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp10 = LEFT_SHIFT(tmp10, CONST_BITS); + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[4]; + z3 = (JLONG) wsptr[6]; + + tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ + tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ + z4 = z1 + z3; + tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ + z4 -= z2; + tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ + tmp21 = tmp20 + tmp23 + tmp25 - + MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ + tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ + tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ + tmp24 += tmp25; + tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ + tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ + MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ + tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + tmp11 = z1 + z2; + tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ + tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ + tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ + z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ + tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ + tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ + z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ + tmp11 += z1; + tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ + tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ + MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ + MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 12x12 output block. + * + * Optimized algorithm with 15 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/24). + */ + +GLOBAL(void) +jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*12]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = LEFT_SHIFT(z3, CONST_BITS); + /* Add fudge factor here for final descale. */ + z3 += ONE << (CONST_BITS-PASS1_BITS-1); + + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ + z1 = LEFT_SHIFT(z1, CONST_BITS); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 = LEFT_SHIFT(z2, CONST_BITS); + + tmp12 = z1 - z2; + + tmp21 = z3 + tmp12; + tmp24 = z3 - tmp12; + + tmp12 = z4 + z2; + + tmp20 = tmp10 + tmp12; + tmp25 = tmp10 - tmp12; + + tmp12 = z4 - z1 - z2; + + tmp22 = tmp11 + tmp12; + tmp23 = tmp11 - tmp12; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + + tmp10 = z1 + z3; + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ + + z1 -= z4; + z2 -= z3; + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 12 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 12; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = LEFT_SHIFT(z3, CONST_BITS); + + z4 = (JLONG) wsptr[4]; + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + z1 = (JLONG) wsptr[2]; + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ + z1 = LEFT_SHIFT(z1, CONST_BITS); + z2 = (JLONG) wsptr[6]; + z2 = LEFT_SHIFT(z2, CONST_BITS); + + tmp12 = z1 - z2; + + tmp21 = z3 + tmp12; + tmp24 = z3 - tmp12; + + tmp12 = z4 + z2; + + tmp20 = tmp10 + tmp12; + tmp25 = tmp10 - tmp12; + + tmp12 = z4 - z1 - z2; + + tmp22 = tmp11 + tmp12; + tmp23 = tmp11 - tmp12; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + + tmp10 = z1 + z3; + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ + + z1 -= z4; + z2 -= z3; + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 13x13 output block. + * + * Optimized algorithm with 29 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/26). + */ + +GLOBAL(void) +jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*13]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = LEFT_SHIFT(z1, CONST_BITS); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ + + tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ + tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ + + tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ + tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ + + tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ + tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ + + tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ + tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ + tmp15 = z1 + z4; + tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ + tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ + tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ + tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ + tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ + tmp11 += tmp14; + tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ + tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ + tmp12 += tmp14; + tmp13 += tmp14; + tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ + tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ + MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ + z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ + tmp14 += z1; + tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ + MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 13 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 13; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = LEFT_SHIFT(z1, CONST_BITS); + + z2 = (JLONG) wsptr[2]; + z3 = (JLONG) wsptr[4]; + z4 = (JLONG) wsptr[6]; + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ + + tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ + tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ + + tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ + tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ + + tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ + tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ + + tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ + tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ + tmp15 = z1 + z4; + tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ + tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ + tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ + tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ + tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ + tmp11 += tmp14; + tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ + tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ + tmp12 += tmp14; + tmp13 += tmp14; + tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ + tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ + MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ + z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ + tmp14 += z1; + tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ + MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 14x14 output block. + * + * Optimized algorithm with 20 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/28). + */ + +GLOBAL(void) +jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*14]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = LEFT_SHIFT(z1, CONST_BITS); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ + + tmp10 = z1 + z2; + tmp11 = z1 + z3; + tmp12 = z1 - z4; + + tmp23 = RIGHT_SHIFT(z1 - LEFT_SHIFT(z2 + z3 - z4, 1), + CONST_BITS-PASS1_BITS); /* c0 = (c4+c12-c8)*2 */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ + + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ + + tmp20 = tmp10 + tmp13; + tmp26 = tmp10 - tmp13; + tmp21 = tmp11 + tmp14; + tmp25 = tmp11 - tmp14; + tmp22 = tmp12 + tmp15; + tmp24 = tmp12 - tmp15; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp13 = LEFT_SHIFT(z4, CONST_BITS); + + tmp14 = z1 + z3; + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ + tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ + z1 -= z2; + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ + tmp16 += tmp15; + z1 += z4; + z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ + tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ + tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ + z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ + tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ + tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ + + tmp13 = LEFT_SHIFT(z1 - z3, PASS1_BITS); + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) (tmp23 + tmp13); + wsptr[8*10] = (int) (tmp23 - tmp13); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 14 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 14; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = LEFT_SHIFT(z1, CONST_BITS); + z4 = (JLONG) wsptr[4]; + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ + + tmp10 = z1 + z2; + tmp11 = z1 + z3; + tmp12 = z1 - z4; + + tmp23 = z1 - LEFT_SHIFT(z2 + z3 - z4, 1); /* c0 = (c4+c12-c8)*2 */ + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[6]; + + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ + + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ + + tmp20 = tmp10 + tmp13; + tmp26 = tmp10 - tmp13; + tmp21 = tmp11 + tmp14; + tmp25 = tmp11 - tmp14; + tmp22 = tmp12 + tmp15; + tmp24 = tmp12 - tmp15; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + z4 = LEFT_SHIFT(z4, CONST_BITS); + + tmp14 = z1 + z3; + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ + tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ + z1 -= z2; + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ + tmp16 += tmp15; + tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ + tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ + tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ + tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ + tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ + tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ + + tmp13 = LEFT_SHIFT(z1 - z3, CONST_BITS) + z4; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 15x15 output block. + * + * Optimized algorithm with 22 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/30). + */ + +GLOBAL(void) +jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*15]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = LEFT_SHIFT(z1, CONST_BITS); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ + tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ + + tmp12 = z1 - tmp10; + tmp13 = z1 + tmp11; + z1 -= LEFT_SHIFT(tmp11 - tmp10, 1); /* c0 = (c6-c12)*2 */ + + z4 = z2 - z3; + z3 += z2; + tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ + z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ + + tmp20 = tmp13 + tmp10 + tmp11; + tmp23 = tmp12 - tmp10 + tmp11 + z2; + + tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ + + tmp25 = tmp13 - tmp10 - tmp11; + tmp26 = tmp12 + tmp10 - tmp11 - z2; + + tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ + + tmp21 = tmp12 + tmp10 + tmp11; + tmp24 = tmp13 - tmp10 + tmp11; + tmp11 += tmp11; + tmp22 = z1 + tmp11; /* c10 = c6-c12 */ + tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp13 = z2 - z4; + tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ + tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ + tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ + + tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ + tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ + z2 = z1 - z4; + tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ + + tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ + tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ + tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ + z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ + tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ + tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 15 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 15; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = LEFT_SHIFT(z1, CONST_BITS); + + z2 = (JLONG) wsptr[2]; + z3 = (JLONG) wsptr[4]; + z4 = (JLONG) wsptr[6]; + + tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ + tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ + + tmp12 = z1 - tmp10; + tmp13 = z1 + tmp11; + z1 -= LEFT_SHIFT(tmp11 - tmp10, 1); /* c0 = (c6-c12)*2 */ + + z4 = z2 - z3; + z3 += z2; + tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ + z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ + + tmp20 = tmp13 + tmp10 + tmp11; + tmp23 = tmp12 - tmp10 + tmp11 + z2; + + tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ + + tmp25 = tmp13 - tmp10 - tmp11; + tmp26 = tmp12 + tmp10 - tmp11 - z2; + + tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ + + tmp21 = tmp12 + tmp10 + tmp11; + tmp24 = tmp13 - tmp10 + tmp11; + tmp11 += tmp11; + tmp22 = z1 + tmp11; /* c10 = c6-c12 */ + tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z4 = (JLONG) wsptr[5]; + z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ + z4 = (JLONG) wsptr[7]; + + tmp13 = z2 - z4; + tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ + tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ + tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ + + tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ + tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ + z2 = z1 - z4; + tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ + + tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ + tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ + tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ + z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ + tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ + tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 16x16 output block. + * + * Optimized algorithm with 28 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/32). + */ + +GLOBAL(void) +jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*16]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + tmp12 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z3 = z1 - z2; + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ + + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ + + tmp20 = tmp10 + tmp0; + tmp27 = tmp10 - tmp0; + tmp21 = tmp12 + tmp1; + tmp26 = tmp12 - tmp1; + tmp22 = tmp13 + tmp2; + tmp25 = tmp13 - tmp2; + tmp23 = tmp11 + tmp3; + tmp24 = tmp11 - tmp3; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z1 + z3; + + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ + tmp13 = tmp10 + tmp11 + tmp12 - + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ + z2 += z4; + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + tmp1 += z1; + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ + tmp12 += z2; + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + tmp2 += z2; + tmp3 += z2; + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ + tmp10 += z2; + tmp11 += z2; + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 16 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 16; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + + z1 = (JLONG) wsptr[4]; + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + tmp12 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[6]; + z3 = z1 - z2; + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ + + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ + + tmp20 = tmp10 + tmp0; + tmp27 = tmp10 - tmp0; + tmp21 = tmp12 + tmp1; + tmp26 = tmp12 - tmp1; + tmp22 = tmp13 + tmp2; + tmp25 = tmp13 - tmp2; + tmp23 = tmp11 + tmp3; + tmp24 = tmp11 - tmp3; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + tmp11 = z1 + z3; + + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ + tmp13 = tmp10 + tmp11 + tmp12 - + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ + z2 += z4; + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + tmp1 += z1; + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ + tmp12 += z2; + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + tmp2 += z2; + tmp3 += z2; + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ + tmp10 += z2; + tmp11 += z2; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ } } +#endif /* IDCT_SCALING_SUPPORTED */ #endif /* DCT_ISLOW_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jidctred.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jidctred.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jidctred.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jidctred.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jidctred.c * + * This file was part of the Independent JPEG Group's software. * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains inverse-DCT routines that produce reduced-size output: * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block. @@ -23,7 +26,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef IDCT_SCALING_SUPPORTED @@ -44,7 +47,7 @@ #define PASS1_BITS 2 #else #define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ #endif /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus @@ -55,20 +58,20 @@ */ #if CONST_BITS == 13 -#define FIX_0_211164243 ((INT32) 1730) /* FIX(0.211164243) */ -#define FIX_0_509795579 ((INT32) 4176) /* FIX(0.509795579) */ -#define FIX_0_601344887 ((INT32) 4926) /* FIX(0.601344887) */ -#define FIX_0_720959822 ((INT32) 5906) /* FIX(0.720959822) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_850430095 ((INT32) 6967) /* FIX(0.850430095) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_061594337 ((INT32) 8697) /* FIX(1.061594337) */ -#define FIX_1_272758580 ((INT32) 10426) /* FIX(1.272758580) */ -#define FIX_1_451774981 ((INT32) 11893) /* FIX(1.451774981) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_2_172734803 ((INT32) 17799) /* FIX(2.172734803) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_624509785 ((INT32) 29692) /* FIX(3.624509785) */ +#define FIX_0_211164243 ((JLONG) 1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 ((JLONG) 4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 ((JLONG) 4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 ((JLONG) 5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 ((JLONG) 6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 ((JLONG) 8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 ((JLONG) 10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 ((JLONG) 11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 ((JLONG) 17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 ((JLONG) 29692) /* FIX(3.624509785) */ #else #define FIX_0_211164243 FIX(0.211164243) #define FIX_0_509795579 FIX(0.509795579) @@ -87,7 +90,7 @@ #endif -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. +/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result. * For 8-bit samples with the recommended scaling, all the variable * and constant values involved are no more than 16 bits wide, so a * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. @@ -115,19 +118,19 @@ */ GLOBAL(void) -jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp2, tmp10, tmp12; - INT32 z1, z2, z3, z4; + JLONG tmp0, tmp2, tmp10, tmp12; + JLONG z1, z2, z3, z4; JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE*4]; /* buffers data between passes */ + int workspace[DCTSIZE*4]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ @@ -140,57 +143,58 @@ if (ctr == DCTSIZE-4) continue; if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 && - inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 && + inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { /* AC terms all zero; we need not examine term 4 for 4x4 output */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), + PASS1_BITS); + wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; wsptr[DCTSIZE*3] = dcval; - + continue; } - + /* Even part */ - + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp0 <<= (CONST_BITS+1); - + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS+1); + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865); - + tmp10 = tmp0 + tmp2; tmp12 = tmp0 - tmp2; - + /* Odd part */ - + z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - + tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ - + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ - + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ - + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ - + + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ + + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ + + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ + tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ - + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ - + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ - + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ + + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ + + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ + + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ /* Final output stage */ - + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1); } - + /* Pass 2: process 4 rows from work array, store into output array. */ wsptr = workspace; @@ -200,64 +204,64 @@ #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - + JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) + & RANGE_MASK]; + outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; outptr[3] = dcval; - - wsptr += DCTSIZE; /* advance pointer to next row */ + + wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif - + /* Even part */ - - tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1); - - tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065) - + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865); - + + tmp0 = LEFT_SHIFT((JLONG) wsptr[0], CONST_BITS+1); + + tmp2 = MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + + MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865); + tmp10 = tmp0 + tmp2; tmp12 = tmp0 - tmp2; - + /* Odd part */ - - z1 = (INT32) wsptr[7]; - z2 = (INT32) wsptr[5]; - z3 = (INT32) wsptr[3]; - z4 = (INT32) wsptr[1]; - + + z1 = (JLONG) wsptr[7]; + z2 = (JLONG) wsptr[5]; + z3 = (JLONG) wsptr[3]; + z4 = (JLONG) wsptr[1]; + tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ - + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ - + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ - + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ - + + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ + + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ + + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ + tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ - + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ - + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ - + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ + + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ + + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ + + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ /* Final output stage */ - + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3+1) + & RANGE_MASK]; outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3+1) + & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3+1) + & RANGE_MASK]; outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ + CONST_BITS+PASS1_BITS+3+1) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ } } @@ -268,18 +272,18 @@ */ GLOBAL(void) -jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp10, z1; + JLONG tmp0, tmp10, z1; JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE*2]; /* buffers data between passes */ + int workspace[DCTSIZE*2]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ @@ -292,21 +296,22 @@ if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6) continue; if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) { /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), + PASS1_BITS); + wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; - + continue; } - + /* Even part */ - + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp10 = z1 << (CONST_BITS+2); - + tmp10 = LEFT_SHIFT(z1, CONST_BITS+2); + /* Odd part */ z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); @@ -319,11 +324,11 @@ tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ /* Final output stage */ - + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2); wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2); } - + /* Pass 2: process 2 rows from work array, store into output array. */ wsptr = workspace; @@ -334,38 +339,38 @@ #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - + JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) + & RANGE_MASK]; + outptr[0] = dcval; outptr[1] = dcval; - - wsptr += DCTSIZE; /* advance pointer to next row */ + + wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif - + /* Even part */ - - tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2); - + + tmp10 = LEFT_SHIFT((JLONG) wsptr[0], CONST_BITS+2); + /* Odd part */ - tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */ - + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */ - + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */ - + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ + tmp0 = MULTIPLY((JLONG) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */ + + MULTIPLY((JLONG) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */ + + MULTIPLY((JLONG) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */ + + MULTIPLY((JLONG) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ /* Final output stage */ - + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3+2) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3+2) + & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3+2) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ + CONST_BITS+PASS1_BITS+3+2) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ } } @@ -376,12 +381,12 @@ */ GLOBAL(void) -jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { int dcval; - ISLOW_MULT_TYPE * quantptr; + ISLOW_MULT_TYPE *quantptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); SHIFT_TEMPS @@ -390,7 +395,7 @@ */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; dcval = DEQUANTIZE(coef_block[0], quantptr[0]); - dcval = (int) DESCALE((INT32) dcval, 3); + dcval = (int) DESCALE((JLONG) dcval, 3); output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jinclude.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jinclude.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jinclude.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jinclude.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jinclude.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1994, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file exists to provide a single place to fix any problems with * including the wrong system include files. (Common problems are taken @@ -17,8 +20,8 @@ /* Include auto-config file to find out which system include files we need. */ -#include "jconfig.h" /* auto configuration options */ -#define JCONFIG_INCLUDED /* so that jpeglib.h doesn't do it again */ +#include "jconfig.h" /* auto configuration options */ +#define JCONFIG_INCLUDED /* so that jpeglib.h doesn't do it again */ /* * We need the NULL macro and size_t typedef. @@ -58,28 +61,18 @@ #ifdef NEED_BSD_STRINGS #include -#define MEMZERO(target,size) bzero((void *)(target), (size_t)(size)) -#define MEMCOPY(dest,src,size) bcopy((const void *)(src), (void *)(dest), (size_t)(size)) +#define MEMZERO(target,size) bzero((void *)(target), (size_t)(size)) +#define MEMCOPY(dest,src,size) bcopy((const void *)(src), (void *)(dest), (size_t)(size)) #else /* not BSD, assume ANSI/SysV string lib */ #include -#define MEMZERO(target,size) memset((void *)(target), 0, (size_t)(size)) -#define MEMCOPY(dest,src,size) memcpy((void *)(dest), (const void *)(src), (size_t)(size)) +#define MEMZERO(target,size) memset((void *)(target), 0, (size_t)(size)) +#define MEMCOPY(dest,src,size) memcpy((void *)(dest), (const void *)(src), (size_t)(size)) #endif /* - * In ANSI C, and indeed any rational implementation, size_t is also the - * type returned by sizeof(). However, it seems there are some irrational - * implementations out there, in which sizeof() returns an int even though - * size_t is defined as long or unsigned long. To ensure consistent results - * we always use this SIZEOF() macro in place of using sizeof() directly. - */ - -#define SIZEOF(object) ((size_t) sizeof(object)) - -/* * The modules that use fread() and fwrite() always invoke them through * these macros. On some systems you may need to twiddle the argument casts. * CAUTION: argument order is different from underlying functions! diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jmemmgr.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jmemmgr.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jmemmgr.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jmemmgr.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jmemmgr.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the JPEG system-independent memory management * routines. This code is usable across a wide variety of machines; most @@ -25,14 +28,14 @@ */ #define JPEG_INTERNALS -#define AM_MEMORY_MANAGER /* we define jvirt_Xarray_control structs */ +#define AM_MEMORY_MANAGER /* we define jvirt_Xarray_control structs */ #include "jinclude.h" #include "jpeglib.h" -#include "jmemsys.h" /* import the system-dependent declarations */ +#include "jmemsys.h" /* import the system-dependent declarations */ #ifndef NO_GETENV -#ifndef HAVE_STDLIB_H /* should declare getenv() */ -extern char * getenv JPP((const char * name)); +#ifndef HAVE_STDLIB_H /* should declare getenv() */ +extern char *getenv (const char *name); #endif #endif @@ -67,9 +70,9 @@ * There isn't any really portable way to determine the worst-case alignment * requirement. This module assumes that the alignment requirement is * multiples of ALIGN_SIZE. - * By default, we define ALIGN_SIZE as sizeof(double). This is necessary on some - * workstations (where doubles really do need 8-byte alignment) and will work - * fine on nearly everything. If your machine has lesser alignment needs, + * By default, we define ALIGN_SIZE as sizeof(double). This is necessary on + * some workstations (where doubles really do need 8-byte alignment) and will + * work fine on nearly everything. If your machine has lesser alignment needs, * you can save a few bytes by making ALIGN_SIZE smaller. * The only place I know of where this will NOT work is certain Macintosh * 680x0 compilers that define double as a 10-byte IEEE extended float. @@ -78,9 +81,9 @@ * such a compiler. */ -#ifndef ALIGN_SIZE /* so can override from jconfig.h */ +#ifndef ALIGN_SIZE /* so can override from jconfig.h */ #ifndef WITH_SIMD -#define ALIGN_SIZE SIZEOF(double) +#define ALIGN_SIZE sizeof(double) #else #define ALIGN_SIZE 16 /* Most SIMD implementations require this */ #endif @@ -91,24 +94,23 @@ * request to jpeg_get_small() or jpeg_get_large(). There is no per-object * overhead within a pool, except for alignment padding. Each pool has a * header with a link to the next pool of the same class. - * Small and large pool headers are identical except that the latter's - * link pointer must be FAR on 80x86 machines. + * Small and large pool headers are identical. */ -typedef struct small_pool_struct * small_pool_ptr; +typedef struct small_pool_struct *small_pool_ptr; typedef struct small_pool_struct { - small_pool_ptr next; /* next in list of pools */ - size_t bytes_used; /* how many bytes already used within pool */ - size_t bytes_left; /* bytes still available in this pool */ + small_pool_ptr next; /* next in list of pools */ + size_t bytes_used; /* how many bytes already used within pool */ + size_t bytes_left; /* bytes still available in this pool */ } small_pool_hdr; -typedef struct large_pool_struct FAR * large_pool_ptr; +typedef struct large_pool_struct *large_pool_ptr; typedef struct large_pool_struct { - large_pool_ptr next; /* next in list of pools */ - size_t bytes_used; /* how many bytes already used within pool */ - size_t bytes_left; /* bytes still available in this pool */ + large_pool_ptr next; /* next in list of pools */ + size_t bytes_used; /* how many bytes already used within pool */ + size_t bytes_left; /* bytes still available in this pool */ } large_pool_hdr; /* @@ -116,7 +118,7 @@ */ typedef struct { - struct jpeg_memory_mgr pub; /* public fields */ + struct jpeg_memory_mgr pub; /* public fields */ /* Each pool identifier (lifetime class) names a linked list of pools. */ small_pool_ptr small_list[JPOOL_NUMPOOLS]; @@ -136,10 +138,10 @@ /* alloc_sarray and alloc_barray set this value for use by virtual * array routines. */ - JDIMENSION last_rowsperchunk; /* from most recent alloc_sarray/barray */ + JDIMENSION last_rowsperchunk; /* from most recent alloc_sarray/barray */ } my_memory_mgr; -typedef my_memory_mgr * my_mem_ptr; +typedef my_memory_mgr *my_mem_ptr; /* @@ -150,39 +152,39 @@ */ struct jvirt_sarray_control { - JSAMPARRAY mem_buffer; /* => the in-memory buffer */ - JDIMENSION rows_in_array; /* total virtual array height */ - JDIMENSION samplesperrow; /* width of array (and of memory buffer) */ - JDIMENSION maxaccess; /* max rows accessed by access_virt_sarray */ - JDIMENSION rows_in_mem; /* height of memory buffer */ - JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */ - JDIMENSION cur_start_row; /* first logical row # in the buffer */ - JDIMENSION first_undef_row; /* row # of first uninitialized row */ - boolean pre_zero; /* pre-zero mode requested? */ - boolean dirty; /* do current buffer contents need written? */ - boolean b_s_open; /* is backing-store data valid? */ - jvirt_sarray_ptr next; /* link to next virtual sarray control block */ - backing_store_info b_s_info; /* System-dependent control info */ + JSAMPARRAY mem_buffer; /* => the in-memory buffer */ + JDIMENSION rows_in_array; /* total virtual array height */ + JDIMENSION samplesperrow; /* width of array (and of memory buffer) */ + JDIMENSION maxaccess; /* max rows accessed by access_virt_sarray */ + JDIMENSION rows_in_mem; /* height of memory buffer */ + JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */ + JDIMENSION cur_start_row; /* first logical row # in the buffer */ + JDIMENSION first_undef_row; /* row # of first uninitialized row */ + boolean pre_zero; /* pre-zero mode requested? */ + boolean dirty; /* do current buffer contents need written? */ + boolean b_s_open; /* is backing-store data valid? */ + jvirt_sarray_ptr next; /* link to next virtual sarray control block */ + backing_store_info b_s_info; /* System-dependent control info */ }; struct jvirt_barray_control { - JBLOCKARRAY mem_buffer; /* => the in-memory buffer */ - JDIMENSION rows_in_array; /* total virtual array height */ - JDIMENSION blocksperrow; /* width of array (and of memory buffer) */ - JDIMENSION maxaccess; /* max rows accessed by access_virt_barray */ - JDIMENSION rows_in_mem; /* height of memory buffer */ - JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */ - JDIMENSION cur_start_row; /* first logical row # in the buffer */ - JDIMENSION first_undef_row; /* row # of first uninitialized row */ - boolean pre_zero; /* pre-zero mode requested? */ - boolean dirty; /* do current buffer contents need written? */ - boolean b_s_open; /* is backing-store data valid? */ - jvirt_barray_ptr next; /* link to next virtual barray control block */ - backing_store_info b_s_info; /* System-dependent control info */ + JBLOCKARRAY mem_buffer; /* => the in-memory buffer */ + JDIMENSION rows_in_array; /* total virtual array height */ + JDIMENSION blocksperrow; /* width of array (and of memory buffer) */ + JDIMENSION maxaccess; /* max rows accessed by access_virt_barray */ + JDIMENSION rows_in_mem; /* height of memory buffer */ + JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */ + JDIMENSION cur_start_row; /* first logical row # in the buffer */ + JDIMENSION first_undef_row; /* row # of first uninitialized row */ + boolean pre_zero; /* pre-zero mode requested? */ + boolean dirty; /* do current buffer contents need written? */ + boolean b_s_open; /* is backing-store data valid? */ + jvirt_barray_ptr next; /* link to next virtual barray control block */ + backing_store_info b_s_info; /* System-dependent control info */ }; -#ifdef MEM_STATS /* optional extra stuff for statistics */ +#ifdef MEM_STATS /* optional extra stuff for statistics */ LOCAL(void) print_mem_stats (j_common_ptr cinfo, int pool_id) @@ -196,19 +198,19 @@ * This is helpful because message parm array can't handle longs. */ fprintf(stderr, "Freeing pool %d, total space = %ld\n", - pool_id, mem->total_space_allocated); + pool_id, mem->total_space_allocated); for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL; lhdr_ptr = lhdr_ptr->next) { fprintf(stderr, " Large chunk used %ld\n", - (long) lhdr_ptr->bytes_used); + (long) lhdr_ptr->bytes_used); } for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL; shdr_ptr = shdr_ptr->next) { fprintf(stderr, " Small chunk used %ld free %ld\n", - (long) shdr_ptr->bytes_used, - (long) shdr_ptr->bytes_left); + (long) shdr_ptr->bytes_used, + (long) shdr_ptr->bytes_left); } } @@ -221,7 +223,7 @@ /* If we compiled MEM_STATS support, report alloc requests before dying */ { #ifdef MEM_STATS - cinfo->err->trace_level = 2; /* force self_destruct to report stats */ + cinfo->err->trace_level = 2; /* force self_destruct to report stats */ #endif ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which); } @@ -244,19 +246,19 @@ * adjustment. */ -static const size_t first_pool_slop[JPOOL_NUMPOOLS] = +static const size_t first_pool_slop[JPOOL_NUMPOOLS] = { - 1600, /* first PERMANENT pool */ - 16000 /* first IMAGE pool */ + 1600, /* first PERMANENT pool */ + 16000 /* first IMAGE pool */ }; -static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = +static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = { - 0, /* additional PERMANENT pools */ - 5000 /* additional IMAGE pools */ + 0, /* additional PERMANENT pools */ + 5000 /* additional IMAGE pools */ }; -#define MIN_SLOP 50 /* greater than 0 to avoid futile looping */ +#define MIN_SLOP 50 /* greater than 0 to avoid futile looping */ METHODDEF(void *) @@ -265,7 +267,7 @@ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; small_pool_ptr hdr_ptr, prev_hdr_ptr; - char * data_ptr; + char *data_ptr; size_t min_request, slop; /* @@ -274,20 +276,26 @@ * and so that algorithms can straddle outside the proper area up * to the next alignment. */ + if (sizeofobject > MAX_ALLOC_CHUNK) { + /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject + is close to SIZE_MAX. */ + out_of_memory(cinfo, 7); + } sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE); /* Check for unsatisfiable request (do now to ensure no overflow below) */ - if ((SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK) - out_of_memory(cinfo, 1); /* request exceeds malloc's ability */ + if ((sizeof(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > + MAX_ALLOC_CHUNK) + out_of_memory(cinfo, 1); /* request exceeds malloc's ability */ /* See if space is available in any existing pool */ if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ prev_hdr_ptr = NULL; hdr_ptr = mem->small_list[pool_id]; while (hdr_ptr != NULL) { if (hdr_ptr->bytes_left >= sizeofobject) - break; /* found pool with enough space */ + break; /* found pool with enough space */ prev_hdr_ptr = hdr_ptr; hdr_ptr = hdr_ptr->next; } @@ -295,8 +303,8 @@ /* Time to make a new pool? */ if (hdr_ptr == NULL) { /* min_request is what we need now, slop is what will be leftover */ - min_request = SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1; - if (prev_hdr_ptr == NULL) /* first pool in class? */ + min_request = sizeof(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1; + if (prev_hdr_ptr == NULL) /* first pool in class? */ slop = first_pool_slop[pool_id]; else slop = extra_pool_slop[pool_id]; @@ -307,17 +315,17 @@ for (;;) { hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop); if (hdr_ptr != NULL) - break; + break; slop /= 2; - if (slop < MIN_SLOP) /* give up when it gets real small */ - out_of_memory(cinfo, 2); /* jpeg_get_small failed */ + if (slop < MIN_SLOP) /* give up when it gets real small */ + out_of_memory(cinfo, 2); /* jpeg_get_small failed */ } mem->total_space_allocated += min_request + slop; /* Success, initialize the new pool header and add to end of list */ hdr_ptr->next = NULL; hdr_ptr->bytes_used = 0; hdr_ptr->bytes_left = sizeofobject + slop; - if (prev_hdr_ptr == NULL) /* first pool in class? */ + if (prev_hdr_ptr == NULL) /* first pool in class? */ mem->small_list[pool_id] = hdr_ptr; else prev_hdr_ptr->next = hdr_ptr; @@ -325,7 +333,7 @@ /* OK, allocate the object from the current pool */ data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */ - data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */ + data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE; data_ptr += hdr_ptr->bytes_used; /* point to place for object */ @@ -339,9 +347,8 @@ /* * Allocation of "large" objects. * - * The external semantics of these are the same as "small" objects, - * except that FAR pointers are used on 80x86. However the pool - * management heuristics are quite different. We assume that each + * The external semantics of these are the same as "small" objects. However, + * the pool management heuristics are quite different. We assume that each * request is large enough that it may as well be passed directly to * jpeg_get_large; the pool management just links everything together * so that we can free it all on demand. @@ -350,35 +357,42 @@ * deliberately bunch rows together to ensure a large request size. */ -METHODDEF(void FAR *) +METHODDEF(void *) alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject) /* Allocate a "large" object */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; large_pool_ptr hdr_ptr; - char FAR * data_ptr; + char *data_ptr; /* * Round up the requested size to a multiple of ALIGN_SIZE so that * algorithms can straddle outside the proper area up to the next * alignment. */ + if (sizeofobject > MAX_ALLOC_CHUNK) { + /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject + is close to SIZE_MAX. */ + out_of_memory(cinfo, 8); + } sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE); /* Check for unsatisfiable request (do now to ensure no overflow below) */ - if ((SIZEOF(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK) - out_of_memory(cinfo, 3); /* request exceeds malloc's ability */ + if ((sizeof(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > + MAX_ALLOC_CHUNK) + out_of_memory(cinfo, 3); /* request exceeds malloc's ability */ /* Always make a new pool */ if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject + - SIZEOF(large_pool_hdr) + - ALIGN_SIZE - 1); + sizeof(large_pool_hdr) + + ALIGN_SIZE - 1); if (hdr_ptr == NULL) - out_of_memory(cinfo, 4); /* jpeg_get_large failed */ - mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr) + ALIGN_SIZE - 1; + out_of_memory(cinfo, 4); /* jpeg_get_large failed */ + mem->total_space_allocated += sizeofobject + sizeof(large_pool_hdr) + + ALIGN_SIZE - 1; /* Success, initialize the new pool header and add to list */ hdr_ptr->next = mem->large_list[pool_id]; @@ -390,17 +404,16 @@ mem->large_list[pool_id] = hdr_ptr; data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */ - data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */ + data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE; - return (void FAR *) data_ptr; + return (void *) data_ptr; } /* * Creation of 2-D sample arrays. - * The pointers are in near heap, the samples themselves in FAR heap. * * To minimize allocation overhead and to allow I/O of large contiguous * blocks, we allocate the sample rows in groups of as many rows as possible @@ -417,7 +430,7 @@ METHODDEF(JSAMPARRAY) alloc_sarray (j_common_ptr cinfo, int pool_id, - JDIMENSION samplesperrow, JDIMENSION numrows) + JDIMENSION samplesperrow, JDIMENSION numrows) /* Allocate a 2-D sample array */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; @@ -427,13 +440,20 @@ long ltemp; /* Make sure each row is properly aligned */ - if ((ALIGN_SIZE % SIZEOF(JSAMPLE)) != 0) - out_of_memory(cinfo, 5); /* safety check */ - samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE)); + if ((ALIGN_SIZE % sizeof(JSAMPLE)) != 0) + out_of_memory(cinfo, 5); /* safety check */ + + if (samplesperrow > MAX_ALLOC_CHUNK) { + /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject + is close to SIZE_MAX. */ + out_of_memory(cinfo, 9); + } + samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / + sizeof(JSAMPLE)); /* Calculate max # of rows allowed in one allocation chunk */ - ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) / - ((long) samplesperrow * SIZEOF(JSAMPLE)); + ltemp = (MAX_ALLOC_CHUNK-sizeof(large_pool_hdr)) / + ((long) samplesperrow * sizeof(JSAMPLE)); if (ltemp <= 0) ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); if (ltemp < (long) numrows) @@ -444,15 +464,15 @@ /* Get space for row pointers (small object) */ result = (JSAMPARRAY) alloc_small(cinfo, pool_id, - (size_t) (numrows * SIZEOF(JSAMPROW))); + (size_t) (numrows * sizeof(JSAMPROW))); /* Get the rows themselves (large objects) */ currow = 0; while (currow < numrows) { rowsperchunk = MIN(rowsperchunk, numrows - currow); workspace = (JSAMPROW) alloc_large(cinfo, pool_id, - (size_t) ((size_t) rowsperchunk * (size_t) samplesperrow - * SIZEOF(JSAMPLE))); + (size_t) ((size_t) rowsperchunk * (size_t) samplesperrow + * sizeof(JSAMPLE))); for (i = rowsperchunk; i > 0; i--) { result[currow++] = workspace; workspace += samplesperrow; @@ -470,7 +490,7 @@ METHODDEF(JBLOCKARRAY) alloc_barray (j_common_ptr cinfo, int pool_id, - JDIMENSION blocksperrow, JDIMENSION numrows) + JDIMENSION blocksperrow, JDIMENSION numrows) /* Allocate a 2-D coefficient-block array */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; @@ -480,12 +500,12 @@ long ltemp; /* Make sure each row is properly aligned */ - if ((SIZEOF(JBLOCK) % ALIGN_SIZE) != 0) - out_of_memory(cinfo, 6); /* safety check */ + if ((sizeof(JBLOCK) % ALIGN_SIZE) != 0) + out_of_memory(cinfo, 6); /* safety check */ /* Calculate max # of rows allowed in one allocation chunk */ - ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) / - ((long) blocksperrow * SIZEOF(JBLOCK)); + ltemp = (MAX_ALLOC_CHUNK-sizeof(large_pool_hdr)) / + ((long) blocksperrow * sizeof(JBLOCK)); if (ltemp <= 0) ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); if (ltemp < (long) numrows) @@ -496,15 +516,15 @@ /* Get space for row pointers (small object) */ result = (JBLOCKARRAY) alloc_small(cinfo, pool_id, - (size_t) (numrows * SIZEOF(JBLOCKROW))); + (size_t) (numrows * sizeof(JBLOCKROW))); /* Get the rows themselves (large objects) */ currow = 0; while (currow < numrows) { rowsperchunk = MIN(rowsperchunk, numrows - currow); workspace = (JBLOCKROW) alloc_large(cinfo, pool_id, - (size_t) ((size_t) rowsperchunk * (size_t) blocksperrow - * SIZEOF(JBLOCK))); + (size_t) ((size_t) rowsperchunk * (size_t) blocksperrow + * sizeof(JBLOCK))); for (i = rowsperchunk; i > 0; i--) { result[currow++] = workspace; workspace += blocksperrow; @@ -554,8 +574,8 @@ METHODDEF(jvirt_sarray_ptr) request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero, - JDIMENSION samplesperrow, JDIMENSION numrows, - JDIMENSION maxaccess) + JDIMENSION samplesperrow, JDIMENSION numrows, + JDIMENSION maxaccess) /* Request a virtual 2-D sample array */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; @@ -563,18 +583,18 @@ /* Only IMAGE-lifetime virtual arrays are currently supported */ if (pool_id != JPOOL_IMAGE) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ /* get control block */ result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id, - SIZEOF(struct jvirt_sarray_control)); + sizeof(struct jvirt_sarray_control)); - result->mem_buffer = NULL; /* marks array not yet realized */ + result->mem_buffer = NULL; /* marks array not yet realized */ result->rows_in_array = numrows; result->samplesperrow = samplesperrow; result->maxaccess = maxaccess; result->pre_zero = pre_zero; - result->b_s_open = FALSE; /* no associated backing-store object */ + result->b_s_open = FALSE; /* no associated backing-store object */ result->next = mem->virt_sarray_list; /* add to list of virtual arrays */ mem->virt_sarray_list = result; @@ -584,8 +604,8 @@ METHODDEF(jvirt_barray_ptr) request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero, - JDIMENSION blocksperrow, JDIMENSION numrows, - JDIMENSION maxaccess) + JDIMENSION blocksperrow, JDIMENSION numrows, + JDIMENSION maxaccess) /* Request a virtual 2-D coefficient-block array */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; @@ -593,18 +613,18 @@ /* Only IMAGE-lifetime virtual arrays are currently supported */ if (pool_id != JPOOL_IMAGE) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ /* get control block */ result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id, - SIZEOF(struct jvirt_barray_control)); + sizeof(struct jvirt_barray_control)); - result->mem_buffer = NULL; /* marks array not yet realized */ + result->mem_buffer = NULL; /* marks array not yet realized */ result->rows_in_array = numrows; result->blocksperrow = blocksperrow; result->maxaccess = maxaccess; result->pre_zero = pre_zero; - result->b_s_open = FALSE; /* no associated backing-store object */ + result->b_s_open = FALSE; /* no associated backing-store object */ result->next = mem->virt_barray_list; /* add to list of virtual arrays */ mem->virt_barray_list = result; @@ -631,26 +651,26 @@ for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) { if (sptr->mem_buffer == NULL) { /* if not realized yet */ space_per_minheight += (long) sptr->maxaccess * - (long) sptr->samplesperrow * SIZEOF(JSAMPLE); + (long) sptr->samplesperrow * sizeof(JSAMPLE); maximum_space += (long) sptr->rows_in_array * - (long) sptr->samplesperrow * SIZEOF(JSAMPLE); + (long) sptr->samplesperrow * sizeof(JSAMPLE); } } for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) { if (bptr->mem_buffer == NULL) { /* if not realized yet */ space_per_minheight += (long) bptr->maxaccess * - (long) bptr->blocksperrow * SIZEOF(JBLOCK); + (long) bptr->blocksperrow * sizeof(JBLOCK); maximum_space += (long) bptr->rows_in_array * - (long) bptr->blocksperrow * SIZEOF(JBLOCK); + (long) bptr->blocksperrow * sizeof(JBLOCK); } } if (space_per_minheight <= 0) - return; /* no unrealized arrays, no work */ + return; /* no unrealized arrays, no work */ /* Determine amount of memory to actually use; this is system-dependent. */ avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space, - mem->total_space_allocated); + mem->total_space_allocated); /* If the maximum space needed is available, make all the buffers full * height; otherwise parcel it out with the same number of minheights @@ -673,19 +693,19 @@ if (sptr->mem_buffer == NULL) { /* if not realized yet */ minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L; if (minheights <= max_minheights) { - /* This buffer fits in memory */ - sptr->rows_in_mem = sptr->rows_in_array; + /* This buffer fits in memory */ + sptr->rows_in_mem = sptr->rows_in_array; } else { - /* It doesn't fit in memory, create backing store. */ - sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess); - jpeg_open_backing_store(cinfo, & sptr->b_s_info, - (long) sptr->rows_in_array * - (long) sptr->samplesperrow * - (long) SIZEOF(JSAMPLE)); - sptr->b_s_open = TRUE; + /* It doesn't fit in memory, create backing store. */ + sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess); + jpeg_open_backing_store(cinfo, & sptr->b_s_info, + (long) sptr->rows_in_array * + (long) sptr->samplesperrow * + (long) sizeof(JSAMPLE)); + sptr->b_s_open = TRUE; } sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE, - sptr->samplesperrow, sptr->rows_in_mem); + sptr->samplesperrow, sptr->rows_in_mem); sptr->rowsperchunk = mem->last_rowsperchunk; sptr->cur_start_row = 0; sptr->first_undef_row = 0; @@ -697,19 +717,19 @@ if (bptr->mem_buffer == NULL) { /* if not realized yet */ minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L; if (minheights <= max_minheights) { - /* This buffer fits in memory */ - bptr->rows_in_mem = bptr->rows_in_array; + /* This buffer fits in memory */ + bptr->rows_in_mem = bptr->rows_in_array; } else { - /* It doesn't fit in memory, create backing store. */ - bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess); - jpeg_open_backing_store(cinfo, & bptr->b_s_info, - (long) bptr->rows_in_array * - (long) bptr->blocksperrow * - (long) SIZEOF(JBLOCK)); - bptr->b_s_open = TRUE; + /* It doesn't fit in memory, create backing store. */ + bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess); + jpeg_open_backing_store(cinfo, & bptr->b_s_info, + (long) bptr->rows_in_array * + (long) bptr->blocksperrow * + (long) sizeof(JBLOCK)); + bptr->b_s_open = TRUE; } bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE, - bptr->blocksperrow, bptr->rows_in_mem); + bptr->blocksperrow, bptr->rows_in_mem); bptr->rowsperchunk = mem->last_rowsperchunk; bptr->cur_start_row = 0; bptr->first_undef_row = 0; @@ -725,7 +745,7 @@ { long bytesperrow, file_offset, byte_count, rows, thisrow, i; - bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE); + bytesperrow = (long) ptr->samplesperrow * sizeof(JSAMPLE); file_offset = ptr->cur_start_row * bytesperrow; /* Loop to read or write each allocation chunk in mem_buffer */ for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) { @@ -736,17 +756,17 @@ rows = MIN(rows, (long) ptr->first_undef_row - thisrow); /* Transfer no more than fits in file */ rows = MIN(rows, (long) ptr->rows_in_array - thisrow); - if (rows <= 0) /* this chunk might be past end of file! */ + if (rows <= 0) /* this chunk might be past end of file! */ break; byte_count = rows * bytesperrow; if (writing) (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info, - (void FAR *) ptr->mem_buffer[i], - file_offset, byte_count); + (void *) ptr->mem_buffer[i], + file_offset, byte_count); else (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info, - (void FAR *) ptr->mem_buffer[i], - file_offset, byte_count); + (void *) ptr->mem_buffer[i], + file_offset, byte_count); file_offset += byte_count; } } @@ -758,7 +778,7 @@ { long bytesperrow, file_offset, byte_count, rows, thisrow, i; - bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK); + bytesperrow = (long) ptr->blocksperrow * sizeof(JBLOCK); file_offset = ptr->cur_start_row * bytesperrow; /* Loop to read or write each allocation chunk in mem_buffer */ for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) { @@ -769,17 +789,17 @@ rows = MIN(rows, (long) ptr->first_undef_row - thisrow); /* Transfer no more than fits in file */ rows = MIN(rows, (long) ptr->rows_in_array - thisrow); - if (rows <= 0) /* this chunk might be past end of file! */ + if (rows <= 0) /* this chunk might be past end of file! */ break; byte_count = rows * bytesperrow; if (writing) (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info, - (void FAR *) ptr->mem_buffer[i], - file_offset, byte_count); + (void *) ptr->mem_buffer[i], + file_offset, byte_count); else (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info, - (void FAR *) ptr->mem_buffer[i], - file_offset, byte_count); + (void *) ptr->mem_buffer[i], + file_offset, byte_count); file_offset += byte_count; } } @@ -787,8 +807,8 @@ METHODDEF(JSAMPARRAY) access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr, - JDIMENSION start_row, JDIMENSION num_rows, - boolean writable) + JDIMENSION start_row, JDIMENSION num_rows, + boolean writable) /* Access the part of a virtual sample array starting at start_row */ /* and extending for num_rows rows. writable is true if */ /* caller intends to modify the accessed area. */ @@ -826,7 +846,7 @@ ltemp = (long) end_row - (long) ptr->rows_in_mem; if (ltemp < 0) - ltemp = 0; /* don't fall off front end of file */ + ltemp = 0; /* don't fall off front end of file */ ptr->cur_start_row = (JDIMENSION) ltemp; } /* Read in the selected part of the array. @@ -841,25 +861,25 @@ */ if (ptr->first_undef_row < end_row) { if (ptr->first_undef_row < start_row) { - if (writable) /* writer skipped over a section of array */ - ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); - undef_row = start_row; /* but reader is allowed to read ahead */ + if (writable) /* writer skipped over a section of array */ + ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); + undef_row = start_row; /* but reader is allowed to read ahead */ } else { undef_row = ptr->first_undef_row; } if (writable) ptr->first_undef_row = end_row; if (ptr->pre_zero) { - size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE); + size_t bytesperrow = (size_t) ptr->samplesperrow * sizeof(JSAMPLE); undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */ end_row -= ptr->cur_start_row; while (undef_row < end_row) { - jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow); - undef_row++; + jzero_far((void *) ptr->mem_buffer[undef_row], bytesperrow); + undef_row++; } } else { - if (! writable) /* reader looking at undefined data */ - ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); + if (! writable) /* reader looking at undefined data */ + ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); } } /* Flag the buffer dirty if caller will write in it */ @@ -872,8 +892,8 @@ METHODDEF(JBLOCKARRAY) access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr, - JDIMENSION start_row, JDIMENSION num_rows, - boolean writable) + JDIMENSION start_row, JDIMENSION num_rows, + boolean writable) /* Access the part of a virtual block array starting at start_row */ /* and extending for num_rows rows. writable is true if */ /* caller intends to modify the accessed area. */ @@ -911,7 +931,7 @@ ltemp = (long) end_row - (long) ptr->rows_in_mem; if (ltemp < 0) - ltemp = 0; /* don't fall off front end of file */ + ltemp = 0; /* don't fall off front end of file */ ptr->cur_start_row = (JDIMENSION) ltemp; } /* Read in the selected part of the array. @@ -926,25 +946,25 @@ */ if (ptr->first_undef_row < end_row) { if (ptr->first_undef_row < start_row) { - if (writable) /* writer skipped over a section of array */ - ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); - undef_row = start_row; /* but reader is allowed to read ahead */ + if (writable) /* writer skipped over a section of array */ + ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); + undef_row = start_row; /* but reader is allowed to read ahead */ } else { undef_row = ptr->first_undef_row; } if (writable) ptr->first_undef_row = end_row; if (ptr->pre_zero) { - size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK); + size_t bytesperrow = (size_t) ptr->blocksperrow * sizeof(JBLOCK); undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */ end_row -= ptr->cur_start_row; while (undef_row < end_row) { - jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow); - undef_row++; + jzero_far((void *) ptr->mem_buffer[undef_row], bytesperrow); + undef_row++; } } else { - if (! writable) /* reader looking at undefined data */ - ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); + if (! writable) /* reader looking at undefined data */ + ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); } } /* Flag the buffer dirty if caller will write in it */ @@ -968,7 +988,7 @@ size_t space_freed; if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ #ifdef MEM_STATS if (cinfo->err->trace_level > 1) @@ -981,16 +1001,16 @@ jvirt_barray_ptr bptr; for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) { - if (sptr->b_s_open) { /* there may be no backing store */ - sptr->b_s_open = FALSE; /* prevent recursive close if error */ - (*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info); + if (sptr->b_s_open) { /* there may be no backing store */ + sptr->b_s_open = FALSE; /* prevent recursive close if error */ + (*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info); } } mem->virt_sarray_list = NULL; for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) { - if (bptr->b_s_open) { /* there may be no backing store */ - bptr->b_s_open = FALSE; /* prevent recursive close if error */ - (*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info); + if (bptr->b_s_open) { /* there may be no backing store */ + bptr->b_s_open = FALSE; /* prevent recursive close if error */ + (*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info); } } mem->virt_barray_list = NULL; @@ -1003,9 +1023,9 @@ while (lhdr_ptr != NULL) { large_pool_ptr next_lhdr_ptr = lhdr_ptr->next; space_freed = lhdr_ptr->bytes_used + - lhdr_ptr->bytes_left + - SIZEOF(large_pool_hdr); - jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed); + lhdr_ptr->bytes_left + + sizeof(large_pool_hdr); + jpeg_free_large(cinfo, (void *) lhdr_ptr, space_freed); mem->total_space_allocated -= space_freed; lhdr_ptr = next_lhdr_ptr; } @@ -1017,8 +1037,8 @@ while (shdr_ptr != NULL) { small_pool_ptr next_shdr_ptr = shdr_ptr->next; space_freed = shdr_ptr->bytes_used + - shdr_ptr->bytes_left + - SIZEOF(small_pool_hdr); + shdr_ptr->bytes_left + + sizeof(small_pool_hdr); jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed); mem->total_space_allocated -= space_freed; shdr_ptr = next_shdr_ptr; @@ -1045,10 +1065,10 @@ } /* Release the memory manager control block too. */ - jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr)); - cinfo->mem = NULL; /* ensures I will be called only once */ + jpeg_free_small(cinfo, (void *) cinfo->mem, sizeof(my_memory_mgr)); + cinfo->mem = NULL; /* ensures I will be called only once */ - jpeg_mem_term(cinfo); /* system-dependent cleanup */ + jpeg_mem_term(cinfo); /* system-dependent cleanup */ } @@ -1065,10 +1085,10 @@ int pool; size_t test_mac; - cinfo->mem = NULL; /* for safety if init fails */ + cinfo->mem = NULL; /* for safety if init fails */ /* Check for configuration errors. - * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably + * sizeof(ALIGN_TYPE) should be a power of 2; otherwise, it probably * doesn't reflect any real hardware alignment requirement. * The test is a little tricky: for X>0, X and X-1 have no one-bits * in common if and only if X is a power of 2, ie has only one one-bit. @@ -1089,10 +1109,10 @@ max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */ /* Attempt to allocate memory manager's control block */ - mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr)); + mem = (my_mem_ptr) jpeg_get_small(cinfo, sizeof(my_memory_mgr)); if (mem == NULL) { - jpeg_mem_term(cinfo); /* system-dependent cleanup */ + jpeg_mem_term(cinfo); /* system-dependent cleanup */ ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0); } @@ -1122,7 +1142,7 @@ mem->virt_sarray_list = NULL; mem->virt_barray_list = NULL; - mem->total_space_allocated = SIZEOF(my_memory_mgr); + mem->total_space_allocated = sizeof(my_memory_mgr); /* Declare ourselves open for business */ cinfo->mem = & mem->pub; @@ -1134,15 +1154,15 @@ * this feature. */ #ifndef NO_GETENV - { char * memenv; + { char *memenv; if ((memenv = getenv("JPEGMEM")) != NULL) { char ch = 'x'; if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) { - if (ch == 'm' || ch == 'M') - max_to_use *= 1000L; - mem->pub.max_memory_to_use = max_to_use * 1000L; + if (ch == 'm' || ch == 'M') + max_to_use *= 1000L; + mem->pub.max_memory_to_use = max_to_use * 1000L; } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jmemnobs.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jmemnobs.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jmemnobs.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jmemnobs.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jmemnobs.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1992-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file provides a really simple implementation of the system- * dependent portion of the JPEG memory manager. This implementation @@ -18,11 +21,11 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jmemsys.h" /* import the system-dependent declarations */ +#include "jmemsys.h" /* import the system-dependent declarations */ -#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void * malloc JPP((size_t size)); -extern void free JPP((void *ptr)); +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void *malloc (size_t size); +extern void free (void *ptr); #endif @@ -38,7 +41,7 @@ } GLOBAL(void) -jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject) +jpeg_free_small (j_common_ptr cinfo, void *object, size_t sizeofobject) { free(object); } @@ -46,19 +49,16 @@ /* * "Large" objects are treated the same as "small" ones. - * NB: although we include FAR keywords in the routine declarations, - * this file won't actually work in 80x86 small/medium model; at least, - * you probably won't be able to process useful-size images in only 64KB. */ -GLOBAL(void FAR *) +GLOBAL(void *) jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject) { - return (void FAR *) malloc(sizeofobject); + return (void *) malloc(sizeofobject); } GLOBAL(void) -jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject) +jpeg_free_large (j_common_ptr cinfo, void *object, size_t sizeofobject) { free(object); } @@ -71,7 +71,7 @@ GLOBAL(size_t) jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed, - size_t max_bytes_needed, size_t already_allocated) + size_t max_bytes_needed, size_t already_allocated) { return max_bytes_needed; } @@ -85,7 +85,7 @@ GLOBAL(void) jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) + long total_bytes_needed) { ERREXIT(cinfo, JERR_NO_BACKING_STORE); } @@ -99,7 +99,7 @@ GLOBAL(long) jpeg_mem_init (j_common_ptr cinfo) { - return 0; /* just set max_memory_to_use to 0 */ + return 0; /* just set max_memory_to_use to 0 */ } GLOBAL(void) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jmemsys.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jmemsys.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jmemsys.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jmemsys.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jmemsys.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1992-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This include file defines the interface between the system-independent * and system-dependent portions of the JPEG memory manager. No other @@ -14,25 +17,10 @@ * in the IJG distribution. You may need to modify it if you write a * custom memory manager. If system-dependent changes are needed in * this file, the best method is to #ifdef them based on a configuration - * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR - * and USE_MAC_MEMMGR. + * symbol supplied in jconfig.h. */ -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_get_small jGetSmall -#define jpeg_free_small jFreeSmall -#define jpeg_get_large jGetLarge -#define jpeg_free_large jFreeLarge -#define jpeg_mem_available jMemAvail -#define jpeg_open_backing_store jOpenBackStore -#define jpeg_mem_init jMemInit -#define jpeg_mem_term jMemTerm -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - - /* * These two functions are used to allocate and release small chunks of * memory. (Typically the total amount requested through jpeg_get_small is @@ -41,40 +29,36 @@ * and free; in particular, jpeg_get_small must return NULL on failure. * On most systems, these ARE malloc and free. jpeg_free_small is passed the * size of the object being freed, just in case it's needed. - * On an 80x86 machine using small-data memory model, these manage near heap. */ -EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject)); -EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object, - size_t sizeofobject)); +EXTERN(void *) jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject); +EXTERN(void) jpeg_free_small (j_common_ptr cinfo, void *object, + size_t sizeofobject); /* * These two functions are used to allocate and release large chunks of * memory (up to the total free space designated by jpeg_mem_available). - * The interface is the same as above, except that on an 80x86 machine, - * far pointers are used. On most other machines these are identical to - * the jpeg_get/free_small routines; but we keep them separate anyway, - * in case a different allocation strategy is desirable for large chunks. + * These are identical to the jpeg_get/free_small routines; but we keep them + * separate anyway, in case a different allocation strategy is desirable for + * large chunks. */ -EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo, - size_t sizeofobject)); -EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object, - size_t sizeofobject)); +EXTERN(void *) jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject); +EXTERN(void) jpeg_free_large (j_common_ptr cinfo, void *object, + size_t sizeofobject); /* * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may * be requested in a single call to jpeg_get_large (and jpeg_get_small for that - * matter, but that case should never come into play). This macro is needed + * matter, but that case should never come into play). This macro was needed * to model the 64Kb-segment-size limit of far addressing on 80x86 machines. - * On those machines, we expect that jconfig.h will provide a proper value. - * On machines with 32-bit flat address spaces, any large constant may be used. + * On machines with flat address spaces, any large constant may be used. * * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type * size_t and will be a multiple of sizeof(align_type). */ -#ifndef MAX_ALLOC_CHUNK /* may be overridden in jconfig.h */ +#ifndef MAX_ALLOC_CHUNK /* may be overridden in jconfig.h */ #define MAX_ALLOC_CHUNK 1000000000L #endif @@ -100,10 +84,9 @@ * Conversely, zero may be returned to always use the minimum amount of memory. */ -EXTERN(size_t) jpeg_mem_available JPP((j_common_ptr cinfo, - size_t min_bytes_needed, - size_t max_bytes_needed, - size_t already_allocated)); +EXTERN(size_t) jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed, + size_t max_bytes_needed, + size_t already_allocated); /* @@ -113,56 +96,53 @@ * are private to the system-dependent backing store routines. */ -#define TEMP_NAME_LENGTH 64 /* max length of a temporary file's name */ +#define TEMP_NAME_LENGTH 64 /* max length of a temporary file's name */ -#ifdef USE_MSDOS_MEMMGR /* DOS-specific junk */ +#ifdef USE_MSDOS_MEMMGR /* DOS-specific junk */ -typedef unsigned short XMSH; /* type of extended-memory handles */ -typedef unsigned short EMSH; /* type of expanded-memory handles */ +typedef unsigned short XMSH; /* type of extended-memory handles */ +typedef unsigned short EMSH; /* type of expanded-memory handles */ typedef union { - short file_handle; /* DOS file handle if it's a temp file */ - XMSH xms_handle; /* handle if it's a chunk of XMS */ - EMSH ems_handle; /* handle if it's a chunk of EMS */ + short file_handle; /* DOS file handle if it's a temp file */ + XMSH xms_handle; /* handle if it's a chunk of XMS */ + EMSH ems_handle; /* handle if it's a chunk of EMS */ } handle_union; #endif /* USE_MSDOS_MEMMGR */ -#ifdef USE_MAC_MEMMGR /* Mac-specific junk */ +#ifdef USE_MAC_MEMMGR /* Mac-specific junk */ #include #endif /* USE_MAC_MEMMGR */ -typedef struct backing_store_struct * backing_store_ptr; +typedef struct backing_store_struct *backing_store_ptr; typedef struct backing_store_struct { /* Methods for reading/writing/closing this backing-store object */ - JMETHOD(void, read_backing_store, (j_common_ptr cinfo, - backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count)); - JMETHOD(void, write_backing_store, (j_common_ptr cinfo, - backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count)); - JMETHOD(void, close_backing_store, (j_common_ptr cinfo, - backing_store_ptr info)); + void (*read_backing_store) (j_common_ptr cinfo, backing_store_ptr info, + void *buffer_address, long file_offset, + long byte_count); + void (*write_backing_store) (j_common_ptr cinfo, backing_store_ptr info, + void *buffer_address, long file_offset, + long byte_count); + void (*close_backing_store) (j_common_ptr cinfo, backing_store_ptr info); /* Private fields for system-dependent backing-store management */ #ifdef USE_MSDOS_MEMMGR /* For the MS-DOS manager (jmemdos.c), we need: */ - handle_union handle; /* reference to backing-store storage object */ + handle_union handle; /* reference to backing-store storage object */ char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */ #else #ifdef USE_MAC_MEMMGR /* For the Mac manager (jmemmac.c), we need: */ - short temp_file; /* file reference number to temp file */ - FSSpec tempSpec; /* the FSSpec for the temp file */ + short temp_file; /* file reference number to temp file */ + FSSpec tempSpec; /* the FSSpec for the temp file */ char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */ #else /* For a typical implementation with temp files, we need: */ - FILE * temp_file; /* stdio reference to temp file */ + FILE *temp_file; /* stdio reference to temp file */ char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */ #endif #endif @@ -177,9 +157,9 @@ * just take an error exit.) */ -EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo, - backing_store_ptr info, - long total_bytes_needed)); +EXTERN(void) jpeg_open_backing_store (j_common_ptr cinfo, + backing_store_ptr info, + long total_bytes_needed); /* @@ -194,5 +174,5 @@ * all opened backing-store objects have been closed. */ -EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo)); -EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo)); +EXTERN(long) jpeg_mem_init (j_common_ptr cinfo); +EXTERN(void) jpeg_mem_term (j_common_ptr cinfo); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jmorecfg.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jmorecfg.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jmorecfg.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jmorecfg.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jmorecfg.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009, 2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 1997-2009 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains additional configuration options that customize the * JPEG software for special applications or support machine-dependent @@ -13,18 +16,6 @@ /* - * Define BITS_IN_JSAMPLE as either - * 8 for 8-bit sample values (the usual setting) - * 12 for 12-bit sample values - * Only 8 and 12 are legal data precisions for lossy JPEG according to the - * JPEG standard, and the IJG code does not support anything else! - * We do not support run-time selection of data precision, sorry. - */ - -#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */ - - -/* * Maximum number of components (color channels) allowed in JPEG image. * To meet the letter of the JPEG spec, set this to 255. However, darn * few applications need more than 4 channels (maybe 5 for CMYK + alpha @@ -33,7 +24,7 @@ * bytes of storage, whether actually used in an image or not.) */ -#define MAX_COMPONENTS 10 /* maximum number of image components */ +#define MAX_COMPONENTS 10 /* maximum number of image components */ /* @@ -71,8 +62,8 @@ #endif /* HAVE_UNSIGNED_CHAR */ -#define MAXJSAMPLE 255 -#define CENTERJSAMPLE 128 +#define MAXJSAMPLE 255 +#define CENTERJSAMPLE 128 #endif /* BITS_IN_JSAMPLE == 8 */ @@ -85,8 +76,8 @@ typedef short JSAMPLE; #define GETJSAMPLE(value) ((int) (value)) -#define MAXJSAMPLE 4095 -#define CENTERJSAMPLE 2048 +#define MAXJSAMPLE 4095 +#define CENTERJSAMPLE 2048 #endif /* BITS_IN_JSAMPLE == 12 */ @@ -152,21 +143,52 @@ /* INT16 must hold at least the values -32768..32767. */ -#ifndef XMD_H /* X11/xmd.h correctly defines INT16 */ +#ifndef XMD_H /* X11/xmd.h correctly defines INT16 */ typedef short INT16; #endif -/* INT32 must hold at least signed 32-bit values. */ +/* INT32 must hold at least signed 32-bit values. + * + * NOTE: The INT32 typedef dates back to libjpeg v5 (1994.) Integers were + * sometimes 16-bit back then (MS-DOS), which is why INT32 is typedef'd to + * long. It also wasn't common (or at least as common) in 1994 for INT32 to be + * defined by platform headers. Since then, however, INT32 is defined in + * several other common places: + * + * Xmd.h (X11 header) typedefs INT32 to int on 64-bit platforms and long on + * 32-bit platforms (i.e always a 32-bit signed type.) + * + * basetsd.h (Win32 header) typedefs INT32 to int (always a 32-bit signed type + * on modern platforms.) + * + * qglobal.h (Qt header) typedefs INT32 to int (always a 32-bit signed type on + * modern platforms.) + * + * This is a recipe for conflict, since "long" and "int" aren't always + * compatible types. Since the definition of INT32 has technically been part + * of the libjpeg API for more than 20 years, we can't remove it, but we do not + * use it internally any longer. We instead define a separate type (JLONG) + * for internal use, which ensures that internal behavior will always be the + * same regardless of any external headers that may be included. + */ -#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ +#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ +#ifndef _BASETSD_H_ /* Microsoft defines it in basetsd.h */ +#ifndef _BASETSD_H /* MinGW is slightly different */ +#ifndef QGLOBAL_H /* Qt defines it in qglobal.h */ typedef long INT32; #endif +#endif +#endif +#endif /* Datatype used for image dimensions. The JPEG standard only supports * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore * "unsigned int" is sufficient on all machines. However, if you need to * handle larger images and you don't mind deviating from the spec, you - * can change this datatype. + * can change this datatype. (Note that changing this datatype will + * potentially require modifying the SIMD code. The x86-64 SIMD extensions, + * in particular, assume a 32-bit JDIMENSION.) */ typedef unsigned int JDIMENSION; @@ -182,39 +204,31 @@ */ /* a function called through method pointers: */ -#define METHODDEF(type) static type +#define METHODDEF(type) static type /* a function used only in its module: */ -#define LOCAL(type) static type +#define LOCAL(type) static type /* a function referenced thru EXTERNs: */ -#define GLOBAL(type) type +#define GLOBAL(type) type /* a reference to a GLOBAL function: */ -#define EXTERN(type) extern type +#define EXTERN(type) extern type -/* This macro is used to declare a "method", that is, a function pointer. - * We want to supply prototype parameters if the compiler can cope. - * Note that the arglist parameter must be parenthesized! - * Again, you can customize this if you need special linkage keywords. +/* Originally, this macro was used as a way of defining function prototypes + * for both modern compilers as well as older compilers that did not support + * prototype parameters. libjpeg-turbo has never supported these older, + * non-ANSI compilers, but the macro is still included because there is some + * software out there that uses it. */ -#ifdef HAVE_PROTOTYPES #define JMETHOD(type,methodname,arglist) type (*methodname) arglist -#else -#define JMETHOD(type,methodname,arglist) type (*methodname) () -#endif -/* Here is the pseudo-keyword for declaring pointers that must be "far" - * on 80x86 machines. Most of the specialized coding for 80x86 is handled - * by just saying "FAR *" where such a pointer is needed. In a few places - * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol. +/* libjpeg-turbo no longer supports platforms that have far symbols (MS-DOS), + * but again, some software relies on this macro. */ -#ifdef NEED_FAR_POINTERS -#define FAR far -#else +#undef FAR #define FAR -#endif /* @@ -227,11 +241,11 @@ #ifndef HAVE_BOOLEAN typedef int boolean; #endif -#ifndef FALSE /* in case these macros already exist */ -#define FALSE 0 /* values of boolean */ +#ifndef FALSE /* in case these macros already exist */ +#define FALSE 0 /* values of boolean */ #endif #ifndef TRUE -#define TRUE 1 +#define TRUE 1 #endif @@ -259,15 +273,15 @@ /* Capability options common to encoder and decoder: */ -#define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ -#define DCT_IFAST_SUPPORTED /* faster, less accurate integer method */ -#define DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */ +#define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ +#define DCT_IFAST_SUPPORTED /* faster, less accurate integer method */ +#define DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */ /* Encoder capability options: */ #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ -#define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ -#define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */ +#define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ +#define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */ /* Note: if you selected 12-bit data precision, it is dangerous to turn off * ENTROPY_OPT_SUPPORTED. The standard Huffman tables are only good for 8-bit * precision, so jchuff.c normally uses entropy optimization to compute @@ -281,39 +295,45 @@ /* Decoder capability options: */ #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ -#define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ -#define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ +#define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ +#define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ #define BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */ -#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ +#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ #undef UPSAMPLE_SCALING_SUPPORTED /* Output rescaling at upsample stage? */ #define UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */ -#define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ -#define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ +#define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ +#define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ /* more capability options later, no doubt */ /* - * Ordering of RGB data in scanlines passed to or from the application. - * If your application wants to deal with data in the order B,G,R, just - * change these macros. You can also deal with formats such as R,G,B,X - * (one extra byte per pixel) by changing RGB_PIXELSIZE. Note that changing - * the offsets will also change the order in which colormap data is organized. - * RESTRICTIONS: - * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats. - * 2. These macros only affect RGB<=>YCbCr color conversion, so they are not - * useful if you are using JPEG color spaces other than YCbCr or grayscale. - * 3. The color quantizer modules will not behave desirably if RGB_PIXELSIZE - * is not 3 (they don't understand about dummy color components!). So you - * can't use color quantization if you change that value. - */ - -#define RGB_RED 0 /* Offset of Red in an RGB scanline element */ -#define RGB_GREEN 1 /* Offset of Green */ -#define RGB_BLUE 2 /* Offset of Blue */ -#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ + * The RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros are a vestigial + * feature of libjpeg. The idea was that, if an application developer needed + * to compress from/decompress to a BGR/BGRX/RGBX/XBGR/XRGB buffer, they could + * change these macros, rebuild libjpeg, and link their application statically + * with it. In reality, few people ever did this, because there were some + * severe restrictions involved (cjpeg and djpeg no longer worked properly, + * compressing/decompressing RGB JPEGs no longer worked properly, and the color + * quantizer wouldn't work with pixel sizes other than 3.) Further, since all + * of the O/S-supplied versions of libjpeg were built with the default values + * of RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE, many applications have + * come to regard these values as immutable. + * + * The libjpeg-turbo colorspace extensions provide a much cleaner way of + * compressing from/decompressing to buffers with arbitrary component orders + * and pixel sizes. Thus, we do not support changing the values of RGB_RED, + * RGB_GREEN, RGB_BLUE, or RGB_PIXELSIZE. In addition to the restrictions + * listed above, changing these values will also break the SIMD extensions and + * the regression tests. + */ + +#define RGB_RED 0 /* Offset of Red in an RGB scanline element */ +#define RGB_GREEN 1 /* Offset of Green */ +#define RGB_BLUE 2 /* Offset of Blue */ +#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ -#define JPEG_NUMCS 16 +#define JPEG_NUMCS 17 #define EXT_RGB_RED 0 #define EXT_RGB_GREEN 1 @@ -348,25 +368,29 @@ static const int rgb_red[JPEG_NUMCS] = { -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED, EXT_BGR_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED, - EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED + EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED, + -1 }; static const int rgb_green[JPEG_NUMCS] = { -1, -1, RGB_GREEN, -1, -1, -1, EXT_RGB_GREEN, EXT_RGBX_GREEN, EXT_BGR_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN, - EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN + EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN, + -1 }; static const int rgb_blue[JPEG_NUMCS] = { -1, -1, RGB_BLUE, -1, -1, -1, EXT_RGB_BLUE, EXT_RGBX_BLUE, EXT_BGR_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE, - EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE + EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE, + -1 }; static const int rgb_pixelsize[JPEG_NUMCS] = { -1, -1, RGB_PIXELSIZE, -1, -1, -1, EXT_RGB_PIXELSIZE, EXT_RGBX_PIXELSIZE, EXT_BGR_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE, - EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE + EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE, + -1 }; /* Definitions for speed-related optimizations. */ @@ -378,7 +402,7 @@ #ifndef MULTIPLIER #ifndef WITH_SIMD -#define MULTIPLIER int /* type for fastest integer multiply */ +#define MULTIPLIER int /* type for fastest integer multiply */ #else #define MULTIPLIER short /* prefer 16-bit with SIMD for parellelism */ #endif @@ -388,17 +412,10 @@ /* FAST_FLOAT should be either float or double, whichever is done faster * by your compiler. (Note that this type is only used in the floating point * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.) - * Typically, float is faster in ANSI C compilers, while double is faster in - * pre-ANSI compilers (because they insist on converting to double anyway). - * The code below therefore chooses float if we have ANSI-style prototypes. */ #ifndef FAST_FLOAT -#ifdef HAVE_PROTOTYPES #define FAST_FLOAT float -#else -#define FAST_FLOAT double -#endif #endif #endif /* JPEG_INTERNAL_OPTIONS */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpegcomp.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jpegcomp.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpegcomp.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jpegcomp.h 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,8 @@ * jpegcomp.h * * Copyright (C) 2010, D. R. Commander - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * JPEG compatibility macros * These declarations are considered internal to the JPEG library; most @@ -11,6 +12,8 @@ #if JPEG_LIB_VERSION >= 70 #define _DCT_scaled_size DCT_h_scaled_size +#define _DCT_h_scaled_size DCT_h_scaled_size +#define _DCT_v_scaled_size DCT_v_scaled_size #define _min_DCT_scaled_size min_DCT_h_scaled_size #define _min_DCT_h_scaled_size min_DCT_h_scaled_size #define _min_DCT_v_scaled_size min_DCT_v_scaled_size @@ -18,6 +21,8 @@ #define _jpeg_height jpeg_height #else #define _DCT_scaled_size DCT_scaled_size +#define _DCT_h_scaled_size DCT_scaled_size +#define _DCT_v_scaled_size DCT_scaled_size #define _min_DCT_scaled_size min_DCT_scaled_size #define _min_DCT_h_scaled_size min_DCT_scaled_size #define _min_DCT_v_scaled_size min_DCT_scaled_size diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpegint.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jpegint.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpegint.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jpegint.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,14 @@ /* * jpegint.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015-2016, D. R. Commander + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file provides common declarations for the various JPEG modules. * These declarations are considered internal to the JPEG library; most @@ -14,121 +18,127 @@ /* Declarations for both compression & decompression */ -typedef enum { /* Operating modes for buffer controllers */ - JBUF_PASS_THRU, /* Plain stripwise operation */ - /* Remaining modes require a full-image buffer to have been created */ - JBUF_SAVE_SOURCE, /* Run source subobject only, save output */ - JBUF_CRANK_DEST, /* Run dest subobject only, using saved data */ - JBUF_SAVE_AND_PASS /* Run both subobjects, save output */ +typedef enum { /* Operating modes for buffer controllers */ + JBUF_PASS_THRU, /* Plain stripwise operation */ + /* Remaining modes require a full-image buffer to have been created */ + JBUF_SAVE_SOURCE, /* Run source subobject only, save output */ + JBUF_CRANK_DEST, /* Run dest subobject only, using saved data */ + JBUF_SAVE_AND_PASS /* Run both subobjects, save output */ } J_BUF_MODE; /* Values of global_state field (jdapi.c has some dependencies on ordering!) */ -#define CSTATE_START 100 /* after create_compress */ -#define CSTATE_SCANNING 101 /* start_compress done, write_scanlines OK */ -#define CSTATE_RAW_OK 102 /* start_compress done, write_raw_data OK */ -#define CSTATE_WRCOEFS 103 /* jpeg_write_coefficients done */ -#define DSTATE_START 200 /* after create_decompress */ -#define DSTATE_INHEADER 201 /* reading header markers, no SOS yet */ -#define DSTATE_READY 202 /* found SOS, ready for start_decompress */ -#define DSTATE_PRELOAD 203 /* reading multiscan file in start_decompress*/ -#define DSTATE_PRESCAN 204 /* performing dummy pass for 2-pass quant */ -#define DSTATE_SCANNING 205 /* start_decompress done, read_scanlines OK */ -#define DSTATE_RAW_OK 206 /* start_decompress done, read_raw_data OK */ -#define DSTATE_BUFIMAGE 207 /* expecting jpeg_start_output */ -#define DSTATE_BUFPOST 208 /* looking for SOS/EOI in jpeg_finish_output */ -#define DSTATE_RDCOEFS 209 /* reading file in jpeg_read_coefficients */ -#define DSTATE_STOPPING 210 /* looking for EOI in jpeg_finish_decompress */ +#define CSTATE_START 100 /* after create_compress */ +#define CSTATE_SCANNING 101 /* start_compress done, write_scanlines OK */ +#define CSTATE_RAW_OK 102 /* start_compress done, write_raw_data OK */ +#define CSTATE_WRCOEFS 103 /* jpeg_write_coefficients done */ +#define DSTATE_START 200 /* after create_decompress */ +#define DSTATE_INHEADER 201 /* reading header markers, no SOS yet */ +#define DSTATE_READY 202 /* found SOS, ready for start_decompress */ +#define DSTATE_PRELOAD 203 /* reading multiscan file in start_decompress*/ +#define DSTATE_PRESCAN 204 /* performing dummy pass for 2-pass quant */ +#define DSTATE_SCANNING 205 /* start_decompress done, read_scanlines OK */ +#define DSTATE_RAW_OK 206 /* start_decompress done, read_raw_data OK */ +#define DSTATE_BUFIMAGE 207 /* expecting jpeg_start_output */ +#define DSTATE_BUFPOST 208 /* looking for SOS/EOI in jpeg_finish_output */ +#define DSTATE_RDCOEFS 209 /* reading file in jpeg_read_coefficients */ +#define DSTATE_STOPPING 210 /* looking for EOI in jpeg_finish_decompress */ + + +/* JLONG must hold at least signed 32-bit values. */ +typedef long JLONG; + + +/* + * Left shift macro that handles a negative operand without causing any + * sanitizer warnings + */ + +#define LEFT_SHIFT(a, b) ((JLONG)((unsigned long)(a) << (b))) /* Declarations for compression modules */ /* Master control module */ struct jpeg_comp_master { - JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo)); - JMETHOD(void, pass_startup, (j_compress_ptr cinfo)); - JMETHOD(void, finish_pass, (j_compress_ptr cinfo)); + void (*prepare_for_pass) (j_compress_ptr cinfo); + void (*pass_startup) (j_compress_ptr cinfo); + void (*finish_pass) (j_compress_ptr cinfo); /* State variables made visible to other modules */ - boolean call_pass_startup; /* True if pass_startup must be called */ - boolean is_last_pass; /* True during last pass */ + boolean call_pass_startup; /* True if pass_startup must be called */ + boolean is_last_pass; /* True during last pass */ }; /* Main buffer control (downsampled-data buffer) */ struct jpeg_c_main_controller { - JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(void, process_data, (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail)); + void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode); + void (*process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf, + JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail); }; /* Compression preprocessing (downsampling input buffer control) */ struct jpeg_c_prep_controller { - JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(void, pre_process_data, (j_compress_ptr cinfo, - JSAMPARRAY input_buf, - JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail, - JSAMPIMAGE output_buf, - JDIMENSION *out_row_group_ctr, - JDIMENSION out_row_groups_avail)); + void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode); + void (*pre_process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf, + JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail, + JSAMPIMAGE output_buf, + JDIMENSION *out_row_group_ctr, + JDIMENSION out_row_groups_avail); }; /* Coefficient buffer control */ struct jpeg_c_coef_controller { - JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(boolean, compress_data, (j_compress_ptr cinfo, - JSAMPIMAGE input_buf)); + void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode); + boolean (*compress_data) (j_compress_ptr cinfo, JSAMPIMAGE input_buf); }; /* Colorspace conversion */ struct jpeg_color_converter { - JMETHOD(void, start_pass, (j_compress_ptr cinfo)); - JMETHOD(void, color_convert, (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + void (*start_pass) (j_compress_ptr cinfo); + void (*color_convert) (j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows); }; /* Downsampling */ struct jpeg_downsampler { - JMETHOD(void, start_pass, (j_compress_ptr cinfo)); - JMETHOD(void, downsample, (j_compress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_index, - JSAMPIMAGE output_buf, - JDIMENSION out_row_group_index)); + void (*start_pass) (j_compress_ptr cinfo); + void (*downsample) (j_compress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_index, JSAMPIMAGE output_buf, + JDIMENSION out_row_group_index); - boolean need_context_rows; /* TRUE if need rows above & below */ + boolean need_context_rows; /* TRUE if need rows above & below */ }; /* Forward DCT (also controls coefficient quantization) */ struct jpeg_forward_dct { - JMETHOD(void, start_pass, (j_compress_ptr cinfo)); + void (*start_pass) (j_compress_ptr cinfo); /* perhaps this should be an array??? */ - JMETHOD(void, forward_DCT, (j_compress_ptr cinfo, - jpeg_component_info * compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks)); + void (*forward_DCT) (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, + JDIMENSION num_blocks); }; /* Entropy encoding */ struct jpeg_entropy_encoder { - JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics)); - JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data)); - JMETHOD(void, finish_pass, (j_compress_ptr cinfo)); + void (*start_pass) (j_compress_ptr cinfo, boolean gather_statistics); + boolean (*encode_mcu) (j_compress_ptr cinfo, JBLOCKROW *MCU_data); + void (*finish_pass) (j_compress_ptr cinfo); }; /* Marker writing */ struct jpeg_marker_writer { - JMETHOD(void, write_file_header, (j_compress_ptr cinfo)); - JMETHOD(void, write_frame_header, (j_compress_ptr cinfo)); - JMETHOD(void, write_scan_header, (j_compress_ptr cinfo)); - JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo)); - JMETHOD(void, write_tables_only, (j_compress_ptr cinfo)); + void (*write_file_header) (j_compress_ptr cinfo); + void (*write_frame_header) (j_compress_ptr cinfo); + void (*write_scan_header) (j_compress_ptr cinfo); + void (*write_file_trailer) (j_compress_ptr cinfo); + void (*write_tables_only) (j_compress_ptr cinfo); /* These routines are exported to allow insertion of extra markers */ /* Probably only COM and APPn markers should be written this way */ - JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker, - unsigned int datalen)); - JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val)); + void (*write_marker_header) (j_compress_ptr cinfo, int marker, + unsigned int datalen); + void (*write_marker_byte) (j_compress_ptr cinfo, int val); }; @@ -136,138 +146,137 @@ /* Master control module */ struct jpeg_decomp_master { - JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo)); + void (*prepare_for_output_pass) (j_decompress_ptr cinfo); + void (*finish_output_pass) (j_decompress_ptr cinfo); /* State variables made visible to other modules */ - boolean is_dummy_pass; /* True during 1st pass for 2-pass quant */ + boolean is_dummy_pass; /* True during 1st pass for 2-pass quant */ + + /* Partial decompression variables */ + JDIMENSION first_iMCU_col; + JDIMENSION last_iMCU_col; + JDIMENSION first_MCU_col[MAX_COMPS_IN_SCAN]; + JDIMENSION last_MCU_col[MAX_COMPS_IN_SCAN]; + boolean jinit_upsampler_no_alloc; }; /* Input control module */ struct jpeg_input_controller { - JMETHOD(int, consume_input, (j_decompress_ptr cinfo)); - JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo)); - JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo)); + int (*consume_input) (j_decompress_ptr cinfo); + void (*reset_input_controller) (j_decompress_ptr cinfo); + void (*start_input_pass) (j_decompress_ptr cinfo); + void (*finish_input_pass) (j_decompress_ptr cinfo); /* State variables made visible to other modules */ - boolean has_multiple_scans; /* True if file has multiple scans */ - boolean eoi_reached; /* True when EOI has been consumed */ + boolean has_multiple_scans; /* True if file has multiple scans */ + boolean eoi_reached; /* True when EOI has been consumed */ }; /* Main buffer control (downsampled-data buffer) */ struct jpeg_d_main_controller { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(void, process_data, (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode); + void (*process_data) (j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); }; /* Coefficient buffer control */ struct jpeg_d_coef_controller { - JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo)); - JMETHOD(int, consume_data, (j_decompress_ptr cinfo)); - JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo)); - JMETHOD(int, decompress_data, (j_decompress_ptr cinfo, - JSAMPIMAGE output_buf)); + void (*start_input_pass) (j_decompress_ptr cinfo); + int (*consume_data) (j_decompress_ptr cinfo); + void (*start_output_pass) (j_decompress_ptr cinfo); + int (*decompress_data) (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); /* Pointer to array of coefficient virtual arrays, or NULL if none */ jvirt_barray_ptr *coef_arrays; }; /* Decompression postprocessing (color quantization buffer control) */ struct jpeg_d_post_controller { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(void, post_process_data, (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode); + void (*post_process_data) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); }; /* Marker reading & parsing */ struct jpeg_marker_reader { - JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo)); + void (*reset_marker_reader) (j_decompress_ptr cinfo); /* Read markers until SOS or EOI. * Returns same codes as are defined for jpeg_consume_input: * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI. */ - JMETHOD(int, read_markers, (j_decompress_ptr cinfo)); + int (*read_markers) (j_decompress_ptr cinfo); /* Read a restart marker --- exported for use by entropy decoder only */ jpeg_marker_parser_method read_restart_marker; /* State of marker reader --- nominally internal, but applications * supplying COM or APPn handlers might like to know the state. */ - boolean saw_SOI; /* found SOI? */ - boolean saw_SOF; /* found SOF? */ - int next_restart_num; /* next restart number expected (0-7) */ - unsigned int discarded_bytes; /* # of bytes skipped looking for a marker */ + boolean saw_SOI; /* found SOI? */ + boolean saw_SOF; /* found SOF? */ + int next_restart_num; /* next restart number expected (0-7) */ + unsigned int discarded_bytes; /* # of bytes skipped looking for a marker */ }; /* Entropy decoding */ struct jpeg_entropy_decoder { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); - JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); + void (*start_pass) (j_decompress_ptr cinfo); + boolean (*decode_mcu) (j_decompress_ptr cinfo, JBLOCKROW *MCU_data); /* This is here to share code between baseline and progressive decoders; */ /* other modules probably should not use it */ - boolean insufficient_data; /* set TRUE after emitting warning */ + boolean insufficient_data; /* set TRUE after emitting warning */ }; /* Inverse DCT (also performs dequantization) */ -typedef JMETHOD(void, inverse_DCT_method_ptr, - (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col)); +typedef void (*inverse_DCT_method_ptr) (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, + JDIMENSION output_col); struct jpeg_inverse_dct { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); + void (*start_pass) (j_decompress_ptr cinfo); /* It is useful to allow each component to have a separate IDCT method. */ inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS]; }; /* Upsampling (note that upsampler must also call color converter) */ struct jpeg_upsampler { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, upsample, (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + void (*start_pass) (j_decompress_ptr cinfo); + void (*upsample) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); - boolean need_context_rows; /* TRUE if need rows above & below */ + boolean need_context_rows; /* TRUE if need rows above & below */ }; /* Colorspace conversion */ struct jpeg_color_deconverter { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, color_convert, (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + void (*start_pass) (j_decompress_ptr cinfo); + void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows); }; /* Color quantization or color precision reduction */ struct jpeg_color_quantizer { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan)); - JMETHOD(void, color_quantize, (j_decompress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPARRAY output_buf, - int num_rows)); - JMETHOD(void, finish_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, new_color_map, (j_decompress_ptr cinfo)); + void (*start_pass) (j_decompress_ptr cinfo, boolean is_pre_scan); + void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows); + void (*finish_pass) (j_decompress_ptr cinfo); + void (*new_color_map) (j_decompress_ptr cinfo); }; /* Miscellaneous useful macros */ #undef MAX -#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) #undef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MIN(a,b) ((a) < (b) ? (a) : (b)) /* We assume that right shift corresponds to signed division by 2 with @@ -275,126 +284,84 @@ * shift" instructions that shift in copies of the sign bit. But some * C compilers implement >> with an unsigned shift. For these machines you * must define RIGHT_SHIFT_IS_UNSIGNED. - * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity. + * RIGHT_SHIFT provides a proper signed right shift of a JLONG quantity. * It is only applied with constant shift counts. SHIFT_TEMPS must be * included in the variables of any routine using RIGHT_SHIFT. */ #ifdef RIGHT_SHIFT_IS_UNSIGNED -#define SHIFT_TEMPS INT32 shift_temp; +#define SHIFT_TEMPS JLONG shift_temp; #define RIGHT_SHIFT(x,shft) \ - ((shift_temp = (x)) < 0 ? \ - (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \ - (shift_temp >> (shft))) + ((shift_temp = (x)) < 0 ? \ + (shift_temp >> (shft)) | ((~((JLONG) 0)) << (32-(shft))) : \ + (shift_temp >> (shft))) #else #define SHIFT_TEMPS -#define RIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define RIGHT_SHIFT(x,shft) ((x) >> (shft)) #endif -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jinit_compress_master jICompress -#define jinit_c_master_control jICMaster -#define jinit_c_main_controller jICMainC -#define jinit_c_prep_controller jICPrepC -#define jinit_c_coef_controller jICCoefC -#define jinit_color_converter jICColor -#define jinit_downsampler jIDownsampler -#define jinit_forward_dct jIFDCT -#define jinit_huff_encoder jIHEncoder -#define jinit_phuff_encoder jIPHEncoder -#define jinit_arith_encoder jIAEncoder -#define jinit_marker_writer jIMWriter -#define jinit_master_decompress jIDMaster -#define jinit_d_main_controller jIDMainC -#define jinit_d_coef_controller jIDCoefC -#define jinit_d_post_controller jIDPostC -#define jinit_input_controller jIInCtlr -#define jinit_marker_reader jIMReader -#define jinit_huff_decoder jIHDecoder -#define jinit_phuff_decoder jIPHDecoder -#define jinit_arith_decoder jIADecoder -#define jinit_inverse_dct jIIDCT -#define jinit_upsampler jIUpsampler -#define jinit_color_deconverter jIDColor -#define jinit_1pass_quantizer jI1Quant -#define jinit_2pass_quantizer jI2Quant -#define jinit_merged_upsampler jIMUpsampler -#define jinit_memory_mgr jIMemMgr -#define jdiv_round_up jDivRound -#define jround_up jRound -#define jcopy_sample_rows jCopySamples -#define jcopy_block_row jCopyBlocks -#define jzero_far jZeroFar -#define jpeg_zigzag_order jZIGTable -#define jpeg_natural_order jZAGTable -#define jpeg_aritab jAriTab -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - - /* Compression module initialization routines */ -EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo, - boolean transcode_only)); -EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo)); +EXTERN(void) jinit_compress_master (j_compress_ptr cinfo); +EXTERN(void) jinit_c_master_control (j_compress_ptr cinfo, + boolean transcode_only); +EXTERN(void) jinit_c_main_controller (j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_c_prep_controller (j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_c_coef_controller (j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_color_converter (j_compress_ptr cinfo); +EXTERN(void) jinit_downsampler (j_compress_ptr cinfo); +EXTERN(void) jinit_forward_dct (j_compress_ptr cinfo); +EXTERN(void) jinit_huff_encoder (j_compress_ptr cinfo); +EXTERN(void) jinit_phuff_encoder (j_compress_ptr cinfo); +EXTERN(void) jinit_arith_encoder (j_compress_ptr cinfo); +EXTERN(void) jinit_marker_writer (j_compress_ptr cinfo); /* Decompression module initialization routines */ -EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo)); +EXTERN(void) jinit_master_decompress (j_decompress_ptr cinfo); +EXTERN(void) jinit_d_main_controller (j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_d_coef_controller (j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_d_post_controller (j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_input_controller (j_decompress_ptr cinfo); +EXTERN(void) jinit_marker_reader (j_decompress_ptr cinfo); +EXTERN(void) jinit_huff_decoder (j_decompress_ptr cinfo); +EXTERN(void) jinit_phuff_decoder (j_decompress_ptr cinfo); +EXTERN(void) jinit_arith_decoder (j_decompress_ptr cinfo); +EXTERN(void) jinit_inverse_dct (j_decompress_ptr cinfo); +EXTERN(void) jinit_upsampler (j_decompress_ptr cinfo); +EXTERN(void) jinit_color_deconverter (j_decompress_ptr cinfo); +EXTERN(void) jinit_1pass_quantizer (j_decompress_ptr cinfo); +EXTERN(void) jinit_2pass_quantizer (j_decompress_ptr cinfo); +EXTERN(void) jinit_merged_upsampler (j_decompress_ptr cinfo); /* Memory manager initialization */ -EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo)); +EXTERN(void) jinit_memory_mgr (j_common_ptr cinfo); /* Utility routines in jutils.c */ -EXTERN(long) jdiv_round_up JPP((long a, long b)); -EXTERN(long) jround_up JPP((long a, long b)); -EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row, - JSAMPARRAY output_array, int dest_row, - int num_rows, JDIMENSION num_cols)); -EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row, - JDIMENSION num_blocks)); -EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero)); +EXTERN(long) jdiv_round_up (long a, long b); +EXTERN(long) jround_up (long a, long b); +EXTERN(void) jcopy_sample_rows (JSAMPARRAY input_array, int source_row, + JSAMPARRAY output_array, int dest_row, + int num_rows, JDIMENSION num_cols); +EXTERN(void) jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row, + JDIMENSION num_blocks); +EXTERN(void) jzero_far (void *target, size_t bytestozero); /* Constant tables in jutils.c */ -#if 0 /* This table is not actually needed in v6a */ +#if 0 /* This table is not actually needed in v6a */ extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */ #endif extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */ /* Arithmetic coding probability estimation tables in jaricom.c */ -extern const INT32 jpeg_aritab[]; +extern const JLONG jpeg_aritab[]; /* Suppress undefined-structure complaints if necessary. */ #ifdef INCOMPLETE_TYPES_BROKEN -#ifndef AM_MEMORY_MANAGER /* only jmemmgr.c defines these */ +#ifndef AM_MEMORY_MANAGER /* only jmemmgr.c defines these */ struct jvirt_sarray_control { long dummy; }; struct jvirt_barray_control { long dummy; }; #endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpeglib.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jpeglib.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpeglib.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jpeglib.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,14 @@ /* * jpeglib.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. * Modified 2002-2009 by Guido Vollbeding. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file defines the application interface for the JPEG library. * Most applications using the library need only include this file, @@ -15,6 +18,10 @@ #ifndef JPEGLIB_H #define JPEGLIB_H +/* Begin chromium edits */ +#include "jpeglibmangler.h" +/* End chromium edits */ + /* * First we include the configuration files that record how this * installation of the JPEG library is set up. jconfig.h can be @@ -22,10 +29,10 @@ * manual configuration options that most people need not worry about. */ -#ifndef JCONFIG_INCLUDED /* in case jinclude.h already did */ -#include "jconfig.h" /* widely used configuration options */ +#ifndef JCONFIG_INCLUDED /* in case jinclude.h already did */ +#include "jconfig.h" /* widely used configuration options */ #endif -#include "jmorecfg.h" /* seldom changed options */ +#include "jmorecfg.h" /* seldom changed options */ #ifdef __cplusplus @@ -40,13 +47,13 @@ * if you want to be compatible. */ -#define DCTSIZE 8 /* The basic DCT block is 8x8 samples */ -#define DCTSIZE2 64 /* DCTSIZE squared; # of elements in a block */ -#define NUM_QUANT_TBLS 4 /* Quantization tables are numbered 0..3 */ -#define NUM_HUFF_TBLS 4 /* Huffman tables are numbered 0..3 */ -#define NUM_ARITH_TBLS 16 /* Arith-coding tables are numbered 0..15 */ -#define MAX_COMPS_IN_SCAN 4 /* JPEG limit on # of components in one scan */ -#define MAX_SAMP_FACTOR 4 /* JPEG limit on sampling factors */ +#define DCTSIZE 8 /* The basic DCT block is 8x8 samples */ +#define DCTSIZE2 64 /* DCTSIZE squared; # of elements in a block */ +#define NUM_QUANT_TBLS 4 /* Quantization tables are numbered 0..3 */ +#define NUM_HUFF_TBLS 4 /* Huffman tables are numbered 0..3 */ +#define NUM_ARITH_TBLS 16 /* Arith-coding tables are numbered 0..15 */ +#define MAX_COMPS_IN_SCAN 4 /* JPEG limit on # of components in one scan */ +#define MAX_SAMP_FACTOR 4 /* JPEG limit on sampling factors */ /* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard; * the PostScript DCT filter can emit files with many more than 10 blocks/MCU. * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU @@ -61,20 +68,18 @@ /* Data structures for images (arrays of samples and of DCT coefficients). - * On 80x86 machines, the image arrays are too big for near pointers, - * but the pointer arrays can fit in near memory. */ -typedef JSAMPLE FAR *JSAMPROW; /* ptr to one image row of pixel samples. */ -typedef JSAMPROW *JSAMPARRAY; /* ptr to some rows (a 2-D sample array) */ -typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */ - -typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */ -typedef JBLOCK FAR *JBLOCKROW; /* pointer to one row of coefficient blocks */ -typedef JBLOCKROW *JBLOCKARRAY; /* a 2-D array of coefficient blocks */ -typedef JBLOCKARRAY *JBLOCKIMAGE; /* a 3-D array of coefficient blocks */ +typedef JSAMPLE *JSAMPROW; /* ptr to one image row of pixel samples. */ +typedef JSAMPROW *JSAMPARRAY; /* ptr to some rows (a 2-D sample array) */ +typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */ + +typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */ +typedef JBLOCK *JBLOCKROW; /* pointer to one row of coefficient blocks */ +typedef JBLOCKROW *JBLOCKARRAY; /* a 2-D array of coefficient blocks */ +typedef JBLOCKARRAY *JBLOCKIMAGE; /* a 3-D array of coefficient blocks */ -typedef JCOEF FAR *JCOEFPTR; /* useful in a couple of places */ +typedef JCOEF *JCOEFPTR; /* useful in a couple of places */ /* Types for JPEG compression parameters and working tables. */ @@ -87,13 +92,13 @@ * (not the zigzag order in which they are stored in a JPEG DQT marker). * CAUTION: IJG versions prior to v6a kept this array in zigzag order. */ - UINT16 quantval[DCTSIZE2]; /* quantization step for each coefficient */ + UINT16 quantval[DCTSIZE2]; /* quantization step for each coefficient */ /* This field is used only during compression. It's initialized FALSE when * the table is created, and set TRUE when it's been output to the file. * You could suppress output of a table by setting this to TRUE. * (See jpeg_suppress_tables for an example.) */ - boolean sent_table; /* TRUE when table has been output */ + boolean sent_table; /* TRUE when table has been output */ } JQUANT_TBL; @@ -101,15 +106,15 @@ typedef struct { /* These two fields directly represent the contents of a JPEG DHT marker */ - UINT8 bits[17]; /* bits[k] = # of symbols with codes of */ - /* length k bits; bits[0] is unused */ - UINT8 huffval[256]; /* The symbols, in order of incr code length */ + UINT8 bits[17]; /* bits[k] = # of symbols with codes of */ + /* length k bits; bits[0] is unused */ + UINT8 huffval[256]; /* The symbols, in order of incr code length */ /* This field is used only during compression. It's initialized FALSE when * the table is created, and set TRUE when it's been output to the file. * You could suppress output of a table by setting this to TRUE. * (See jpeg_suppress_tables for an example.) */ - boolean sent_table; /* TRUE when table has been output */ + boolean sent_table; /* TRUE when table has been output */ } JHUFF_TBL; @@ -119,20 +124,20 @@ /* These values are fixed over the whole image. */ /* For compression, they must be supplied by parameter setup; */ /* for decompression, they are read from the SOF marker. */ - int component_id; /* identifier for this component (0..255) */ - int component_index; /* its index in SOF or cinfo->comp_info[] */ - int h_samp_factor; /* horizontal sampling factor (1..4) */ - int v_samp_factor; /* vertical sampling factor (1..4) */ - int quant_tbl_no; /* quantization table selector (0..3) */ + int component_id; /* identifier for this component (0..255) */ + int component_index; /* its index in SOF or cinfo->comp_info[] */ + int h_samp_factor; /* horizontal sampling factor (1..4) */ + int v_samp_factor; /* vertical sampling factor (1..4) */ + int quant_tbl_no; /* quantization table selector (0..3) */ /* These values may vary between scans. */ /* For compression, they must be supplied by parameter setup; */ /* for decompression, they are read from the SOS marker. */ /* The decompressor output side may not use these variables. */ - int dc_tbl_no; /* DC entropy table selector (0..3) */ - int ac_tbl_no; /* AC entropy table selector (0..3) */ - + int dc_tbl_no; /* DC entropy table selector (0..3) */ + int ac_tbl_no; /* AC entropy table selector (0..3) */ + /* Remaining fields should be treated as private by applications. */ - + /* These values are computed during compression or decompression startup: */ /* Component's size in DCT blocks. * Any dummy blocks added to complete an MCU are not counted; therefore @@ -143,8 +148,8 @@ /* Size of a DCT block in samples. Always DCTSIZE for compression. * For decompression this is the size of the output from one DCT block, * reflecting any scaling we choose to apply during the IDCT step. - * Values of 1,2,4,8 are likely to be supported. Note that different - * components may receive different IDCT scalings. + * Values from 1 to 16 are supported. + * Note that different components may receive different IDCT scalings. */ #if JPEG_LIB_VERSION >= 70 int DCT_h_scaled_size; @@ -158,53 +163,53 @@ * and similarly for height. For decompression, IDCT scaling is included, so * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE) */ - JDIMENSION downsampled_width; /* actual width in samples */ + JDIMENSION downsampled_width; /* actual width in samples */ JDIMENSION downsampled_height; /* actual height in samples */ /* This flag is used only for decompression. In cases where some of the * components will be ignored (eg grayscale output from YCbCr image), * we can skip most computations for the unused components. */ - boolean component_needed; /* do we need the value of this component? */ + boolean component_needed; /* do we need the value of this component? */ /* These values are computed before starting a scan of the component. */ /* The decompressor output side may not use these variables. */ - int MCU_width; /* number of blocks per MCU, horizontally */ - int MCU_height; /* number of blocks per MCU, vertically */ - int MCU_blocks; /* MCU_width * MCU_height */ - int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */ - int last_col_width; /* # of non-dummy blocks across in last MCU */ - int last_row_height; /* # of non-dummy blocks down in last MCU */ + int MCU_width; /* number of blocks per MCU, horizontally */ + int MCU_height; /* number of blocks per MCU, vertically */ + int MCU_blocks; /* MCU_width * MCU_height */ + int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */ + int last_col_width; /* # of non-dummy blocks across in last MCU */ + int last_row_height; /* # of non-dummy blocks down in last MCU */ /* Saved quantization table for component; NULL if none yet saved. * See jdinput.c comments about the need for this information. * This field is currently used only for decompression. */ - JQUANT_TBL * quant_table; + JQUANT_TBL *quant_table; /* Private per-component storage for DCT or IDCT subsystem. */ - void * dct_table; + void *dct_table; } jpeg_component_info; /* The script for encoding a multiple-scan file is an array of these: */ typedef struct { - int comps_in_scan; /* number of components encoded in this scan */ + int comps_in_scan; /* number of components encoded in this scan */ int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */ - int Ss, Se; /* progressive JPEG spectral selection parms */ - int Ah, Al; /* progressive JPEG successive approx. parms */ + int Ss, Se; /* progressive JPEG spectral selection parms */ + int Ah, Al; /* progressive JPEG successive approx. parms */ } jpeg_scan_info; /* The decompressor can save APPn and COM markers in a list of these: */ -typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr; +typedef struct jpeg_marker_struct *jpeg_saved_marker_ptr; struct jpeg_marker_struct { - jpeg_saved_marker_ptr next; /* next in list, or NULL */ - UINT8 marker; /* marker code: JPEG_COM, or JPEG_APP0+n */ - unsigned int original_length; /* # bytes of data in the file */ - unsigned int data_length; /* # bytes of data saved at data[] */ - JOCTET FAR * data; /* the data contained in the marker */ + jpeg_saved_marker_ptr next; /* next in list, or NULL */ + UINT8 marker; /* marker code: JPEG_COM, or JPEG_APP0+n */ + unsigned int original_length; /* # bytes of data in the file */ + unsigned int data_length; /* # bytes of data saved at data[] */ + JOCTET *data; /* the data contained in the marker */ /* the marker length word is not counted in data_length or original_length */ }; @@ -214,102 +219,102 @@ #define JCS_ALPHA_EXTENSIONS 1 typedef enum { - JCS_UNKNOWN, /* error/unspecified */ - JCS_GRAYSCALE, /* monochrome */ - JCS_RGB, /* red/green/blue as specified by the RGB_RED, RGB_GREEN, - RGB_BLUE, and RGB_PIXELSIZE macros */ - JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */ - JCS_CMYK, /* C/M/Y/K */ - JCS_YCCK, /* Y/Cb/Cr/K */ - JCS_EXT_RGB, /* red/green/blue */ - JCS_EXT_RGBX, /* red/green/blue/x */ - JCS_EXT_BGR, /* blue/green/red */ - JCS_EXT_BGRX, /* blue/green/red/x */ - JCS_EXT_XBGR, /* x/blue/green/red */ - JCS_EXT_XRGB, /* x/red/green/blue */ - /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX, - JCS_EXT_XBGR, or JCS_EXT_XRGB during decompression, the X byte is - undefined, and in order to ensure the best performance, - libjpeg-turbo can set that byte to whatever value it wishes. Use - the following colorspace constants to ensure that the X byte is set - to 0xFF, so that it can be interpreted as an opaque alpha - channel. */ - JCS_EXT_RGBA, /* red/green/blue/alpha */ - JCS_EXT_BGRA, /* blue/green/red/alpha */ - JCS_EXT_ABGR, /* alpha/blue/green/red */ - JCS_EXT_ARGB /* alpha/red/green/blue */ + JCS_UNKNOWN, /* error/unspecified */ + JCS_GRAYSCALE, /* monochrome */ + JCS_RGB, /* red/green/blue as specified by the RGB_RED, + RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros */ + JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */ + JCS_CMYK, /* C/M/Y/K */ + JCS_YCCK, /* Y/Cb/Cr/K */ + JCS_EXT_RGB, /* red/green/blue */ + JCS_EXT_RGBX, /* red/green/blue/x */ + JCS_EXT_BGR, /* blue/green/red */ + JCS_EXT_BGRX, /* blue/green/red/x */ + JCS_EXT_XBGR, /* x/blue/green/red */ + JCS_EXT_XRGB, /* x/red/green/blue */ + /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR, + or JCS_EXT_XRGB during decompression, the X byte is undefined, and in + order to ensure the best performance, libjpeg-turbo can set that byte to + whatever value it wishes. Use the following colorspace constants to + ensure that the X byte is set to 0xFF, so that it can be interpreted as an + opaque alpha channel. */ + JCS_EXT_RGBA, /* red/green/blue/alpha */ + JCS_EXT_BGRA, /* blue/green/red/alpha */ + JCS_EXT_ABGR, /* alpha/blue/green/red */ + JCS_EXT_ARGB, /* alpha/red/green/blue */ + JCS_RGB565 /* 5-bit red/6-bit green/5-bit blue */ } J_COLOR_SPACE; /* DCT/IDCT algorithm options. */ typedef enum { - JDCT_ISLOW, /* slow but accurate integer algorithm */ - JDCT_IFAST, /* faster, less accurate integer method */ - JDCT_FLOAT /* floating-point: accurate, fast on fast HW */ + JDCT_ISLOW, /* slow but accurate integer algorithm */ + JDCT_IFAST, /* faster, less accurate integer method */ + JDCT_FLOAT /* floating-point: accurate, fast on fast HW */ } J_DCT_METHOD; -#ifndef JDCT_DEFAULT /* may be overridden in jconfig.h */ +#ifndef JDCT_DEFAULT /* may be overridden in jconfig.h */ #define JDCT_DEFAULT JDCT_ISLOW #endif -#ifndef JDCT_FASTEST /* may be overridden in jconfig.h */ +#ifndef JDCT_FASTEST /* may be overridden in jconfig.h */ #define JDCT_FASTEST JDCT_IFAST #endif /* Dithering options for decompression. */ typedef enum { - JDITHER_NONE, /* no dithering */ - JDITHER_ORDERED, /* simple ordered dither */ - JDITHER_FS /* Floyd-Steinberg error diffusion dither */ + JDITHER_NONE, /* no dithering */ + JDITHER_ORDERED, /* simple ordered dither */ + JDITHER_FS /* Floyd-Steinberg error diffusion dither */ } J_DITHER_MODE; /* Common fields between JPEG compression and decompression master structs. */ #define jpeg_common_fields \ - struct jpeg_error_mgr * err; /* Error handler module */\ - struct jpeg_memory_mgr * mem; /* Memory manager module */\ - struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\ - void * client_data; /* Available for use by application */\ - boolean is_decompressor; /* So common code can tell which is which */\ - int global_state /* For checking call sequence validity */ + struct jpeg_error_mgr *err; /* Error handler module */\ + struct jpeg_memory_mgr *mem; /* Memory manager module */\ + struct jpeg_progress_mgr *progress; /* Progress monitor, or NULL if none */\ + void *client_data; /* Available for use by application */\ + boolean is_decompressor; /* So common code can tell which is which */\ + int global_state /* For checking call sequence validity */ /* Routines that are to be used by both halves of the library are declared * to receive a pointer to this structure. There are no actual instances of * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct. */ struct jpeg_common_struct { - jpeg_common_fields; /* Fields common to both master struct types */ + jpeg_common_fields; /* Fields common to both master struct types */ /* Additional fields follow in an actual jpeg_compress_struct or * jpeg_decompress_struct. All three structs must agree on these * initial fields! (This would be a lot cleaner in C++.) */ }; -typedef struct jpeg_common_struct * j_common_ptr; -typedef struct jpeg_compress_struct * j_compress_ptr; -typedef struct jpeg_decompress_struct * j_decompress_ptr; +typedef struct jpeg_common_struct *j_common_ptr; +typedef struct jpeg_compress_struct *j_compress_ptr; +typedef struct jpeg_decompress_struct *j_decompress_ptr; /* Master record for a compression instance */ struct jpeg_compress_struct { - jpeg_common_fields; /* Fields shared with jpeg_decompress_struct */ + jpeg_common_fields; /* Fields shared with jpeg_decompress_struct */ /* Destination for compressed data */ - struct jpeg_destination_mgr * dest; + struct jpeg_destination_mgr *dest; /* Description of source image --- these fields must be filled in by * outer application before starting compression. in_color_space must * be correct before you can even call jpeg_set_defaults(). */ - JDIMENSION image_width; /* input image width */ - JDIMENSION image_height; /* input image height */ - int input_components; /* # of color components in input image */ - J_COLOR_SPACE in_color_space; /* colorspace of input image */ + JDIMENSION image_width; /* input image width */ + JDIMENSION image_height; /* input image height */ + int input_components; /* # of color components in input image */ + J_COLOR_SPACE in_color_space; /* colorspace of input image */ - double input_gamma; /* image gamma of input image */ + double input_gamma; /* image gamma of input image */ /* Compression parameters --- these fields must be set before calling * jpeg_start_compress(). We recommend calling jpeg_set_defaults() to @@ -322,8 +327,8 @@ #if JPEG_LIB_VERSION >= 70 unsigned int scale_num, scale_denom; /* fraction by which to scale image */ - JDIMENSION jpeg_width; /* scaled JPEG image width */ - JDIMENSION jpeg_height; /* scaled JPEG image height */ + JDIMENSION jpeg_width; /* scaled JPEG image width */ + JDIMENSION jpeg_height; /* scaled JPEG image height */ /* Dimensions of actual JPEG image that will be written to file, * derived from input dimensions by scaling factors above. * These fields are computed by jpeg_start_compress(). @@ -332,15 +337,15 @@ */ #endif - int data_precision; /* bits of precision in image data */ + int data_precision; /* bits of precision in image data */ - int num_components; /* # of color components in JPEG image */ + int num_components; /* # of color components in JPEG image */ J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */ - jpeg_component_info * comp_info; + jpeg_component_info *comp_info; /* comp_info[i] describes component that appears i'th in SOF */ - JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS]; + JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS]; #if JPEG_LIB_VERSION >= 70 int q_scale_factor[NUM_QUANT_TBLS]; #endif @@ -348,30 +353,30 @@ * and corresponding scale factors (percentage, initialized 100). */ - JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; - JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; + JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; + JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; /* ptrs to Huffman coding tables, or NULL if not defined */ UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */ UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */ UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */ - int num_scans; /* # of entries in scan_info array */ - const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */ + int num_scans; /* # of entries in scan_info array */ + const jpeg_scan_info *scan_info; /* script for multi-scan file, or NULL */ /* The default value of scan_info is NULL, which causes a single-scan * sequential JPEG file to be emitted. To create a multi-scan file, * set num_scans and scan_info to point to an array of scan definitions. */ - boolean raw_data_in; /* TRUE=caller supplies downsampled data */ - boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ - boolean optimize_coding; /* TRUE=optimize entropy encoding parms */ - boolean CCIR601_sampling; /* TRUE=first samples are cosited */ + boolean raw_data_in; /* TRUE=caller supplies downsampled data */ + boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ + boolean optimize_coding; /* TRUE=optimize entropy encoding parms */ + boolean CCIR601_sampling; /* TRUE=first samples are cosited */ #if JPEG_LIB_VERSION >= 70 boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */ #endif - int smoothing_factor; /* 1..100, or 0 for no input smoothing */ - J_DCT_METHOD dct_method; /* DCT algorithm selector */ + int smoothing_factor; /* 1..100, or 0 for no input smoothing */ + J_DCT_METHOD dct_method; /* DCT algorithm selector */ /* The restart interval can be specified in absolute MCUs by setting * restart_interval, or in MCU rows by setting restart_in_rows @@ -379,28 +384,28 @@ * for each scan). */ unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */ - int restart_in_rows; /* if > 0, MCU rows per restart interval */ + int restart_in_rows; /* if > 0, MCU rows per restart interval */ /* Parameters controlling emission of special markers. */ - boolean write_JFIF_header; /* should a JFIF marker be written? */ - UINT8 JFIF_major_version; /* What to write for the JFIF version number */ + boolean write_JFIF_header; /* should a JFIF marker be written? */ + UINT8 JFIF_major_version; /* What to write for the JFIF version number */ UINT8 JFIF_minor_version; /* These three values are not used by the JPEG code, merely copied */ /* into the JFIF APP0 marker. density_unit can be 0 for unknown, */ /* 1 for dots/inch, or 2 for dots/cm. Note that the pixel aspect */ /* ratio is defined by X_density/Y_density even when density_unit=0. */ - UINT8 density_unit; /* JFIF code for pixel size units */ - UINT16 X_density; /* Horizontal pixel density */ - UINT16 Y_density; /* Vertical pixel density */ - boolean write_Adobe_marker; /* should an Adobe marker be written? */ - + UINT8 density_unit; /* JFIF code for pixel size units */ + UINT16 X_density; /* Horizontal pixel density */ + UINT16 Y_density; /* Vertical pixel density */ + boolean write_Adobe_marker; /* should an Adobe marker be written? */ + /* State variable: index of next scanline to be written to * jpeg_write_scanlines(). Application may use this to control its * processing loop, e.g., "while (next_scanline < image_height)". */ - JDIMENSION next_scanline; /* 0 .. image_height-1 */ + JDIMENSION next_scanline; /* 0 .. image_height-1 */ /* Remaining fields are known throughout compressor, but generally * should not be touched by a surrounding application. @@ -409,59 +414,59 @@ /* * These fields are computed during compression startup */ - boolean progressive_mode; /* TRUE if scan script uses progressive mode */ - int max_h_samp_factor; /* largest h_samp_factor */ - int max_v_samp_factor; /* largest v_samp_factor */ + boolean progressive_mode; /* TRUE if scan script uses progressive mode */ + int max_h_samp_factor; /* largest h_samp_factor */ + int max_v_samp_factor; /* largest v_samp_factor */ #if JPEG_LIB_VERSION >= 70 - int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ - int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ + int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ + int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ #endif - JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */ + JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */ /* The coefficient controller receives data in units of MCU rows as defined * for fully interleaved scans (whether the JPEG file is interleaved or not). * There are v_samp_factor * DCTSIZE sample rows of each component in an * "iMCU" (interleaved MCU) row. */ - + /* * These fields are valid during any one scan. * They describe the components and MCUs actually appearing in the scan. */ - int comps_in_scan; /* # of JPEG components in this scan */ - jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN]; + int comps_in_scan; /* # of JPEG components in this scan */ + jpeg_component_info *cur_comp_info[MAX_COMPS_IN_SCAN]; /* *cur_comp_info[i] describes component that appears i'th in SOS */ - - JDIMENSION MCUs_per_row; /* # of MCUs across the image */ - JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */ - - int blocks_in_MCU; /* # of DCT blocks per MCU */ + + JDIMENSION MCUs_per_row; /* # of MCUs across the image */ + JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */ + + int blocks_in_MCU; /* # of DCT blocks per MCU */ int MCU_membership[C_MAX_BLOCKS_IN_MCU]; /* MCU_membership[i] is index in cur_comp_info of component owning */ /* i'th block in an MCU */ - int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ + int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ #if JPEG_LIB_VERSION >= 80 - int block_size; /* the basic DCT block size: 1..16 */ - const int * natural_order; /* natural-order position array */ - int lim_Se; /* min( Se, DCTSIZE2-1 ) */ + int block_size; /* the basic DCT block size: 1..16 */ + const int *natural_order; /* natural-order position array */ + int lim_Se; /* min( Se, DCTSIZE2-1 ) */ #endif /* * Links to compression subobjects (methods and private variables of modules) */ - struct jpeg_comp_master * master; - struct jpeg_c_main_controller * main; - struct jpeg_c_prep_controller * prep; - struct jpeg_c_coef_controller * coef; - struct jpeg_marker_writer * marker; - struct jpeg_color_converter * cconvert; - struct jpeg_downsampler * downsample; - struct jpeg_forward_dct * fdct; - struct jpeg_entropy_encoder * entropy; - jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */ + struct jpeg_comp_master *master; + struct jpeg_c_main_controller *main; + struct jpeg_c_prep_controller *prep; + struct jpeg_c_coef_controller *coef; + struct jpeg_marker_writer *marker; + struct jpeg_color_converter *cconvert; + struct jpeg_downsampler *downsample; + struct jpeg_forward_dct *fdct; + struct jpeg_entropy_encoder *entropy; + jpeg_scan_info *script_space; /* workspace for jpeg_simple_progression */ int script_space_size; }; @@ -469,17 +474,17 @@ /* Master record for a decompression instance */ struct jpeg_decompress_struct { - jpeg_common_fields; /* Fields shared with jpeg_compress_struct */ + jpeg_common_fields; /* Fields shared with jpeg_compress_struct */ /* Source of compressed data */ - struct jpeg_source_mgr * src; + struct jpeg_source_mgr *src; /* Basic description of image --- filled in by jpeg_read_header(). */ /* Application may inspect these values to decide how to process image. */ - JDIMENSION image_width; /* nominal image width (from SOF marker) */ - JDIMENSION image_height; /* nominal image height */ - int num_components; /* # of color components in JPEG image */ + JDIMENSION image_width; /* nominal image width (from SOF marker) */ + JDIMENSION image_height; /* nominal image height */ + int num_components; /* # of color components in JPEG image */ J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */ /* Decompression processing parameters --- these fields must be set before @@ -491,24 +496,24 @@ unsigned int scale_num, scale_denom; /* fraction by which to scale image */ - double output_gamma; /* image gamma wanted in output */ + double output_gamma; /* image gamma wanted in output */ - boolean buffered_image; /* TRUE=multiple output passes */ - boolean raw_data_out; /* TRUE=downsampled data wanted */ + boolean buffered_image; /* TRUE=multiple output passes */ + boolean raw_data_out; /* TRUE=downsampled data wanted */ - J_DCT_METHOD dct_method; /* IDCT algorithm selector */ - boolean do_fancy_upsampling; /* TRUE=apply fancy upsampling */ - boolean do_block_smoothing; /* TRUE=apply interblock smoothing */ + J_DCT_METHOD dct_method; /* IDCT algorithm selector */ + boolean do_fancy_upsampling; /* TRUE=apply fancy upsampling */ + boolean do_block_smoothing; /* TRUE=apply interblock smoothing */ - boolean quantize_colors; /* TRUE=colormapped output wanted */ + boolean quantize_colors; /* TRUE=colormapped output wanted */ /* the following are ignored if not quantize_colors: */ - J_DITHER_MODE dither_mode; /* type of color dithering to use */ - boolean two_pass_quantize; /* TRUE=use two-pass color quantization */ - int desired_number_of_colors; /* max # colors to use in created colormap */ + J_DITHER_MODE dither_mode; /* type of color dithering to use */ + boolean two_pass_quantize; /* TRUE=use two-pass color quantization */ + int desired_number_of_colors; /* max # colors to use in created colormap */ /* these are significant only in buffered-image mode: */ - boolean enable_1pass_quant; /* enable future use of 1-pass quantizer */ + boolean enable_1pass_quant; /* enable future use of 1-pass quantizer */ boolean enable_external_quant;/* enable future use of external colormap */ - boolean enable_2pass_quant; /* enable future use of 2-pass quantizer */ + boolean enable_2pass_quant; /* enable future use of 2-pass quantizer */ /* Description of actual output image that will be returned to application. * These fields are computed by jpeg_start_decompress(). @@ -516,14 +521,14 @@ * in advance of calling jpeg_start_decompress(). */ - JDIMENSION output_width; /* scaled image width */ - JDIMENSION output_height; /* scaled image height */ - int out_color_components; /* # of color components in out_color_space */ - int output_components; /* # of color components returned */ + JDIMENSION output_width; /* scaled image width */ + JDIMENSION output_height; /* scaled image height */ + int out_color_components; /* # of color components in out_color_space */ + int output_components; /* # of color components returned */ /* output_components is 1 (a colormap index) when quantizing colors; * otherwise it equals out_color_components. */ - int rec_outbuf_height; /* min recommended height of scanline buffer */ + int rec_outbuf_height; /* min recommended height of scanline buffer */ /* If the buffer passed to jpeg_read_scanlines() is less than this many rows * high, space and time will be wasted due to unnecessary data copying. * Usually rec_outbuf_height will be 1 or 2, at most 4. @@ -535,8 +540,8 @@ * jpeg_start_decompress or jpeg_start_output. * The map has out_color_components rows and actual_number_of_colors columns. */ - int actual_number_of_colors; /* number of entries in use */ - JSAMPARRAY colormap; /* The color map as a 2-D pixel array */ + int actual_number_of_colors; /* number of entries in use */ + JSAMPARRAY colormap; /* The color map as a 2-D pixel array */ /* State variables: these variables indicate the progress of decompression. * The application may examine these but must not modify them. @@ -546,20 +551,20 @@ * Application may use this to control its processing loop, e.g., * "while (output_scanline < output_height)". */ - JDIMENSION output_scanline; /* 0 .. output_height-1 */ + JDIMENSION output_scanline; /* 0 .. output_height-1 */ /* Current input scan number and number of iMCU rows completed in scan. * These indicate the progress of the decompressor input side. */ - int input_scan_number; /* Number of SOS markers seen so far */ - JDIMENSION input_iMCU_row; /* Number of iMCU rows completed */ + int input_scan_number; /* Number of SOS markers seen so far */ + JDIMENSION input_iMCU_row; /* Number of iMCU rows completed */ /* The "output scan number" is the notional scan being displayed by the * output side. The decompressor will not allow output scan/row number * to get ahead of input scan/row, but it can fall arbitrarily far behind. */ - int output_scan_number; /* Nominal scan number being displayed */ - JDIMENSION output_iMCU_row; /* Number of iMCU rows read */ + int output_scan_number; /* Nominal scan number being displayed */ + JDIMENSION output_iMCU_row; /* Number of iMCU rows read */ /* Current progression status. coef_bits[c][i] indicates the precision * with which component c's DCT coefficient i (in zigzag order) is known. @@ -568,7 +573,7 @@ * (thus, 0 at completion of the progression). * This pointer is NULL when reading a non-progressive file. */ - int (*coef_bits)[DCTSIZE2]; /* -1 or current Al value for each coef */ + int (*coef_bits)[DCTSIZE2]; /* -1 or current Al value for each coef */ /* Internal JPEG parameters --- the application usually need not look at * these fields. Note that the decompressor output side may not use @@ -579,27 +584,27 @@ * datastreams when processing abbreviated JPEG datastreams. */ - JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS]; + JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS]; /* ptrs to coefficient quantization tables, or NULL if not defined */ - JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; - JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; + JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; + JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; /* ptrs to Huffman coding tables, or NULL if not defined */ /* These parameters are never carried across datastreams, since they * are given in SOF/SOS markers or defined to be reset by SOI. */ - int data_precision; /* bits of precision in image data */ + int data_precision; /* bits of precision in image data */ - jpeg_component_info * comp_info; + jpeg_component_info *comp_info; /* comp_info[i] describes component that appears i'th in SOF */ #if JPEG_LIB_VERSION >= 80 - boolean is_baseline; /* TRUE if Baseline SOF0 encountered */ + boolean is_baseline; /* TRUE if Baseline SOF0 encountered */ #endif - boolean progressive_mode; /* TRUE if SOFn specifies progressive mode */ - boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ + boolean progressive_mode; /* TRUE if SOFn specifies progressive mode */ + boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */ UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */ @@ -610,17 +615,17 @@ /* These fields record data obtained from optional markers recognized by * the JPEG library. */ - boolean saw_JFIF_marker; /* TRUE iff a JFIF APP0 marker was found */ + boolean saw_JFIF_marker; /* TRUE iff a JFIF APP0 marker was found */ /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */ - UINT8 JFIF_major_version; /* JFIF version number */ + UINT8 JFIF_major_version; /* JFIF version number */ UINT8 JFIF_minor_version; - UINT8 density_unit; /* JFIF code for pixel size units */ - UINT16 X_density; /* Horizontal pixel density */ - UINT16 Y_density; /* Vertical pixel density */ - boolean saw_Adobe_marker; /* TRUE iff an Adobe APP14 marker was found */ - UINT8 Adobe_transform; /* Color transform code from Adobe marker */ + UINT8 density_unit; /* JFIF code for pixel size units */ + UINT16 X_density; /* Horizontal pixel density */ + UINT16 Y_density; /* Vertical pixel density */ + boolean saw_Adobe_marker; /* TRUE iff an Adobe APP14 marker was found */ + UINT8 Adobe_transform; /* Color transform code from Adobe marker */ - boolean CCIR601_sampling; /* TRUE=first samples are cosited */ + boolean CCIR601_sampling; /* TRUE=first samples are cosited */ /* Aside from the specific data retained from APPn markers known to the * library, the uninterpreted contents of any or all APPn and COM markers @@ -635,17 +640,17 @@ /* * These fields are computed during decompression startup */ - int max_h_samp_factor; /* largest h_samp_factor */ - int max_v_samp_factor; /* largest v_samp_factor */ + int max_h_samp_factor; /* largest h_samp_factor */ + int max_v_samp_factor; /* largest v_samp_factor */ #if JPEG_LIB_VERSION >= 70 - int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ - int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ + int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ + int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ #else - int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */ + int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */ #endif - JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */ + JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */ /* The coefficient controller's input and output progress is measured in * units of "iMCU" (interleaved MCU) rows. These are the same as MCU rows * in fully interleaved JPEG scans, but are used whether the scan is @@ -654,33 +659,33 @@ * v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row. */ - JSAMPLE * sample_range_limit; /* table for fast range-limiting */ + JSAMPLE *sample_range_limit; /* table for fast range-limiting */ /* * These fields are valid during any one scan. * They describe the components and MCUs actually appearing in the scan. * Note that the decompressor output side must not use these fields. */ - int comps_in_scan; /* # of JPEG components in this scan */ - jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN]; + int comps_in_scan; /* # of JPEG components in this scan */ + jpeg_component_info *cur_comp_info[MAX_COMPS_IN_SCAN]; /* *cur_comp_info[i] describes component that appears i'th in SOS */ - JDIMENSION MCUs_per_row; /* # of MCUs across the image */ - JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */ + JDIMENSION MCUs_per_row; /* # of MCUs across the image */ + JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */ - int blocks_in_MCU; /* # of DCT blocks per MCU */ + int blocks_in_MCU; /* # of DCT blocks per MCU */ int MCU_membership[D_MAX_BLOCKS_IN_MCU]; /* MCU_membership[i] is index in cur_comp_info of component owning */ /* i'th block in an MCU */ - int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ + int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ #if JPEG_LIB_VERSION >= 80 /* These fields are derived from Se of first SOS marker. */ - int block_size; /* the basic DCT block size: 1..16 */ - const int * natural_order; /* natural-order position array for entropy decode */ - int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */ + int block_size; /* the basic DCT block size: 1..16 */ + const int *natural_order; /* natural-order position array for entropy decode */ + int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */ #endif /* This field is shared between entropy decoder and marker parser. @@ -692,17 +697,17 @@ /* * Links to decompression subobjects (methods, private variables of modules) */ - struct jpeg_decomp_master * master; - struct jpeg_d_main_controller * main; - struct jpeg_d_coef_controller * coef; - struct jpeg_d_post_controller * post; - struct jpeg_input_controller * inputctl; - struct jpeg_marker_reader * marker; - struct jpeg_entropy_decoder * entropy; - struct jpeg_inverse_dct * idct; - struct jpeg_upsampler * upsample; - struct jpeg_color_deconverter * cconvert; - struct jpeg_color_quantizer * cquantize; + struct jpeg_decomp_master *master; + struct jpeg_d_main_controller *main; + struct jpeg_d_coef_controller *coef; + struct jpeg_d_post_controller *post; + struct jpeg_input_controller *inputctl; + struct jpeg_marker_reader *marker; + struct jpeg_entropy_decoder *entropy; + struct jpeg_inverse_dct *idct; + struct jpeg_upsampler *upsample; + struct jpeg_color_deconverter *cconvert; + struct jpeg_color_quantizer *cquantize; }; @@ -718,17 +723,17 @@ struct jpeg_error_mgr { /* Error exit handler: does not return to caller */ - JMETHOD(void, error_exit, (j_common_ptr cinfo)); + void (*error_exit) (j_common_ptr cinfo); /* Conditionally emit a trace or warning message */ - JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level)); + void (*emit_message) (j_common_ptr cinfo, int msg_level); /* Routine that actually outputs a trace or error message */ - JMETHOD(void, output_message, (j_common_ptr cinfo)); + void (*output_message) (j_common_ptr cinfo); /* Format a message string for the most recent JPEG error or message */ - JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer)); -#define JMSG_LENGTH_MAX 200 /* recommended size of format_message buffer */ + void (*format_message) (j_common_ptr cinfo, char *buffer); +#define JMSG_LENGTH_MAX 200 /* recommended size of format_message buffer */ /* Reset error state variables at start of a new image */ - JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo)); - + void (*reset_error_mgr) (j_common_ptr cinfo); + /* The message ID code and any parameters are saved here. * A message can have one string parameter or up to 8 int parameters. */ @@ -738,18 +743,18 @@ int i[8]; char s[JMSG_STR_PARM_MAX]; } msg_parm; - + /* Standard state variables for error facility */ - - int trace_level; /* max msg_level that will be displayed */ - + + int trace_level; /* max msg_level that will be displayed */ + /* For recoverable corrupt-data errors, we emit a warning message, * but keep going unless emit_message chooses to abort. emit_message * should count warnings in num_warnings. The surrounding application * can check for bad data by seeing if num_warnings is nonzero at the * end of processing. */ - long num_warnings; /* number of corrupt-data warnings */ + long num_warnings; /* number of corrupt-data warnings */ /* These fields point to the table(s) of error message strings. * An application can change the table pointer to switch to a different @@ -761,52 +766,52 @@ * First table includes all errors generated by JPEG library itself. * Error code 0 is reserved for a "no such error string" message. */ - const char * const * jpeg_message_table; /* Library errors */ + const char * const *jpeg_message_table; /* Library errors */ int last_jpeg_message; /* Table contains strings 0..last_jpeg_message */ /* Second table can be added by application (see cjpeg/djpeg for example). * It contains strings numbered first_addon_message..last_addon_message. */ - const char * const * addon_message_table; /* Non-library errors */ - int first_addon_message; /* code for first string in addon table */ - int last_addon_message; /* code for last string in addon table */ + const char * const *addon_message_table; /* Non-library errors */ + int first_addon_message; /* code for first string in addon table */ + int last_addon_message; /* code for last string in addon table */ }; /* Progress monitor object */ struct jpeg_progress_mgr { - JMETHOD(void, progress_monitor, (j_common_ptr cinfo)); + void (*progress_monitor) (j_common_ptr cinfo); - long pass_counter; /* work units completed in this pass */ - long pass_limit; /* total number of work units in this pass */ - int completed_passes; /* passes completed so far */ - int total_passes; /* total number of passes expected */ + long pass_counter; /* work units completed in this pass */ + long pass_limit; /* total number of work units in this pass */ + int completed_passes; /* passes completed so far */ + int total_passes; /* total number of passes expected */ }; /* Data destination object for compression */ struct jpeg_destination_mgr { - JOCTET * next_output_byte; /* => next byte to write in buffer */ - size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ - JMETHOD(void, init_destination, (j_compress_ptr cinfo)); - JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo)); - JMETHOD(void, term_destination, (j_compress_ptr cinfo)); + void (*init_destination) (j_compress_ptr cinfo); + boolean (*empty_output_buffer) (j_compress_ptr cinfo); + void (*term_destination) (j_compress_ptr cinfo); }; /* Data source object for decompression */ struct jpeg_source_mgr { - const JOCTET * next_input_byte; /* => next byte to read from buffer */ - size_t bytes_in_buffer; /* # of bytes remaining in buffer */ + const JOCTET *next_input_byte; /* => next byte to read from buffer */ + size_t bytes_in_buffer; /* # of bytes remaining in buffer */ - JMETHOD(void, init_source, (j_decompress_ptr cinfo)); - JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo)); - JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes)); - JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired)); - JMETHOD(void, term_source, (j_decompress_ptr cinfo)); + void (*init_source) (j_decompress_ptr cinfo); + boolean (*fill_input_buffer) (j_decompress_ptr cinfo); + void (*skip_input_data) (j_decompress_ptr cinfo, long num_bytes); + boolean (*resync_to_restart) (j_decompress_ptr cinfo, int desired); + void (*term_source) (j_decompress_ptr cinfo); }; @@ -821,51 +826,42 @@ * successful. */ -#define JPOOL_PERMANENT 0 /* lasts until master record is destroyed */ -#define JPOOL_IMAGE 1 /* lasts until done with image/datastream */ -#define JPOOL_NUMPOOLS 2 +#define JPOOL_PERMANENT 0 /* lasts until master record is destroyed */ +#define JPOOL_IMAGE 1 /* lasts until done with image/datastream */ +#define JPOOL_NUMPOOLS 2 -typedef struct jvirt_sarray_control * jvirt_sarray_ptr; -typedef struct jvirt_barray_control * jvirt_barray_ptr; +typedef struct jvirt_sarray_control *jvirt_sarray_ptr; +typedef struct jvirt_barray_control *jvirt_barray_ptr; struct jpeg_memory_mgr { /* Method pointers */ - JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id, - size_t sizeofobject)); - JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id, - size_t sizeofobject)); - JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id, - JDIMENSION samplesperrow, - JDIMENSION numrows)); - JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id, - JDIMENSION blocksperrow, - JDIMENSION numrows)); - JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo, - int pool_id, - boolean pre_zero, - JDIMENSION samplesperrow, - JDIMENSION numrows, - JDIMENSION maxaccess)); - JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo, - int pool_id, - boolean pre_zero, - JDIMENSION blocksperrow, - JDIMENSION numrows, - JDIMENSION maxaccess)); - JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo)); - JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo, - jvirt_sarray_ptr ptr, - JDIMENSION start_row, - JDIMENSION num_rows, - boolean writable)); - JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo, - jvirt_barray_ptr ptr, - JDIMENSION start_row, - JDIMENSION num_rows, - boolean writable)); - JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id)); - JMETHOD(void, self_destruct, (j_common_ptr cinfo)); + void *(*alloc_small) (j_common_ptr cinfo, int pool_id, size_t sizeofobject); + void *(*alloc_large) (j_common_ptr cinfo, int pool_id, + size_t sizeofobject); + JSAMPARRAY (*alloc_sarray) (j_common_ptr cinfo, int pool_id, + JDIMENSION samplesperrow, JDIMENSION numrows); + JBLOCKARRAY (*alloc_barray) (j_common_ptr cinfo, int pool_id, + JDIMENSION blocksperrow, JDIMENSION numrows); + jvirt_sarray_ptr (*request_virt_sarray) (j_common_ptr cinfo, int pool_id, + boolean pre_zero, + JDIMENSION samplesperrow, + JDIMENSION numrows, + JDIMENSION maxaccess); + jvirt_barray_ptr (*request_virt_barray) (j_common_ptr cinfo, int pool_id, + boolean pre_zero, + JDIMENSION blocksperrow, + JDIMENSION numrows, + JDIMENSION maxaccess); + void (*realize_virt_arrays) (j_common_ptr cinfo); + JSAMPARRAY (*access_virt_sarray) (j_common_ptr cinfo, jvirt_sarray_ptr ptr, + JDIMENSION start_row, JDIMENSION num_rows, + boolean writable); + JBLOCKARRAY (*access_virt_barray) (j_common_ptr cinfo, jvirt_barray_ptr ptr, + JDIMENSION start_row, JDIMENSION num_rows, + boolean writable); + void (*free_pool) (j_common_ptr cinfo, int pool_id); + void (*self_destruct) (j_common_ptr cinfo); /* Limit on memory allocation for this JPEG object. (Note that this is * merely advisory, not a guaranteed maximum; it only affects the space @@ -882,96 +878,21 @@ /* Routine signature for application-supplied marker processing methods. * Need not pass marker code since it is stored in cinfo->unread_marker. */ -typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo)); - - -/* Declarations for routines called by application. - * The JPP macro hides prototype parameters from compilers that can't cope. - * Note JPP requires double parentheses. - */ - -#ifdef HAVE_PROTOTYPES -#define JPP(arglist) arglist -#else -#define JPP(arglist) () -#endif +typedef boolean (*jpeg_marker_parser_method) (j_decompress_ptr cinfo); -/* Short forms of external names for systems with brain-damaged linkers. - * We shorten external names to be unique in the first six letters, which - * is good enough for all known systems. - * (If your compiler itself needs names to be unique in less than 15 - * characters, you are out of luck. Get a better compiler.) +/* Originally, this macro was used as a way of defining function prototypes + * for both modern compilers as well as older compilers that did not support + * prototype parameters. libjpeg-turbo has never supported these older, + * non-ANSI compilers, but the macro is still included because there is some + * software out there that uses it. */ -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_std_error jStdError -#define jpeg_CreateCompress jCreaCompress -#define jpeg_CreateDecompress jCreaDecompress -#define jpeg_destroy_compress jDestCompress -#define jpeg_destroy_decompress jDestDecompress -#define jpeg_stdio_dest jStdDest -#define jpeg_stdio_src jStdSrc -#if JPEG_LIB_VERSION >= 80 -#define jpeg_mem_dest jMemDest -#define jpeg_mem_src jMemSrc -#endif -#define jpeg_set_defaults jSetDefaults -#define jpeg_set_colorspace jSetColorspace -#define jpeg_default_colorspace jDefColorspace -#define jpeg_set_quality jSetQuality -#define jpeg_set_linear_quality jSetLQuality -#if JPEG_LIB_VERSION >= 70 -#define jpeg_default_qtables jDefQTables -#endif -#define jpeg_add_quant_table jAddQuantTable -#define jpeg_quality_scaling jQualityScaling -#define jpeg_simple_progression jSimProgress -#define jpeg_suppress_tables jSuppressTables -#define jpeg_alloc_quant_table jAlcQTable -#define jpeg_alloc_huff_table jAlcHTable -#define jpeg_start_compress jStrtCompress -#define jpeg_write_scanlines jWrtScanlines -#define jpeg_finish_compress jFinCompress -#if JPEG_LIB_VERSION >= 70 -#define jpeg_calc_jpeg_dimensions jCjpegDimensions -#endif -#define jpeg_write_raw_data jWrtRawData -#define jpeg_write_marker jWrtMarker -#define jpeg_write_m_header jWrtMHeader -#define jpeg_write_m_byte jWrtMByte -#define jpeg_write_tables jWrtTables -#define jpeg_read_header jReadHeader -#define jpeg_start_decompress jStrtDecompress -#define jpeg_read_scanlines jReadScanlines -#define jpeg_finish_decompress jFinDecompress -#define jpeg_read_raw_data jReadRawData -#define jpeg_has_multiple_scans jHasMultScn -#define jpeg_start_output jStrtOutput -#define jpeg_finish_output jFinOutput -#define jpeg_input_complete jInComplete -#define jpeg_new_colormap jNewCMap -#define jpeg_consume_input jConsumeInput -#if JPEG_LIB_VERSION >= 80 -#define jpeg_core_output_dimensions jCoreDimensions -#endif -#define jpeg_calc_output_dimensions jCalcDimensions -#define jpeg_save_markers jSaveMarkers -#define jpeg_set_marker_processor jSetMarker -#define jpeg_read_coefficients jReadCoefs -#define jpeg_write_coefficients jWrtCoefs -#define jpeg_copy_critical_parameters jCopyCrit -#define jpeg_abort_compress jAbrtCompress -#define jpeg_abort_decompress jAbrtDecompress -#define jpeg_abort jAbort -#define jpeg_destroy jDestroy -#define jpeg_resync_to_restart jResyncRestart -#endif /* NEED_SHORT_EXTERNAL_NAMES */ +#define JPP(arglist) arglist /* Default error-management setup */ -EXTERN(struct jpeg_error_mgr *) jpeg_std_error - JPP((struct jpeg_error_mgr * err)); +EXTERN(struct jpeg_error_mgr *) jpeg_std_error (struct jpeg_error_mgr *err); /* Initialization of JPEG compression objects. * jpeg_create_compress() and jpeg_create_decompress() are the exported @@ -982,97 +903,89 @@ */ #define jpeg_create_compress(cinfo) \ jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \ - (size_t) sizeof(struct jpeg_compress_struct)) + (size_t) sizeof(struct jpeg_compress_struct)) #define jpeg_create_decompress(cinfo) \ jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \ - (size_t) sizeof(struct jpeg_decompress_struct)) -EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo, - int version, size_t structsize)); -EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo, - int version, size_t structsize)); + (size_t) sizeof(struct jpeg_decompress_struct)) +EXTERN(void) jpeg_CreateCompress (j_compress_ptr cinfo, int version, + size_t structsize); +EXTERN(void) jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, + size_t structsize); /* Destruction of JPEG compression objects */ -EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo)); -EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo)); +EXTERN(void) jpeg_destroy_compress (j_compress_ptr cinfo); +EXTERN(void) jpeg_destroy_decompress (j_decompress_ptr cinfo); /* Standard data source and destination managers: stdio streams. */ /* Caller is responsible for opening the file before and closing after. */ -EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile)); -EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile)); +EXTERN(void) jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile); +EXTERN(void) jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile); -#if JPEG_LIB_VERSION >= 80 +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) /* Data source and destination managers: memory buffers. */ -EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo, - unsigned char ** outbuffer, - unsigned long * outsize)); -EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo, - unsigned char * inbuffer, - unsigned long insize)); +EXTERN(void) jpeg_mem_dest (j_compress_ptr cinfo, unsigned char **outbuffer, + unsigned long *outsize); +EXTERN(void) jpeg_mem_src (j_decompress_ptr cinfo, + const unsigned char *inbuffer, + unsigned long insize); #endif /* Default parameter setup for compression */ -EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo)); +EXTERN(void) jpeg_set_defaults (j_compress_ptr cinfo); /* Compression parameter setup aids */ -EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo, - J_COLOR_SPACE colorspace)); -EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo)); -EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality, - boolean force_baseline)); -EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo, - int scale_factor, - boolean force_baseline)); +EXTERN(void) jpeg_set_colorspace (j_compress_ptr cinfo, + J_COLOR_SPACE colorspace); +EXTERN(void) jpeg_default_colorspace (j_compress_ptr cinfo); +EXTERN(void) jpeg_set_quality (j_compress_ptr cinfo, int quality, + boolean force_baseline); +EXTERN(void) jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, + boolean force_baseline); #if JPEG_LIB_VERSION >= 70 -EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo, - boolean force_baseline)); +EXTERN(void) jpeg_default_qtables (j_compress_ptr cinfo, + boolean force_baseline); #endif -EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl, - const unsigned int *basic_table, - int scale_factor, - boolean force_baseline)); -EXTERN(int) jpeg_quality_scaling JPP((int quality)); -EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo)); -EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo, - boolean suppress)); -EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo)); -EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo)); +EXTERN(void) jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, + const unsigned int *basic_table, + int scale_factor, boolean force_baseline); +EXTERN(int) jpeg_quality_scaling (int quality); +EXTERN(void) jpeg_simple_progression (j_compress_ptr cinfo); +EXTERN(void) jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress); +EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table (j_common_ptr cinfo); +EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table (j_common_ptr cinfo); /* Main entry points for compression */ -EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo, - boolean write_all_tables)); -EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo, - JSAMPARRAY scanlines, - JDIMENSION num_lines)); -EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo)); +EXTERN(void) jpeg_start_compress (j_compress_ptr cinfo, + boolean write_all_tables); +EXTERN(JDIMENSION) jpeg_write_scanlines (j_compress_ptr cinfo, + JSAMPARRAY scanlines, + JDIMENSION num_lines); +EXTERN(void) jpeg_finish_compress (j_compress_ptr cinfo); #if JPEG_LIB_VERSION >= 70 /* Precalculate JPEG dimensions for current compression parameters. */ -EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo)); +EXTERN(void) jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo); #endif /* Replaces jpeg_write_scanlines when writing raw downsampled data. */ -EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo, - JSAMPIMAGE data, - JDIMENSION num_lines)); +EXTERN(JDIMENSION) jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines); /* Write a special marker. See libjpeg.txt concerning safe usage. */ -EXTERN(void) jpeg_write_marker - JPP((j_compress_ptr cinfo, int marker, - const JOCTET * dataptr, unsigned int datalen)); +EXTERN(void) jpeg_write_marker (j_compress_ptr cinfo, int marker, + const JOCTET *dataptr, unsigned int datalen); /* Same, but piecemeal. */ -EXTERN(void) jpeg_write_m_header - JPP((j_compress_ptr cinfo, int marker, unsigned int datalen)); -EXTERN(void) jpeg_write_m_byte - JPP((j_compress_ptr cinfo, int val)); +EXTERN(void) jpeg_write_m_header (j_compress_ptr cinfo, int marker, + unsigned int datalen); +EXTERN(void) jpeg_write_m_byte (j_compress_ptr cinfo, int val); /* Alternate compression function: just write an abbreviated table file */ -EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo)); +EXTERN(void) jpeg_write_tables (j_compress_ptr cinfo); /* Decompression startup: read start of JPEG datastream to see what's there */ -EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo, - boolean require_image)); +EXTERN(int) jpeg_read_header (j_decompress_ptr cinfo, boolean require_image); /* Return value is one of: */ -#define JPEG_SUSPENDED 0 /* Suspended due to lack of input data */ -#define JPEG_HEADER_OK 1 /* Found valid image datastream */ -#define JPEG_HEADER_TABLES_ONLY 2 /* Found valid table-specs-only datastream */ +#define JPEG_SUSPENDED 0 /* Suspended due to lack of input data */ +#define JPEG_HEADER_OK 1 /* Found valid image datastream */ +#define JPEG_HEADER_TABLES_ONLY 2 /* Found valid table-specs-only datastream */ /* If you pass require_image = TRUE (normal case), you need not check for * a TABLES_ONLY return code; an abbreviated file will cause an error exit. * JPEG_SUSPENDED is only possible if you use a data source module that can @@ -1080,54 +993,55 @@ */ /* Main entry points for decompression */ -EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo)); -EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo, - JSAMPARRAY scanlines, - JDIMENSION max_lines)); -EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo)); +EXTERN(boolean) jpeg_start_decompress (j_decompress_ptr cinfo); +EXTERN(JDIMENSION) jpeg_read_scanlines (j_decompress_ptr cinfo, + JSAMPARRAY scanlines, + JDIMENSION max_lines); +EXTERN(JDIMENSION) jpeg_skip_scanlines (j_decompress_ptr cinfo, + JDIMENSION num_lines); +EXTERN(void) jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width); +EXTERN(boolean) jpeg_finish_decompress (j_decompress_ptr cinfo); /* Replaces jpeg_read_scanlines when reading raw downsampled data. */ -EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo, - JSAMPIMAGE data, - JDIMENSION max_lines)); +EXTERN(JDIMENSION) jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines); /* Additional entry points for buffered-image mode. */ -EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo)); -EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo, - int scan_number)); -EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo)); -EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo)); -EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo)); -EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo)); +EXTERN(boolean) jpeg_has_multiple_scans (j_decompress_ptr cinfo); +EXTERN(boolean) jpeg_start_output (j_decompress_ptr cinfo, int scan_number); +EXTERN(boolean) jpeg_finish_output (j_decompress_ptr cinfo); +EXTERN(boolean) jpeg_input_complete (j_decompress_ptr cinfo); +EXTERN(void) jpeg_new_colormap (j_decompress_ptr cinfo); +EXTERN(int) jpeg_consume_input (j_decompress_ptr cinfo); /* Return value is one of: */ -/* #define JPEG_SUSPENDED 0 Suspended due to lack of input data */ -#define JPEG_REACHED_SOS 1 /* Reached start of new scan */ -#define JPEG_REACHED_EOI 2 /* Reached end of image */ -#define JPEG_ROW_COMPLETED 3 /* Completed one iMCU row */ -#define JPEG_SCAN_COMPLETED 4 /* Completed last iMCU row of a scan */ +/* #define JPEG_SUSPENDED 0 Suspended due to lack of input data */ +#define JPEG_REACHED_SOS 1 /* Reached start of new scan */ +#define JPEG_REACHED_EOI 2 /* Reached end of image */ +#define JPEG_ROW_COMPLETED 3 /* Completed one iMCU row */ +#define JPEG_SCAN_COMPLETED 4 /* Completed last iMCU row of a scan */ /* Precalculate output dimensions for current decompression parameters. */ #if JPEG_LIB_VERSION >= 80 -EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo)); +EXTERN(void) jpeg_core_output_dimensions (j_decompress_ptr cinfo); #endif -EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo)); +EXTERN(void) jpeg_calc_output_dimensions (j_decompress_ptr cinfo); /* Control saving of COM and APPn markers into marker_list. */ -EXTERN(void) jpeg_save_markers - JPP((j_decompress_ptr cinfo, int marker_code, - unsigned int length_limit)); +EXTERN(void) jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit); /* Install a special processing method for COM or APPn markers. */ -EXTERN(void) jpeg_set_marker_processor - JPP((j_decompress_ptr cinfo, int marker_code, - jpeg_marker_parser_method routine)); +EXTERN(void) jpeg_set_marker_processor (j_decompress_ptr cinfo, + int marker_code, + jpeg_marker_parser_method routine); /* Read or write raw DCT coefficients --- useful for lossless transcoding. */ -EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo)); -EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo, - jvirt_barray_ptr * coef_arrays)); -EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo, - j_compress_ptr dstinfo)); +EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients (j_decompress_ptr cinfo); +EXTERN(void) jpeg_write_coefficients (j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays); +EXTERN(void) jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, + j_compress_ptr dstinfo); /* If you choose to abort compression or decompression before completing * jpeg_finish_(de)compress, then you need to clean up to release memory, @@ -1135,28 +1049,27 @@ * if you're done with the JPEG object, but if you want to clean it up and * reuse it, call this: */ -EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo)); -EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo)); +EXTERN(void) jpeg_abort_compress (j_compress_ptr cinfo); +EXTERN(void) jpeg_abort_decompress (j_decompress_ptr cinfo); /* Generic versions of jpeg_abort and jpeg_destroy that work on either * flavor of JPEG object. These may be more convenient in some places. */ -EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo)); -EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo)); +EXTERN(void) jpeg_abort (j_common_ptr cinfo); +EXTERN(void) jpeg_destroy (j_common_ptr cinfo); /* Default restart-marker-resync procedure for use by data source modules */ -EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo, - int desired)); +EXTERN(boolean) jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired); /* These marker codes are exported since applications and data source modules * are likely to want to use them. */ -#define JPEG_RST0 0xD0 /* RST0 marker code */ -#define JPEG_EOI 0xD9 /* EOI marker code */ -#define JPEG_APP0 0xE0 /* APP0 marker code */ -#define JPEG_COM 0xFE /* COM marker code */ +#define JPEG_RST0 0xD0 /* RST0 marker code */ +#define JPEG_EOI 0xD9 /* EOI marker code */ +#define JPEG_APP0 0xE0 /* APP0 marker code */ +#define JPEG_COM 0xFE /* COM marker code */ /* If we have a brain-damaged compiler that emits warnings (or worse, errors) @@ -1165,7 +1078,7 @@ */ #ifdef INCOMPLETE_TYPES_BROKEN -#ifndef JPEG_INTERNALS /* will be defined in jpegint.h */ +#ifndef JPEG_INTERNALS /* will be defined in jpegint.h */ struct jvirt_sarray_control { long dummy; }; struct jvirt_barray_control { long dummy; }; struct jpeg_comp_master { long dummy; }; @@ -1200,8 +1113,8 @@ */ #ifdef JPEG_INTERNALS -#include "jpegint.h" /* fetch private declarations */ -#include "jerror.h" /* fetch error codes too */ +#include "jpegint.h" /* fetch private declarations */ +#include "jerror.h" /* fetch error codes too */ #endif #ifdef __cplusplus diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpeglibmangler.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jpeglibmangler.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpeglibmangler.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jpeglibmangler.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,117 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ +#define THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ + +// Mangle all externally visible function names so we can build our own libjpeg +// without system libraries trying to use it. + +#define jpeg_make_c_derived_tbl chromium_jpeg_make_c_derived_tbl +#define jpeg_gen_optimal_table chromium_jpeg_gen_optimal_table +#define jpeg_make_d_derived_tbl chromium_jpeg_make_d_derived_tbl +#define jpeg_fill_bit_buffer chromium_jpeg_fill_bit_buffer +#define jpeg_huff_decode chromium_jpeg_huff_decode +#define jpeg_fdct_islow chromium_jpeg_fdct_islow +#define jpeg_fdct_ifast chromium_jpeg_fdct_ifast +#define jpeg_fdct_float chromium_jpeg_fdct_float +#define jpeg_idct_islow chromium_jpeg_idct_islow +#define jpeg_idct_ifast chromium_jpeg_idct_ifast +#define jpeg_idct_float chromium_jpeg_idct_float +#define jpeg_idct_4x4 chromium_jpeg_idct_4x4 +#define jpeg_idct_2x2 chromium_jpeg_idct_2x2 +#define jpeg_idct_1x1 chromium_jpeg_idct_1x1 +#define jinit_compress_master chromium_jinit_compress_master +#define jinit_c_master_control chromium_jinit_c_master_control +#define jinit_c_main_controller chromium_jinit_c_main_controller +#define jinit_c_prep_controller chromium_jinit_c_prep_controller +#define jinit_c_coef_controller chromium_jinit_c_coef_controller +#define jinit_color_converter chromium_jinit_color_converter +#define jinit_downsampler chromium_jinit_downsampler +#define jinit_forward_dct chromium_jinit_forward_dct +#define jinit_huff_encoder chromium_jinit_huff_encoder +#define jinit_phuff_encoder chromium_jinit_phuff_encoder +#define jinit_marker_writer chromium_jinit_marker_writer +#define jinit_master_decompress chromium_jinit_master_decompress +#define jinit_d_main_controller chromium_jinit_d_main_controller +#define jinit_d_coef_controller chromium_jinit_d_coef_controller +#define jinit_d_post_controller chromium_jinit_d_post_controller +#define jinit_input_controller chromium_jinit_input_controller +#define jinit_marker_reader chromium_jinit_marker_reader +#define jinit_huff_decoder chromium_jinit_huff_decoder +#define jinit_phuff_decoder chromium_jinit_phuff_decoder +#define jinit_inverse_dct chromium_jinit_inverse_dct +#define jinit_upsampler chromium_jinit_upsampler +#define jinit_color_deconverter chromium_jinit_color_deconverter +#define jinit_1pass_quantizer chromium_jinit_1pass_quantizer +#define jinit_2pass_quantizer chromium_jinit_2pass_quantizer +#define jinit_merged_upsampler chromium_jinit_merged_upsampler +#define jinit_memory_mgr chromium_jinit_memory_mgr +#define jdiv_round_up chromium_jdiv_round_up +#define jround_up chromium_jround_up +#define jcopy_sample_rows chromium_jcopy_sample_rows +#define jcopy_block_row chromium_jcopy_block_row +#define jzero_far chromium_jzero_far +#define jpeg_std_error chromium_jpeg_std_error +#define jpeg_CreateCompress chromium_jpeg_CreateCompress +#define jpeg_CreateDecompress chromium_jpeg_CreateDecompress +#define jpeg_destroy_compress chromium_jpeg_destroy_compress +#define jpeg_destroy_decompress chromium_jpeg_destroy_decompress +#define jpeg_stdio_dest chromium_jpeg_stdio_dest +#define jpeg_stdio_src chromium_jpeg_stdio_src +#define jpeg_set_defaults chromium_jpeg_set_defaults +#define jpeg_set_colorspace chromium_jpeg_set_colorspace +#define jpeg_default_colorspace chromium_jpeg_default_colorspace +#define jpeg_set_quality chromium_jpeg_set_quality +#define jpeg_set_linear_quality chromium_jpeg_set_linear_quality +#define jpeg_add_quant_table chromium_jpeg_add_quant_table +#define jpeg_quality_scaling chromium_jpeg_quality_scaling +#define jpeg_simple_progression chromium_jpeg_simple_progression +#define jpeg_suppress_tables chromium_jpeg_suppress_tables +#define jpeg_alloc_quant_table chromium_jpeg_alloc_quant_table +#define jpeg_alloc_huff_table chromium_jpeg_alloc_huff_table +#define jpeg_start_compress chromium_jpeg_start_compress +#define jpeg_write_scanlines chromium_jpeg_write_scanlines +#define jpeg_finish_compress chromium_jpeg_finish_compress +#define jpeg_write_raw_data chromium_jpeg_write_raw_data +#define jpeg_write_marker chromium_jpeg_write_marker +#define jpeg_write_m_header chromium_jpeg_write_m_header +#define jpeg_write_m_byte chromium_jpeg_write_m_byte +#define jpeg_write_tables chromium_jpeg_write_tables +#define jpeg_read_header chromium_jpeg_read_header +#define jpeg_start_decompress chromium_jpeg_start_decompress +#define jpeg_read_scanlines chromium_jpeg_read_scanlines +#define jpeg_skip_scanlines chromium_jpeg_skip_scanlines +#define jpeg_crop_scanline chromium_jpeg_crop_scanline +#define jpeg_finish_decompress chromium_jpeg_finish_decompress +#define jpeg_read_raw_data chromium_jpeg_read_raw_data +#define jpeg_has_multiple_scans chromium_jpeg_has_multiple_scans +#define jpeg_start_output chromium_jpeg_start_output +#define jpeg_finish_output chromium_jpeg_finish_output +#define jpeg_input_complete chromium_jpeg_input_complete +#define jpeg_new_colormap chromium_jpeg_new_colormap +#define jpeg_consume_input chromium_jpeg_consume_input +#define jpeg_calc_output_dimensions chromium_jpeg_calc_output_dimensions +#define jpeg_save_markers chromium_jpeg_save_markers +#define jpeg_set_marker_processor chromium_jpeg_set_marker_processor +#define jpeg_read_coefficients chromium_jpeg_read_coefficients +#define jpeg_write_coefficients chromium_jpeg_write_coefficients +#define jpeg_copy_critical_parameters chromium_jpeg_copy_critical_parameters +#define jpeg_abort_compress chromium_jpeg_abort_compress +#define jpeg_abort_decompress chromium_jpeg_abort_decompress +#define jpeg_abort chromium_jpeg_abort +#define jpeg_destroy chromium_jpeg_destroy +#define jpeg_resync_to_restart chromium_jpeg_resync_to_restart +#define jpeg_get_small chromium_jpeg_get_small +#define jpeg_free_small chromium_jpeg_free_small +#define jpeg_get_large chromium_jpeg_get_large +#define jpeg_free_large chromium_jpeg_free_large +#define jpeg_mem_available chromium_jpeg_mem_available +#define jpeg_open_backing_store chromium_jpeg_open_backing_store +#define jpeg_mem_init chromium_jpeg_mem_init +#define jpeg_mem_term chromium_jpeg_mem_term +#define jpeg_std_message_table chromium_jpeg_std_message_table +#define jpeg_natural_order chromium_jpeg_natural_order + +#endif // THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpeg_nbits_table.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jpeg_nbits_table.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpeg_nbits_table.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jpeg_nbits_table.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,4098 @@ +static const unsigned char jpeg_nbits_table[65536] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 +}; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpegtran.1 glmark2-2021.02/android/jni/src/libjpeg-turbo/jpegtran.1 --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpegtran.1 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jpegtran.1 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,290 @@ +.TH JPEGTRAN 1 "18 February 2016" +.SH NAME +jpegtran \- lossless transformation of JPEG files +.SH SYNOPSIS +.B jpegtran +[ +.I options +] +[ +.I filename +] +.LP +.SH DESCRIPTION +.LP +.B jpegtran +performs various useful transformations of JPEG files. +It can translate the coded representation from one variant of JPEG to another, +for example from baseline JPEG to progressive JPEG or vice versa. It can also +perform some rearrangements of the image data, for example turning an image +from landscape to portrait format by rotation. +.PP +For EXIF files and JPEG files containing Exif data, you may prefer to use +.B exiftran +instead. +.PP +.B jpegtran +works by rearranging the compressed data (DCT coefficients), without +ever fully decoding the image. Therefore, its transformations are lossless: +there is no image degradation at all, which would not be true if you used +.B djpeg +followed by +.B cjpeg +to accomplish the same conversion. But by the same token, +.B jpegtran +cannot perform lossy operations such as changing the image quality. However, +while the image data is losslessly transformed, metadata can be removed. See +the +.B \-copy +option for specifics. +.PP +.B jpegtran +reads the named JPEG/JFIF file, or the standard input if no file is +named, and produces a JPEG/JFIF file on the standard output. +.SH OPTIONS +All switch names may be abbreviated; for example, +.B \-optimize +may be written +.B \-opt +or +.BR \-o . +Upper and lower case are equivalent. +British spellings are also accepted (e.g., +.BR \-optimise ), +though for brevity these are not mentioned below. +.PP +To specify the coded JPEG representation used in the output file, +.B jpegtran +accepts a subset of the switches recognized by +.BR cjpeg : +.TP +.B \-optimize +Perform optimization of entropy encoding parameters. +.TP +.B \-progressive +Create progressive JPEG file. +.TP +.BI \-restart " N" +Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is +attached to the number. +.TP +.B \-arithmetic +Use arithmetic coding. +.TP +.BI \-scans " file" +Use the scan script given in the specified text file. +.PP +See +.BR cjpeg (1) +for more details about these switches. +If you specify none of these switches, you get a plain baseline-JPEG output +file. The quality setting and so forth are determined by the input file. +.PP +The image can be losslessly transformed by giving one of these switches: +.TP +.B \-flip horizontal +Mirror image horizontally (left-right). +.TP +.B \-flip vertical +Mirror image vertically (top-bottom). +.TP +.B \-rotate 90 +Rotate image 90 degrees clockwise. +.TP +.B \-rotate 180 +Rotate image 180 degrees. +.TP +.B \-rotate 270 +Rotate image 270 degrees clockwise (or 90 ccw). +.TP +.B \-transpose +Transpose image (across UL-to-LR axis). +.TP +.B \-transverse +Transverse transpose (across UR-to-LL axis). +.PP +The transpose transformation has no restrictions regarding image dimensions. +The other transformations operate rather oddly if the image dimensions are not +a multiple of the iMCU size (usually 8 or 16 pixels), because they can only +transform complete blocks of DCT coefficient data in the desired way. +.PP +.BR jpegtran 's +default behavior when transforming an odd-size image is designed +to preserve exact reversibility and mathematical consistency of the +transformation set. As stated, transpose is able to flip the entire image +area. Horizontal mirroring leaves any partial iMCU column at the right edge +untouched, but is able to flip all rows of the image. Similarly, vertical +mirroring leaves any partial iMCU row at the bottom edge untouched, but is +able to flip all columns. The other transforms can be built up as sequences +of transpose and flip operations; for consistency, their actions on edge +pixels are defined to be the same as the end result of the corresponding +transpose-and-flip sequence. +.PP +For practical use, you may prefer to discard any untransformable edge pixels +rather than having a strange-looking strip along the right and/or bottom edges +of a transformed image. To do this, add the +.B \-trim +switch: +.TP +.B \-trim +Drop non-transformable edge blocks. +.IP +Obviously, a transformation with +.B \-trim +is not reversible, so strictly speaking +.B jpegtran +with this switch is not lossless. Also, the expected mathematical +equivalences between the transformations no longer hold. For example, +.B \-rot 270 -trim +trims only the bottom edge, but +.B \-rot 90 -trim +followed by +.B \-rot 180 -trim +trims both edges. +.TP +.B \-perfect +If you are only interested in perfect transformations, add the +.B \-perfect +switch. This causes +.B jpegtran +to fail with an error if the transformation is not perfect. +.IP +For example, you may want to do +.IP +.B (jpegtran \-rot 90 -perfect +.I foo.jpg +.B || djpeg +.I foo.jpg +.B | pnmflip \-r90 | cjpeg) +.IP +to do a perfect rotation, if available, or an approximated one if not. +.PP +This version of \fBjpegtran\fR also offers a lossless crop option, which +discards data outside of a given image region but losslessly preserves what is +inside. Like the rotate and flip transforms, lossless crop is restricted by the +current JPEG format; the upper left corner of the selected region must fall on +an iMCU boundary. If it doesn't, then it is silently moved up and/or left to +the nearest iMCU boundary (the lower right corner is unchanged.) Thus, the +output image covers at least the requested region, but it may cover more. The +adjustment of the region dimensions may be optionally disabled by attaching +an 'f' character ("force") to the width or height number. + +The image can be losslessly cropped by giving the switch: +.TP +.B \-crop WxH+X+Y +Crop the image to a rectangular region of width W and height H, starting at +point X,Y. The lossless crop feature discards data outside of a given image +region but losslessly preserves what is inside. Like the rotate and flip +transforms, lossless crop is restricted by the current JPEG format; the upper +left corner of the selected region must fall on an iMCU boundary. If it +doesn't, then it is silently moved up and/or left to the nearest iMCU boundary +(the lower right corner is unchanged.) +.PP +Other not-strictly-lossless transformation switches are: +.TP +.B \-grayscale +Force grayscale output. +.IP +This option discards the chrominance channels if the input image is YCbCr +(ie, a standard color JPEG), resulting in a grayscale JPEG file. The +luminance channel is preserved exactly, so this is a better method of reducing +to grayscale than decompression, conversion, and recompression. This switch +is particularly handy for fixing a monochrome picture that was mistakenly +encoded as a color JPEG. (In such a case, the space savings from getting rid +of the near-empty chroma channels won't be large; but the decoding time for +a grayscale JPEG is substantially less than that for a color JPEG.) +.PP +.B jpegtran +also recognizes these switches that control what to do with "extra" markers, +such as comment blocks: +.TP +.B \-copy none +Copy no extra markers from source file. This setting suppresses all +comments and other metadata in the source file. +.TP +.B \-copy comments +Copy only comment markers. This setting copies comments from the source file +but discards any other metadata. +.TP +.B \-copy all +Copy all extra markers. This setting preserves miscellaneous markers +found in the source file, such as JFIF thumbnails, Exif data, and Photoshop +settings. In some files, these extra markers can be sizable. Note that this +option will copy thumbnails as-is; they will not be transformed. +.PP +The default behavior is \fB-copy comments\fR. (Note: in IJG releases v6 and +v6a, \fBjpegtran\fR always did the equivalent of \fB-copy none\fR.) +.PP +Additional switches recognized by jpegtran are: +.TP +.BI \-maxmemory " N" +Set limit for amount of memory to use in processing large images. Value is +in thousands of bytes, or millions of bytes if "M" is attached to the +number. For example, +.B \-max 4m +selects 4000000 bytes. If more space is needed, temporary files will be used. +.TP +.BI \-outfile " name" +Send output image to the named file, not to standard output. +.TP +.B \-verbose +Enable debug printout. More +.BR \-v 's +give more output. Also, version information is printed at startup. +.TP +.B \-debug +Same as +.BR \-verbose . +.TP +.B \-version +Print version information and exit. +.SH EXAMPLES +.LP +This example converts a baseline JPEG file to progressive form: +.IP +.B jpegtran \-progressive +.I foo.jpg +.B > +.I fooprog.jpg +.PP +This example rotates an image 90 degrees clockwise, discarding any +unrotatable edge pixels: +.IP +.B jpegtran \-rot 90 -trim +.I foo.jpg +.B > +.I foo90.jpg +.SH ENVIRONMENT +.TP +.B JPEGMEM +If this environment variable is set, its value is the default memory limit. +The value is specified as described for the +.B \-maxmemory +switch. +.B JPEGMEM +overrides the default value specified when the program was compiled, and +itself is overridden by an explicit +.BR \-maxmemory . +.SH SEE ALSO +.BR cjpeg (1), +.BR djpeg (1), +.BR rdjpgcom (1), +.BR wrjpgcom (1) +.br +Wallace, Gregory K. "The JPEG Still Picture Compression Standard", +Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44. +.SH AUTHOR +Independent JPEG Group +.PP +This file was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo and to wordsmith certain sections. +.SH BUGS +The transform options can't transform odd-size images perfectly. Use +.B \-trim +or +.B \-perfect +if you don't like the results. +.PP +The entire image is read into memory and then written out again, even in +cases where this isn't really necessary. Expect swapping on large images, +especially when using the more complex transform options. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpegtran.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jpegtran.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jpegtran.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jpegtran.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,551 @@ +/* + * jpegtran.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1995-2010, Thomas G. Lane, Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a command-line user interface for JPEG transcoding. + * It is very similar to cjpeg.c, and partly to djpeg.c, but provides + * lossless transcoding between different JPEG file formats. It also + * provides some lossless and sort-of-lossless transformations of JPEG data. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "transupp.h" /* Support routines for jpegtran */ +#include "jversion.h" /* for version message */ +#include "jconfigint.h" + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + + +/* + * Argument-parsing code. + * The switch parser is designed to be useful with DOS-style command line + * syntax, ie, intermixed switches and file names, where only the switches + * to the left of a given file name affect processing of that file. + * The main program in this file doesn't actually use this capability... + */ + + +static const char *progname; /* program name for error messages */ +static char *outfilename; /* for -outfile switch */ +static JCOPY_OPTION copyoption; /* -copy switch */ +static jpeg_transform_info transformoption; /* image transformation options */ + + +LOCAL(void) +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "usage: %s [switches] ", progname); +#ifdef TWO_FILE_COMMANDLINE + fprintf(stderr, "inputfile outputfile\n"); +#else + fprintf(stderr, "[inputfile]\n"); +#endif + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -copy none Copy no extra markers from source file\n"); + fprintf(stderr, " -copy comments Copy only comment markers (default)\n"); + fprintf(stderr, " -copy all Copy all extra markers\n"); +#ifdef ENTROPY_OPT_SUPPORTED + fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n"); +#endif +#ifdef C_PROGRESSIVE_SUPPORTED + fprintf(stderr, " -progressive Create progressive JPEG file\n"); +#endif + fprintf(stderr, "Switches for modifying the image:\n"); +#if TRANSFORMS_SUPPORTED + fprintf(stderr, " -crop WxH+X+Y Crop to a rectangular subarea\n"); + fprintf(stderr, " -grayscale Reduce to grayscale (omit color data)\n"); + fprintf(stderr, " -flip [horizontal|vertical] Mirror image (left-right or top-bottom)\n"); + fprintf(stderr, " -perfect Fail if there is non-transformable edge blocks\n"); + fprintf(stderr, " -rotate [90|180|270] Rotate image (degrees clockwise)\n"); +#endif +#if TRANSFORMS_SUPPORTED + fprintf(stderr, " -transpose Transpose image\n"); + fprintf(stderr, " -transverse Transverse transpose image\n"); + fprintf(stderr, " -trim Drop non-transformable edge blocks\n"); +#endif + fprintf(stderr, "Switches for advanced users:\n"); +#ifdef C_ARITH_CODING_SUPPORTED + fprintf(stderr, " -arithmetic Use arithmetic coding\n"); +#endif + fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); + fprintf(stderr, " -outfile name Specify name for output file\n"); + fprintf(stderr, " -verbose or -debug Emit debug output\n"); + fprintf(stderr, " -version Print version information and exit\n"); + fprintf(stderr, "Switches for wizards:\n"); +#ifdef C_MULTISCAN_FILES_SUPPORTED + fprintf(stderr, " -scans file Create multi-scan JPEG per script file\n"); +#endif + exit(EXIT_FAILURE); +} + + +LOCAL(void) +select_transform (JXFORM_CODE transform) +/* Silly little routine to detect multiple transform options, + * which we can't handle. + */ +{ +#if TRANSFORMS_SUPPORTED + if (transformoption.transform == JXFORM_NONE || + transformoption.transform == transform) { + transformoption.transform = transform; + } else { + fprintf(stderr, "%s: can only do one image transformation at a time\n", + progname); + usage(); + } +#else + fprintf(stderr, "%s: sorry, image transformation was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif +} + + +LOCAL(int) +parse_switches (j_compress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) +/* Parse optional switches. + * Returns argv[] index of first file-name argument (== argc if none). + * Any file names with indexes <= last_file_arg_seen are ignored; + * they have presumably been processed in a previous iteration. + * (Pass 0 for last_file_arg_seen on the first or only iteration.) + * for_real is FALSE on the first (dummy) pass; we may skip any expensive + * processing. + */ +{ + int argn; + char *arg; + boolean simple_progressive; + char *scansarg = NULL; /* saves -scans parm if any */ + + /* Set up default JPEG parameters. */ + simple_progressive = FALSE; + outfilename = NULL; + copyoption = JCOPYOPT_DEFAULT; + transformoption.transform = JXFORM_NONE; + transformoption.perfect = FALSE; + transformoption.trim = FALSE; + transformoption.force_grayscale = FALSE; + transformoption.crop = FALSE; + transformoption.slow_hflip = FALSE; + cinfo->err->trace_level = 0; + + /* Scan command line options, adjust parameters */ + + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (*arg != '-') { + /* Not a switch, must be a file name argument */ + if (argn <= last_file_arg_seen) { + outfilename = NULL; /* -outfile applies to just one input file */ + continue; /* ignore this name if previously processed */ + } + break; /* else done parsing switches */ + } + arg++; /* advance past switch marker character */ + + if (keymatch(arg, "arithmetic", 1)) { + /* Use arithmetic coding. */ +#ifdef C_ARITH_CODING_SUPPORTED + cinfo->arith_code = TRUE; +#else + fprintf(stderr, "%s: sorry, arithmetic coding not supported\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "copy", 2)) { + /* Select which extra markers to copy. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "none", 1)) { + copyoption = JCOPYOPT_NONE; + } else if (keymatch(argv[argn], "comments", 1)) { + copyoption = JCOPYOPT_COMMENTS; + } else if (keymatch(argv[argn], "all", 1)) { + copyoption = JCOPYOPT_ALL; + } else + usage(); + + } else if (keymatch(arg, "crop", 2)) { + /* Perform lossless cropping. */ +#if TRANSFORMS_SUPPORTED + if (++argn >= argc) /* advance to next argument */ + usage(); + if (! jtransform_parse_crop_spec(&transformoption, argv[argn])) { + fprintf(stderr, "%s: bogus -crop argument '%s'\n", + progname, argv[argn]); + exit(EXIT_FAILURE); + } +#else + select_transform(JXFORM_NONE); /* force an error */ +#endif + + } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { + /* Enable debug printouts. */ + /* On first -d, print version identification */ + static boolean printed_version = FALSE; + + if (! printed_version) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + fprintf(stderr, "%s\n\n", JCOPYRIGHT); + fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n", + JVERSION); + printed_version = TRUE; + } + cinfo->err->trace_level++; + + } else if (keymatch(arg, "version", 4)) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + exit(EXIT_SUCCESS); + + } else if (keymatch(arg, "flip", 1)) { + /* Mirror left-right or top-bottom. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "horizontal", 1)) + select_transform(JXFORM_FLIP_H); + else if (keymatch(argv[argn], "vertical", 1)) + select_transform(JXFORM_FLIP_V); + else + usage(); + + } else if (keymatch(arg, "grayscale", 1) || keymatch(arg, "greyscale",1)) { + /* Force to grayscale. */ +#if TRANSFORMS_SUPPORTED + transformoption.force_grayscale = TRUE; +#else + select_transform(JXFORM_NONE); /* force an error */ +#endif + + } else if (keymatch(arg, "maxmemory", 3)) { + /* Maximum memory in Kb (or Mb with 'm'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (ch == 'm' || ch == 'M') + lval *= 1000L; + cinfo->mem->max_memory_to_use = lval * 1000L; + + } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { + /* Enable entropy parm optimization. */ +#ifdef ENTROPY_OPT_SUPPORTED + cinfo->optimize_coding = TRUE; +#else + fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "outfile", 4)) { + /* Set output file name. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + outfilename = argv[argn]; /* save it away for later use */ + + } else if (keymatch(arg, "perfect", 2)) { + /* Fail if there is any partial edge MCUs that the transform can't + * handle. */ + transformoption.perfect = TRUE; + + } else if (keymatch(arg, "progressive", 2)) { + /* Select simple progressive mode. */ +#ifdef C_PROGRESSIVE_SUPPORTED + simple_progressive = TRUE; + /* We must postpone execution until num_components is known. */ +#else + fprintf(stderr, "%s: sorry, progressive output was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "restart", 1)) { + /* Restart interval in MCU rows (or in MCUs with 'b'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (lval < 0 || lval > 65535L) + usage(); + if (ch == 'b' || ch == 'B') { + cinfo->restart_interval = (unsigned int) lval; + cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */ + } else { + cinfo->restart_in_rows = (int) lval; + /* restart_interval will be computed during startup */ + } + + } else if (keymatch(arg, "rotate", 2)) { + /* Rotate 90, 180, or 270 degrees (measured clockwise). */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "90", 2)) + select_transform(JXFORM_ROT_90); + else if (keymatch(argv[argn], "180", 3)) + select_transform(JXFORM_ROT_180); + else if (keymatch(argv[argn], "270", 3)) + select_transform(JXFORM_ROT_270); + else + usage(); + + } else if (keymatch(arg, "scans", 1)) { + /* Set scan script. */ +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (++argn >= argc) /* advance to next argument */ + usage(); + scansarg = argv[argn]; + /* We must postpone reading the file in case -progressive appears. */ +#else + fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "transpose", 1)) { + /* Transpose (across UL-to-LR axis). */ + select_transform(JXFORM_TRANSPOSE); + + } else if (keymatch(arg, "transverse", 6)) { + /* Transverse transpose (across UR-to-LL axis). */ + select_transform(JXFORM_TRANSVERSE); + + } else if (keymatch(arg, "trim", 3)) { + /* Trim off any partial edge MCUs that the transform can't handle. */ + transformoption.trim = TRUE; + + } else { + usage(); /* bogus switch */ + } + } + + /* Post-switch-scanning cleanup */ + + if (for_real) { + +#ifdef C_PROGRESSIVE_SUPPORTED + if (simple_progressive) /* process -progressive; -scans can override */ + jpeg_simple_progression(cinfo); +#endif + +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (scansarg != NULL) /* process -scans if it was present */ + if (! read_scan_script(cinfo, scansarg)) + usage(); +#endif + } + + return argn; /* return index of next arg (file name) */ +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + struct jpeg_decompress_struct srcinfo; + struct jpeg_compress_struct dstinfo; + struct jpeg_error_mgr jsrcerr, jdsterr; +#ifdef PROGRESS_REPORT + struct cdjpeg_progress_mgr progress; +#endif + jvirt_barray_ptr *src_coef_arrays; + jvirt_barray_ptr *dst_coef_arrays; + int file_index; + /* We assume all-in-memory processing and can therefore use only a + * single file pointer for sequential input and output operation. + */ + FILE *fp; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "jpegtran"; /* in case C library doesn't provide it */ + + /* Initialize the JPEG decompression object with default error handling. */ + srcinfo.err = jpeg_std_error(&jsrcerr); + jpeg_create_decompress(&srcinfo); + /* Initialize the JPEG compression object with default error handling. */ + dstinfo.err = jpeg_std_error(&jdsterr); + jpeg_create_compress(&dstinfo); + + /* Scan command line to find file names. + * It is convenient to use just one switch-parsing routine, but the switch + * values read here are mostly ignored; we will rescan the switches after + * opening the input file. Also note that most of the switches affect the + * destination JPEG object, so we parse into that and then copy over what + * needs to affects the source too. + */ + + file_index = parse_switches(&dstinfo, argc, argv, 0, FALSE); + jsrcerr.trace_level = jdsterr.trace_level; + srcinfo.mem->max_memory_to_use = dstinfo.mem->max_memory_to_use; + +#ifdef TWO_FILE_COMMANDLINE + /* Must have either -outfile switch or explicit output file name */ + if (outfilename == NULL) { + if (file_index != argc-2) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + outfilename = argv[file_index+1]; + } else { + if (file_index != argc-1) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + } +#else + /* Unix style: expect zero or one file name */ + if (file_index < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } +#endif /* TWO_FILE_COMMANDLINE */ + + /* Open the input file. */ + if (file_index < argc) { + if ((fp = fopen(argv[file_index], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s for reading\n", progname, argv[file_index]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ + fp = read_stdin(); + } + +#ifdef PROGRESS_REPORT + start_progress_monitor((j_common_ptr) &dstinfo, &progress); +#endif + + /* Specify data source for decompression */ + jpeg_stdio_src(&srcinfo, fp); + + /* Enable saving of extra markers that we want to copy */ + jcopy_markers_setup(&srcinfo, copyoption); + + /* Read file header */ + (void) jpeg_read_header(&srcinfo, TRUE); + + /* Any space needed by a transform option must be requested before + * jpeg_read_coefficients so that memory allocation will be done right. + */ +#if TRANSFORMS_SUPPORTED + /* Fail right away if -perfect is given and transformation is not perfect. + */ + if (!jtransform_request_workspace(&srcinfo, &transformoption)) { + fprintf(stderr, "%s: transformation is not perfect\n", progname); + exit(EXIT_FAILURE); + } +#endif + + /* Read source file as DCT coefficients */ + src_coef_arrays = jpeg_read_coefficients(&srcinfo); + + /* Initialize destination compression parameters from source values */ + jpeg_copy_critical_parameters(&srcinfo, &dstinfo); + + /* Adjust destination parameters if required by transform options; + * also find out which set of coefficient arrays will hold the output. + */ +#if TRANSFORMS_SUPPORTED + dst_coef_arrays = jtransform_adjust_parameters(&srcinfo, &dstinfo, + src_coef_arrays, + &transformoption); +#else + dst_coef_arrays = src_coef_arrays; +#endif + + /* Close input file, if we opened it. + * Note: we assume that jpeg_read_coefficients consumed all input + * until JPEG_REACHED_EOI, and that jpeg_finish_decompress will + * only consume more while (! cinfo->inputctl->eoi_reached). + * We cannot call jpeg_finish_decompress here since we still need the + * virtual arrays allocated from the source object for processing. + */ + if (fp != stdin) + fclose(fp); + + /* Open the output file. */ + if (outfilename != NULL) { + if ((fp = fopen(outfilename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s for writing\n", progname, outfilename); + exit(EXIT_FAILURE); + } + } else { + /* default output file is stdout */ + fp = write_stdout(); + } + + /* Adjust default compression parameters by re-parsing the options */ + file_index = parse_switches(&dstinfo, argc, argv, 0, TRUE); + + /* Specify data destination for compression */ + jpeg_stdio_dest(&dstinfo, fp); + + /* Start compressor (note no image data is actually written here) */ + jpeg_write_coefficients(&dstinfo, dst_coef_arrays); + + /* Copy to the output file any extra markers that we want to preserve */ + jcopy_markers_execute(&srcinfo, &dstinfo, copyoption); + + /* Execute image transformation, if any */ +#if TRANSFORMS_SUPPORTED + jtransform_execute_transformation(&srcinfo, &dstinfo, + src_coef_arrays, + &transformoption); +#endif + + /* Finish compression and release memory */ + jpeg_finish_compress(&dstinfo); + jpeg_destroy_compress(&dstinfo); + (void) jpeg_finish_decompress(&srcinfo); + jpeg_destroy_decompress(&srcinfo); + + /* Close output file, if we opened it */ + if (fp != stdout) + fclose(fp); + +#ifdef PROGRESS_REPORT + end_progress_monitor((j_common_ptr) &dstinfo); +#endif + + /* All done. */ + exit(jsrcerr.num_warnings + jdsterr.num_warnings ?EXIT_WARNING:EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jquant1.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jquant1.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jquant1.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jquant1.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jquant1.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. - * Copyright (C) 2009, D. R. Commander - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains 1-pass color quantization (color mapping) routines. * These routines provide mapping to a fixed color map using equally spaced @@ -69,9 +71,9 @@ * table in both directions. */ -#define ODITHER_SIZE 16 /* dimension of dither matrix */ +#define ODITHER_SIZE 16 /* dimension of dither matrix */ /* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */ -#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE) /* # cells in matrix */ +#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE) /* # cells in matrix */ #define ODITHER_MASK (ODITHER_SIZE-1) /* mask for wrapping around counters */ typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE]; @@ -106,8 +108,8 @@ * Errors are accumulated into the array fserrors[], at a resolution of * 1/16th of a pixel count. The error at a given pixel is propagated * to its not-yet-processed neighbors using the standard F-S fractions, - * ... (here) 7/16 - * 3/16 5/16 1/16 + * ... (here) 7/16 + * 3/16 5/16 1/16 * We work left-to-right on even rows, right-to-left on odd rows. * * We can get away with a single array (holding one row's worth of errors) @@ -120,52 +122,49 @@ * The fserrors[] array is indexed [component#][position]. * We provide (#columns + 2) entries per component; the extra entry at each * end saves us from special-casing the first and last pixels. - * - * Note: on a wide image, we might not have enough room in a PC's near data - * segment to hold the error array; so it is allocated with alloc_large. */ #if BITS_IN_JSAMPLE == 8 -typedef INT16 FSERROR; /* 16 bits should be enough */ -typedef int LOCFSERROR; /* use 'int' for calculation temps */ +typedef INT16 FSERROR; /* 16 bits should be enough */ +typedef int LOCFSERROR; /* use 'int' for calculation temps */ #else -typedef INT32 FSERROR; /* may need more than 16 bits */ -typedef INT32 LOCFSERROR; /* be sure calculation temps are big enough */ +typedef JLONG FSERROR; /* may need more than 16 bits */ +typedef JLONG LOCFSERROR; /* be sure calculation temps are big enough */ #endif -typedef FSERROR FAR *FSERRPTR; /* pointer to error array (in FAR storage!) */ +typedef FSERROR *FSERRPTR; /* pointer to error array */ /* Private subobject */ -#define MAX_Q_COMPS 4 /* max components I can handle */ +#define MAX_Q_COMPS 4 /* max components I can handle */ typedef struct { struct jpeg_color_quantizer pub; /* public fields */ /* Initially allocated colormap is saved here */ - JSAMPARRAY sv_colormap; /* The color map as a 2-D pixel array */ - int sv_actual; /* number of entries in use */ + JSAMPARRAY sv_colormap; /* The color map as a 2-D pixel array */ + int sv_actual; /* number of entries in use */ - JSAMPARRAY colorindex; /* Precomputed mapping for speed */ + JSAMPARRAY colorindex; /* Precomputed mapping for speed */ /* colorindex[i][j] = index of color closest to pixel value j in component i, * premultiplied as described above. Since colormap indexes must fit into * JSAMPLEs, the entries of this array will too. */ - boolean is_padded; /* is the colorindex padded for odither? */ + boolean is_padded; /* is the colorindex padded for odither? */ - int Ncolors[MAX_Q_COMPS]; /* # of values alloced to each component */ + int Ncolors[MAX_Q_COMPS]; /* # of values alloced to each component */ /* Variables for ordered dithering */ - int row_index; /* cur row's vertical index in dither matrix */ + int row_index; /* cur row's vertical index in dither matrix */ ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */ /* Variables for Floyd-Steinberg dithering */ FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */ - boolean on_odd_row; /* flag to remember which row we are on */ + boolean on_odd_row; /* flag to remember which row we are on */ } my_cquantizer; -typedef my_cquantizer * my_cquantize_ptr; +typedef my_cquantizer *my_cquantize_ptr; /* @@ -204,11 +203,11 @@ iroot = 1; do { iroot++; - temp = iroot; /* set temp = iroot ** nc */ + temp = iroot; /* set temp = iroot ** nc */ for (i = 1; i < nc; i++) temp *= iroot; } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */ - iroot--; /* now iroot = floor(root) */ + iroot--; /* now iroot = floor(root) */ /* Must have at least 2 color values per component */ if (iroot < 2) @@ -232,10 +231,10 @@ j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i); /* calculate new total_colors if Ncolors[j] is incremented */ temp = total_colors / Ncolors[j]; - temp *= Ncolors[j]+1; /* done in long arith to avoid oflo */ + temp *= Ncolors[j]+1; /* done in long arith to avoid oflo */ if (temp > (long) max_colors) - break; /* won't fit, done with this pass */ - Ncolors[j]++; /* OK, apply the increment */ + break; /* won't fit, done with this pass */ + Ncolors[j]++; /* OK, apply the increment */ total_colors = (int) temp; changed = TRUE; } @@ -255,7 +254,7 @@ * (Forcing the upper and lower values to the limits ensures that * dithering can't produce a color outside the selected gamut.) */ - return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj); + return (int) (((JLONG) j * MAXJSAMPLE + maxj/2) / maxj); } @@ -265,7 +264,7 @@ /* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */ { /* Breakpoints are halfway between values returned by output_value */ - return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj)); + return (int) (((JLONG) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj)); } @@ -277,8 +276,8 @@ create_colormap (j_decompress_ptr cinfo) { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; - JSAMPARRAY colormap; /* Created colormap */ - int total_colors; /* Number of distinct output colors */ + JSAMPARRAY colormap; /* Created colormap */ + int total_colors; /* Number of distinct output colors */ int i,j,k, nci, blksize, blkdist, ptr, val; /* Select number of colors for each component */ @@ -287,8 +286,8 @@ /* Report selected color counts */ if (cinfo->out_color_components == 3) TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS, - total_colors, cquantize->Ncolors[0], - cquantize->Ncolors[1], cquantize->Ncolors[2]); + total_colors, cquantize->Ncolors[0], + cquantize->Ncolors[1], cquantize->Ncolors[2]); else TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors); @@ -313,12 +312,12 @@ val = output_value(cinfo, i, j, nci-1); /* Fill in all colormap entries that have this value of this component */ for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) { - /* fill in blksize entries beginning at ptr */ - for (k = 0; k < blksize; k++) - colormap[i][ptr+k] = (JSAMPLE) val; + /* fill in blksize entries beginning at ptr */ + for (k = 0; k < blksize; k++) + colormap[i][ptr+k] = (JSAMPLE) val; } } - blkdist = blksize; /* blksize of this color is blkdist of next */ + blkdist = blksize; /* blksize of this color is blkdist of next */ } /* Save the colormap in private storage, @@ -376,16 +375,16 @@ val = 0; k = largest_input_value(cinfo, i, 0, nci-1); for (j = 0; j <= MAXJSAMPLE; j++) { - while (j > k) /* advance val if past boundary */ - k = largest_input_value(cinfo, i, ++val, nci-1); + while (j > k) /* advance val if past boundary */ + k = largest_input_value(cinfo, i, ++val, nci-1); /* premultiply so that no multiplication needed in main processing */ indexptr[j] = (JSAMPLE) (val * blksize); } /* Pad at both ends if necessary */ if (pad) for (j = 1; j <= MAXJSAMPLE; j++) { - indexptr[-j] = indexptr[0]; - indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE]; + indexptr[-j] = indexptr[0]; + indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE]; } } } @@ -401,21 +400,21 @@ { ODITHER_MATRIX_PTR odither; int j,k; - INT32 num,den; + JLONG num,den; odither = (ODITHER_MATRIX_PTR) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(ODITHER_MATRIX)); + sizeof(ODITHER_MATRIX)); /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1). * Hence the dither value for the matrix cell with fill order f * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1). * On 16-bit-int machine, be careful to avoid overflow. */ - den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1)); + den = 2 * ODITHER_CELLS * ((JLONG) (ncolors - 1)); for (j = 0; j < ODITHER_SIZE; j++) { for (k = 0; k < ODITHER_SIZE; k++) { - num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k]))) - * MAXJSAMPLE; + num = ((JLONG) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k]))) + * MAXJSAMPLE; /* Ensure round towards zero despite C's lack of consistency * about rounding negative values in integer division... */ @@ -428,7 +427,7 @@ /* * Create the ordered-dither tables. - * Components having the same number of representative colors may + * Components having the same number of representative colors may * share a dither table. */ @@ -441,14 +440,14 @@ for (i = 0; i < cinfo->out_color_components; i++) { nci = cquantize->Ncolors[i]; /* # of distinct values for this color */ - odither = NULL; /* search for matching prior component */ + odither = NULL; /* search for matching prior component */ for (j = 0; j < i; j++) { if (nci == cquantize->Ncolors[j]) { - odither = cquantize->odither[j]; - break; + odither = cquantize->odither[j]; + break; } } - if (odither == NULL) /* need a new table? */ + if (odither == NULL) /* need a new table? */ odither = make_odither_array(cinfo, nci); cquantize->odither[i] = odither; } @@ -461,7 +460,7 @@ METHODDEF(void) color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* General case, no dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; @@ -479,7 +478,7 @@ for (col = width; col > 0; col--) { pixcode = 0; for (ci = 0; ci < nc; ci++) { - pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]); + pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]); } *ptrout++ = (JSAMPLE) pixcode; } @@ -489,7 +488,7 @@ METHODDEF(void) color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* Fast path for out_color_components==3, no dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; @@ -517,15 +516,15 @@ METHODDEF(void) quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* General case, with ordered dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; register JSAMPROW input_ptr; register JSAMPROW output_ptr; JSAMPROW colorindex_ci; - int * dither; /* points to active row of dither matrix */ - int row_index, col_index; /* current indexes into dither matrix */ + int *dither; /* points to active row of dither matrix */ + int row_index, col_index; /* current indexes into dither matrix */ int nc = cinfo->out_color_components; int ci; int row; @@ -534,8 +533,7 @@ for (row = 0; row < num_rows; row++) { /* Initialize output values to 0 so can process components separately */ - jzero_far((void FAR *) output_buf[row], - (size_t) (width * SIZEOF(JSAMPLE))); + jzero_far((void *) output_buf[row], (size_t) (width * sizeof(JSAMPLE))); row_index = cquantize->row_index; for (ci = 0; ci < nc; ci++) { input_ptr = input_buf[row] + ci; @@ -545,17 +543,17 @@ col_index = 0; for (col = width; col > 0; col--) { - /* Form pixel value + dither, range-limit to 0..MAXJSAMPLE, - * select output value, accumulate into output code for this pixel. - * Range-limiting need not be done explicitly, as we have extended - * the colorindex table to produce the right answers for out-of-range - * inputs. The maximum dither is +- MAXJSAMPLE; this sets the - * required amount of padding. - */ - *output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]]; - input_ptr += nc; - output_ptr++; - col_index = (col_index + 1) & ODITHER_MASK; + /* Form pixel value + dither, range-limit to 0..MAXJSAMPLE, + * select output value, accumulate into output code for this pixel. + * Range-limiting need not be done explicitly, as we have extended + * the colorindex table to produce the right answers for out-of-range + * inputs. The maximum dither is +- MAXJSAMPLE; this sets the + * required amount of padding. + */ + *output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]]; + input_ptr += nc; + output_ptr++; + col_index = (col_index + 1) & ODITHER_MASK; } } /* Advance row index for next row */ @@ -567,7 +565,7 @@ METHODDEF(void) quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* Fast path for out_color_components==3, with ordered dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; @@ -577,10 +575,10 @@ JSAMPROW colorindex0 = cquantize->colorindex[0]; JSAMPROW colorindex1 = cquantize->colorindex[1]; JSAMPROW colorindex2 = cquantize->colorindex[2]; - int * dither0; /* points to active row of dither matrix */ - int * dither1; - int * dither2; - int row_index, col_index; /* current indexes into dither matrix */ + int *dither0; /* points to active row of dither matrix */ + int *dither1; + int *dither2; + int row_index, col_index; /* current indexes into dither matrix */ int row; JDIMENSION col; JDIMENSION width = cinfo->output_width; @@ -596,11 +594,11 @@ for (col = width; col > 0; col--) { pixcode = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + - dither0[col_index]]); + dither0[col_index]]); pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + - dither1[col_index]]); + dither1[col_index]]); pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + - dither2[col_index]]); + dither2[col_index]]); *output_ptr++ = (JSAMPLE) pixcode; col_index = (col_index + 1) & ODITHER_MASK; } @@ -612,24 +610,24 @@ METHODDEF(void) quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* General case, with Floyd-Steinberg dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; - register LOCFSERROR cur; /* current error or pixel value */ - LOCFSERROR belowerr; /* error for pixel below cur */ - LOCFSERROR bpreverr; /* error for below/prev col */ - LOCFSERROR bnexterr; /* error for below/next col */ + register LOCFSERROR cur; /* current error or pixel value */ + LOCFSERROR belowerr; /* error for pixel below cur */ + LOCFSERROR bpreverr; /* error for below/prev col */ + LOCFSERROR bnexterr; /* error for below/next col */ LOCFSERROR delta; - register FSERRPTR errorptr; /* => fserrors[] at column before current */ + register FSERRPTR errorptr; /* => fserrors[] at column before current */ register JSAMPROW input_ptr; register JSAMPROW output_ptr; JSAMPROW colorindex_ci; JSAMPROW colormap_ci; int pixcode; int nc = cinfo->out_color_components; - int dir; /* 1 for left-to-right, -1 for right-to-left */ - int dirnc; /* dir * nc */ + int dir; /* 1 for left-to-right, -1 for right-to-left */ + int dirnc; /* dir * nc */ int ci; int row; JDIMENSION col; @@ -639,23 +637,22 @@ for (row = 0; row < num_rows; row++) { /* Initialize output values to 0 so can process components separately */ - jzero_far((void FAR *) output_buf[row], - (size_t) (width * SIZEOF(JSAMPLE))); + jzero_far((void *) output_buf[row], (size_t) (width * sizeof(JSAMPLE))); for (ci = 0; ci < nc; ci++) { input_ptr = input_buf[row] + ci; output_ptr = output_buf[row]; if (cquantize->on_odd_row) { - /* work right to left in this row */ - input_ptr += (width-1) * nc; /* so point to rightmost pixel */ - output_ptr += width-1; - dir = -1; - dirnc = -nc; - errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */ + /* work right to left in this row */ + input_ptr += (width-1) * nc; /* so point to rightmost pixel */ + output_ptr += width-1; + dir = -1; + dirnc = -nc; + errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */ } else { - /* work left to right in this row */ - dir = 1; - dirnc = nc; - errorptr = cquantize->fserrors[ci]; /* => entry before first column */ + /* work left to right in this row */ + dir = 1; + dirnc = nc; + errorptr = cquantize->fserrors[ci]; /* => entry before first column */ } colorindex_ci = cquantize->colorindex[ci]; colormap_ci = cquantize->sv_colormap[ci]; @@ -665,47 +662,47 @@ belowerr = bpreverr = 0; for (col = width; col > 0; col--) { - /* cur holds the error propagated from the previous pixel on the - * current line. Add the error propagated from the previous line - * to form the complete error correction term for this pixel, and - * round the error term (which is expressed * 16) to an integer. - * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct - * for either sign of the error value. - * Note: errorptr points to *previous* column's array entry. - */ - cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4); - /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE. - * The maximum error is +- MAXJSAMPLE; this sets the required size - * of the range_limit array. - */ - cur += GETJSAMPLE(*input_ptr); - cur = GETJSAMPLE(range_limit[cur]); - /* Select output value, accumulate into output code for this pixel */ - pixcode = GETJSAMPLE(colorindex_ci[cur]); - *output_ptr += (JSAMPLE) pixcode; - /* Compute actual representation error at this pixel */ - /* Note: we can do this even though we don't have the final */ - /* pixel code, because the colormap is orthogonal. */ - cur -= GETJSAMPLE(colormap_ci[pixcode]); - /* Compute error fractions to be propagated to adjacent pixels. - * Add these into the running sums, and simultaneously shift the - * next-line error sums left by 1 column. - */ - bnexterr = cur; - delta = cur * 2; - cur += delta; /* form error * 3 */ - errorptr[0] = (FSERROR) (bpreverr + cur); - cur += delta; /* form error * 5 */ - bpreverr = belowerr + cur; - belowerr = bnexterr; - cur += delta; /* form error * 7 */ - /* At this point cur contains the 7/16 error value to be propagated - * to the next pixel on the current line, and all the errors for the - * next line have been shifted over. We are therefore ready to move on. - */ - input_ptr += dirnc; /* advance input ptr to next column */ - output_ptr += dir; /* advance output ptr to next column */ - errorptr += dir; /* advance errorptr to current column */ + /* cur holds the error propagated from the previous pixel on the + * current line. Add the error propagated from the previous line + * to form the complete error correction term for this pixel, and + * round the error term (which is expressed * 16) to an integer. + * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct + * for either sign of the error value. + * Note: errorptr points to *previous* column's array entry. + */ + cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4); + /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE. + * The maximum error is +- MAXJSAMPLE; this sets the required size + * of the range_limit array. + */ + cur += GETJSAMPLE(*input_ptr); + cur = GETJSAMPLE(range_limit[cur]); + /* Select output value, accumulate into output code for this pixel */ + pixcode = GETJSAMPLE(colorindex_ci[cur]); + *output_ptr += (JSAMPLE) pixcode; + /* Compute actual representation error at this pixel */ + /* Note: we can do this even though we don't have the final */ + /* pixel code, because the colormap is orthogonal. */ + cur -= GETJSAMPLE(colormap_ci[pixcode]); + /* Compute error fractions to be propagated to adjacent pixels. + * Add these into the running sums, and simultaneously shift the + * next-line error sums left by 1 column. + */ + bnexterr = cur; + delta = cur * 2; + cur += delta; /* form error * 3 */ + errorptr[0] = (FSERROR) (bpreverr + cur); + cur += delta; /* form error * 5 */ + bpreverr = belowerr + cur; + belowerr = bnexterr; + cur += delta; /* form error * 7 */ + /* At this point cur contains the 7/16 error value to be propagated + * to the next pixel on the current line, and all the errors for the + * next line have been shifted over. We are therefore ready to move on. + */ + input_ptr += dirnc; /* advance input ptr to next column */ + output_ptr += dir; /* advance output ptr to next column */ + errorptr += dir; /* advance errorptr to current column */ } /* Post-loop cleanup: we must unload the final error value into the * final fserrors[] entry. Note we need not unload belowerr because @@ -729,7 +726,7 @@ size_t arraysize; int i; - arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR)); + arraysize = (size_t) ((cinfo->output_width + 2) * sizeof(FSERROR)); for (i = 0; i < cinfo->out_color_components; i++) { cquantize->fserrors[i] = (FSERRPTR) (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize); @@ -765,7 +762,7 @@ cquantize->pub.color_quantize = quantize3_ord_dither; else cquantize->pub.color_quantize = quantize_ord_dither; - cquantize->row_index = 0; /* initialize state for ordered dither */ + cquantize->row_index = 0; /* initialize state for ordered dither */ /* If user changed to ordered dither from another mode, * we must recreate the color index table with padding. * This will cost extra space, but probably isn't very likely. @@ -783,9 +780,9 @@ if (cquantize->fserrors[0] == NULL) alloc_fs_workspace(cinfo); /* Initialize the propagated errors to zero. */ - arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR)); + arraysize = (size_t) ((cinfo->output_width + 2) * sizeof(FSERROR)); for (i = 0; i < cinfo->out_color_components; i++) - jzero_far((void FAR *) cquantize->fserrors[i], arraysize); + jzero_far((void *) cquantize->fserrors[i], arraysize); break; default: ERREXIT(cinfo, JERR_NOT_COMPILED); @@ -828,13 +825,13 @@ cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_cquantizer)); + sizeof(my_cquantizer)); cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize; cquantize->pub.start_pass = start_pass_1_quant; cquantize->pub.finish_pass = finish_pass_1_quant; cquantize->pub.new_color_map = new_color_map_1_quant; cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */ - cquantize->odither[0] = NULL; /* Also flag odither arrays not allocated */ + cquantize->odither[0] = NULL; /* Also flag odither arrays not allocated */ /* Make sure my internal arrays won't overflow */ if (cinfo->out_color_components > MAX_Q_COMPS) @@ -848,10 +845,10 @@ create_colorindex(cinfo); /* Allocate Floyd-Steinberg workspace now if requested. - * We do this now since it is FAR storage and may affect the memory - * manager's space calculations. If the user changes to FS dither - * mode in a later pass, we will allocate the space then, and will - * possibly overrun the max_memory_to_use setting. + * We do this now since it may affect the memory manager's space + * calculations. If the user changes to FS dither mode in a later pass, we + * will allocate the space then, and will possibly overrun the + * max_memory_to_use setting. */ if (cinfo->dither_mode == JDITHER_FS) alloc_fs_workspace(cinfo); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jquant2.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jquant2.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jquant2.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jquant2.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jquant2.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. - * Copyright (C) 2009, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009, 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains 2-pass color quantization (color mapping) routines. * These routines provide selection of a custom color map for an image, @@ -42,7 +44,7 @@ * color space, and repeatedly splits the "largest" remaining box until we * have as many boxes as desired colors. Then the mean color in each * remaining box becomes one of the possible output colors. - * + * * The second pass over the image maps each input pixel to the closest output * color (optionally after applying a Floyd-Steinberg dithering correction). * This mapping is logically trivial, but making it go fast enough requires @@ -71,9 +73,9 @@ * probably need to change these scale factors. */ -#define R_SCALE 2 /* scale R distances by this much */ -#define G_SCALE 3 /* scale G distances by this much */ -#define B_SCALE 1 /* and B by this much */ +#define R_SCALE 2 /* scale R distances by this much */ +#define G_SCALE 3 /* scale G distances by this much */ +#define B_SCALE 1 /* and B by this much */ static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE}; #define C0_SCALE c_scales[rgb_red[cinfo->out_color_space]] @@ -101,9 +103,7 @@ * machines, we can't just allocate the histogram in one chunk. Instead * of a true 3-D array, we use a row of pointers to 2-D arrays. Each * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and - * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries. Note that - * on 80x86 machines, the pointer row is in near memory but the actual - * arrays are in far memory (same arrangement as we use for image arrays). + * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries. */ #define MAXNUMCOLORS (MAXJSAMPLE+1) /* maximum size of colormap */ @@ -111,9 +111,9 @@ /* These will do the right thing for either R,G,B or B,G,R color order, * but you may not like the results for other color orders. */ -#define HIST_C0_BITS 5 /* bits of precision in R/B histogram */ -#define HIST_C1_BITS 6 /* bits of precision in G histogram */ -#define HIST_C2_BITS 5 /* bits of precision in B/R histogram */ +#define HIST_C0_BITS 5 /* bits of precision in R/B histogram */ +#define HIST_C1_BITS 6 /* bits of precision in G histogram */ +#define HIST_C2_BITS 5 /* bits of precision in B/R histogram */ /* Number of elements along histogram axes. */ #define HIST_C0_ELEMS (1<cquantize; register JSAMPROW ptr; @@ -219,11 +216,11 @@ for (col = width; col > 0; col--) { /* get pixel value and index into the histogram */ histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT] - [GETJSAMPLE(ptr[1]) >> C1_SHIFT] - [GETJSAMPLE(ptr[2]) >> C2_SHIFT]; + [GETJSAMPLE(ptr[1]) >> C1_SHIFT] + [GETJSAMPLE(ptr[2]) >> C2_SHIFT]; /* increment, check for overflow and undo increment if so. */ if (++(*histp) <= 0) - (*histp)--; + (*histp)--; ptr += 3; } } @@ -243,12 +240,12 @@ int c1min, c1max; int c2min, c2max; /* The volume (actually 2-norm) of the box */ - INT32 volume; + JLONG volume; /* The number of nonzero histogram cells within this box */ long colorcount; } box; -typedef box * boxptr; +typedef box *boxptr; LOCAL(boxptr) @@ -260,7 +257,7 @@ register int i; register long maxc = 0; boxptr which = NULL; - + for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) { if (boxp->colorcount > maxc && boxp->volume > 0) { which = boxp; @@ -278,9 +275,9 @@ { register boxptr boxp; register int i; - register INT32 maxv = 0; + register JLONG maxv = 0; boxptr which = NULL; - + for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) { if (boxp->volume > maxv) { which = boxp; @@ -301,77 +298,77 @@ histptr histp; int c0,c1,c2; int c0min,c0max,c1min,c1max,c2min,c2max; - INT32 dist0,dist1,dist2; + JLONG dist0,dist1,dist2; long ccount; - + c0min = boxp->c0min; c0max = boxp->c0max; c1min = boxp->c1min; c1max = boxp->c1max; c2min = boxp->c2min; c2max = boxp->c2max; - + if (c0max > c0min) for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { - histp = & histogram[c0][c1][c2min]; - for (c2 = c2min; c2 <= c2max; c2++) - if (*histp++ != 0) { - boxp->c0min = c0min = c0; - goto have_c0min; - } + histp = & histogram[c0][c1][c2min]; + for (c2 = c2min; c2 <= c2max; c2++) + if (*histp++ != 0) { + boxp->c0min = c0min = c0; + goto have_c0min; + } } have_c0min: if (c0max > c0min) for (c0 = c0max; c0 >= c0min; c0--) for (c1 = c1min; c1 <= c1max; c1++) { - histp = & histogram[c0][c1][c2min]; - for (c2 = c2min; c2 <= c2max; c2++) - if (*histp++ != 0) { - boxp->c0max = c0max = c0; - goto have_c0max; - } + histp = & histogram[c0][c1][c2min]; + for (c2 = c2min; c2 <= c2max; c2++) + if (*histp++ != 0) { + boxp->c0max = c0max = c0; + goto have_c0max; + } } have_c0max: if (c1max > c1min) for (c1 = c1min; c1 <= c1max; c1++) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1][c2min]; - for (c2 = c2min; c2 <= c2max; c2++) - if (*histp++ != 0) { - boxp->c1min = c1min = c1; - goto have_c1min; - } + histp = & histogram[c0][c1][c2min]; + for (c2 = c2min; c2 <= c2max; c2++) + if (*histp++ != 0) { + boxp->c1min = c1min = c1; + goto have_c1min; + } } have_c1min: if (c1max > c1min) for (c1 = c1max; c1 >= c1min; c1--) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1][c2min]; - for (c2 = c2min; c2 <= c2max; c2++) - if (*histp++ != 0) { - boxp->c1max = c1max = c1; - goto have_c1max; - } + histp = & histogram[c0][c1][c2min]; + for (c2 = c2min; c2 <= c2max; c2++) + if (*histp++ != 0) { + boxp->c1max = c1max = c1; + goto have_c1max; + } } have_c1max: if (c2max > c2min) for (c2 = c2min; c2 <= c2max; c2++) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1min][c2]; - for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) - if (*histp != 0) { - boxp->c2min = c2min = c2; - goto have_c2min; - } + histp = & histogram[c0][c1min][c2]; + for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) + if (*histp != 0) { + boxp->c2min = c2min = c2; + goto have_c2min; + } } have_c2min: if (c2max > c2min) for (c2 = c2max; c2 >= c2min; c2--) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1min][c2]; - for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) - if (*histp != 0) { - boxp->c2max = c2max = c2; - goto have_c2max; - } + histp = & histogram[c0][c1min][c2]; + for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) + if (*histp != 0) { + boxp->c2max = c2max = c2; + goto have_c2max; + } } have_c2max: @@ -387,16 +384,16 @@ dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE; dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE; boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2; - + /* Now scan remaining volume of box and compute population */ ccount = 0; for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { histp = & histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++, histp++) - if (*histp != 0) { - ccount++; - } + if (*histp != 0) { + ccount++; + } } boxp->colorcount = ccount; } @@ -404,7 +401,7 @@ LOCAL(int) median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes, - int desired_colors) + int desired_colors) /* Repeatedly select and split the largest box until we have enough boxes */ { int n,lb; @@ -420,9 +417,9 @@ } else { b1 = find_biggest_volume(boxlist, numboxes); } - if (b1 == NULL) /* no splittable boxes left! */ + if (b1 == NULL) /* no splittable boxes left! */ break; - b2 = &boxlist[numboxes]; /* where new box will go */ + b2 = &boxlist[numboxes]; /* where new box will go */ /* Copy the color bounds to the new box. */ b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max; b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min; @@ -494,24 +491,24 @@ long c0total = 0; long c1total = 0; long c2total = 0; - + c0min = boxp->c0min; c0max = boxp->c0max; c1min = boxp->c1min; c1max = boxp->c1max; c2min = boxp->c2min; c2max = boxp->c2max; - + for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { histp = & histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++) { - if ((count = *histp++) != 0) { - total += count; - c0total += ((c0 << C0_SHIFT) + ((1<>1)) * count; - c1total += ((c1 << C1_SHIFT) + ((1<>1)) * count; - c2total += ((c2 << C2_SHIFT) + ((1<>1)) * count; - } + if ((count = *histp++) != 0) { + total += count; + c0total += ((c0 << C0_SHIFT) + ((1<>1)) * count; + c1total += ((c1 << C1_SHIFT) + ((1<>1)) * count; + c2total += ((c2 << C2_SHIFT) + ((1<>1)) * count; + } } } - + cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total); cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total); cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total); @@ -528,7 +525,7 @@ /* Allocate workspace for box list */ boxlist = (boxptr) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box)); + ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * sizeof(box)); /* Initialize one box containing whole space */ numboxes = 1; boxlist[0].c0min = 0; @@ -575,7 +572,7 @@ * distance from every colormap entry to every histogram cell. Unfortunately, * it needs a work array to hold the best-distance-so-far for each histogram * cell (because the inner loop has to be over cells, not colormap entries). - * The work array elements have to be INT32s, so the work array would need + * The work array elements have to be JLONGs, so the work array would need * 256Kb at our recommended precision. This is not feasible in DOS machines. * * To get around these problems, we apply Thomas' method to compute the @@ -627,7 +624,7 @@ LOCAL(int) find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, - JSAMPLE colorlist[]) + JSAMPLE colorlist[]) /* Locate the colormap entries close enough to an update box to be candidates * for the nearest entry to some cell(s) in the update box. The update box * is specified by the center coordinates of its first cell. The number of @@ -641,8 +638,8 @@ int maxc0, maxc1, maxc2; int centerc0, centerc1, centerc2; int i, x, ncolors; - INT32 minmaxdist, min_dist, max_dist, tdist; - INT32 mindist[MAXNUMCOLORS]; /* min distance to colormap entry i */ + JLONG minmaxdist, min_dist, max_dist, tdist; + JLONG mindist[MAXNUMCOLORS]; /* min distance to colormap entry i */ /* Compute true coordinates of update box's upper corner and center. * Actually we compute the coordinates of the center of the upper-corner @@ -684,11 +681,11 @@ /* within cell range so no contribution to min_dist */ min_dist = 0; if (x <= centerc0) { - tdist = (x - maxc0) * C0_SCALE; - max_dist = tdist*tdist; + tdist = (x - maxc0) * C0_SCALE; + max_dist = tdist*tdist; } else { - tdist = (x - minc0) * C0_SCALE; - max_dist = tdist*tdist; + tdist = (x - minc0) * C0_SCALE; + max_dist = tdist*tdist; } } @@ -706,11 +703,11 @@ } else { /* within cell range so no contribution to min_dist */ if (x <= centerc1) { - tdist = (x - maxc1) * C1_SCALE; - max_dist += tdist*tdist; + tdist = (x - maxc1) * C1_SCALE; + max_dist += tdist*tdist; } else { - tdist = (x - minc1) * C1_SCALE; - max_dist += tdist*tdist; + tdist = (x - minc1) * C1_SCALE; + max_dist += tdist*tdist; } } @@ -728,15 +725,15 @@ } else { /* within cell range so no contribution to min_dist */ if (x <= centerc2) { - tdist = (x - maxc2) * C2_SCALE; - max_dist += tdist*tdist; + tdist = (x - maxc2) * C2_SCALE; + max_dist += tdist*tdist; } else { - tdist = (x - minc2) * C2_SCALE; - max_dist += tdist*tdist; + tdist = (x - minc2) * C2_SCALE; + max_dist += tdist*tdist; } } - mindist[i] = min_dist; /* save away the results */ + mindist[i] = min_dist; /* save away the results */ if (max_dist < minmaxdist) minmaxdist = max_dist; } @@ -756,7 +753,7 @@ LOCAL(void) find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, - int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[]) + int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[]) /* Find the closest colormap entry for each cell in the update box, * given the list of candidate colors prepared by find_nearby_colors. * Return the indexes of the closest entries in the bestcolor[] array. @@ -766,31 +763,31 @@ { int ic0, ic1, ic2; int i, icolor; - register INT32 * bptr; /* pointer into bestdist[] array */ - JSAMPLE * cptr; /* pointer into bestcolor[] array */ - INT32 dist0, dist1; /* initial distance values */ - register INT32 dist2; /* current distance in inner loop */ - INT32 xx0, xx1; /* distance increments */ - register INT32 xx2; - INT32 inc0, inc1, inc2; /* initial values for increments */ + register JLONG *bptr; /* pointer into bestdist[] array */ + JSAMPLE *cptr; /* pointer into bestcolor[] array */ + JLONG dist0, dist1; /* initial distance values */ + register JLONG dist2; /* current distance in inner loop */ + JLONG xx0, xx1; /* distance increments */ + register JLONG xx2; + JLONG inc0, inc1, inc2; /* initial values for increments */ /* This array holds the distance to the nearest-so-far color for each cell */ - INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS]; + JLONG bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS]; /* Initialize best-distance for each cell of the update box */ bptr = bestdist; for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--) *bptr++ = 0x7FFFFFFFL; - + /* For each color selected by find_nearby_colors, * compute its distance to the center of each cell in the box. * If that's less than best-so-far, update best distance and color number. */ - + /* Nominal steps between cell centers ("x" in Thomas article) */ #define STEP_C0 ((1 << C0_SHIFT) * C0_SCALE) #define STEP_C1 ((1 << C1_SHIFT) * C1_SCALE) #define STEP_C2 ((1 << C2_SHIFT) * C2_SCALE) - + for (i = 0; i < numcolors; i++) { icolor = GETJSAMPLE(colorlist[i]); /* Compute (square of) distance from minc0/c1/c2 to this color */ @@ -812,20 +809,20 @@ dist1 = dist0; xx1 = inc1; for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) { - dist2 = dist1; - xx2 = inc2; - for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) { - if (dist2 < *bptr) { - *bptr = dist2; - *cptr = (JSAMPLE) icolor; - } - dist2 += xx2; - xx2 += 2 * STEP_C2 * STEP_C2; - bptr++; - cptr++; - } - dist1 += xx1; - xx1 += 2 * STEP_C1 * STEP_C1; + dist2 = dist1; + xx2 = inc2; + for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) { + if (dist2 < *bptr) { + *bptr = dist2; + *cptr = (JSAMPLE) icolor; + } + dist2 += xx2; + xx2 += 2 * STEP_C2 * STEP_C2; + bptr++; + cptr++; + } + dist1 += xx1; + xx1 += 2 * STEP_C1 * STEP_C1; } dist0 += xx0; xx0 += 2 * STEP_C0 * STEP_C0; @@ -842,13 +839,13 @@ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; hist3d histogram = cquantize->histogram; - int minc0, minc1, minc2; /* lower left corner of update box */ + int minc0, minc1, minc2; /* lower left corner of update box */ int ic0, ic1, ic2; - register JSAMPLE * cptr; /* pointer into bestcolor[] array */ - register histptr cachep; /* pointer into main cache array */ + register JSAMPLE *cptr; /* pointer into bestcolor[] array */ + register histptr cachep; /* pointer into main cache array */ /* This array lists the candidate colormap indexes. */ JSAMPLE colorlist[MAXNUMCOLORS]; - int numcolors; /* number of candidate colors */ + int numcolors; /* number of candidate colors */ /* This array holds the actually closest colormap index for each cell. */ JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS]; @@ -864,7 +861,7 @@ minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1); minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1); minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1); - + /* Determine which colormap entries are close enough to be candidates * for the nearest entry to some cell in the update box. */ @@ -872,10 +869,10 @@ /* Determine the actually nearest colors. */ find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist, - bestcolor); + bestcolor); /* Save the best color numbers (plus 1) in the main cache array */ - c0 <<= BOX_C0_LOG; /* convert ID back to base cell indexes */ + c0 <<= BOX_C0_LOG; /* convert ID back to base cell indexes */ c1 <<= BOX_C1_LOG; c2 <<= BOX_C2_LOG; cptr = bestcolor; @@ -883,7 +880,7 @@ for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) { cachep = & histogram[c0+ic0][c1+ic1][c2]; for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) { - *cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1); + *cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1); } } } @@ -896,7 +893,7 @@ METHODDEF(void) pass2_no_dither (j_decompress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) /* This version performs no dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; @@ -920,7 +917,7 @@ /* If we have not seen this color before, find nearest colormap entry */ /* and update the cache */ if (*cachep == 0) - fill_inverse_cmap(cinfo, c0,c1,c2); + fill_inverse_cmap(cinfo, c0,c1,c2); /* Now emit the colormap index for this cell */ *outptr++ = (JSAMPLE) (*cachep - 1); } @@ -930,20 +927,20 @@ METHODDEF(void) pass2_fs_dither (j_decompress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) /* This version performs Floyd-Steinberg dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; hist3d histogram = cquantize->histogram; - register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */ + register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */ LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */ LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */ - register FSERRPTR errorptr; /* => fserrors[] at column before current */ - JSAMPROW inptr; /* => current input pixel */ - JSAMPROW outptr; /* => current output pixel */ + register FSERRPTR errorptr; /* => fserrors[] at column before current */ + JSAMPROW inptr; /* => current input pixel */ + JSAMPROW outptr; /* => current output pixel */ histptr cachep; - int dir; /* +1 or -1 depending on direction */ - int dir3; /* 3*dir, for advancing inptr & errorptr */ + int dir; /* +1 or -1 depending on direction */ + int dir3; /* 3*dir, for advancing inptr & errorptr */ int row; JDIMENSION col; JDIMENSION width = cinfo->output_width; @@ -959,7 +956,7 @@ outptr = output_buf[row]; if (cquantize->on_odd_row) { /* work right to left in this row */ - inptr += (width-1) * 3; /* so point to rightmost pixel */ + inptr += (width-1) * 3; /* so point to rightmost pixel */ outptr += width-1; dir = -1; dir3 = -3; @@ -1011,53 +1008,44 @@ /* If we have not seen this color before, find nearest colormap */ /* entry and update the cache */ if (*cachep == 0) - fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT); + fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT); /* Now emit the colormap index for this cell */ { register int pixcode = *cachep - 1; - *outptr = (JSAMPLE) pixcode; - /* Compute representation error for this pixel */ - cur0 -= GETJSAMPLE(colormap0[pixcode]); - cur1 -= GETJSAMPLE(colormap1[pixcode]); - cur2 -= GETJSAMPLE(colormap2[pixcode]); + *outptr = (JSAMPLE) pixcode; + /* Compute representation error for this pixel */ + cur0 -= GETJSAMPLE(colormap0[pixcode]); + cur1 -= GETJSAMPLE(colormap1[pixcode]); + cur2 -= GETJSAMPLE(colormap2[pixcode]); } /* Compute error fractions to be propagated to adjacent pixels. * Add these into the running sums, and simultaneously shift the * next-line error sums left by 1 column. */ - { register LOCFSERROR bnexterr, delta; + { register LOCFSERROR bnexterr; - bnexterr = cur0; /* Process component 0 */ - delta = cur0 * 2; - cur0 += delta; /* form error * 3 */ - errorptr[0] = (FSERROR) (bpreverr0 + cur0); - cur0 += delta; /* form error * 5 */ - bpreverr0 = belowerr0 + cur0; - belowerr0 = bnexterr; - cur0 += delta; /* form error * 7 */ - bnexterr = cur1; /* Process component 1 */ - delta = cur1 * 2; - cur1 += delta; /* form error * 3 */ - errorptr[1] = (FSERROR) (bpreverr1 + cur1); - cur1 += delta; /* form error * 5 */ - bpreverr1 = belowerr1 + cur1; - belowerr1 = bnexterr; - cur1 += delta; /* form error * 7 */ - bnexterr = cur2; /* Process component 2 */ - delta = cur2 * 2; - cur2 += delta; /* form error * 3 */ - errorptr[2] = (FSERROR) (bpreverr2 + cur2); - cur2 += delta; /* form error * 5 */ - bpreverr2 = belowerr2 + cur2; - belowerr2 = bnexterr; - cur2 += delta; /* form error * 7 */ + bnexterr = cur0; /* Process component 0 */ + errorptr[0] = (FSERROR) (bpreverr0 + cur0 * 3); + bpreverr0 = belowerr0 + cur0 * 5; + belowerr0 = bnexterr; + cur0 *= 7; + bnexterr = cur1; /* Process component 1 */ + errorptr[1] = (FSERROR) (bpreverr1 + cur1 * 3); + bpreverr1 = belowerr1 + cur1 * 5; + belowerr1 = bnexterr; + cur1 *= 7; + bnexterr = cur2; /* Process component 2 */ + errorptr[2] = (FSERROR) (bpreverr2 + cur2 * 3); + bpreverr2 = belowerr2 + cur2 * 5; + belowerr2 = bnexterr; + cur2 *= 7; } /* At this point curN contains the 7/16 error value to be propagated * to the next pixel on the current line, and all the errors for the * next line have been shifted over. We are therefore ready to move on. */ - inptr += dir3; /* Advance pixel pointers to next column */ + inptr += dir3; /* Advance pixel pointers to next column */ outptr += dir; - errorptr += dir3; /* advance errorptr to current column */ + errorptr += dir3; /* advance errorptr to current column */ } /* Post-loop cleanup: we must unload the final error values into the * final fserrors[] entry. Note we need not unload belowerrN because @@ -1092,12 +1080,12 @@ /* Allocate and fill in the error_limiter table */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; - int * table; + int *table; int in, out; table = (int *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int)); - table += MAXJSAMPLE; /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */ + ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * sizeof(int)); + table += MAXJSAMPLE; /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */ cquantize->error_limiter = table; #define STEPSIZE ((MAXJSAMPLE+1)/16) @@ -1180,16 +1168,16 @@ if (cinfo->dither_mode == JDITHER_FS) { size_t arraysize = (size_t) ((cinfo->output_width + 2) * - (3 * SIZEOF(FSERROR))); + (3 * sizeof(FSERROR))); /* Allocate Floyd-Steinberg workspace if we didn't already. */ if (cquantize->fserrors == NULL) - cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large) - ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize); + cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large) + ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize); /* Initialize the propagated errors to zero. */ - jzero_far((void FAR *) cquantize->fserrors, arraysize); + jzero_far((void *) cquantize->fserrors, arraysize); /* Make the error-limit table if we didn't already. */ if (cquantize->error_limiter == NULL) - init_error_limit(cinfo); + init_error_limit(cinfo); cquantize->on_odd_row = FALSE; } @@ -1197,8 +1185,8 @@ /* Zero the histogram or inverse color map, if necessary */ if (cquantize->needs_zeroed) { for (i = 0; i < HIST_C0_ELEMS; i++) { - jzero_far((void FAR *) histogram[i], - HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell)); + jzero_far((void *) histogram[i], + HIST_C1_ELEMS*HIST_C2_ELEMS * sizeof(histcell)); } cquantize->needs_zeroed = FALSE; } @@ -1231,11 +1219,11 @@ cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_cquantizer)); + sizeof(my_cquantizer)); cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize; cquantize->pub.start_pass = start_pass_2_quant; cquantize->pub.new_color_map = new_color_map_2_quant; - cquantize->fserrors = NULL; /* flag optional arrays not allocated */ + cquantize->fserrors = NULL; /* flag optional arrays not allocated */ cquantize->error_limiter = NULL; /* Make sure jdmaster didn't give me a case I can't handle */ @@ -1244,17 +1232,17 @@ /* Allocate the histogram/inverse colormap storage */ cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d)); + ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * sizeof(hist2d)); for (i = 0; i < HIST_C0_ELEMS; i++) { cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell)); + HIST_C1_ELEMS*HIST_C2_ELEMS * sizeof(histcell)); } cquantize->needs_zeroed = TRUE; /* histogram is garbage now */ /* Allocate storage for the completed colormap, if required. - * We do this now since it is FAR storage and may affect - * the memory manager's space calculations. + * We do this now since it may affect the memory manager's space + * calculations. */ if (cinfo->enable_2pass_quant) { /* Make sure color count is acceptable */ @@ -1277,14 +1265,15 @@ cinfo->dither_mode = JDITHER_FS; /* Allocate Floyd-Steinberg workspace if necessary. - * This isn't really needed until pass 2, but again it is FAR storage. - * Although we will cope with a later change in dither_mode, - * we do not promise to honor max_memory_to_use if dither_mode changes. + * This isn't really needed until pass 2, but again it may affect the memory + * manager's space calculations. Although we will cope with a later change + * in dither_mode, we do not promise to honor max_memory_to_use if + * dither_mode changes. */ if (cinfo->dither_mode == JDITHER_FS) { cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR)))); + (size_t) ((cinfo->output_width + 2) * (3 * sizeof(FSERROR)))); /* Might as well create the error-limiting table too. */ init_error_limit(cinfo); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jsimddct.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jsimddct.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jsimddct.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jsimddct.h 2021-04-25 01:47:22.000000000 +0800 @@ -2,101 +2,73 @@ * jsimddct.h * * Copyright 2009 Pierre Ossman for Cendio AB - * + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc * */ -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jsimd_can_convsamp jSCanConv -#define jsimd_can_convsamp_float jSCanConvF -#define jsimd_convsamp jSConv -#define jsimd_convsamp_float jSConvF -#define jsimd_can_fdct_islow jSCanFDCTIS -#define jsimd_can_fdct_ifast jSCanFDCTIF -#define jsimd_can_fdct_float jSCanFDCTFl -#define jsimd_fdct_islow jSFDCTIS -#define jsimd_fdct_ifast jSFDCTIF -#define jsimd_fdct_float jSFDCTFl -#define jsimd_can_quantize jSCanQuant -#define jsimd_can_quantize_float jSCanQuantF -#define jsimd_quantize jSQuant -#define jsimd_quantize_float jSQuantF -#define jsimd_can_idct_2x2 jSCanIDCT22 -#define jsimd_can_idct_4x4 jSCanIDCT44 -#define jsimd_idct_2x2 jSIDCT22 -#define jsimd_idct_4x4 jSIDCT44 -#define jsimd_can_idct_islow jSCanIDCTIS -#define jsimd_can_idct_ifast jSCanIDCTIF -#define jsimd_can_idct_float jSCanIDCTFl -#define jsimd_idct_islow jSIDCTIS -#define jsimd_idct_ifast jSIDCTIF -#define jsimd_idct_float jSIDCTFl -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - -EXTERN(int) jsimd_can_convsamp JPP((void)); -EXTERN(int) jsimd_can_convsamp_float JPP((void)); - -EXTERN(void) jsimd_convsamp JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - DCTELEM * workspace)); -EXTERN(void) jsimd_convsamp_float JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT * workspace)); - -EXTERN(int) jsimd_can_fdct_islow JPP((void)); -EXTERN(int) jsimd_can_fdct_ifast JPP((void)); -EXTERN(int) jsimd_can_fdct_float JPP((void)); - -EXTERN(void) jsimd_fdct_islow JPP((DCTELEM * data)); -EXTERN(void) jsimd_fdct_ifast JPP((DCTELEM * data)); -EXTERN(void) jsimd_fdct_float JPP((FAST_FLOAT * data)); - -EXTERN(int) jsimd_can_quantize JPP((void)); -EXTERN(int) jsimd_can_quantize_float JPP((void)); - -EXTERN(void) jsimd_quantize JPP((JCOEFPTR coef_block, - DCTELEM * divisors, - DCTELEM * workspace)); -EXTERN(void) jsimd_quantize_float JPP((JCOEFPTR coef_block, - FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); - -EXTERN(int) jsimd_can_idct_2x2 JPP((void)); -EXTERN(int) jsimd_can_idct_4x4 JPP((void)); - -EXTERN(void) jsimd_idct_2x2 JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_4x4 JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -EXTERN(int) jsimd_can_idct_islow JPP((void)); -EXTERN(int) jsimd_can_idct_ifast JPP((void)); -EXTERN(int) jsimd_can_idct_float JPP((void)); - -EXTERN(void) jsimd_idct_islow JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_ifast JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_float JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(int) jsimd_can_convsamp (void); +EXTERN(int) jsimd_can_convsamp_float (void); +EXTERN(void) jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace); +EXTERN(void) jsimd_convsamp_float (JSAMPARRAY sample_data, + JDIMENSION start_col, + FAST_FLOAT *workspace); + +EXTERN(int) jsimd_can_fdct_islow (void); +EXTERN(int) jsimd_can_fdct_ifast (void); +EXTERN(int) jsimd_can_fdct_float (void); + +EXTERN(void) jsimd_fdct_islow (DCTELEM *data); +EXTERN(void) jsimd_fdct_ifast (DCTELEM *data); +EXTERN(void) jsimd_fdct_float (FAST_FLOAT *data); + +EXTERN(int) jsimd_can_quantize (void); +EXTERN(int) jsimd_can_quantize_float (void); + +EXTERN(void) jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace); +EXTERN(void) jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace); + +EXTERN(int) jsimd_can_idct_2x2 (void); +EXTERN(int) jsimd_can_idct_4x4 (void); +EXTERN(int) jsimd_can_idct_6x6 (void); +EXTERN(int) jsimd_can_idct_12x12 (void); + +EXTERN(void) jsimd_idct_2x2 (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4 (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_6x6 (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_12x12 (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(int) jsimd_can_idct_islow (void); +EXTERN(int) jsimd_can_idct_ifast (void); +EXTERN(int) jsimd_can_idct_float (void); + +EXTERN(void) jsimd_idct_islow (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_ifast (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_float (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jsimd.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jsimd.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jsimd.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jsimd.h 2021-04-25 01:47:22.000000000 +0800 @@ -2,97 +2,92 @@ * jsimd.h * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright 2011 D. R. Commander - * + * Copyright 2011, 2014 D. R. Commander + * Copyright 2015 Matthieu Darbois + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc * */ -/* Short forms of external names for systems with brain-damaged linkers. */ +#include "jchuff.h" /* Declarations shared with jcphuff.c */ -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jsimd_can_rgb_ycc jSCanRgbYcc -#define jsimd_can_rgb_gray jSCanRgbGry -#define jsimd_can_ycc_rgb jSCanYccRgb -#define jsimd_rgb_ycc_convert jSRgbYccConv -#define jsimd_rgb_gray_convert jSRgbGryConv -#define jsimd_ycc_rgb_convert jSYccRgbConv -#define jsimd_can_h2v2_downsample jSCanH2V2Down -#define jsimd_can_h2v1_downsample jSCanH2V1Down -#define jsimd_h2v2_downsample jSH2V2Down -#define jsimd_h2v1_downsample jSH2V1Down -#define jsimd_can_h2v2_upsample jSCanH2V2Up -#define jsimd_can_h2v1_upsample jSCanH2V1Up -#define jsimd_h2v2_upsample jSH2V2Up -#define jsimd_h2v1_upsample jSH2V1Up -#define jsimd_can_h2v2_fancy_upsample jSCanH2V2FUp -#define jsimd_can_h2v1_fancy_upsample jSCanH2V1FUp -#define jsimd_h2v2_fancy_upsample jSH2V2FUp -#define jsimd_h2v1_fancy_upsample jSH2V1FUp -#define jsimd_can_h2v2_merged_upsample jSCanH2V2MUp -#define jsimd_can_h2v1_merged_upsample jSCanH2V1MUp -#define jsimd_h2v2_merged_upsample jSH2V2MUp -#define jsimd_h2v1_merged_upsample jSH2V1MUp -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - -EXTERN(int) jsimd_can_rgb_ycc JPP((void)); -EXTERN(int) jsimd_can_rgb_gray JPP((void)); -EXTERN(int) jsimd_can_ycc_rgb JPP((void)); +EXTERN(int) jsimd_can_rgb_ycc (void); +EXTERN(int) jsimd_can_rgb_gray (void); +EXTERN(int) jsimd_can_ycc_rgb (void); +EXTERN(int) jsimd_can_ycc_rgb565 (void); +EXTERN(int) jsimd_c_can_null_convert (void); EXTERN(void) jsimd_rgb_ycc_convert - JPP((j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_rgb_gray_convert - JPP((j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_ycc_rgb_convert - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_rgb565_convert + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_c_null_convert + (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); -EXTERN(int) jsimd_can_h2v2_downsample JPP((void)); -EXTERN(int) jsimd_can_h2v1_downsample JPP((void)); +EXTERN(int) jsimd_can_h2v2_downsample (void); +EXTERN(int) jsimd_can_h2v1_downsample (void); EXTERN(void) jsimd_h2v2_downsample - JPP((j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data)); + (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(int) jsimd_can_h2v2_smooth_downsample (void); + +EXTERN(void) jsimd_h2v2_smooth_downsample + (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data); + EXTERN(void) jsimd_h2v1_downsample - JPP((j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data)); + (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data); -EXTERN(int) jsimd_can_h2v2_upsample JPP((void)); -EXTERN(int) jsimd_can_h2v1_upsample JPP((void)); +EXTERN(int) jsimd_can_h2v2_upsample (void); +EXTERN(int) jsimd_can_h2v1_upsample (void); +EXTERN(int) jsimd_can_int_upsample (void); EXTERN(void) jsimd_h2v2_upsample - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v1_upsample - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_int_upsample + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(int) jsimd_can_h2v2_fancy_upsample JPP((void)); -EXTERN(int) jsimd_can_h2v1_fancy_upsample JPP((void)); +EXTERN(int) jsimd_can_h2v2_fancy_upsample (void); +EXTERN(int) jsimd_can_h2v1_fancy_upsample (void); EXTERN(void) jsimd_h2v2_fancy_upsample - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v1_fancy_upsample - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(int) jsimd_can_h2v2_merged_upsample JPP((void)); -EXTERN(int) jsimd_can_h2v1_merged_upsample JPP((void)); +EXTERN(int) jsimd_can_h2v2_merged_upsample (void); +EXTERN(int) jsimd_can_h2v1_merged_upsample (void); EXTERN(void) jsimd_h2v2_merged_upsample - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_merged_upsample - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + +EXTERN(int) jsimd_can_huff_encode_one_block (void); +EXTERN(JOCTET*) jsimd_huff_encode_one_block + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jsimd_none.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jsimd_none.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jsimd_none.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jsimd_none.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,404 @@ +/* + * jsimd_none.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014 D. R. Commander + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains stubs for when there is no SIMD support available. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" +#include "jsimd.h" +#include "jdct.h" +#include "jsimddct.h" + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_c_can_null_convert (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(void) +jsimd_c_null_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v2_smooth_downsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(void) +jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_int_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_6x6 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_12x12 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return NULL; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jstdhuff.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jstdhuff.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jstdhuff.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jstdhuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,135 @@ +/* + * jstdhuff.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1998, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to set the default Huffman tables, if they are + * not already set. + */ + +/* + * Huffman table setup routines + */ + +LOCAL(void) +add_huff_table (j_common_ptr cinfo, + JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val) +/* Define a Huffman table */ +{ + int nsymbols, len; + + if (*htblptr == NULL) + *htblptr = jpeg_alloc_huff_table(cinfo); + else + return; + + /* Copy the number-of-symbols-of-each-code-length counts */ + MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits)); + + /* Validate the counts. We do this here mainly so we can copy the right + * number of symbols from the val[] array, without risking marching off + * the end of memory. jchuff.c will do a more thorough test later. + */ + nsymbols = 0; + for (len = 1; len <= 16; len++) + nsymbols += bits[len]; + if (nsymbols < 1 || nsymbols > 256) + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + + MEMCOPY((*htblptr)->huffval, val, nsymbols * sizeof(UINT8)); + MEMZERO(&((*htblptr)->huffval[nsymbols]), (256 - nsymbols) * sizeof(UINT8)); + + /* Initialize sent_table FALSE so table will be written to JPEG file. */ + (*htblptr)->sent_table = FALSE; +} + + +LOCAL(void) +std_huff_tables (j_common_ptr cinfo) +/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */ +/* IMPORTANT: these are only valid for 8-bit data precision! */ +{ + JHUFF_TBL **dc_huff_tbl_ptrs, **ac_huff_tbl_ptrs; + + static const UINT8 bits_dc_luminance[17] = + { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; + static const UINT8 val_dc_luminance[] = + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + + static const UINT8 bits_dc_chrominance[17] = + { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; + static const UINT8 val_dc_chrominance[] = + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + + static const UINT8 bits_ac_luminance[17] = + { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d }; + static const UINT8 val_ac_luminance[] = + { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa }; + + static const UINT8 bits_ac_chrominance[17] = + { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 }; + static const UINT8 val_ac_chrominance[] = + { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa }; + + if (cinfo->is_decompressor) { + dc_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->dc_huff_tbl_ptrs; + ac_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->ac_huff_tbl_ptrs; + } else { + dc_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->dc_huff_tbl_ptrs; + ac_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->ac_huff_tbl_ptrs; + } + + add_huff_table(cinfo, &dc_huff_tbl_ptrs[0], bits_dc_luminance, + val_dc_luminance); + add_huff_table(cinfo, &ac_huff_tbl_ptrs[0], bits_ac_luminance, + val_ac_luminance); + add_huff_table(cinfo, &dc_huff_tbl_ptrs[1], bits_dc_chrominance, + val_dc_chrominance); + add_huff_table(cinfo, &ac_huff_tbl_ptrs[1], bits_ac_chrominance, + val_ac_chrominance); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jutils.c glmark2-2021.02/android/jni/src/libjpeg-turbo/jutils.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jutils.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jutils.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jutils.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code + * relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains tables and miscellaneous utility routines needed * for both compression and decompression. @@ -21,7 +24,7 @@ * of a DCT block read in natural order (left to right, top to bottom). */ -#if 0 /* This table is not actually needed in v6a */ +#if 0 /* This table is not actually needed in v6a */ const int jpeg_zigzag_order[DCTSIZE2] = { 0, 1, 5, 6, 14, 15, 27, 28, @@ -87,30 +90,10 @@ } -/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays - * and coefficient-block arrays. This won't work on 80x86 because the arrays - * are FAR and we're assuming a small-pointer memory model. However, some - * DOS compilers provide far-pointer versions of memcpy() and memset() even - * in the small-model libraries. These will be used if USE_FMEM is defined. - * Otherwise, the routines below do it the hard way. (The performance cost - * is not all that great, because these routines aren't very heavily used.) - */ - -#ifndef NEED_FAR_POINTERS /* normal case, same as regular macros */ -#define FMEMCOPY(dest,src,size) MEMCOPY(dest,src,size) -#define FMEMZERO(target,size) MEMZERO(target,size) -#else /* 80x86 case, define if we can */ -#ifdef USE_FMEM -#define FMEMCOPY(dest,src,size) _fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size)) -#define FMEMZERO(target,size) _fmemset((void FAR *)(target), 0, (size_t)(size)) -#endif -#endif - - GLOBAL(void) jcopy_sample_rows (JSAMPARRAY input_array, int source_row, - JSAMPARRAY output_array, int dest_row, - int num_rows, JDIMENSION num_cols) + JSAMPARRAY output_array, int dest_row, + int num_rows, JDIMENSION num_cols) /* Copy some rows of samples from one place to another. * num_rows rows are copied from input_array[source_row++] * to output_array[dest_row++]; these areas may overlap for duplication. @@ -118,11 +101,7 @@ */ { register JSAMPROW inptr, outptr; -#ifdef FMEMCOPY - register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE)); -#else - register JDIMENSION count; -#endif + register size_t count = (size_t) (num_cols * sizeof(JSAMPLE)); register int row; input_array += source_row; @@ -131,49 +110,24 @@ for (row = num_rows; row > 0; row--) { inptr = *input_array++; outptr = *output_array++; -#ifdef FMEMCOPY - FMEMCOPY(outptr, inptr, count); -#else - for (count = num_cols; count > 0; count--) - *outptr++ = *inptr++; /* needn't bother with GETJSAMPLE() here */ -#endif + MEMCOPY(outptr, inptr, count); } } GLOBAL(void) jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row, - JDIMENSION num_blocks) + JDIMENSION num_blocks) /* Copy a row of coefficient blocks from one place to another. */ { -#ifdef FMEMCOPY - FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF))); -#else - register JCOEFPTR inptr, outptr; - register long count; - - inptr = (JCOEFPTR) input_row; - outptr = (JCOEFPTR) output_row; - for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) { - *outptr++ = *inptr++; - } -#endif + MEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF))); } GLOBAL(void) -jzero_far (void FAR * target, size_t bytestozero) -/* Zero out a chunk of FAR memory. */ +jzero_far (void *target, size_t bytestozero) +/* Zero out a chunk of memory. */ /* This might be sample-array data, block-array data, or alloc_large data. */ { -#ifdef FMEMZERO - FMEMZERO(target, bytestozero); -#else - register char FAR * ptr = (char FAR *) target; - register size_t count; - - for (count = bytestozero; count > 0; count--) { - *ptr++ = 0; - } -#endif + MEMZERO(target, bytestozero); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jversion.h glmark2-2021.02/android/jni/src/libjpeg-turbo/jversion.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/jversion.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/jversion.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jversion.h * - * Copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding. - * Copyright (C) 2010, 2012, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2012-2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains software version identification. */ @@ -12,7 +14,7 @@ #if JPEG_LIB_VERSION >= 80 -#define JVERSION "8b 16-May-2010" +#define JVERSION "8d 15-Jan-2012" #elif JPEG_LIB_VERSION >= 70 @@ -20,12 +22,28 @@ #else -#define JVERSION "6b 27-Mar-1998" +#define JVERSION "6b 27-Mar-1998" #endif -#define JCOPYRIGHT "Copyright (C) 1991-2010 Thomas G. Lane, Guido Vollbeding\n" \ - "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ - "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \ - "Copyright (C) 2009-2012 D. R. Commander\n" \ - "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)" +/* + * NOTE: It is our convention to place the authors in the following order: + * - libjpeg-turbo authors (2009-) in descending order of the date of their + * most recent contribution to the project, then in ascending order of the + * date of their first contribution to the project + * - Upstream authors in descending order of the date of the first inclusion of + * their code + */ + +#define JCOPYRIGHT "Copyright (C) 2009-2016 D. R. Commander\n" \ + "Copyright (C) 2011-2016 Siarhei Siamashka\n" \ + "Copyright (C) 2015-2016 Matthieu Darbois\n" \ + "Copyright (C) 2015 Google, Inc.\n" \ + "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \ + "Copyright (C) 2013 Linaro Limited\n" \ + "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \ + "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \ + "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ + "Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding" \ + +#define JCOPYRIGHT_SHORT "Copyright (C) 1991-2016 The libjpeg-turbo Project and many others" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/libjpeg.map.in glmark2-2021.02/android/jni/src/libjpeg-turbo/libjpeg.map.in --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/libjpeg.map.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/libjpeg.map.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,11 @@ +LIBJPEGTURBO_@JPEG_LIB_VERSION_DECIMAL@ { + @MEM_SRCDST_FUNCTIONS@ + local: + jsimd_*; + jconst_*; +}; + +LIBJPEG_@JPEG_LIB_VERSION_DECIMAL@ { + global: + *; +}; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/libjpeg.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/libjpeg.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/libjpeg.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/libjpeg.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,3105 @@ +USING THE IJG JPEG LIBRARY + +This file was part of the Independent JPEG Group's software: +Copyright (C) 1994-2013, Thomas G. Lane, Guido Vollbeding. +libjpeg-turbo Modifications: +Copyright (C) 2010, 2014-2016, D. R. Commander. +Copyright (C) 2015, Google, Inc. +For conditions of distribution and use, see the accompanying README.ijg file. + + +This file describes how to use the IJG JPEG library within an application +program. Read it if you want to write a program that uses the library. + +The file example.c provides heavily commented skeleton code for calling the +JPEG library. Also see jpeglib.h (the include file to be used by application +programs) for full details about data structures and function parameter lists. +The library source code, of course, is the ultimate reference. + +Note that there have been *major* changes from the application interface +presented by IJG version 4 and earlier versions. The old design had several +inherent limitations, and it had accumulated a lot of cruft as we added +features while trying to minimize application-interface changes. We have +sacrificed backward compatibility in the version 5 rewrite, but we think the +improvements justify this. + + +TABLE OF CONTENTS +----------------- + +Overview: + Functions provided by the library + Outline of typical usage +Basic library usage: + Data formats + Compression details + Decompression details + Mechanics of usage: include files, linking, etc +Advanced features: + Compression parameter selection + Decompression parameter selection + Special color spaces + Error handling + Compressed data handling (source and destination managers) + I/O suspension + Progressive JPEG support + Buffered-image mode + Abbreviated datastreams and multiple images + Special markers + Raw (downsampled) image data + Really raw data: DCT coefficients + Progress monitoring + Memory management + Memory usage + Library compile-time options + Portability considerations + +You should read at least the overview and basic usage sections before trying +to program with the library. The sections on advanced features can be read +if and when you need them. + + +OVERVIEW +======== + +Functions provided by the library +--------------------------------- + +The IJG JPEG library provides C code to read and write JPEG-compressed image +files. The surrounding application program receives or supplies image data a +scanline at a time, using a straightforward uncompressed image format. All +details of color conversion and other preprocessing/postprocessing can be +handled by the library. + +The library includes a substantial amount of code that is not covered by the +JPEG standard but is necessary for typical applications of JPEG. These +functions preprocess the image before JPEG compression or postprocess it after +decompression. They include colorspace conversion, downsampling/upsampling, +and color quantization. The application indirectly selects use of this code +by specifying the format in which it wishes to supply or receive image data. +For example, if colormapped output is requested, then the decompression +library automatically invokes color quantization. + +A wide range of quality vs. speed tradeoffs are possible in JPEG processing, +and even more so in decompression postprocessing. The decompression library +provides multiple implementations that cover most of the useful tradeoffs, +ranging from very-high-quality down to fast-preview operation. On the +compression side we have generally not provided low-quality choices, since +compression is normally less time-critical. It should be understood that the +low-quality modes may not meet the JPEG standard's accuracy requirements; +nonetheless, they are useful for viewers. + +A word about functions *not* provided by the library. We handle a subset of +the ISO JPEG standard; most baseline, extended-sequential, and progressive +JPEG processes are supported. (Our subset includes all features now in common +use.) Unsupported ISO options include: + * Hierarchical storage + * Lossless JPEG + * DNL marker + * Nonintegral subsampling ratios +We support both 8- and 12-bit data precision, but this is a compile-time +choice rather than a run-time choice; hence it is difficult to use both +precisions in a single application. + +By itself, the library handles only interchange JPEG datastreams --- in +particular the widely used JFIF file format. The library can be used by +surrounding code to process interchange or abbreviated JPEG datastreams that +are embedded in more complex file formats. (For example, this library is +used by the free LIBTIFF library to support JPEG compression in TIFF.) + + +Outline of typical usage +------------------------ + +The rough outline of a JPEG compression operation is: + + Allocate and initialize a JPEG compression object + Specify the destination for the compressed data (eg, a file) + Set parameters for compression, including image size & colorspace + jpeg_start_compress(...); + while (scan lines remain to be written) + jpeg_write_scanlines(...); + jpeg_finish_compress(...); + Release the JPEG compression object + +A JPEG compression object holds parameters and working state for the JPEG +library. We make creation/destruction of the object separate from starting +or finishing compression of an image; the same object can be re-used for a +series of image compression operations. This makes it easy to re-use the +same parameter settings for a sequence of images. Re-use of a JPEG object +also has important implications for processing abbreviated JPEG datastreams, +as discussed later. + +The image data to be compressed is supplied to jpeg_write_scanlines() from +in-memory buffers. If the application is doing file-to-file compression, +reading image data from the source file is the application's responsibility. +The library emits compressed data by calling a "data destination manager", +which typically will write the data into a file; but the application can +provide its own destination manager to do something else. + +Similarly, the rough outline of a JPEG decompression operation is: + + Allocate and initialize a JPEG decompression object + Specify the source of the compressed data (eg, a file) + Call jpeg_read_header() to obtain image info + Set parameters for decompression + jpeg_start_decompress(...); + while (scan lines remain to be read) + jpeg_read_scanlines(...); + jpeg_finish_decompress(...); + Release the JPEG decompression object + +This is comparable to the compression outline except that reading the +datastream header is a separate step. This is helpful because information +about the image's size, colorspace, etc is available when the application +selects decompression parameters. For example, the application can choose an +output scaling ratio that will fit the image into the available screen size. + +The decompression library obtains compressed data by calling a data source +manager, which typically will read the data from a file; but other behaviors +can be obtained with a custom source manager. Decompressed data is delivered +into in-memory buffers passed to jpeg_read_scanlines(). + +It is possible to abort an incomplete compression or decompression operation +by calling jpeg_abort(); or, if you do not need to retain the JPEG object, +simply release it by calling jpeg_destroy(). + +JPEG compression and decompression objects are two separate struct types. +However, they share some common fields, and certain routines such as +jpeg_destroy() can work on either type of object. + +The JPEG library has no static variables: all state is in the compression +or decompression object. Therefore it is possible to process multiple +compression and decompression operations concurrently, using multiple JPEG +objects. + +Both compression and decompression can be done in an incremental memory-to- +memory fashion, if suitable source/destination managers are used. See the +section on "I/O suspension" for more details. + + +BASIC LIBRARY USAGE +=================== + +Data formats +------------ + +Before diving into procedural details, it is helpful to understand the +image data format that the JPEG library expects or returns. + +The standard input image format is a rectangular array of pixels, with each +pixel having the same number of "component" or "sample" values (color +channels). You must specify how many components there are and the colorspace +interpretation of the components. Most applications will use RGB data +(three components per pixel) or grayscale data (one component per pixel). +PLEASE NOTE THAT RGB DATA IS THREE SAMPLES PER PIXEL, GRAYSCALE ONLY ONE. +A remarkable number of people manage to miss this, only to find that their +programs don't work with grayscale JPEG files. + +There is no provision for colormapped input. JPEG files are always full-color +or full grayscale (or sometimes another colorspace such as CMYK). You can +feed in a colormapped image by expanding it to full-color format. However +JPEG often doesn't work very well with source data that has been colormapped, +because of dithering noise. This is discussed in more detail in the JPEG FAQ +and the other references mentioned in the README.ijg file. + +Pixels are stored by scanlines, with each scanline running from left to +right. The component values for each pixel are adjacent in the row; for +example, R,G,B,R,G,B,R,G,B,... for 24-bit RGB color. Each scanline is an +array of data type JSAMPLE --- which is typically "unsigned char", unless +you've changed jmorecfg.h. (You can also change the RGB pixel layout, say +to B,G,R order, by modifying jmorecfg.h. But see the restrictions listed in +that file before doing so.) + +A 2-D array of pixels is formed by making a list of pointers to the starts of +scanlines; so the scanlines need not be physically adjacent in memory. Even +if you process just one scanline at a time, you must make a one-element +pointer array to conform to this structure. Pointers to JSAMPLE rows are of +type JSAMPROW, and the pointer to the pointer array is of type JSAMPARRAY. + +The library accepts or supplies one or more complete scanlines per call. +It is not possible to process part of a row at a time. Scanlines are always +processed top-to-bottom. You can process an entire image in one call if you +have it all in memory, but usually it's simplest to process one scanline at +a time. + +For best results, source data values should have the precision specified by +BITS_IN_JSAMPLE (normally 8 bits). For instance, if you choose to compress +data that's only 6 bits/channel, you should left-justify each value in a +byte before passing it to the compressor. If you need to compress data +that has more than 8 bits/channel, compile with BITS_IN_JSAMPLE = 12. +(See "Library compile-time options", later.) + + +The data format returned by the decompressor is the same in all details, +except that colormapped output is supported. (Again, a JPEG file is never +colormapped. But you can ask the decompressor to perform on-the-fly color +quantization to deliver colormapped output.) If you request colormapped +output then the returned data array contains a single JSAMPLE per pixel; +its value is an index into a color map. The color map is represented as +a 2-D JSAMPARRAY in which each row holds the values of one color component, +that is, colormap[i][j] is the value of the i'th color component for pixel +value (map index) j. Note that since the colormap indexes are stored in +JSAMPLEs, the maximum number of colors is limited by the size of JSAMPLE +(ie, at most 256 colors for an 8-bit JPEG library). + + +Compression details +------------------- + +Here we revisit the JPEG compression outline given in the overview. + +1. Allocate and initialize a JPEG compression object. + +A JPEG compression object is a "struct jpeg_compress_struct". (It also has +a bunch of subsidiary structures which are allocated via malloc(), but the +application doesn't control those directly.) This struct can be just a local +variable in the calling routine, if a single routine is going to execute the +whole JPEG compression sequence. Otherwise it can be static or allocated +from malloc(). + +You will also need a structure representing a JPEG error handler. The part +of this that the library cares about is a "struct jpeg_error_mgr". If you +are providing your own error handler, you'll typically want to embed the +jpeg_error_mgr struct in a larger structure; this is discussed later under +"Error handling". For now we'll assume you are just using the default error +handler. The default error handler will print JPEG error/warning messages +on stderr, and it will call exit() if a fatal error occurs. + +You must initialize the error handler structure, store a pointer to it into +the JPEG object's "err" field, and then call jpeg_create_compress() to +initialize the rest of the JPEG object. + +Typical code for this step, if you are using the default error handler, is + + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; + ... + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + +jpeg_create_compress allocates a small amount of memory, so it could fail +if you are out of memory. In that case it will exit via the error handler; +that's why the error handler must be initialized first. + + +2. Specify the destination for the compressed data (eg, a file). + +As previously mentioned, the JPEG library delivers compressed data to a +"data destination" module. The library includes one data destination +module which knows how to write to a stdio stream. You can use your own +destination module if you want to do something else, as discussed later. + +If you use the standard destination module, you must open the target stdio +stream beforehand. Typical code for this step looks like: + + FILE *outfile; + ... + if ((outfile = fopen(filename, "wb")) == NULL) { + fprintf(stderr, "can't open %s\n", filename); + exit(1); + } + jpeg_stdio_dest(&cinfo, outfile); + +where the last line invokes the standard destination module. + +WARNING: it is critical that the binary compressed data be delivered to the +output file unchanged. On non-Unix systems the stdio library may perform +newline translation or otherwise corrupt binary data. To suppress this +behavior, you may need to use a "b" option to fopen (as shown above), or use +setmode() or another routine to put the stdio stream in binary mode. See +cjpeg.c and djpeg.c for code that has been found to work on many systems. + +You can select the data destination after setting other parameters (step 3), +if that's more convenient. You may not change the destination between +calling jpeg_start_compress() and jpeg_finish_compress(). + + +3. Set parameters for compression, including image size & colorspace. + +You must supply information about the source image by setting the following +fields in the JPEG object (cinfo structure): + + image_width Width of image, in pixels + image_height Height of image, in pixels + input_components Number of color channels (samples per pixel) + in_color_space Color space of source image + +The image dimensions are, hopefully, obvious. JPEG supports image dimensions +of 1 to 64K pixels in either direction. The input color space is typically +RGB or grayscale, and input_components is 3 or 1 accordingly. (See "Special +color spaces", later, for more info.) The in_color_space field must be +assigned one of the J_COLOR_SPACE enum constants, typically JCS_RGB or +JCS_GRAYSCALE. + +JPEG has a large number of compression parameters that determine how the +image is encoded. Most applications don't need or want to know about all +these parameters. You can set all the parameters to reasonable defaults by +calling jpeg_set_defaults(); then, if there are particular values you want +to change, you can do so after that. The "Compression parameter selection" +section tells about all the parameters. + +You must set in_color_space correctly before calling jpeg_set_defaults(), +because the defaults depend on the source image colorspace. However the +other three source image parameters need not be valid until you call +jpeg_start_compress(). There's no harm in calling jpeg_set_defaults() more +than once, if that happens to be convenient. + +Typical code for a 24-bit RGB source image is + + cinfo.image_width = Width; /* image width and height, in pixels */ + cinfo.image_height = Height; + cinfo.input_components = 3; /* # of color components per pixel */ + cinfo.in_color_space = JCS_RGB; /* colorspace of input image */ + + jpeg_set_defaults(&cinfo); + /* Make optional parameter settings here */ + + +4. jpeg_start_compress(...); + +After you have established the data destination and set all the necessary +source image info and other parameters, call jpeg_start_compress() to begin +a compression cycle. This will initialize internal state, allocate working +storage, and emit the first few bytes of the JPEG datastream header. + +Typical code: + + jpeg_start_compress(&cinfo, TRUE); + +The "TRUE" parameter ensures that a complete JPEG interchange datastream +will be written. This is appropriate in most cases. If you think you might +want to use an abbreviated datastream, read the section on abbreviated +datastreams, below. + +Once you have called jpeg_start_compress(), you may not alter any JPEG +parameters or other fields of the JPEG object until you have completed +the compression cycle. + + +5. while (scan lines remain to be written) + jpeg_write_scanlines(...); + +Now write all the required image data by calling jpeg_write_scanlines() +one or more times. You can pass one or more scanlines in each call, up +to the total image height. In most applications it is convenient to pass +just one or a few scanlines at a time. The expected format for the passed +data is discussed under "Data formats", above. + +Image data should be written in top-to-bottom scanline order. The JPEG spec +contains some weasel wording about how top and bottom are application-defined +terms (a curious interpretation of the English language...) but if you want +your files to be compatible with everyone else's, you WILL use top-to-bottom +order. If the source data must be read in bottom-to-top order, you can use +the JPEG library's virtual array mechanism to invert the data efficiently. +Examples of this can be found in the sample application cjpeg. + +The library maintains a count of the number of scanlines written so far +in the next_scanline field of the JPEG object. Usually you can just use +this variable as the loop counter, so that the loop test looks like +"while (cinfo.next_scanline < cinfo.image_height)". + +Code for this step depends heavily on the way that you store the source data. +example.c shows the following code for the case of a full-size 2-D source +array containing 3-byte RGB pixels: + + JSAMPROW row_pointer[1]; /* pointer to a single row */ + int row_stride; /* physical row width in buffer */ + + row_stride = image_width * 3; /* JSAMPLEs per row in image_buffer */ + + while (cinfo.next_scanline < cinfo.image_height) { + row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride]; + jpeg_write_scanlines(&cinfo, row_pointer, 1); + } + +jpeg_write_scanlines() returns the number of scanlines actually written. +This will normally be equal to the number passed in, so you can usually +ignore the return value. It is different in just two cases: + * If you try to write more scanlines than the declared image height, + the additional scanlines are ignored. + * If you use a suspending data destination manager, output buffer overrun + will cause the compressor to return before accepting all the passed lines. + This feature is discussed under "I/O suspension", below. The normal + stdio destination manager will NOT cause this to happen. +In any case, the return value is the same as the change in the value of +next_scanline. + + +6. jpeg_finish_compress(...); + +After all the image data has been written, call jpeg_finish_compress() to +complete the compression cycle. This step is ESSENTIAL to ensure that the +last bufferload of data is written to the data destination. +jpeg_finish_compress() also releases working memory associated with the JPEG +object. + +Typical code: + + jpeg_finish_compress(&cinfo); + +If using the stdio destination manager, don't forget to close the output +stdio stream (if necessary) afterwards. + +If you have requested a multi-pass operating mode, such as Huffman code +optimization, jpeg_finish_compress() will perform the additional passes using +data buffered by the first pass. In this case jpeg_finish_compress() may take +quite a while to complete. With the default compression parameters, this will +not happen. + +It is an error to call jpeg_finish_compress() before writing the necessary +total number of scanlines. If you wish to abort compression, call +jpeg_abort() as discussed below. + +After completing a compression cycle, you may dispose of the JPEG object +as discussed next, or you may use it to compress another image. In that case +return to step 2, 3, or 4 as appropriate. If you do not change the +destination manager, the new datastream will be written to the same target. +If you do not change any JPEG parameters, the new datastream will be written +with the same parameters as before. Note that you can change the input image +dimensions freely between cycles, but if you change the input colorspace, you +should call jpeg_set_defaults() to adjust for the new colorspace; and then +you'll need to repeat all of step 3. + + +7. Release the JPEG compression object. + +When you are done with a JPEG compression object, destroy it by calling +jpeg_destroy_compress(). This will free all subsidiary memory (regardless of +the previous state of the object). Or you can call jpeg_destroy(), which +works for either compression or decompression objects --- this may be more +convenient if you are sharing code between compression and decompression +cases. (Actually, these routines are equivalent except for the declared type +of the passed pointer. To avoid gripes from ANSI C compilers, jpeg_destroy() +should be passed a j_common_ptr.) + +If you allocated the jpeg_compress_struct structure from malloc(), freeing +it is your responsibility --- jpeg_destroy() won't. Ditto for the error +handler structure. + +Typical code: + + jpeg_destroy_compress(&cinfo); + + +8. Aborting. + +If you decide to abort a compression cycle before finishing, you can clean up +in either of two ways: + +* If you don't need the JPEG object any more, just call + jpeg_destroy_compress() or jpeg_destroy() to release memory. This is + legitimate at any point after calling jpeg_create_compress() --- in fact, + it's safe even if jpeg_create_compress() fails. + +* If you want to re-use the JPEG object, call jpeg_abort_compress(), or call + jpeg_abort() which works on both compression and decompression objects. + This will return the object to an idle state, releasing any working memory. + jpeg_abort() is allowed at any time after successful object creation. + +Note that cleaning up the data destination, if required, is your +responsibility; neither of these routines will call term_destination(). +(See "Compressed data handling", below, for more about that.) + +jpeg_destroy() and jpeg_abort() are the only safe calls to make on a JPEG +object that has reported an error by calling error_exit (see "Error handling" +for more info). The internal state of such an object is likely to be out of +whack. Either of these two routines will return the object to a known state. + + +Decompression details +--------------------- + +Here we revisit the JPEG decompression outline given in the overview. + +1. Allocate and initialize a JPEG decompression object. + +This is just like initialization for compression, as discussed above, +except that the object is a "struct jpeg_decompress_struct" and you +call jpeg_create_decompress(). Error handling is exactly the same. + +Typical code: + + struct jpeg_decompress_struct cinfo; + struct jpeg_error_mgr jerr; + ... + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_decompress(&cinfo); + +(Both here and in the IJG code, we usually use variable name "cinfo" for +both compression and decompression objects.) + + +2. Specify the source of the compressed data (eg, a file). + +As previously mentioned, the JPEG library reads compressed data from a "data +source" module. The library includes one data source module which knows how +to read from a stdio stream. You can use your own source module if you want +to do something else, as discussed later. + +If you use the standard source module, you must open the source stdio stream +beforehand. Typical code for this step looks like: + + FILE *infile; + ... + if ((infile = fopen(filename, "rb")) == NULL) { + fprintf(stderr, "can't open %s\n", filename); + exit(1); + } + jpeg_stdio_src(&cinfo, infile); + +where the last line invokes the standard source module. + +WARNING: it is critical that the binary compressed data be read unchanged. +On non-Unix systems the stdio library may perform newline translation or +otherwise corrupt binary data. To suppress this behavior, you may need to use +a "b" option to fopen (as shown above), or use setmode() or another routine to +put the stdio stream in binary mode. See cjpeg.c and djpeg.c for code that +has been found to work on many systems. + +You may not change the data source between calling jpeg_read_header() and +jpeg_finish_decompress(). If you wish to read a series of JPEG images from +a single source file, you should repeat the jpeg_read_header() to +jpeg_finish_decompress() sequence without reinitializing either the JPEG +object or the data source module; this prevents buffered input data from +being discarded. + + +3. Call jpeg_read_header() to obtain image info. + +Typical code for this step is just + + jpeg_read_header(&cinfo, TRUE); + +This will read the source datastream header markers, up to the beginning +of the compressed data proper. On return, the image dimensions and other +info have been stored in the JPEG object. The application may wish to +consult this information before selecting decompression parameters. + +More complex code is necessary if + * A suspending data source is used --- in that case jpeg_read_header() + may return before it has read all the header data. See "I/O suspension", + below. The normal stdio source manager will NOT cause this to happen. + * Abbreviated JPEG files are to be processed --- see the section on + abbreviated datastreams. Standard applications that deal only in + interchange JPEG files need not be concerned with this case either. + +It is permissible to stop at this point if you just wanted to find out the +image dimensions and other header info for a JPEG file. In that case, +call jpeg_destroy() when you are done with the JPEG object, or call +jpeg_abort() to return it to an idle state before selecting a new data +source and reading another header. + + +4. Set parameters for decompression. + +jpeg_read_header() sets appropriate default decompression parameters based on +the properties of the image (in particular, its colorspace). However, you +may well want to alter these defaults before beginning the decompression. +For example, the default is to produce full color output from a color file. +If you want colormapped output you must ask for it. Other options allow the +returned image to be scaled and allow various speed/quality tradeoffs to be +selected. "Decompression parameter selection", below, gives details. + +If the defaults are appropriate, nothing need be done at this step. + +Note that all default values are set by each call to jpeg_read_header(). +If you reuse a decompression object, you cannot expect your parameter +settings to be preserved across cycles, as you can for compression. +You must set desired parameter values each time. + + +5. jpeg_start_decompress(...); + +Once the parameter values are satisfactory, call jpeg_start_decompress() to +begin decompression. This will initialize internal state, allocate working +memory, and prepare for returning data. + +Typical code is just + + jpeg_start_decompress(&cinfo); + +If you have requested a multi-pass operating mode, such as 2-pass color +quantization, jpeg_start_decompress() will do everything needed before data +output can begin. In this case jpeg_start_decompress() may take quite a while +to complete. With a single-scan (non progressive) JPEG file and default +decompression parameters, this will not happen; jpeg_start_decompress() will +return quickly. + +After this call, the final output image dimensions, including any requested +scaling, are available in the JPEG object; so is the selected colormap, if +colormapped output has been requested. Useful fields include + + output_width image width and height, as scaled + output_height + out_color_components # of color components in out_color_space + output_components # of color components returned per pixel + colormap the selected colormap, if any + actual_number_of_colors number of entries in colormap + +output_components is 1 (a colormap index) when quantizing colors; otherwise it +equals out_color_components. It is the number of JSAMPLE values that will be +emitted per pixel in the output arrays. + +Typically you will need to allocate data buffers to hold the incoming image. +You will need output_width * output_components JSAMPLEs per scanline in your +output buffer, and a total of output_height scanlines will be returned. + +Note: if you are using the JPEG library's internal memory manager to allocate +data buffers (as djpeg does), then the manager's protocol requires that you +request large buffers *before* calling jpeg_start_decompress(). This is a +little tricky since the output_XXX fields are not normally valid then. You +can make them valid by calling jpeg_calc_output_dimensions() after setting the +relevant parameters (scaling, output color space, and quantization flag). + + +6. while (scan lines remain to be read) + jpeg_read_scanlines(...); + +Now you can read the decompressed image data by calling jpeg_read_scanlines() +one or more times. At each call, you pass in the maximum number of scanlines +to be read (ie, the height of your working buffer); jpeg_read_scanlines() +will return up to that many lines. The return value is the number of lines +actually read. The format of the returned data is discussed under "Data +formats", above. Don't forget that grayscale and color JPEGs will return +different data formats! + +Image data is returned in top-to-bottom scanline order. If you must write +out the image in bottom-to-top order, you can use the JPEG library's virtual +array mechanism to invert the data efficiently. Examples of this can be +found in the sample application djpeg. + +The library maintains a count of the number of scanlines returned so far +in the output_scanline field of the JPEG object. Usually you can just use +this variable as the loop counter, so that the loop test looks like +"while (cinfo.output_scanline < cinfo.output_height)". (Note that the test +should NOT be against image_height, unless you never use scaling. The +image_height field is the height of the original unscaled image.) +The return value always equals the change in the value of output_scanline. + +If you don't use a suspending data source, it is safe to assume that +jpeg_read_scanlines() reads at least one scanline per call, until the +bottom of the image has been reached. + +If you use a buffer larger than one scanline, it is NOT safe to assume that +jpeg_read_scanlines() fills it. (The current implementation returns only a +few scanlines per call, no matter how large a buffer you pass.) So you must +always provide a loop that calls jpeg_read_scanlines() repeatedly until the +whole image has been read. + + +7. jpeg_finish_decompress(...); + +After all the image data has been read, call jpeg_finish_decompress() to +complete the decompression cycle. This causes working memory associated +with the JPEG object to be released. + +Typical code: + + jpeg_finish_decompress(&cinfo); + +If using the stdio source manager, don't forget to close the source stdio +stream if necessary. + +It is an error to call jpeg_finish_decompress() before reading the correct +total number of scanlines. If you wish to abort decompression, call +jpeg_abort() as discussed below. + +After completing a decompression cycle, you may dispose of the JPEG object as +discussed next, or you may use it to decompress another image. In that case +return to step 2 or 3 as appropriate. If you do not change the source +manager, the next image will be read from the same source. + + +8. Release the JPEG decompression object. + +When you are done with a JPEG decompression object, destroy it by calling +jpeg_destroy_decompress() or jpeg_destroy(). The previous discussion of +destroying compression objects applies here too. + +Typical code: + + jpeg_destroy_decompress(&cinfo); + + +9. Aborting. + +You can abort a decompression cycle by calling jpeg_destroy_decompress() or +jpeg_destroy() if you don't need the JPEG object any more, or +jpeg_abort_decompress() or jpeg_abort() if you want to reuse the object. +The previous discussion of aborting compression cycles applies here too. + + +Partial image decompression +--------------------------- + +Partial image decompression is convenient for performance-critical applications +that wish to view only a portion of a large JPEG image without decompressing +the whole thing. It it also useful in memory-constrained environments (such as +on mobile devices.) This library provides the following functions to support +partial image decompression: + +1. Skipping rows when decompressing + + jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines); + +This function provides application programmers with the ability to skip over +multiple rows in the JPEG image. + +Suspending data sources are not supported by this function. Calling +jpeg_skip_scanlines() with a suspending data source will result in undefined +behavior. + +jpeg_skip_scanlines() will not allow skipping past the bottom of the image. If +the value of num_lines is large enough to skip past the bottom of the image, +then the function will skip to the end of the image instead. + +If the value of num_lines is valid, then jpeg_skip_scanlines() will always +skip all of the input rows requested. There is no need to inspect the return +value of the function in that case. + +Best results will be achieved by calling jpeg_skip_scanlines() for large chunks +of rows. The function should be viewed as a way to quickly jump to a +particular vertical offset in the JPEG image in order to decode a subset of the +image. Used in this manner, it will provide significant performance +improvements. + +Calling jpeg_skip_scanlines() for small values of num_lines has several +potential drawbacks: + 1) JPEG decompression occurs in blocks, so if jpeg_skip_scanlines() is + called from the middle of a decompression block, then it is likely that + much of the decompression work has already been done for the first + couple of rows that need to be skipped. + 2) When this function returns, it must leave the decompressor in a state + such that it is ready to read the next line. This may involve + decompressing a block that must be partially skipped. +These issues are especially tricky for cases in which upsampling requires +context rows. In the worst case, jpeg_skip_scanlines() will perform similarly +to jpeg_read_scanlines() (since it will actually call jpeg_read_scanlines().) + +2. Decompressing partial scanlines + + jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width) + +This function provides application programmers with the ability to decompress +only a portion of each row in the JPEG image. It must be called after +jpeg_start_decompress() and before any calls to jpeg_read_scanlines() or +jpeg_skip_scanlines(). + +If xoffset and width do not form a valid subset of the image row, then this +function will generate an error. Note that if the output image is scaled, then +xoffset and width are relative to the scaled image dimensions. + +xoffset and width are passed by reference because xoffset must fall on an iMCU +boundary. If it doesn't, then it will be moved left to the nearest iMCU +boundary, and width will be increased accordingly. If the calling program does +not like the adjusted values of xoffset and width, then it can call +jpeg_crop_scanline() again with new values (for instance, if it wants to move +xoffset to the nearest iMCU boundary to the right instead of to the left.) + +After calling this function, cinfo->output_width will be set to the adjusted +width. This value should be used when allocating an output buffer to pass to +jpeg_read_scanlines(). + +The output image from a partial-width decompression will be identical to the +corresponding image region from a full decode, with one exception: The "fancy" +(smooth) h2v2 (4:2:0) and h2v1 (4:2:2) upsampling algorithms fill in the +missing chroma components by averaging the chroma components from neighboring +pixels, except on the right and left edges of the image (where there are no +neighboring pixels.) When performing a partial-width decompression, these +"fancy" upsampling algorithms may treat the left and right edges of the partial +image region as if they are the left and right edges of the image, meaning that +the upsampling algorithm may be simplified. The result is that the pixels on +the left or right edge of the partial image may not be exactly identical to the +corresponding pixels in the original image. + + +Mechanics of usage: include files, linking, etc +----------------------------------------------- + +Applications using the JPEG library should include the header file jpeglib.h +to obtain declarations of data types and routines. Before including +jpeglib.h, include system headers that define at least the typedefs FILE and +size_t. On ANSI-conforming systems, including is sufficient; on +older Unix systems, you may need to define size_t. + +If the application needs to refer to individual JPEG library error codes, also +include jerror.h to define those symbols. + +jpeglib.h indirectly includes the files jconfig.h and jmorecfg.h. If you are +installing the JPEG header files in a system directory, you will want to +install all four files: jpeglib.h, jerror.h, jconfig.h, jmorecfg.h. + +The most convenient way to include the JPEG code into your executable program +is to prepare a library file ("libjpeg.a", or a corresponding name on non-Unix +machines) and reference it at your link step. If you use only half of the +library (only compression or only decompression), only that much code will be +included from the library, unless your linker is hopelessly brain-damaged. +The supplied makefiles build libjpeg.a automatically (see install.txt). + +While you can build the JPEG library as a shared library if the whim strikes +you, we don't really recommend it. The trouble with shared libraries is that +at some point you'll probably try to substitute a new version of the library +without recompiling the calling applications. That generally doesn't work +because the parameter struct declarations usually change with each new +version. In other words, the library's API is *not* guaranteed binary +compatible across versions; we only try to ensure source-code compatibility. +(In hindsight, it might have been smarter to hide the parameter structs from +applications and introduce a ton of access functions instead. Too late now, +however.) + +It may be worth pointing out that the core JPEG library does not actually +require the stdio library: only the default source/destination managers and +error handler need it. You can use the library in a stdio-less environment +if you replace those modules and use jmemnobs.c (or another memory manager of +your own devising). More info about the minimum system library requirements +may be found in jinclude.h. + + +ADVANCED FEATURES +================= + +Compression parameter selection +------------------------------- + +This section describes all the optional parameters you can set for JPEG +compression, as well as the "helper" routines provided to assist in this +task. Proper setting of some parameters requires detailed understanding +of the JPEG standard; if you don't know what a parameter is for, it's best +not to mess with it! See REFERENCES in the README.ijg file for pointers to +more info about JPEG. + +It's a good idea to call jpeg_set_defaults() first, even if you plan to set +all the parameters; that way your code is more likely to work with future JPEG +libraries that have additional parameters. For the same reason, we recommend +you use a helper routine where one is provided, in preference to twiddling +cinfo fields directly. + +The helper routines are: + +jpeg_set_defaults (j_compress_ptr cinfo) + This routine sets all JPEG parameters to reasonable defaults, using + only the input image's color space (field in_color_space, which must + already be set in cinfo). Many applications will only need to use + this routine and perhaps jpeg_set_quality(). + +jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace) + Sets the JPEG file's colorspace (field jpeg_color_space) as specified, + and sets other color-space-dependent parameters appropriately. See + "Special color spaces", below, before using this. A large number of + parameters, including all per-component parameters, are set by this + routine; if you want to twiddle individual parameters you should call + jpeg_set_colorspace() before rather than after. + +jpeg_default_colorspace (j_compress_ptr cinfo) + Selects an appropriate JPEG colorspace based on cinfo->in_color_space, + and calls jpeg_set_colorspace(). This is actually a subroutine of + jpeg_set_defaults(). It's broken out in case you want to change + just the colorspace-dependent JPEG parameters. + +jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline) + Constructs JPEG quantization tables appropriate for the indicated + quality setting. The quality value is expressed on the 0..100 scale + recommended by IJG (cjpeg's "-quality" switch uses this routine). + Note that the exact mapping from quality values to tables may change + in future IJG releases as more is learned about DCT quantization. + If the force_baseline parameter is TRUE, then the quantization table + entries are constrained to the range 1..255 for full JPEG baseline + compatibility. In the current implementation, this only makes a + difference for quality settings below 25, and it effectively prevents + very small/low quality files from being generated. The IJG decoder + is capable of reading the non-baseline files generated at low quality + settings when force_baseline is FALSE, but other decoders may not be. + +jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, + boolean force_baseline) + Same as jpeg_set_quality() except that the generated tables are the + sample tables given in the JPEC spec section K.1, multiplied by the + specified scale factor (which is expressed as a percentage; thus + scale_factor = 100 reproduces the spec's tables). Note that larger + scale factors give lower quality. This entry point is useful for + conforming to the Adobe PostScript DCT conventions, but we do not + recommend linear scaling as a user-visible quality scale otherwise. + force_baseline again constrains the computed table entries to 1..255. + +int jpeg_quality_scaling (int quality) + Converts a value on the IJG-recommended quality scale to a linear + scaling percentage. Note that this routine may change or go away + in future releases --- IJG may choose to adopt a scaling method that + can't be expressed as a simple scalar multiplier, in which case the + premise of this routine collapses. Caveat user. + +jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) + [libjpeg v7+ API/ABI emulation only] + Set default quantization tables with linear q_scale_factor[] values + (see below). + +jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, + const unsigned int *basic_table, + int scale_factor, boolean force_baseline) + Allows an arbitrary quantization table to be created. which_tbl + indicates which table slot to fill. basic_table points to an array + of 64 unsigned ints given in normal array order. These values are + multiplied by scale_factor/100 and then clamped to the range 1..65535 + (or to 1..255 if force_baseline is TRUE). + CAUTION: prior to library version 6a, jpeg_add_quant_table expected + the basic table to be given in JPEG zigzag order. If you need to + write code that works with either older or newer versions of this + routine, you must check the library version number. Something like + "#if JPEG_LIB_VERSION >= 61" is the right test. + +jpeg_simple_progression (j_compress_ptr cinfo) + Generates a default scan script for writing a progressive-JPEG file. + This is the recommended method of creating a progressive file, + unless you want to make a custom scan sequence. You must ensure that + the JPEG color space is set correctly before calling this routine. + + +Compression parameters (cinfo fields) include: + +boolean arith_code + If TRUE, use arithmetic coding. + If FALSE, use Huffman coding. + +J_DCT_METHOD dct_method + Selects the algorithm used for the DCT step. Choices are: + JDCT_ISLOW: slow but accurate integer algorithm + JDCT_IFAST: faster, less accurate integer method + JDCT_FLOAT: floating-point method + JDCT_DEFAULT: default method (normally JDCT_ISLOW) + JDCT_FASTEST: fastest method (normally JDCT_IFAST) + In libjpeg-turbo, JDCT_IFAST is generally about 5-15% faster than + JDCT_ISLOW when using the x86/x86-64 SIMD extensions (results may vary + with other SIMD implementations, or when using libjpeg-turbo without + SIMD extensions.) For quality levels of 90 and below, there should be + little or no perceptible difference between the two algorithms. For + quality levels above 90, however, the difference between JDCT_IFAST and + JDCT_ISLOW becomes more pronounced. With quality=97, for instance, + JDCT_IFAST incurs generally about a 1-3 dB loss (in PSNR) relative to + JDCT_ISLOW, but this can be larger for some images. Do not use + JDCT_IFAST with quality levels above 97. The algorithm often + degenerates at quality=98 and above and can actually produce a more + lossy image than if lower quality levels had been used. Also, in + libjpeg-turbo, JDCT_IFAST is not fully accelerated for quality levels + above 97, so it will be slower than JDCT_ISLOW. JDCT_FLOAT is mainly a + legacy feature. It does not produce significantly more accurate + results than the ISLOW method, and it is much slower. The FLOAT method + may also give different results on different machines due to varying + roundoff behavior, whereas the integer methods should give the same + results on all machines. + +J_COLOR_SPACE jpeg_color_space +int num_components + The JPEG color space and corresponding number of components; see + "Special color spaces", below, for more info. We recommend using + jpeg_set_color_space() if you want to change these. + +boolean optimize_coding + TRUE causes the compressor to compute optimal Huffman coding tables + for the image. This requires an extra pass over the data and + therefore costs a good deal of space and time. The default is + FALSE, which tells the compressor to use the supplied or default + Huffman tables. In most cases optimal tables save only a few percent + of file size compared to the default tables. Note that when this is + TRUE, you need not supply Huffman tables at all, and any you do + supply will be overwritten. + +unsigned int restart_interval +int restart_in_rows + To emit restart markers in the JPEG file, set one of these nonzero. + Set restart_interval to specify the exact interval in MCU blocks. + Set restart_in_rows to specify the interval in MCU rows. (If + restart_in_rows is not 0, then restart_interval is set after the + image width in MCUs is computed.) Defaults are zero (no restarts). + One restart marker per MCU row is often a good choice. + NOTE: the overhead of restart markers is higher in grayscale JPEG + files than in color files, and MUCH higher in progressive JPEGs. + If you use restarts, you may want to use larger intervals in those + cases. + +const jpeg_scan_info *scan_info +int num_scans + By default, scan_info is NULL; this causes the compressor to write a + single-scan sequential JPEG file. If not NULL, scan_info points to + an array of scan definition records of length num_scans. The + compressor will then write a JPEG file having one scan for each scan + definition record. This is used to generate noninterleaved or + progressive JPEG files. The library checks that the scan array + defines a valid JPEG scan sequence. (jpeg_simple_progression creates + a suitable scan definition array for progressive JPEG.) This is + discussed further under "Progressive JPEG support". + +int smoothing_factor + If non-zero, the input image is smoothed; the value should be 1 for + minimal smoothing to 100 for maximum smoothing. Consult jcsample.c + for details of the smoothing algorithm. The default is zero. + +boolean write_JFIF_header + If TRUE, a JFIF APP0 marker is emitted. jpeg_set_defaults() and + jpeg_set_colorspace() set this TRUE if a JFIF-legal JPEG color space + (ie, YCbCr or grayscale) is selected, otherwise FALSE. + +UINT8 JFIF_major_version +UINT8 JFIF_minor_version + The version number to be written into the JFIF marker. + jpeg_set_defaults() initializes the version to 1.01 (major=minor=1). + You should set it to 1.02 (major=1, minor=2) if you plan to write + any JFIF 1.02 extension markers. + +UINT8 density_unit +UINT16 X_density +UINT16 Y_density + The resolution information to be written into the JFIF marker; + not used otherwise. density_unit may be 0 for unknown, + 1 for dots/inch, or 2 for dots/cm. The default values are 0,1,1 + indicating square pixels of unknown size. + +boolean write_Adobe_marker + If TRUE, an Adobe APP14 marker is emitted. jpeg_set_defaults() and + jpeg_set_colorspace() set this TRUE if JPEG color space RGB, CMYK, + or YCCK is selected, otherwise FALSE. It is generally a bad idea + to set both write_JFIF_header and write_Adobe_marker. In fact, + you probably shouldn't change the default settings at all --- the + default behavior ensures that the JPEG file's color space can be + recognized by the decoder. + +JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS] + Pointers to coefficient quantization tables, one per table slot, + or NULL if no table is defined for a slot. Usually these should + be set via one of the above helper routines; jpeg_add_quant_table() + is general enough to define any quantization table. The other + routines will set up table slot 0 for luminance quality and table + slot 1 for chrominance. + +int q_scale_factor[NUM_QUANT_TBLS] + [libjpeg v7+ API/ABI emulation only] + Linear quantization scaling factors (0-100, default 100) + for use with jpeg_default_qtables(). + See rdswitch.c and cjpeg.c for an example of usage. + Note that the q_scale_factor[] values use "linear" scales, so JPEG + quality levels chosen by the user must be converted to these scales + using jpeg_quality_scaling(). Here is an example that corresponds to + cjpeg -quality 90,70: + + jpeg_set_defaults(cinfo); + + /* Set luminance quality 90. */ + cinfo->q_scale_factor[0] = jpeg_quality_scaling(90); + /* Set chrominance quality 70. */ + cinfo->q_scale_factor[1] = jpeg_quality_scaling(70); + + jpeg_default_qtables(cinfo, force_baseline); + + CAUTION: Setting separate quality levels for chrominance and luminance + is mainly only useful if chrominance subsampling is disabled. 2x2 + chrominance subsampling (AKA "4:2:0") is the default, but you can + explicitly disable subsampling as follows: + + cinfo->comp_info[0].v_samp_factor = 1; + cinfo->comp_info[0].h_samp_factor = 1; + +JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS] +JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS] + Pointers to Huffman coding tables, one per table slot, or NULL if + no table is defined for a slot. Slots 0 and 1 are filled with the + JPEG sample tables by jpeg_set_defaults(). If you need to allocate + more table structures, jpeg_alloc_huff_table() may be used. + Note that optimal Huffman tables can be computed for an image + by setting optimize_coding, as discussed above; there's seldom + any need to mess with providing your own Huffman tables. + + +[libjpeg v7+ API/ABI emulation only] +The actual dimensions of the JPEG image that will be written to the file are +given by the following fields. These are computed from the input image +dimensions and the compression parameters by jpeg_start_compress(). You can +also call jpeg_calc_jpeg_dimensions() to obtain the values that will result +from the current parameter settings. This can be useful if you are trying +to pick a scaling ratio that will get close to a desired target size. + +JDIMENSION jpeg_width Actual dimensions of output image. +JDIMENSION jpeg_height + + +Per-component parameters are stored in the struct cinfo.comp_info[i] for +component number i. Note that components here refer to components of the +JPEG color space, *not* the source image color space. A suitably large +comp_info[] array is allocated by jpeg_set_defaults(); if you choose not +to use that routine, it's up to you to allocate the array. + +int component_id + The one-byte identifier code to be recorded in the JPEG file for + this component. For the standard color spaces, we recommend you + leave the default values alone. + +int h_samp_factor +int v_samp_factor + Horizontal and vertical sampling factors for the component; must + be 1..4 according to the JPEG standard. Note that larger sampling + factors indicate a higher-resolution component; many people find + this behavior quite unintuitive. The default values are 2,2 for + luminance components and 1,1 for chrominance components, except + for grayscale where 1,1 is used. + +int quant_tbl_no + Quantization table number for component. The default value is + 0 for luminance components and 1 for chrominance components. + +int dc_tbl_no +int ac_tbl_no + DC and AC entropy coding table numbers. The default values are + 0 for luminance components and 1 for chrominance components. + +int component_index + Must equal the component's index in comp_info[]. (Beginning in + release v6, the compressor library will fill this in automatically; + you don't have to.) + + +Decompression parameter selection +--------------------------------- + +Decompression parameter selection is somewhat simpler than compression +parameter selection, since all of the JPEG internal parameters are +recorded in the source file and need not be supplied by the application. +(Unless you are working with abbreviated files, in which case see +"Abbreviated datastreams", below.) Decompression parameters control +the postprocessing done on the image to deliver it in a format suitable +for the application's use. Many of the parameters control speed/quality +tradeoffs, in which faster decompression may be obtained at the price of +a poorer-quality image. The defaults select the highest quality (slowest) +processing. + +The following fields in the JPEG object are set by jpeg_read_header() and +may be useful to the application in choosing decompression parameters: + +JDIMENSION image_width Width and height of image +JDIMENSION image_height +int num_components Number of color components +J_COLOR_SPACE jpeg_color_space Colorspace of image +boolean saw_JFIF_marker TRUE if a JFIF APP0 marker was seen + UINT8 JFIF_major_version Version information from JFIF marker + UINT8 JFIF_minor_version + UINT8 density_unit Resolution data from JFIF marker + UINT16 X_density + UINT16 Y_density +boolean saw_Adobe_marker TRUE if an Adobe APP14 marker was seen + UINT8 Adobe_transform Color transform code from Adobe marker + +The JPEG color space, unfortunately, is something of a guess since the JPEG +standard proper does not provide a way to record it. In practice most files +adhere to the JFIF or Adobe conventions, and the decoder will recognize these +correctly. See "Special color spaces", below, for more info. + + +The decompression parameters that determine the basic properties of the +returned image are: + +J_COLOR_SPACE out_color_space + Output color space. jpeg_read_header() sets an appropriate default + based on jpeg_color_space; typically it will be RGB or grayscale. + The application can change this field to request output in a different + colorspace. For example, set it to JCS_GRAYSCALE to get grayscale + output from a color file. (This is useful for previewing: grayscale + output is faster than full color since the color components need not + be processed.) Note that not all possible color space transforms are + currently implemented; you may need to extend jdcolor.c if you want an + unusual conversion. + +unsigned int scale_num, scale_denom + Scale the image by the fraction scale_num/scale_denom. Default is + 1/1, or no scaling. Currently, the only supported scaling ratios + are M/8 with all M from 1 to 16, or any reduced fraction thereof (such + as 1/2, 3/4, etc.) (The library design allows for arbitrary + scaling ratios but this is not likely to be implemented any time soon.) + Smaller scaling ratios permit significantly faster decoding since + fewer pixels need be processed and a simpler IDCT method can be used. + +boolean quantize_colors + If set TRUE, colormapped output will be delivered. Default is FALSE, + meaning that full-color output will be delivered. + +The next three parameters are relevant only if quantize_colors is TRUE. + +int desired_number_of_colors + Maximum number of colors to use in generating a library-supplied color + map (the actual number of colors is returned in a different field). + Default 256. Ignored when the application supplies its own color map. + +boolean two_pass_quantize + If TRUE, an extra pass over the image is made to select a custom color + map for the image. This usually looks a lot better than the one-size- + fits-all colormap that is used otherwise. Default is TRUE. Ignored + when the application supplies its own color map. + +J_DITHER_MODE dither_mode + Selects color dithering method. Supported values are: + JDITHER_NONE no dithering: fast, very low quality + JDITHER_ORDERED ordered dither: moderate speed and quality + JDITHER_FS Floyd-Steinberg dither: slow, high quality + Default is JDITHER_FS. (At present, ordered dither is implemented + only in the single-pass, standard-colormap case. If you ask for + ordered dither when two_pass_quantize is TRUE or when you supply + an external color map, you'll get F-S dithering.) + +When quantize_colors is TRUE, the target color map is described by the next +two fields. colormap is set to NULL by jpeg_read_header(). The application +can supply a color map by setting colormap non-NULL and setting +actual_number_of_colors to the map size. Otherwise, jpeg_start_decompress() +selects a suitable color map and sets these two fields itself. +[Implementation restriction: at present, an externally supplied colormap is +only accepted for 3-component output color spaces.] + +JSAMPARRAY colormap + The color map, represented as a 2-D pixel array of out_color_components + rows and actual_number_of_colors columns. Ignored if not quantizing. + CAUTION: if the JPEG library creates its own colormap, the storage + pointed to by this field is released by jpeg_finish_decompress(). + Copy the colormap somewhere else first, if you want to save it. + +int actual_number_of_colors + The number of colors in the color map. + +Additional decompression parameters that the application may set include: + +J_DCT_METHOD dct_method + Selects the algorithm used for the DCT step. Choices are: + JDCT_ISLOW: slow but accurate integer algorithm + JDCT_IFAST: faster, less accurate integer method + JDCT_FLOAT: floating-point method + JDCT_DEFAULT: default method (normally JDCT_ISLOW) + JDCT_FASTEST: fastest method (normally JDCT_IFAST) + In libjpeg-turbo, JDCT_IFAST is generally about 5-15% faster than + JDCT_ISLOW when using the x86/x86-64 SIMD extensions (results may vary + with other SIMD implementations, or when using libjpeg-turbo without + SIMD extensions.) If the JPEG image was compressed using a quality + level of 85 or below, then there should be little or no perceptible + difference between the two algorithms. When decompressing images that + were compressed using quality levels above 85, however, the difference + between JDCT_IFAST and JDCT_ISLOW becomes more pronounced. With images + compressed using quality=97, for instance, JDCT_IFAST incurs generally + about a 4-6 dB loss (in PSNR) relative to JDCT_ISLOW, but this can be + larger for some images. If you can avoid it, do not use JDCT_IFAST + when decompressing images that were compressed using quality levels + above 97. The algorithm often degenerates for such images and can + actually produce a more lossy output image than if the JPEG image had + been compressed using lower quality levels. JDCT_FLOAT is mainly a + legacy feature. It does not produce significantly more accurate + results than the ISLOW method, and it is much slower. The FLOAT method + may also give different results on different machines due to varying + roundoff behavior, whereas the integer methods should give the same + results on all machines. + +boolean do_fancy_upsampling + If TRUE, do careful upsampling of chroma components. If FALSE, + a faster but sloppier method is used. Default is TRUE. The visual + impact of the sloppier method is often very small. + +boolean do_block_smoothing + If TRUE, interblock smoothing is applied in early stages of decoding + progressive JPEG files; if FALSE, not. Default is TRUE. Early + progression stages look "fuzzy" with smoothing, "blocky" without. + In any case, block smoothing ceases to be applied after the first few + AC coefficients are known to full accuracy, so it is relevant only + when using buffered-image mode for progressive images. + +boolean enable_1pass_quant +boolean enable_external_quant +boolean enable_2pass_quant + These are significant only in buffered-image mode, which is + described in its own section below. + + +The output image dimensions are given by the following fields. These are +computed from the source image dimensions and the decompression parameters +by jpeg_start_decompress(). You can also call jpeg_calc_output_dimensions() +to obtain the values that will result from the current parameter settings. +This can be useful if you are trying to pick a scaling ratio that will get +close to a desired target size. It's also important if you are using the +JPEG library's memory manager to allocate output buffer space, because you +are supposed to request such buffers *before* jpeg_start_decompress(). + +JDIMENSION output_width Actual dimensions of output image. +JDIMENSION output_height +int out_color_components Number of color components in out_color_space. +int output_components Number of color components returned. +int rec_outbuf_height Recommended height of scanline buffer. + +When quantizing colors, output_components is 1, indicating a single color map +index per pixel. Otherwise it equals out_color_components. The output arrays +are required to be output_width * output_components JSAMPLEs wide. + +rec_outbuf_height is the recommended minimum height (in scanlines) of the +buffer passed to jpeg_read_scanlines(). If the buffer is smaller, the +library will still work, but time will be wasted due to unnecessary data +copying. In high-quality modes, rec_outbuf_height is always 1, but some +faster, lower-quality modes set it to larger values (typically 2 to 4). +If you are going to ask for a high-speed processing mode, you may as well +go to the trouble of honoring rec_outbuf_height so as to avoid data copying. +(An output buffer larger than rec_outbuf_height lines is OK, but won't +provide any material speed improvement over that height.) + + +Special color spaces +-------------------- + +The JPEG standard itself is "color blind" and doesn't specify any particular +color space. It is customary to convert color data to a luminance/chrominance +color space before compressing, since this permits greater compression. The +existing de-facto JPEG file format standards specify YCbCr or grayscale data +(JFIF), or grayscale, RGB, YCbCr, CMYK, or YCCK (Adobe). For special +applications such as multispectral images, other color spaces can be used, +but it must be understood that such files will be unportable. + +The JPEG library can handle the most common colorspace conversions (namely +RGB <=> YCbCr and CMYK <=> YCCK). It can also deal with data of an unknown +color space, passing it through without conversion. If you deal extensively +with an unusual color space, you can easily extend the library to understand +additional color spaces and perform appropriate conversions. + +For compression, the source data's color space is specified by field +in_color_space. This is transformed to the JPEG file's color space given +by jpeg_color_space. jpeg_set_defaults() chooses a reasonable JPEG color +space depending on in_color_space, but you can override this by calling +jpeg_set_colorspace(). Of course you must select a supported transformation. +jccolor.c currently supports the following transformations: + RGB => YCbCr + RGB => GRAYSCALE + YCbCr => GRAYSCALE + CMYK => YCCK +plus the null transforms: GRAYSCALE => GRAYSCALE, RGB => RGB, +YCbCr => YCbCr, CMYK => CMYK, YCCK => YCCK, and UNKNOWN => UNKNOWN. + +The de-facto file format standards (JFIF and Adobe) specify APPn markers that +indicate the color space of the JPEG file. It is important to ensure that +these are written correctly, or omitted if the JPEG file's color space is not +one of the ones supported by the de-facto standards. jpeg_set_colorspace() +will set the compression parameters to include or omit the APPn markers +properly, so long as it is told the truth about the JPEG color space. +For example, if you are writing some random 3-component color space without +conversion, don't try to fake out the library by setting in_color_space and +jpeg_color_space to JCS_YCbCr; use JCS_UNKNOWN. You may want to write an +APPn marker of your own devising to identify the colorspace --- see "Special +markers", below. + +When told that the color space is UNKNOWN, the library will default to using +luminance-quality compression parameters for all color components. You may +well want to change these parameters. See the source code for +jpeg_set_colorspace(), in jcparam.c, for details. + +For decompression, the JPEG file's color space is given in jpeg_color_space, +and this is transformed to the output color space out_color_space. +jpeg_read_header's setting of jpeg_color_space can be relied on if the file +conforms to JFIF or Adobe conventions, but otherwise it is no better than a +guess. If you know the JPEG file's color space for certain, you can override +jpeg_read_header's guess by setting jpeg_color_space. jpeg_read_header also +selects a default output color space based on (its guess of) jpeg_color_space; +set out_color_space to override this. Again, you must select a supported +transformation. jdcolor.c currently supports + YCbCr => RGB + YCbCr => GRAYSCALE + RGB => GRAYSCALE + GRAYSCALE => RGB + YCCK => CMYK +as well as the null transforms. (Since GRAYSCALE=>RGB is provided, an +application can force grayscale JPEGs to look like color JPEGs if it only +wants to handle one case.) + +The two-pass color quantizer, jquant2.c, is specialized to handle RGB data +(it weights distances appropriately for RGB colors). You'll need to modify +the code if you want to use it for non-RGB output color spaces. Note that +jquant2.c is used to map to an application-supplied colormap as well as for +the normal two-pass colormap selection process. + +CAUTION: it appears that Adobe Photoshop writes inverted data in CMYK JPEG +files: 0 represents 100% ink coverage, rather than 0% ink as you'd expect. +This is arguably a bug in Photoshop, but if you need to work with Photoshop +CMYK files, you will have to deal with it in your application. We cannot +"fix" this in the library by inverting the data during the CMYK<=>YCCK +transform, because that would break other applications, notably Ghostscript. +Photoshop versions prior to 3.0 write EPS files containing JPEG-encoded CMYK +data in the same inverted-YCCK representation used in bare JPEG files, but +the surrounding PostScript code performs an inversion using the PS image +operator. I am told that Photoshop 3.0 will write uninverted YCCK in +EPS/JPEG files, and will omit the PS-level inversion. (But the data +polarity used in bare JPEG files will not change in 3.0.) In either case, +the JPEG library must not invert the data itself, or else Ghostscript would +read these EPS files incorrectly. + + +Error handling +-------------- + +When the default error handler is used, any error detected inside the JPEG +routines will cause a message to be printed on stderr, followed by exit(). +You can supply your own error handling routines to override this behavior +and to control the treatment of nonfatal warnings and trace/debug messages. +The file example.c illustrates the most common case, which is to have the +application regain control after an error rather than exiting. + +The JPEG library never writes any message directly; it always goes through +the error handling routines. Three classes of messages are recognized: + * Fatal errors: the library cannot continue. + * Warnings: the library can continue, but the data is corrupt, and a + damaged output image is likely to result. + * Trace/informational messages. These come with a trace level indicating + the importance of the message; you can control the verbosity of the + program by adjusting the maximum trace level that will be displayed. + +You may, if you wish, simply replace the entire JPEG error handling module +(jerror.c) with your own code. However, you can avoid code duplication by +only replacing some of the routines depending on the behavior you need. +This is accomplished by calling jpeg_std_error() as usual, but then overriding +some of the method pointers in the jpeg_error_mgr struct, as illustrated by +example.c. + +All of the error handling routines will receive a pointer to the JPEG object +(a j_common_ptr which points to either a jpeg_compress_struct or a +jpeg_decompress_struct; if you need to tell which, test the is_decompressor +field). This struct includes a pointer to the error manager struct in its +"err" field. Frequently, custom error handler routines will need to access +additional data which is not known to the JPEG library or the standard error +handler. The most convenient way to do this is to embed either the JPEG +object or the jpeg_error_mgr struct in a larger structure that contains +additional fields; then casting the passed pointer provides access to the +additional fields. Again, see example.c for one way to do it. (Beginning +with IJG version 6b, there is also a void pointer "client_data" in each +JPEG object, which the application can also use to find related data. +The library does not touch client_data at all.) + +The individual methods that you might wish to override are: + +error_exit (j_common_ptr cinfo) + Receives control for a fatal error. Information sufficient to + generate the error message has been stored in cinfo->err; call + output_message to display it. Control must NOT return to the caller; + generally this routine will exit() or longjmp() somewhere. + Typically you would override this routine to get rid of the exit() + default behavior. Note that if you continue processing, you should + clean up the JPEG object with jpeg_abort() or jpeg_destroy(). + +output_message (j_common_ptr cinfo) + Actual output of any JPEG message. Override this to send messages + somewhere other than stderr. Note that this method does not know + how to generate a message, only where to send it. + +format_message (j_common_ptr cinfo, char *buffer) + Constructs a readable error message string based on the error info + stored in cinfo->err. This method is called by output_message. Few + applications should need to override this method. One possible + reason for doing so is to implement dynamic switching of error message + language. + +emit_message (j_common_ptr cinfo, int msg_level) + Decide whether or not to emit a warning or trace message; if so, + calls output_message. The main reason for overriding this method + would be to abort on warnings. msg_level is -1 for warnings, + 0 and up for trace messages. + +Only error_exit() and emit_message() are called from the rest of the JPEG +library; the other two are internal to the error handler. + +The actual message texts are stored in an array of strings which is pointed to +by the field err->jpeg_message_table. The messages are numbered from 0 to +err->last_jpeg_message, and it is these code numbers that are used in the +JPEG library code. You could replace the message texts (for instance, with +messages in French or German) by changing the message table pointer. See +jerror.h for the default texts. CAUTION: this table will almost certainly +change or grow from one library version to the next. + +It may be useful for an application to add its own message texts that are +handled by the same mechanism. The error handler supports a second "add-on" +message table for this purpose. To define an addon table, set the pointer +err->addon_message_table and the message numbers err->first_addon_message and +err->last_addon_message. If you number the addon messages beginning at 1000 +or so, you won't have to worry about conflicts with the library's built-in +messages. See the sample applications cjpeg/djpeg for an example of using +addon messages (the addon messages are defined in cderror.h). + +Actual invocation of the error handler is done via macros defined in jerror.h: + ERREXITn(...) for fatal errors + WARNMSn(...) for corrupt-data warnings + TRACEMSn(...) for trace and informational messages. +These macros store the message code and any additional parameters into the +error handler struct, then invoke the error_exit() or emit_message() method. +The variants of each macro are for varying numbers of additional parameters. +The additional parameters are inserted into the generated message using +standard printf() format codes. + +See jerror.h and jerror.c for further details. + + +Compressed data handling (source and destination managers) +---------------------------------------------------------- + +The JPEG compression library sends its compressed data to a "destination +manager" module. The default destination manager just writes the data to a +memory buffer or to a stdio stream, but you can provide your own manager to +do something else. Similarly, the decompression library calls a "source +manager" to obtain the compressed data; you can provide your own source +manager if you want the data to come from somewhere other than a memory +buffer or a stdio stream. + +In both cases, compressed data is processed a bufferload at a time: the +destination or source manager provides a work buffer, and the library invokes +the manager only when the buffer is filled or emptied. (You could define a +one-character buffer to force the manager to be invoked for each byte, but +that would be rather inefficient.) The buffer's size and location are +controlled by the manager, not by the library. For example, the memory +source manager just makes the buffer pointer and length point to the original +data in memory. In this case the buffer-reload procedure will be invoked +only if the decompressor ran off the end of the datastream, which would +indicate an erroneous datastream. + +The work buffer is defined as an array of datatype JOCTET, which is generally +"char" or "unsigned char". On a machine where char is not exactly 8 bits +wide, you must define JOCTET as a wider data type and then modify the data +source and destination modules to transcribe the work arrays into 8-bit units +on external storage. + +A data destination manager struct contains a pointer and count defining the +next byte to write in the work buffer and the remaining free space: + + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + +The library increments the pointer and decrements the count until the buffer +is filled. The manager's empty_output_buffer method must reset the pointer +and count. The manager is expected to remember the buffer's starting address +and total size in private fields not visible to the library. + +A data destination manager provides three methods: + +init_destination (j_compress_ptr cinfo) + Initialize destination. This is called by jpeg_start_compress() + before any data is actually written. It must initialize + next_output_byte and free_in_buffer. free_in_buffer must be + initialized to a positive value. + +empty_output_buffer (j_compress_ptr cinfo) + This is called whenever the buffer has filled (free_in_buffer + reaches zero). In typical applications, it should write out the + *entire* buffer (use the saved start address and buffer length; + ignore the current state of next_output_byte and free_in_buffer). + Then reset the pointer & count to the start of the buffer, and + return TRUE indicating that the buffer has been dumped. + free_in_buffer must be set to a positive value when TRUE is + returned. A FALSE return should only be used when I/O suspension is + desired (this operating mode is discussed in the next section). + +term_destination (j_compress_ptr cinfo) + Terminate destination --- called by jpeg_finish_compress() after all + data has been written. In most applications, this must flush any + data remaining in the buffer. Use either next_output_byte or + free_in_buffer to determine how much data is in the buffer. + +term_destination() is NOT called by jpeg_abort() or jpeg_destroy(). If you +want the destination manager to be cleaned up during an abort, you must do it +yourself. + +You will also need code to create a jpeg_destination_mgr struct, fill in its +method pointers, and insert a pointer to the struct into the "dest" field of +the JPEG compression object. This can be done in-line in your setup code if +you like, but it's probably cleaner to provide a separate routine similar to +the jpeg_stdio_dest() or jpeg_mem_dest() routines of the supplied destination +managers. + +Decompression source managers follow a parallel design, but with some +additional frammishes. The source manager struct contains a pointer and count +defining the next byte to read from the work buffer and the number of bytes +remaining: + + const JOCTET *next_input_byte; /* => next byte to read from buffer */ + size_t bytes_in_buffer; /* # of bytes remaining in buffer */ + +The library increments the pointer and decrements the count until the buffer +is emptied. The manager's fill_input_buffer method must reset the pointer and +count. In most applications, the manager must remember the buffer's starting +address and total size in private fields not visible to the library. + +A data source manager provides five methods: + +init_source (j_decompress_ptr cinfo) + Initialize source. This is called by jpeg_read_header() before any + data is actually read. Unlike init_destination(), it may leave + bytes_in_buffer set to 0 (in which case a fill_input_buffer() call + will occur immediately). + +fill_input_buffer (j_decompress_ptr cinfo) + This is called whenever bytes_in_buffer has reached zero and more + data is wanted. In typical applications, it should read fresh data + into the buffer (ignoring the current state of next_input_byte and + bytes_in_buffer), reset the pointer & count to the start of the + buffer, and return TRUE indicating that the buffer has been reloaded. + It is not necessary to fill the buffer entirely, only to obtain at + least one more byte. bytes_in_buffer MUST be set to a positive value + if TRUE is returned. A FALSE return should only be used when I/O + suspension is desired (this mode is discussed in the next section). + +skip_input_data (j_decompress_ptr cinfo, long num_bytes) + Skip num_bytes worth of data. The buffer pointer and count should + be advanced over num_bytes input bytes, refilling the buffer as + needed. This is used to skip over a potentially large amount of + uninteresting data (such as an APPn marker). In some applications + it may be possible to optimize away the reading of the skipped data, + but it's not clear that being smart is worth much trouble; large + skips are uncommon. bytes_in_buffer may be zero on return. + A zero or negative skip count should be treated as a no-op. + +resync_to_restart (j_decompress_ptr cinfo, int desired) + This routine is called only when the decompressor has failed to find + a restart (RSTn) marker where one is expected. Its mission is to + find a suitable point for resuming decompression. For most + applications, we recommend that you just use the default resync + procedure, jpeg_resync_to_restart(). However, if you are able to back + up in the input data stream, or if you have a-priori knowledge about + the likely location of restart markers, you may be able to do better. + Read the read_restart_marker() and jpeg_resync_to_restart() routines + in jdmarker.c if you think you'd like to implement your own resync + procedure. + +term_source (j_decompress_ptr cinfo) + Terminate source --- called by jpeg_finish_decompress() after all + data has been read. Often a no-op. + +For both fill_input_buffer() and skip_input_data(), there is no such thing +as an EOF return. If the end of the file has been reached, the routine has +a choice of exiting via ERREXIT() or inserting fake data into the buffer. +In most cases, generating a warning message and inserting a fake EOI marker +is the best course of action --- this will allow the decompressor to output +however much of the image is there. In pathological cases, the decompressor +may swallow the EOI and again demand data ... just keep feeding it fake EOIs. +jdatasrc.c illustrates the recommended error recovery behavior. + +term_source() is NOT called by jpeg_abort() or jpeg_destroy(). If you want +the source manager to be cleaned up during an abort, you must do it yourself. + +You will also need code to create a jpeg_source_mgr struct, fill in its method +pointers, and insert a pointer to the struct into the "src" field of the JPEG +decompression object. This can be done in-line in your setup code if you +like, but it's probably cleaner to provide a separate routine similar to the +jpeg_stdio_src() or jpeg_mem_src() routines of the supplied source managers. + +For more information, consult the memory and stdio source and destination +managers in jdatasrc.c and jdatadst.c. + + +I/O suspension +-------------- + +Some applications need to use the JPEG library as an incremental memory-to- +memory filter: when the compressed data buffer is filled or emptied, they want +control to return to the outer loop, rather than expecting that the buffer can +be emptied or reloaded within the data source/destination manager subroutine. +The library supports this need by providing an "I/O suspension" mode, which we +describe in this section. + +The I/O suspension mode is not a panacea: nothing is guaranteed about the +maximum amount of time spent in any one call to the library, so it will not +eliminate response-time problems in single-threaded applications. If you +need guaranteed response time, we suggest you "bite the bullet" and implement +a real multi-tasking capability. + +To use I/O suspension, cooperation is needed between the calling application +and the data source or destination manager; you will always need a custom +source/destination manager. (Please read the previous section if you haven't +already.) The basic idea is that the empty_output_buffer() or +fill_input_buffer() routine is a no-op, merely returning FALSE to indicate +that it has done nothing. Upon seeing this, the JPEG library suspends +operation and returns to its caller. The surrounding application is +responsible for emptying or refilling the work buffer before calling the +JPEG library again. + +Compression suspension: + +For compression suspension, use an empty_output_buffer() routine that returns +FALSE; typically it will not do anything else. This will cause the +compressor to return to the caller of jpeg_write_scanlines(), with the return +value indicating that not all the supplied scanlines have been accepted. +The application must make more room in the output buffer, adjust the output +buffer pointer/count appropriately, and then call jpeg_write_scanlines() +again, pointing to the first unconsumed scanline. + +When forced to suspend, the compressor will backtrack to a convenient stopping +point (usually the start of the current MCU); it will regenerate some output +data when restarted. Therefore, although empty_output_buffer() is only +called when the buffer is filled, you should NOT write out the entire buffer +after a suspension. Write only the data up to the current position of +next_output_byte/free_in_buffer. The data beyond that point will be +regenerated after resumption. + +Because of the backtracking behavior, a good-size output buffer is essential +for efficiency; you don't want the compressor to suspend often. (In fact, an +overly small buffer could lead to infinite looping, if a single MCU required +more data than would fit in the buffer.) We recommend a buffer of at least +several Kbytes. You may want to insert explicit code to ensure that you don't +call jpeg_write_scanlines() unless there is a reasonable amount of space in +the output buffer; in other words, flush the buffer before trying to compress +more data. + +The compressor does not allow suspension while it is trying to write JPEG +markers at the beginning and end of the file. This means that: + * At the beginning of a compression operation, there must be enough free + space in the output buffer to hold the header markers (typically 600 or + so bytes). The recommended buffer size is bigger than this anyway, so + this is not a problem as long as you start with an empty buffer. However, + this restriction might catch you if you insert large special markers, such + as a JFIF thumbnail image, without flushing the buffer afterwards. + * When you call jpeg_finish_compress(), there must be enough space in the + output buffer to emit any buffered data and the final EOI marker. In the + current implementation, half a dozen bytes should suffice for this, but + for safety's sake we recommend ensuring that at least 100 bytes are free + before calling jpeg_finish_compress(). + +A more significant restriction is that jpeg_finish_compress() cannot suspend. +This means you cannot use suspension with multi-pass operating modes, namely +Huffman code optimization and multiple-scan output. Those modes write the +whole file during jpeg_finish_compress(), which will certainly result in +buffer overrun. (Note that this restriction applies only to compression, +not decompression. The decompressor supports input suspension in all of its +operating modes.) + +Decompression suspension: + +For decompression suspension, use a fill_input_buffer() routine that simply +returns FALSE (except perhaps during error recovery, as discussed below). +This will cause the decompressor to return to its caller with an indication +that suspension has occurred. This can happen at four places: + * jpeg_read_header(): will return JPEG_SUSPENDED. + * jpeg_start_decompress(): will return FALSE, rather than its usual TRUE. + * jpeg_read_scanlines(): will return the number of scanlines already + completed (possibly 0). + * jpeg_finish_decompress(): will return FALSE, rather than its usual TRUE. +The surrounding application must recognize these cases, load more data into +the input buffer, and repeat the call. In the case of jpeg_read_scanlines(), +increment the passed pointers past any scanlines successfully read. + +Just as with compression, the decompressor will typically backtrack to a +convenient restart point before suspending. When fill_input_buffer() is +called, next_input_byte/bytes_in_buffer point to the current restart point, +which is where the decompressor will backtrack to if FALSE is returned. +The data beyond that position must NOT be discarded if you suspend; it needs +to be re-read upon resumption. In most implementations, you'll need to shift +this data down to the start of your work buffer and then load more data after +it. Again, this behavior means that a several-Kbyte work buffer is essential +for decent performance; furthermore, you should load a reasonable amount of +new data before resuming decompression. (If you loaded, say, only one new +byte each time around, you could waste a LOT of cycles.) + +The skip_input_data() source manager routine requires special care in a +suspension scenario. This routine is NOT granted the ability to suspend the +decompressor; it can decrement bytes_in_buffer to zero, but no more. If the +requested skip distance exceeds the amount of data currently in the input +buffer, then skip_input_data() must set bytes_in_buffer to zero and record the +additional skip distance somewhere else. The decompressor will immediately +call fill_input_buffer(), which should return FALSE, which will cause a +suspension return. The surrounding application must then arrange to discard +the recorded number of bytes before it resumes loading the input buffer. +(Yes, this design is rather baroque, but it avoids complexity in the far more +common case where a non-suspending source manager is used.) + +If the input data has been exhausted, we recommend that you emit a warning +and insert dummy EOI markers just as a non-suspending data source manager +would do. This can be handled either in the surrounding application logic or +within fill_input_buffer(); the latter is probably more efficient. If +fill_input_buffer() knows that no more data is available, it can set the +pointer/count to point to a dummy EOI marker and then return TRUE just as +though it had read more data in a non-suspending situation. + +The decompressor does not attempt to suspend within standard JPEG markers; +instead it will backtrack to the start of the marker and reprocess the whole +marker next time. Hence the input buffer must be large enough to hold the +longest standard marker in the file. Standard JPEG markers should normally +not exceed a few hundred bytes each (DHT tables are typically the longest). +We recommend at least a 2K buffer for performance reasons, which is much +larger than any correct marker is likely to be. For robustness against +damaged marker length counts, you may wish to insert a test in your +application for the case that the input buffer is completely full and yet +the decoder has suspended without consuming any data --- otherwise, if this +situation did occur, it would lead to an endless loop. (The library can't +provide this test since it has no idea whether "the buffer is full", or +even whether there is a fixed-size input buffer.) + +The input buffer would need to be 64K to allow for arbitrary COM or APPn +markers, but these are handled specially: they are either saved into allocated +memory, or skipped over by calling skip_input_data(). In the former case, +suspension is handled correctly, and in the latter case, the problem of +buffer overrun is placed on skip_input_data's shoulders, as explained above. +Note that if you provide your own marker handling routine for large markers, +you should consider how to deal with buffer overflow. + +Multiple-buffer management: + +In some applications it is desirable to store the compressed data in a linked +list of buffer areas, so as to avoid data copying. This can be handled by +having empty_output_buffer() or fill_input_buffer() set the pointer and count +to reference the next available buffer; FALSE is returned only if no more +buffers are available. Although seemingly straightforward, there is a +pitfall in this approach: the backtrack that occurs when FALSE is returned +could back up into an earlier buffer. For example, when fill_input_buffer() +is called, the current pointer & count indicate the backtrack restart point. +Since fill_input_buffer() will set the pointer and count to refer to a new +buffer, the restart position must be saved somewhere else. Suppose a second +call to fill_input_buffer() occurs in the same library call, and no +additional input data is available, so fill_input_buffer must return FALSE. +If the JPEG library has not moved the pointer/count forward in the current +buffer, then *the correct restart point is the saved position in the prior +buffer*. Prior buffers may be discarded only after the library establishes +a restart point within a later buffer. Similar remarks apply for output into +a chain of buffers. + +The library will never attempt to backtrack over a skip_input_data() call, +so any skipped data can be permanently discarded. You still have to deal +with the case of skipping not-yet-received data, however. + +It's much simpler to use only a single buffer; when fill_input_buffer() is +called, move any unconsumed data (beyond the current pointer/count) down to +the beginning of this buffer and then load new data into the remaining buffer +space. This approach requires a little more data copying but is far easier +to get right. + + +Progressive JPEG support +------------------------ + +Progressive JPEG rearranges the stored data into a series of scans of +increasing quality. In situations where a JPEG file is transmitted across a +slow communications link, a decoder can generate a low-quality image very +quickly from the first scan, then gradually improve the displayed quality as +more scans are received. The final image after all scans are complete is +identical to that of a regular (sequential) JPEG file of the same quality +setting. Progressive JPEG files are often slightly smaller than equivalent +sequential JPEG files, but the possibility of incremental display is the main +reason for using progressive JPEG. + +The IJG encoder library generates progressive JPEG files when given a +suitable "scan script" defining how to divide the data into scans. +Creation of progressive JPEG files is otherwise transparent to the encoder. +Progressive JPEG files can also be read transparently by the decoder library. +If the decoding application simply uses the library as defined above, it +will receive a final decoded image without any indication that the file was +progressive. Of course, this approach does not allow incremental display. +To perform incremental display, an application needs to use the decoder +library's "buffered-image" mode, in which it receives a decoded image +multiple times. + +Each displayed scan requires about as much work to decode as a full JPEG +image of the same size, so the decoder must be fairly fast in relation to the +data transmission rate in order to make incremental display useful. However, +it is possible to skip displaying the image and simply add the incoming bits +to the decoder's coefficient buffer. This is fast because only Huffman +decoding need be done, not IDCT, upsampling, colorspace conversion, etc. +The IJG decoder library allows the application to switch dynamically between +displaying the image and simply absorbing the incoming bits. A properly +coded application can automatically adapt the number of display passes to +suit the time available as the image is received. Also, a final +higher-quality display cycle can be performed from the buffered data after +the end of the file is reached. + +Progressive compression: + +To create a progressive JPEG file (or a multiple-scan sequential JPEG file), +set the scan_info cinfo field to point to an array of scan descriptors, and +perform compression as usual. Instead of constructing your own scan list, +you can call the jpeg_simple_progression() helper routine to create a +recommended progression sequence; this method should be used by all +applications that don't want to get involved in the nitty-gritty of +progressive scan sequence design. (If you want to provide user control of +scan sequences, you may wish to borrow the scan script reading code found +in rdswitch.c, so that you can read scan script files just like cjpeg's.) +When scan_info is not NULL, the compression library will store DCT'd data +into a buffer array as jpeg_write_scanlines() is called, and will emit all +the requested scans during jpeg_finish_compress(). This implies that +multiple-scan output cannot be created with a suspending data destination +manager, since jpeg_finish_compress() does not support suspension. We +should also note that the compressor currently forces Huffman optimization +mode when creating a progressive JPEG file, because the default Huffman +tables are unsuitable for progressive files. + +Progressive decompression: + +When buffered-image mode is not used, the decoder library will read all of +a multi-scan file during jpeg_start_decompress(), so that it can provide a +final decoded image. (Here "multi-scan" means either progressive or +multi-scan sequential.) This makes multi-scan files transparent to the +decoding application. However, existing applications that used suspending +input with version 5 of the IJG library will need to be modified to check +for a suspension return from jpeg_start_decompress(). + +To perform incremental display, an application must use the library's +buffered-image mode. This is described in the next section. + + +Buffered-image mode +------------------- + +In buffered-image mode, the library stores the partially decoded image in a +coefficient buffer, from which it can be read out as many times as desired. +This mode is typically used for incremental display of progressive JPEG files, +but it can be used with any JPEG file. Each scan of a progressive JPEG file +adds more data (more detail) to the buffered image. The application can +display in lockstep with the source file (one display pass per input scan), +or it can allow input processing to outrun display processing. By making +input and display processing run independently, it is possible for the +application to adapt progressive display to a wide range of data transmission +rates. + +The basic control flow for buffered-image decoding is + + jpeg_create_decompress() + set data source + jpeg_read_header() + set overall decompression parameters + cinfo.buffered_image = TRUE; /* select buffered-image mode */ + jpeg_start_decompress() + for (each output pass) { + adjust output decompression parameters if required + jpeg_start_output() /* start a new output pass */ + for (all scanlines in image) { + jpeg_read_scanlines() + display scanlines + } + jpeg_finish_output() /* terminate output pass */ + } + jpeg_finish_decompress() + jpeg_destroy_decompress() + +This differs from ordinary unbuffered decoding in that there is an additional +level of looping. The application can choose how many output passes to make +and how to display each pass. + +The simplest approach to displaying progressive images is to do one display +pass for each scan appearing in the input file. In this case the outer loop +condition is typically + while (! jpeg_input_complete(&cinfo)) +and the start-output call should read + jpeg_start_output(&cinfo, cinfo.input_scan_number); +The second parameter to jpeg_start_output() indicates which scan of the input +file is to be displayed; the scans are numbered starting at 1 for this +purpose. (You can use a loop counter starting at 1 if you like, but using +the library's input scan counter is easier.) The library automatically reads +data as necessary to complete each requested scan, and jpeg_finish_output() +advances to the next scan or end-of-image marker (hence input_scan_number +will be incremented by the time control arrives back at jpeg_start_output()). +With this technique, data is read from the input file only as needed, and +input and output processing run in lockstep. + +After reading the final scan and reaching the end of the input file, the +buffered image remains available; it can be read additional times by +repeating the jpeg_start_output()/jpeg_read_scanlines()/jpeg_finish_output() +sequence. For example, a useful technique is to use fast one-pass color +quantization for display passes made while the image is arriving, followed by +a final display pass using two-pass quantization for highest quality. This +is done by changing the library parameters before the final output pass. +Changing parameters between passes is discussed in detail below. + +In general the last scan of a progressive file cannot be recognized as such +until after it is read, so a post-input display pass is the best approach if +you want special processing in the final pass. + +When done with the image, be sure to call jpeg_finish_decompress() to release +the buffered image (or just use jpeg_destroy_decompress()). + +If input data arrives faster than it can be displayed, the application can +cause the library to decode input data in advance of what's needed to produce +output. This is done by calling the routine jpeg_consume_input(). +The return value is one of the following: + JPEG_REACHED_SOS: reached an SOS marker (the start of a new scan) + JPEG_REACHED_EOI: reached the EOI marker (end of image) + JPEG_ROW_COMPLETED: completed reading one MCU row of compressed data + JPEG_SCAN_COMPLETED: completed reading last MCU row of current scan + JPEG_SUSPENDED: suspended before completing any of the above +(JPEG_SUSPENDED can occur only if a suspending data source is used.) This +routine can be called at any time after initializing the JPEG object. It +reads some additional data and returns when one of the indicated significant +events occurs. (If called after the EOI marker is reached, it will +immediately return JPEG_REACHED_EOI without attempting to read more data.) + +The library's output processing will automatically call jpeg_consume_input() +whenever the output processing overtakes the input; thus, simple lockstep +display requires no direct calls to jpeg_consume_input(). But by adding +calls to jpeg_consume_input(), you can absorb data in advance of what is +being displayed. This has two benefits: + * You can limit buildup of unprocessed data in your input buffer. + * You can eliminate extra display passes by paying attention to the + state of the library's input processing. + +The first of these benefits only requires interspersing calls to +jpeg_consume_input() with your display operations and any other processing +you may be doing. To avoid wasting cycles due to backtracking, it's best to +call jpeg_consume_input() only after a hundred or so new bytes have arrived. +This is discussed further under "I/O suspension", above. (Note: the JPEG +library currently is not thread-safe. You must not call jpeg_consume_input() +from one thread of control if a different library routine is working on the +same JPEG object in another thread.) + +When input arrives fast enough that more than one new scan is available +before you start a new output pass, you may as well skip the output pass +corresponding to the completed scan. This occurs for free if you pass +cinfo.input_scan_number as the target scan number to jpeg_start_output(). +The input_scan_number field is simply the index of the scan currently being +consumed by the input processor. You can ensure that this is up-to-date by +emptying the input buffer just before calling jpeg_start_output(): call +jpeg_consume_input() repeatedly until it returns JPEG_SUSPENDED or +JPEG_REACHED_EOI. + +The target scan number passed to jpeg_start_output() is saved in the +cinfo.output_scan_number field. The library's output processing calls +jpeg_consume_input() whenever the current input scan number and row within +that scan is less than or equal to the current output scan number and row. +Thus, input processing can "get ahead" of the output processing but is not +allowed to "fall behind". You can achieve several different effects by +manipulating this interlock rule. For example, if you pass a target scan +number greater than the current input scan number, the output processor will +wait until that scan starts to arrive before producing any output. (To avoid +an infinite loop, the target scan number is automatically reset to the last +scan number when the end of image is reached. Thus, if you specify a large +target scan number, the library will just absorb the entire input file and +then perform an output pass. This is effectively the same as what +jpeg_start_decompress() does when you don't select buffered-image mode.) +When you pass a target scan number equal to the current input scan number, +the image is displayed no faster than the current input scan arrives. The +final possibility is to pass a target scan number less than the current input +scan number; this disables the input/output interlock and causes the output +processor to simply display whatever it finds in the image buffer, without +waiting for input. (However, the library will not accept a target scan +number less than one, so you can't avoid waiting for the first scan.) + +When data is arriving faster than the output display processing can advance +through the image, jpeg_consume_input() will store data into the buffered +image beyond the point at which the output processing is reading data out +again. If the input arrives fast enough, it may "wrap around" the buffer to +the point where the input is more than one whole scan ahead of the output. +If the output processing simply proceeds through its display pass without +paying attention to the input, the effect seen on-screen is that the lower +part of the image is one or more scans better in quality than the upper part. +Then, when the next output scan is started, you have a choice of what target +scan number to use. The recommended choice is to use the current input scan +number at that time, which implies that you've skipped the output scans +corresponding to the input scans that were completed while you processed the +previous output scan. In this way, the decoder automatically adapts its +speed to the arriving data, by skipping output scans as necessary to keep up +with the arriving data. + +When using this strategy, you'll want to be sure that you perform a final +output pass after receiving all the data; otherwise your last display may not +be full quality across the whole screen. So the right outer loop logic is +something like this: + do { + absorb any waiting input by calling jpeg_consume_input() + final_pass = jpeg_input_complete(&cinfo); + adjust output decompression parameters if required + jpeg_start_output(&cinfo, cinfo.input_scan_number); + ... + jpeg_finish_output() + } while (! final_pass); +rather than quitting as soon as jpeg_input_complete() returns TRUE. This +arrangement makes it simple to use higher-quality decoding parameters +for the final pass. But if you don't want to use special parameters for +the final pass, the right loop logic is like this: + for (;;) { + absorb any waiting input by calling jpeg_consume_input() + jpeg_start_output(&cinfo, cinfo.input_scan_number); + ... + jpeg_finish_output() + if (jpeg_input_complete(&cinfo) && + cinfo.input_scan_number == cinfo.output_scan_number) + break; + } +In this case you don't need to know in advance whether an output pass is to +be the last one, so it's not necessary to have reached EOF before starting +the final output pass; rather, what you want to test is whether the output +pass was performed in sync with the final input scan. This form of the loop +will avoid an extra output pass whenever the decoder is able (or nearly able) +to keep up with the incoming data. + +When the data transmission speed is high, you might begin a display pass, +then find that much or all of the file has arrived before you can complete +the pass. (You can detect this by noting the JPEG_REACHED_EOI return code +from jpeg_consume_input(), or equivalently by testing jpeg_input_complete().) +In this situation you may wish to abort the current display pass and start a +new one using the newly arrived information. To do so, just call +jpeg_finish_output() and then start a new pass with jpeg_start_output(). + +A variant strategy is to abort and restart display if more than one complete +scan arrives during an output pass; this can be detected by noting +JPEG_REACHED_SOS returns and/or examining cinfo.input_scan_number. This +idea should be employed with caution, however, since the display process +might never get to the bottom of the image before being aborted, resulting +in the lower part of the screen being several passes worse than the upper. +In most cases it's probably best to abort an output pass only if the whole +file has arrived and you want to begin the final output pass immediately. + +When receiving data across a communication link, we recommend always using +the current input scan number for the output target scan number; if a +higher-quality final pass is to be done, it should be started (aborting any +incomplete output pass) as soon as the end of file is received. However, +many other strategies are possible. For example, the application can examine +the parameters of the current input scan and decide whether to display it or +not. If the scan contains only chroma data, one might choose not to use it +as the target scan, expecting that the scan will be small and will arrive +quickly. To skip to the next scan, call jpeg_consume_input() until it +returns JPEG_REACHED_SOS or JPEG_REACHED_EOI. Or just use the next higher +number as the target scan for jpeg_start_output(); but that method doesn't +let you inspect the next scan's parameters before deciding to display it. + + +In buffered-image mode, jpeg_start_decompress() never performs input and +thus never suspends. An application that uses input suspension with +buffered-image mode must be prepared for suspension returns from these +routines: +* jpeg_start_output() performs input only if you request 2-pass quantization + and the target scan isn't fully read yet. (This is discussed below.) +* jpeg_read_scanlines(), as always, returns the number of scanlines that it + was able to produce before suspending. +* jpeg_finish_output() will read any markers following the target scan, + up to the end of the file or the SOS marker that begins another scan. + (But it reads no input if jpeg_consume_input() has already reached the + end of the file or a SOS marker beyond the target output scan.) +* jpeg_finish_decompress() will read until the end of file, and thus can + suspend if the end hasn't already been reached (as can be tested by + calling jpeg_input_complete()). +jpeg_start_output(), jpeg_finish_output(), and jpeg_finish_decompress() +all return TRUE if they completed their tasks, FALSE if they had to suspend. +In the event of a FALSE return, the application must load more input data +and repeat the call. Applications that use non-suspending data sources need +not check the return values of these three routines. + + +It is possible to change decoding parameters between output passes in the +buffered-image mode. The decoder library currently supports only very +limited changes of parameters. ONLY THE FOLLOWING parameter changes are +allowed after jpeg_start_decompress() is called: +* dct_method can be changed before each call to jpeg_start_output(). + For example, one could use a fast DCT method for early scans, changing + to a higher quality method for the final scan. +* dither_mode can be changed before each call to jpeg_start_output(); + of course this has no impact if not using color quantization. Typically + one would use ordered dither for initial passes, then switch to + Floyd-Steinberg dither for the final pass. Caution: changing dither mode + can cause more memory to be allocated by the library. Although the amount + of memory involved is not large (a scanline or so), it may cause the + initial max_memory_to_use specification to be exceeded, which in the worst + case would result in an out-of-memory failure. +* do_block_smoothing can be changed before each call to jpeg_start_output(). + This setting is relevant only when decoding a progressive JPEG image. + During the first DC-only scan, block smoothing provides a very "fuzzy" look + instead of the very "blocky" look seen without it; which is better seems a + matter of personal taste. But block smoothing is nearly always a win + during later stages, especially when decoding a successive-approximation + image: smoothing helps to hide the slight blockiness that otherwise shows + up on smooth gradients until the lowest coefficient bits are sent. +* Color quantization mode can be changed under the rules described below. + You *cannot* change between full-color and quantized output (because that + would alter the required I/O buffer sizes), but you can change which + quantization method is used. + +When generating color-quantized output, changing quantization method is a +very useful way of switching between high-speed and high-quality display. +The library allows you to change among its three quantization methods: +1. Single-pass quantization to a fixed color cube. + Selected by cinfo.two_pass_quantize = FALSE and cinfo.colormap = NULL. +2. Single-pass quantization to an application-supplied colormap. + Selected by setting cinfo.colormap to point to the colormap (the value of + two_pass_quantize is ignored); also set cinfo.actual_number_of_colors. +3. Two-pass quantization to a colormap chosen specifically for the image. + Selected by cinfo.two_pass_quantize = TRUE and cinfo.colormap = NULL. + (This is the default setting selected by jpeg_read_header, but it is + probably NOT what you want for the first pass of progressive display!) +These methods offer successively better quality and lesser speed. However, +only the first method is available for quantizing in non-RGB color spaces. + +IMPORTANT: because the different quantizer methods have very different +working-storage requirements, the library requires you to indicate which +one(s) you intend to use before you call jpeg_start_decompress(). (If we did +not require this, the max_memory_to_use setting would be a complete fiction.) +You do this by setting one or more of these three cinfo fields to TRUE: + enable_1pass_quant Fixed color cube colormap + enable_external_quant Externally-supplied colormap + enable_2pass_quant Two-pass custom colormap +All three are initialized FALSE by jpeg_read_header(). But +jpeg_start_decompress() automatically sets TRUE the one selected by the +current two_pass_quantize and colormap settings, so you only need to set the +enable flags for any other quantization methods you plan to change to later. + +After setting the enable flags correctly at jpeg_start_decompress() time, you +can change to any enabled quantization method by setting two_pass_quantize +and colormap properly just before calling jpeg_start_output(). The following +special rules apply: +1. You must explicitly set cinfo.colormap to NULL when switching to 1-pass + or 2-pass mode from a different mode, or when you want the 2-pass + quantizer to be re-run to generate a new colormap. +2. To switch to an external colormap, or to change to a different external + colormap than was used on the prior pass, you must call + jpeg_new_colormap() after setting cinfo.colormap. +NOTE: if you want to use the same colormap as was used in the prior pass, +you should not do either of these things. This will save some nontrivial +switchover costs. +(These requirements exist because cinfo.colormap will always be non-NULL +after completing a prior output pass, since both the 1-pass and 2-pass +quantizers set it to point to their output colormaps. Thus you have to +do one of these two things to notify the library that something has changed. +Yup, it's a bit klugy, but it's necessary to do it this way for backwards +compatibility.) + +Note that in buffered-image mode, the library generates any requested colormap +during jpeg_start_output(), not during jpeg_start_decompress(). + +When using two-pass quantization, jpeg_start_output() makes a pass over the +buffered image to determine the optimum color map; it therefore may take a +significant amount of time, whereas ordinarily it does little work. The +progress monitor hook is called during this pass, if defined. It is also +important to realize that if the specified target scan number is greater than +or equal to the current input scan number, jpeg_start_output() will attempt +to consume input as it makes this pass. If you use a suspending data source, +you need to check for a FALSE return from jpeg_start_output() under these +conditions. The combination of 2-pass quantization and a not-yet-fully-read +target scan is the only case in which jpeg_start_output() will consume input. + + +Application authors who support buffered-image mode may be tempted to use it +for all JPEG images, even single-scan ones. This will work, but it is +inefficient: there is no need to create an image-sized coefficient buffer for +single-scan images. Requesting buffered-image mode for such an image wastes +memory. Worse, it can cost time on large images, since the buffered data has +to be swapped out or written to a temporary file. If you are concerned about +maximum performance on baseline JPEG files, you should use buffered-image +mode only when the incoming file actually has multiple scans. This can be +tested by calling jpeg_has_multiple_scans(), which will return a correct +result at any time after jpeg_read_header() completes. + +It is also worth noting that when you use jpeg_consume_input() to let input +processing get ahead of output processing, the resulting pattern of access to +the coefficient buffer is quite nonsequential. It's best to use the memory +manager jmemnobs.c if you can (ie, if you have enough real or virtual main +memory). If not, at least make sure that max_memory_to_use is set as high as +possible. If the JPEG memory manager has to use a temporary file, you will +probably see a lot of disk traffic and poor performance. (This could be +improved with additional work on the memory manager, but we haven't gotten +around to it yet.) + +In some applications it may be convenient to use jpeg_consume_input() for all +input processing, including reading the initial markers; that is, you may +wish to call jpeg_consume_input() instead of jpeg_read_header() during +startup. This works, but note that you must check for JPEG_REACHED_SOS and +JPEG_REACHED_EOI return codes as the equivalent of jpeg_read_header's codes. +Once the first SOS marker has been reached, you must call +jpeg_start_decompress() before jpeg_consume_input() will consume more input; +it'll just keep returning JPEG_REACHED_SOS until you do. If you read a +tables-only file this way, jpeg_consume_input() will return JPEG_REACHED_EOI +without ever returning JPEG_REACHED_SOS; be sure to check for this case. +If this happens, the decompressor will not read any more input until you call +jpeg_abort() to reset it. It is OK to call jpeg_consume_input() even when not +using buffered-image mode, but in that case it's basically a no-op after the +initial markers have been read: it will just return JPEG_SUSPENDED. + + +Abbreviated datastreams and multiple images +------------------------------------------- + +A JPEG compression or decompression object can be reused to process multiple +images. This saves a small amount of time per image by eliminating the +"create" and "destroy" operations, but that isn't the real purpose of the +feature. Rather, reuse of an object provides support for abbreviated JPEG +datastreams. Object reuse can also simplify processing a series of images in +a single input or output file. This section explains these features. + +A JPEG file normally contains several hundred bytes worth of quantization +and Huffman tables. In a situation where many images will be stored or +transmitted with identical tables, this may represent an annoying overhead. +The JPEG standard therefore permits tables to be omitted. The standard +defines three classes of JPEG datastreams: + * "Interchange" datastreams contain an image and all tables needed to decode + the image. These are the usual kind of JPEG file. + * "Abbreviated image" datastreams contain an image, but are missing some or + all of the tables needed to decode that image. + * "Abbreviated table specification" (henceforth "tables-only") datastreams + contain only table specifications. +To decode an abbreviated image, it is necessary to load the missing table(s) +into the decoder beforehand. This can be accomplished by reading a separate +tables-only file. A variant scheme uses a series of images in which the first +image is an interchange (complete) datastream, while subsequent ones are +abbreviated and rely on the tables loaded by the first image. It is assumed +that once the decoder has read a table, it will remember that table until a +new definition for the same table number is encountered. + +It is the application designer's responsibility to figure out how to associate +the correct tables with an abbreviated image. While abbreviated datastreams +can be useful in a closed environment, their use is strongly discouraged in +any situation where data exchange with other applications might be needed. +Caveat designer. + +The JPEG library provides support for reading and writing any combination of +tables-only datastreams and abbreviated images. In both compression and +decompression objects, a quantization or Huffman table will be retained for +the lifetime of the object, unless it is overwritten by a new table definition. + + +To create abbreviated image datastreams, it is only necessary to tell the +compressor not to emit some or all of the tables it is using. Each +quantization and Huffman table struct contains a boolean field "sent_table", +which normally is initialized to FALSE. For each table used by the image, the +header-writing process emits the table and sets sent_table = TRUE unless it is +already TRUE. (In normal usage, this prevents outputting the same table +definition multiple times, as would otherwise occur because the chroma +components typically share tables.) Thus, setting this field to TRUE before +calling jpeg_start_compress() will prevent the table from being written at +all. + +If you want to create a "pure" abbreviated image file containing no tables, +just call "jpeg_suppress_tables(&cinfo, TRUE)" after constructing all the +tables. If you want to emit some but not all tables, you'll need to set the +individual sent_table fields directly. + +To create an abbreviated image, you must also call jpeg_start_compress() +with a second parameter of FALSE, not TRUE. Otherwise jpeg_start_compress() +will force all the sent_table fields to FALSE. (This is a safety feature to +prevent abbreviated images from being created accidentally.) + +To create a tables-only file, perform the same parameter setup that you +normally would, but instead of calling jpeg_start_compress() and so on, call +jpeg_write_tables(&cinfo). This will write an abbreviated datastream +containing only SOI, DQT and/or DHT markers, and EOI. All the quantization +and Huffman tables that are currently defined in the compression object will +be emitted unless their sent_tables flag is already TRUE, and then all the +sent_tables flags will be set TRUE. + +A sure-fire way to create matching tables-only and abbreviated image files +is to proceed as follows: + + create JPEG compression object + set JPEG parameters + set destination to tables-only file + jpeg_write_tables(&cinfo); + set destination to image file + jpeg_start_compress(&cinfo, FALSE); + write data... + jpeg_finish_compress(&cinfo); + +Since the JPEG parameters are not altered between writing the table file and +the abbreviated image file, the same tables are sure to be used. Of course, +you can repeat the jpeg_start_compress() ... jpeg_finish_compress() sequence +many times to produce many abbreviated image files matching the table file. + +You cannot suppress output of the computed Huffman tables when Huffman +optimization is selected. (If you could, there'd be no way to decode the +image...) Generally, you don't want to set optimize_coding = TRUE when +you are trying to produce abbreviated files. + +In some cases you might want to compress an image using tables which are +not stored in the application, but are defined in an interchange or +tables-only file readable by the application. This can be done by setting up +a JPEG decompression object to read the specification file, then copying the +tables into your compression object. See jpeg_copy_critical_parameters() +for an example of copying quantization tables. + + +To read abbreviated image files, you simply need to load the proper tables +into the decompression object before trying to read the abbreviated image. +If the proper tables are stored in the application program, you can just +allocate the table structs and fill in their contents directly. For example, +to load a fixed quantization table into table slot "n": + + if (cinfo.quant_tbl_ptrs[n] == NULL) + cinfo.quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) &cinfo); + quant_ptr = cinfo.quant_tbl_ptrs[n]; /* quant_ptr is JQUANT_TBL* */ + for (i = 0; i < 64; i++) { + /* Qtable[] is desired quantization table, in natural array order */ + quant_ptr->quantval[i] = Qtable[i]; + } + +Code to load a fixed Huffman table is typically (for AC table "n"): + + if (cinfo.ac_huff_tbl_ptrs[n] == NULL) + cinfo.ac_huff_tbl_ptrs[n] = jpeg_alloc_huff_table((j_common_ptr) &cinfo); + huff_ptr = cinfo.ac_huff_tbl_ptrs[n]; /* huff_ptr is JHUFF_TBL* */ + for (i = 1; i <= 16; i++) { + /* counts[i] is number of Huffman codes of length i bits, i=1..16 */ + huff_ptr->bits[i] = counts[i]; + } + for (i = 0; i < 256; i++) { + /* symbols[] is the list of Huffman symbols, in code-length order */ + huff_ptr->huffval[i] = symbols[i]; + } + +(Note that trying to set cinfo.quant_tbl_ptrs[n] to point directly at a +constant JQUANT_TBL object is not safe. If the incoming file happened to +contain a quantization table definition, your master table would get +overwritten! Instead allocate a working table copy and copy the master table +into it, as illustrated above. Ditto for Huffman tables, of course.) + +You might want to read the tables from a tables-only file, rather than +hard-wiring them into your application. The jpeg_read_header() call is +sufficient to read a tables-only file. You must pass a second parameter of +FALSE to indicate that you do not require an image to be present. Thus, the +typical scenario is + + create JPEG decompression object + set source to tables-only file + jpeg_read_header(&cinfo, FALSE); + set source to abbreviated image file + jpeg_read_header(&cinfo, TRUE); + set decompression parameters + jpeg_start_decompress(&cinfo); + read data... + jpeg_finish_decompress(&cinfo); + +In some cases, you may want to read a file without knowing whether it contains +an image or just tables. In that case, pass FALSE and check the return value +from jpeg_read_header(): it will be JPEG_HEADER_OK if an image was found, +JPEG_HEADER_TABLES_ONLY if only tables were found. (A third return value, +JPEG_SUSPENDED, is possible when using a suspending data source manager.) +Note that jpeg_read_header() will not complain if you read an abbreviated +image for which you haven't loaded the missing tables; the missing-table check +occurs later, in jpeg_start_decompress(). + + +It is possible to read a series of images from a single source file by +repeating the jpeg_read_header() ... jpeg_finish_decompress() sequence, +without releasing/recreating the JPEG object or the data source module. +(If you did reinitialize, any partial bufferload left in the data source +buffer at the end of one image would be discarded, causing you to lose the +start of the next image.) When you use this method, stored tables are +automatically carried forward, so some of the images can be abbreviated images +that depend on tables from earlier images. + +If you intend to write a series of images into a single destination file, +you might want to make a specialized data destination module that doesn't +flush the output buffer at term_destination() time. This would speed things +up by some trifling amount. Of course, you'd need to remember to flush the +buffer after the last image. You can make the later images be abbreviated +ones by passing FALSE to jpeg_start_compress(). + + +Special markers +--------------- + +Some applications may need to insert or extract special data in the JPEG +datastream. The JPEG standard provides marker types "COM" (comment) and +"APP0" through "APP15" (application) to hold application-specific data. +Unfortunately, the use of these markers is not specified by the standard. +COM markers are fairly widely used to hold user-supplied text. The JFIF file +format spec uses APP0 markers with specified initial strings to hold certain +data. Adobe applications use APP14 markers beginning with the string "Adobe" +for miscellaneous data. Other APPn markers are rarely seen, but might +contain almost anything. + +If you wish to store user-supplied text, we recommend you use COM markers +and place readable 7-bit ASCII text in them. Newline conventions are not +standardized --- expect to find LF (Unix style), CR/LF (DOS style), or CR +(Mac style). A robust COM reader should be able to cope with random binary +garbage, including nulls, since some applications generate COM markers +containing non-ASCII junk. (But yours should not be one of them.) + +For program-supplied data, use an APPn marker, and be sure to begin it with an +identifying string so that you can tell whether the marker is actually yours. +It's probably best to avoid using APP0 or APP14 for any private markers. +(NOTE: the upcoming SPIFF standard will use APP8 markers; we recommend you +not use APP8 markers for any private purposes, either.) + +Keep in mind that at most 65533 bytes can be put into one marker, but you +can have as many markers as you like. + +By default, the IJG compression library will write a JFIF APP0 marker if the +selected JPEG colorspace is grayscale or YCbCr, or an Adobe APP14 marker if +the selected colorspace is RGB, CMYK, or YCCK. You can disable this, but +we don't recommend it. The decompression library will recognize JFIF and +Adobe markers and will set the JPEG colorspace properly when one is found. + + +You can write special markers immediately following the datastream header by +calling jpeg_write_marker() after jpeg_start_compress() and before the first +call to jpeg_write_scanlines(). When you do this, the markers appear after +the SOI and the JFIF APP0 and Adobe APP14 markers (if written), but before +all else. Specify the marker type parameter as "JPEG_COM" for COM or +"JPEG_APP0 + n" for APPn. (Actually, jpeg_write_marker will let you write +any marker type, but we don't recommend writing any other kinds of marker.) +For example, to write a user comment string pointed to by comment_text: + jpeg_write_marker(cinfo, JPEG_COM, comment_text, strlen(comment_text)); + +If it's not convenient to store all the marker data in memory at once, +you can instead call jpeg_write_m_header() followed by multiple calls to +jpeg_write_m_byte(). If you do it this way, it's your responsibility to +call jpeg_write_m_byte() exactly the number of times given in the length +parameter to jpeg_write_m_header(). (This method lets you empty the +output buffer partway through a marker, which might be important when +using a suspending data destination module. In any case, if you are using +a suspending destination, you should flush its buffer after inserting +any special markers. See "I/O suspension".) + +Or, if you prefer to synthesize the marker byte sequence yourself, +you can just cram it straight into the data destination module. + +If you are writing JFIF 1.02 extension markers (thumbnail images), don't +forget to set cinfo.JFIF_minor_version = 2 so that the encoder will write the +correct JFIF version number in the JFIF header marker. The library's default +is to write version 1.01, but that's wrong if you insert any 1.02 extension +markers. (We could probably get away with just defaulting to 1.02, but there +used to be broken decoders that would complain about unknown minor version +numbers. To reduce compatibility risks it's safest not to write 1.02 unless +you are actually using 1.02 extensions.) + + +When reading, two methods of handling special markers are available: +1. You can ask the library to save the contents of COM and/or APPn markers +into memory, and then examine them at your leisure afterwards. +2. You can supply your own routine to process COM and/or APPn markers +on-the-fly as they are read. +The first method is simpler to use, especially if you are using a suspending +data source; writing a marker processor that copes with input suspension is +not easy (consider what happens if the marker is longer than your available +input buffer). However, the second method conserves memory since the marker +data need not be kept around after it's been processed. + +For either method, you'd normally set up marker handling after creating a +decompression object and before calling jpeg_read_header(), because the +markers of interest will typically be near the head of the file and so will +be scanned by jpeg_read_header. Once you've established a marker handling +method, it will be used for the life of that decompression object +(potentially many datastreams), unless you change it. Marker handling is +determined separately for COM markers and for each APPn marker code. + + +To save the contents of special markers in memory, call + jpeg_save_markers(cinfo, marker_code, length_limit) +where marker_code is the marker type to save, JPEG_COM or JPEG_APP0+n. +(To arrange to save all the special marker types, you need to call this +routine 17 times, for COM and APP0-APP15.) If the incoming marker is longer +than length_limit data bytes, only length_limit bytes will be saved; this +parameter allows you to avoid chewing up memory when you only need to see the +first few bytes of a potentially large marker. If you want to save all the +data, set length_limit to 0xFFFF; that is enough since marker lengths are only +16 bits. As a special case, setting length_limit to 0 prevents that marker +type from being saved at all. (That is the default behavior, in fact.) + +After jpeg_read_header() completes, you can examine the special markers by +following the cinfo->marker_list pointer chain. All the special markers in +the file appear in this list, in order of their occurrence in the file (but +omitting any markers of types you didn't ask for). Both the original data +length and the saved data length are recorded for each list entry; the latter +will not exceed length_limit for the particular marker type. Note that these +lengths exclude the marker length word, whereas the stored representation +within the JPEG file includes it. (Hence the maximum data length is really +only 65533.) + +It is possible that additional special markers appear in the file beyond the +SOS marker at which jpeg_read_header stops; if so, the marker list will be +extended during reading of the rest of the file. This is not expected to be +common, however. If you are short on memory you may want to reset the length +limit to zero for all marker types after finishing jpeg_read_header, to +ensure that the max_memory_to_use setting cannot be exceeded due to addition +of later markers. + +The marker list remains stored until you call jpeg_finish_decompress or +jpeg_abort, at which point the memory is freed and the list is set to empty. +(jpeg_destroy also releases the storage, of course.) + +Note that the library is internally interested in APP0 and APP14 markers; +if you try to set a small nonzero length limit on these types, the library +will silently force the length up to the minimum it wants. (But you can set +a zero length limit to prevent them from being saved at all.) Also, in a +16-bit environment, the maximum length limit may be constrained to less than +65533 by malloc() limitations. It is therefore best not to assume that the +effective length limit is exactly what you set it to be. + + +If you want to supply your own marker-reading routine, you do it by calling +jpeg_set_marker_processor(). A marker processor routine must have the +signature + boolean jpeg_marker_parser_method (j_decompress_ptr cinfo) +Although the marker code is not explicitly passed, the routine can find it +in cinfo->unread_marker. At the time of call, the marker proper has been +read from the data source module. The processor routine is responsible for +reading the marker length word and the remaining parameter bytes, if any. +Return TRUE to indicate success. (FALSE should be returned only if you are +using a suspending data source and it tells you to suspend. See the standard +marker processors in jdmarker.c for appropriate coding methods if you need to +use a suspending data source.) + +If you override the default APP0 or APP14 processors, it is up to you to +recognize JFIF and Adobe markers if you want colorspace recognition to occur +properly. We recommend copying and extending the default processors if you +want to do that. (A better idea is to save these marker types for later +examination by calling jpeg_save_markers(); that method doesn't interfere +with the library's own processing of these markers.) + +jpeg_set_marker_processor() and jpeg_save_markers() are mutually exclusive +--- if you call one it overrides any previous call to the other, for the +particular marker type specified. + +A simple example of an external COM processor can be found in djpeg.c. +Also, see jpegtran.c for an example of using jpeg_save_markers. + + +Raw (downsampled) image data +---------------------------- + +Some applications need to supply already-downsampled image data to the JPEG +compressor, or to receive raw downsampled data from the decompressor. The +library supports this requirement by allowing the application to write or +read raw data, bypassing the normal preprocessing or postprocessing steps. +The interface is different from the standard one and is somewhat harder to +use. If your interest is merely in bypassing color conversion, we recommend +that you use the standard interface and simply set jpeg_color_space = +in_color_space (or jpeg_color_space = out_color_space for decompression). +The mechanism described in this section is necessary only to supply or +receive downsampled image data, in which not all components have the same +dimensions. + + +To compress raw data, you must supply the data in the colorspace to be used +in the JPEG file (please read the earlier section on Special color spaces) +and downsampled to the sampling factors specified in the JPEG parameters. +You must supply the data in the format used internally by the JPEG library, +namely a JSAMPIMAGE array. This is an array of pointers to two-dimensional +arrays, each of type JSAMPARRAY. Each 2-D array holds the values for one +color component. This structure is necessary since the components are of +different sizes. If the image dimensions are not a multiple of the MCU size, +you must also pad the data correctly (usually, this is done by replicating +the last column and/or row). The data must be padded to a multiple of a DCT +block in each component: that is, each downsampled row must contain a +multiple of 8 valid samples, and there must be a multiple of 8 sample rows +for each component. (For applications such as conversion of digital TV +images, the standard image size is usually a multiple of the DCT block size, +so that no padding need actually be done.) + +The procedure for compression of raw data is basically the same as normal +compression, except that you call jpeg_write_raw_data() in place of +jpeg_write_scanlines(). Before calling jpeg_start_compress(), you must do +the following: + * Set cinfo->raw_data_in to TRUE. (It is set FALSE by jpeg_set_defaults().) + This notifies the library that you will be supplying raw data. + * Ensure jpeg_color_space is correct --- an explicit jpeg_set_colorspace() + call is a good idea. Note that since color conversion is bypassed, + in_color_space is ignored, except that jpeg_set_defaults() uses it to + choose the default jpeg_color_space setting. + * Ensure the sampling factors, cinfo->comp_info[i].h_samp_factor and + cinfo->comp_info[i].v_samp_factor, are correct. Since these indicate the + dimensions of the data you are supplying, it's wise to set them + explicitly, rather than assuming the library's defaults are what you want. + +To pass raw data to the library, call jpeg_write_raw_data() in place of +jpeg_write_scanlines(). The two routines work similarly except that +jpeg_write_raw_data takes a JSAMPIMAGE data array rather than JSAMPARRAY. +The scanlines count passed to and returned from jpeg_write_raw_data is +measured in terms of the component with the largest v_samp_factor. + +jpeg_write_raw_data() processes one MCU row per call, which is to say +v_samp_factor*DCTSIZE sample rows of each component. The passed num_lines +value must be at least max_v_samp_factor*DCTSIZE, and the return value will +be exactly that amount (or possibly some multiple of that amount, in future +library versions). This is true even on the last call at the bottom of the +image; don't forget to pad your data as necessary. + +The required dimensions of the supplied data can be computed for each +component as + cinfo->comp_info[i].width_in_blocks*DCTSIZE samples per row + cinfo->comp_info[i].height_in_blocks*DCTSIZE rows in image +after jpeg_start_compress() has initialized those fields. If the valid data +is smaller than this, it must be padded appropriately. For some sampling +factors and image sizes, additional dummy DCT blocks are inserted to make +the image a multiple of the MCU dimensions. The library creates such dummy +blocks itself; it does not read them from your supplied data. Therefore you +need never pad by more than DCTSIZE samples. An example may help here. +Assume 2h2v downsampling of YCbCr data, that is + cinfo->comp_info[0].h_samp_factor = 2 for Y + cinfo->comp_info[0].v_samp_factor = 2 + cinfo->comp_info[1].h_samp_factor = 1 for Cb + cinfo->comp_info[1].v_samp_factor = 1 + cinfo->comp_info[2].h_samp_factor = 1 for Cr + cinfo->comp_info[2].v_samp_factor = 1 +and suppose that the nominal image dimensions (cinfo->image_width and +cinfo->image_height) are 101x101 pixels. Then jpeg_start_compress() will +compute downsampled_width = 101 and width_in_blocks = 13 for Y, +downsampled_width = 51 and width_in_blocks = 7 for Cb and Cr (and the same +for the height fields). You must pad the Y data to at least 13*8 = 104 +columns and rows, the Cb/Cr data to at least 7*8 = 56 columns and rows. The +MCU height is max_v_samp_factor = 2 DCT rows so you must pass at least 16 +scanlines on each call to jpeg_write_raw_data(), which is to say 16 actual +sample rows of Y and 8 each of Cb and Cr. A total of 7 MCU rows are needed, +so you must pass a total of 7*16 = 112 "scanlines". The last DCT block row +of Y data is dummy, so it doesn't matter what you pass for it in the data +arrays, but the scanlines count must total up to 112 so that all of the Cb +and Cr data gets passed. + +Output suspension is supported with raw-data compression: if the data +destination module suspends, jpeg_write_raw_data() will return 0. +In this case the same data rows must be passed again on the next call. + + +Decompression with raw data output implies bypassing all postprocessing: +you cannot ask for rescaling or color quantization, for instance. More +seriously, you must deal with the color space and sampling factors present in +the incoming file. If your application only handles, say, 2h1v YCbCr data, +you must check for and fail on other color spaces or other sampling factors. +The library will not convert to a different color space for you. + +To obtain raw data output, set cinfo->raw_data_out = TRUE before +jpeg_start_decompress() (it is set FALSE by jpeg_read_header()). Be sure to +verify that the color space and sampling factors are ones you can handle. +Then call jpeg_read_raw_data() in place of jpeg_read_scanlines(). The +decompression process is otherwise the same as usual. + +jpeg_read_raw_data() returns one MCU row per call, and thus you must pass a +buffer of at least max_v_samp_factor*DCTSIZE scanlines (scanline counting is +the same as for raw-data compression). The buffer you pass must be large +enough to hold the actual data plus padding to DCT-block boundaries. As with +compression, any entirely dummy DCT blocks are not processed so you need not +allocate space for them, but the total scanline count includes them. The +above example of computing buffer dimensions for raw-data compression is +equally valid for decompression. + +Input suspension is supported with raw-data decompression: if the data source +module suspends, jpeg_read_raw_data() will return 0. You can also use +buffered-image mode to read raw data in multiple passes. + + +Really raw data: DCT coefficients +--------------------------------- + +It is possible to read or write the contents of a JPEG file as raw DCT +coefficients. This facility is mainly intended for use in lossless +transcoding between different JPEG file formats. Other possible applications +include lossless cropping of a JPEG image, lossless reassembly of a +multi-strip or multi-tile TIFF/JPEG file into a single JPEG datastream, etc. + +To read the contents of a JPEG file as DCT coefficients, open the file and do +jpeg_read_header() as usual. But instead of calling jpeg_start_decompress() +and jpeg_read_scanlines(), call jpeg_read_coefficients(). This will read the +entire image into a set of virtual coefficient-block arrays, one array per +component. The return value is a pointer to an array of virtual-array +descriptors. Each virtual array can be accessed directly using the JPEG +memory manager's access_virt_barray method (see Memory management, below, +and also read structure.txt's discussion of virtual array handling). Or, +for simple transcoding to a different JPEG file format, the array list can +just be handed directly to jpeg_write_coefficients(). + +Each block in the block arrays contains quantized coefficient values in +normal array order (not JPEG zigzag order). The block arrays contain only +DCT blocks containing real data; any entirely-dummy blocks added to fill out +interleaved MCUs at the right or bottom edges of the image are discarded +during reading and are not stored in the block arrays. (The size of each +block array can be determined from the width_in_blocks and height_in_blocks +fields of the component's comp_info entry.) This is also the data format +expected by jpeg_write_coefficients(). + +When you are done using the virtual arrays, call jpeg_finish_decompress() +to release the array storage and return the decompression object to an idle +state; or just call jpeg_destroy() if you don't need to reuse the object. + +If you use a suspending data source, jpeg_read_coefficients() will return +NULL if it is forced to suspend; a non-NULL return value indicates successful +completion. You need not test for a NULL return value when using a +non-suspending data source. + +It is also possible to call jpeg_read_coefficients() to obtain access to the +decoder's coefficient arrays during a normal decode cycle in buffered-image +mode. This frammish might be useful for progressively displaying an incoming +image and then re-encoding it without loss. To do this, decode in buffered- +image mode as discussed previously, then call jpeg_read_coefficients() after +the last jpeg_finish_output() call. The arrays will be available for your use +until you call jpeg_finish_decompress(). + + +To write the contents of a JPEG file as DCT coefficients, you must provide +the DCT coefficients stored in virtual block arrays. You can either pass +block arrays read from an input JPEG file by jpeg_read_coefficients(), or +allocate virtual arrays from the JPEG compression object and fill them +yourself. In either case, jpeg_write_coefficients() is substituted for +jpeg_start_compress() and jpeg_write_scanlines(). Thus the sequence is + * Create compression object + * Set all compression parameters as necessary + * Request virtual arrays if needed + * jpeg_write_coefficients() + * jpeg_finish_compress() + * Destroy or re-use compression object +jpeg_write_coefficients() is passed a pointer to an array of virtual block +array descriptors; the number of arrays is equal to cinfo.num_components. + +The virtual arrays need only have been requested, not realized, before +jpeg_write_coefficients() is called. A side-effect of +jpeg_write_coefficients() is to realize any virtual arrays that have been +requested from the compression object's memory manager. Thus, when obtaining +the virtual arrays from the compression object, you should fill the arrays +after calling jpeg_write_coefficients(). The data is actually written out +when you call jpeg_finish_compress(); jpeg_write_coefficients() only writes +the file header. + +When writing raw DCT coefficients, it is crucial that the JPEG quantization +tables and sampling factors match the way the data was encoded, or the +resulting file will be invalid. For transcoding from an existing JPEG file, +we recommend using jpeg_copy_critical_parameters(). This routine initializes +all the compression parameters to default values (like jpeg_set_defaults()), +then copies the critical information from a source decompression object. +The decompression object should have just been used to read the entire +JPEG input file --- that is, it should be awaiting jpeg_finish_decompress(). + +jpeg_write_coefficients() marks all tables stored in the compression object +as needing to be written to the output file (thus, it acts like +jpeg_start_compress(cinfo, TRUE)). This is for safety's sake, to avoid +emitting abbreviated JPEG files by accident. If you really want to emit an +abbreviated JPEG file, call jpeg_suppress_tables(), or set the tables' +individual sent_table flags, between calling jpeg_write_coefficients() and +jpeg_finish_compress(). + + +Progress monitoring +------------------- + +Some applications may need to regain control from the JPEG library every so +often. The typical use of this feature is to produce a percent-done bar or +other progress display. (For a simple example, see cjpeg.c or djpeg.c.) +Although you do get control back frequently during the data-transferring pass +(the jpeg_read_scanlines or jpeg_write_scanlines loop), any additional passes +will occur inside jpeg_finish_compress or jpeg_start_decompress; those +routines may take a long time to execute, and you don't get control back +until they are done. + +You can define a progress-monitor routine which will be called periodically +by the library. No guarantees are made about how often this call will occur, +so we don't recommend you use it for mouse tracking or anything like that. +At present, a call will occur once per MCU row, scanline, or sample row +group, whichever unit is convenient for the current processing mode; so the +wider the image, the longer the time between calls. During the data +transferring pass, only one call occurs per call of jpeg_read_scanlines or +jpeg_write_scanlines, so don't pass a large number of scanlines at once if +you want fine resolution in the progress count. (If you really need to use +the callback mechanism for time-critical tasks like mouse tracking, you could +insert additional calls inside some of the library's inner loops.) + +To establish a progress-monitor callback, create a struct jpeg_progress_mgr, +fill in its progress_monitor field with a pointer to your callback routine, +and set cinfo->progress to point to the struct. The callback will be called +whenever cinfo->progress is non-NULL. (This pointer is set to NULL by +jpeg_create_compress or jpeg_create_decompress; the library will not change +it thereafter. So if you allocate dynamic storage for the progress struct, +make sure it will live as long as the JPEG object does. Allocating from the +JPEG memory manager with lifetime JPOOL_PERMANENT will work nicely.) You +can use the same callback routine for both compression and decompression. + +The jpeg_progress_mgr struct contains four fields which are set by the library: + long pass_counter; /* work units completed in this pass */ + long pass_limit; /* total number of work units in this pass */ + int completed_passes; /* passes completed so far */ + int total_passes; /* total number of passes expected */ +During any one pass, pass_counter increases from 0 up to (not including) +pass_limit; the step size is usually but not necessarily 1. The pass_limit +value may change from one pass to another. The expected total number of +passes is in total_passes, and the number of passes already completed is in +completed_passes. Thus the fraction of work completed may be estimated as + completed_passes + (pass_counter/pass_limit) + -------------------------------------------- + total_passes +ignoring the fact that the passes may not be equal amounts of work. + +When decompressing, pass_limit can even change within a pass, because it +depends on the number of scans in the JPEG file, which isn't always known in +advance. The computed fraction-of-work-done may jump suddenly (if the library +discovers it has overestimated the number of scans) or even decrease (in the +opposite case). It is not wise to put great faith in the work estimate. + +When using the decompressor's buffered-image mode, the progress monitor work +estimate is likely to be completely unhelpful, because the library has no way +to know how many output passes will be demanded of it. Currently, the library +sets total_passes based on the assumption that there will be one more output +pass if the input file end hasn't yet been read (jpeg_input_complete() isn't +TRUE), but no more output passes if the file end has been reached when the +output pass is started. This means that total_passes will rise as additional +output passes are requested. If you have a way of determining the input file +size, estimating progress based on the fraction of the file that's been read +will probably be more useful than using the library's value. + + +Memory management +----------------- + +This section covers some key facts about the JPEG library's built-in memory +manager. For more info, please read structure.txt's section about the memory +manager, and consult the source code if necessary. + +All memory and temporary file allocation within the library is done via the +memory manager. If necessary, you can replace the "back end" of the memory +manager to control allocation yourself (for example, if you don't want the +library to use malloc() and free() for some reason). + +Some data is allocated "permanently" and will not be freed until the JPEG +object is destroyed. Most data is allocated "per image" and is freed by +jpeg_finish_compress, jpeg_finish_decompress, or jpeg_abort. You can call the +memory manager yourself to allocate structures that will automatically be +freed at these times. Typical code for this is + ptr = (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, size); +Use JPOOL_PERMANENT to get storage that lasts as long as the JPEG object. +Use alloc_large instead of alloc_small for anything bigger than a few Kbytes. +There are also alloc_sarray and alloc_barray routines that automatically +build 2-D sample or block arrays. + +The library's minimum space requirements to process an image depend on the +image's width, but not on its height, because the library ordinarily works +with "strip" buffers that are as wide as the image but just a few rows high. +Some operating modes (eg, two-pass color quantization) require full-image +buffers. Such buffers are treated as "virtual arrays": only the current strip +need be in memory, and the rest can be swapped out to a temporary file. + +If you use the simplest memory manager back end (jmemnobs.c), then no +temporary files are used; virtual arrays are simply malloc()'d. Images bigger +than memory can be processed only if your system supports virtual memory. +The other memory manager back ends support temporary files of various flavors +and thus work in machines without virtual memory. They may also be useful on +Unix machines if you need to process images that exceed available swap space. + +When using temporary files, the library will make the in-memory buffers for +its virtual arrays just big enough to stay within a "maximum memory" setting. +Your application can set this limit by setting cinfo->mem->max_memory_to_use +after creating the JPEG object. (Of course, there is still a minimum size for +the buffers, so the max-memory setting is effective only if it is bigger than +the minimum space needed.) If you allocate any large structures yourself, you +must allocate them before jpeg_start_compress() or jpeg_start_decompress() in +order to have them counted against the max memory limit. Also keep in mind +that space allocated with alloc_small() is ignored, on the assumption that +it's too small to be worth worrying about; so a reasonable safety margin +should be left when setting max_memory_to_use. + + +Memory usage +------------ + +Working memory requirements while performing compression or decompression +depend on image dimensions, image characteristics (such as colorspace and +JPEG process), and operating mode (application-selected options). + +As of v6b, the decompressor requires: + 1. About 24K in more-or-less-fixed-size data. This varies a bit depending + on operating mode and image characteristics (particularly color vs. + grayscale), but it doesn't depend on image dimensions. + 2. Strip buffers (of size proportional to the image width) for IDCT and + upsampling results. The worst case for commonly used sampling factors + is about 34 bytes * width in pixels for a color image. A grayscale image + only needs about 8 bytes per pixel column. + 3. A full-image DCT coefficient buffer is needed to decode a multi-scan JPEG + file (including progressive JPEGs), or whenever you select buffered-image + mode. This takes 2 bytes/coefficient. At typical 2x2 sampling, that's + 3 bytes per pixel for a color image. Worst case (1x1 sampling) requires + 6 bytes/pixel. For grayscale, figure 2 bytes/pixel. + 4. To perform 2-pass color quantization, the decompressor also needs a + 128K color lookup table and a full-image pixel buffer (3 bytes/pixel). +This does not count any memory allocated by the application, such as a +buffer to hold the final output image. + +The above figures are valid for 8-bit JPEG data precision and a machine with +32-bit ints. For 12-bit JPEG data, double the size of the strip buffers and +quantization pixel buffer. The "fixed-size" data will be somewhat smaller +with 16-bit ints, larger with 64-bit ints. Also, CMYK or other unusual +color spaces will require different amounts of space. + +The full-image coefficient and pixel buffers, if needed at all, do not +have to be fully RAM resident; you can have the library use temporary +files instead when the total memory usage would exceed a limit you set. +(But if your OS supports virtual memory, it's probably better to just use +jmemnobs and let the OS do the swapping.) + +The compressor's memory requirements are similar, except that it has no need +for color quantization. Also, it needs a full-image DCT coefficient buffer +if Huffman-table optimization is asked for, even if progressive mode is not +requested. + +If you need more detailed information about memory usage in a particular +situation, you can enable the MEM_STATS code in jmemmgr.c. + + +Library compile-time options +---------------------------- + +A number of compile-time options are available by modifying jmorecfg.h. + +The JPEG standard provides for both the baseline 8-bit DCT process and +a 12-bit DCT process. The IJG code supports 12-bit lossy JPEG if you define +BITS_IN_JSAMPLE as 12 rather than 8. Note that this causes JSAMPLE to be +larger than a char, so it affects the surrounding application's image data. +The sample applications cjpeg and djpeg can support 12-bit mode only for PPM +and GIF file formats; you must disable the other file formats to compile a +12-bit cjpeg or djpeg. (install.txt has more information about that.) +At present, a 12-bit library can handle *only* 12-bit images, not both +precisions. + +Note that a 12-bit library always compresses in Huffman optimization mode, +in order to generate valid Huffman tables. This is necessary because our +default Huffman tables only cover 8-bit data. If you need to output 12-bit +files in one pass, you'll have to supply suitable default Huffman tables. +You may also want to supply your own DCT quantization tables; the existing +quality-scaling code has been developed for 8-bit use, and probably doesn't +generate especially good tables for 12-bit. + +The maximum number of components (color channels) in the image is determined +by MAX_COMPONENTS. The JPEG standard allows up to 255 components, but we +expect that few applications will need more than four or so. + +On machines with unusual data type sizes, you may be able to improve +performance or reduce memory space by tweaking the various typedefs in +jmorecfg.h. In particular, on some RISC CPUs, access to arrays of "short"s +is quite slow; consider trading memory for speed by making JCOEF, INT16, and +UINT16 be "int" or "unsigned int". UINT8 is also a candidate to become int. +You probably don't want to make JSAMPLE be int unless you have lots of memory +to burn. + +You can reduce the size of the library by compiling out various optional +functions. To do this, undefine xxx_SUPPORTED symbols as necessary. + +You can also save a few K by not having text error messages in the library; +the standard error message table occupies about 5Kb. This is particularly +reasonable for embedded applications where there's no good way to display +a message anyway. To do this, remove the creation of the message table +(jpeg_std_message_table[]) from jerror.c, and alter format_message to do +something reasonable without it. You could output the numeric value of the +message code number, for example. If you do this, you can also save a couple +more K by modifying the TRACEMSn() macros in jerror.h to expand to nothing; +you don't need trace capability anyway, right? + + +Portability considerations +-------------------------- + +The JPEG library has been written to be extremely portable; the sample +applications cjpeg and djpeg are slightly less so. This section summarizes +the design goals in this area. (If you encounter any bugs that cause the +library to be less portable than is claimed here, we'd appreciate hearing +about them.) + +The code works fine on ANSI C and C++ compilers, using any of the popular +system include file setups, and some not-so-popular ones too. + +The code is not dependent on the exact sizes of the C data types. As +distributed, we make the assumptions that + char is at least 8 bits wide + short is at least 16 bits wide + int is at least 16 bits wide + long is at least 32 bits wide +(These are the minimum requirements of the ANSI C standard.) Wider types will +work fine, although memory may be used inefficiently if char is much larger +than 8 bits or short is much bigger than 16 bits. The code should work +equally well with 16- or 32-bit ints. + +In a system where these assumptions are not met, you may be able to make the +code work by modifying the typedefs in jmorecfg.h. However, you will probably +have difficulty if int is less than 16 bits wide, since references to plain +int abound in the code. + +char can be either signed or unsigned, although the code runs faster if an +unsigned char type is available. If char is wider than 8 bits, you will need +to redefine JOCTET and/or provide custom data source/destination managers so +that JOCTET represents exactly 8 bits of data on external storage. + +The JPEG library proper does not assume ASCII representation of characters. +But some of the image file I/O modules in cjpeg/djpeg do have ASCII +dependencies in file-header manipulation; so does cjpeg's select_file_type() +routine. + +The JPEG library does not rely heavily on the C library. In particular, C +stdio is used only by the data source/destination modules and the error +handler, all of which are application-replaceable. (cjpeg/djpeg are more +heavily dependent on stdio.) malloc and free are called only from the memory +manager "back end" module, so you can use a different memory allocator by +replacing that one file. + +More info about porting the code may be gleaned by reading jconfig.txt, +jmorecfg.h, and jinclude.h. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/LICENSE.md glmark2-2021.02/android/jni/src/libjpeg-turbo/LICENSE.md --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/LICENSE.md 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/LICENSE.md 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,88 @@ +libjpeg-turbo Licenses +====================== + +libjpeg-turbo is covered by three compatible BSD-style open source licenses: + +- The IJG (Independent JPEG Group) License, which is listed in + [README.ijg](README.ijg) + + This license applies to the libjpeg API library and associated programs + (any code inherited from libjpeg, and any modifications to that code.) + +- The Modified (3-clause) BSD License, which is listed in + [turbojpeg.c](turbojpeg.c) + + This license covers the TurboJPEG API library and associated programs. + +- The zlib License, which is listed in [simd/jsimdext.inc](simd/jsimdext.inc) + + This license is a subset of the other two, and it covers the libjpeg-turbo + SIMD extensions. + + +Complying with the libjpeg-turbo Licenses +========================================= + +This section provides a roll-up of the libjpeg-turbo licensing terms, to the +best of our understanding. + +1. If you are distributing a modified version of the libjpeg-turbo source, + then: + + 1. You cannot alter or remove any existing copyright or license notices + from the source. + + **Origin** + - Clause 1 of the IJG License + - Clause 1 of the Modified BSD License + - Clauses 1 and 3 of the zlib License + + 2. You must add your own copyright notice to the header of each source + file you modified, so others can tell that you modified that file (if + there is not an existing copyright header in that file, then you can + simply add a notice stating that you modified the file.) + + **Origin** + - Clause 1 of the IJG License + - Clause 2 of the zlib License + + 3. You must include the IJG README file, and you must not alter any of the + copyright or license text in that file. + + **Origin** + - Clause 1 of the IJG License + +2. If you are distributing only libjpeg-turbo binaries without the source, or + if you are distributing an application that statically links with + libjpeg-turbo, then: + + 1. Your product documentation must include a message stating: + + This software is based in part on the work of the Independent JPEG + Group. + + **Origin** + - Clause 2 of the IJG license + + 2. If your binary distribution includes or uses the TurboJPEG API, then + your product documentation must include the text of the Modified BSD + License. + + **Origin** + - Clause 2 of the Modified BSD License + +3. You cannot use the name of the IJG or The libjpeg-turbo Project or the + contributors thereof in advertising, publicity, etc. + + **Origin** + - IJG License + - Clause 3 of the Modified BSD License + +4. The IJG and The libjpeg-turbo Project do not warrant libjpeg-turbo to be + free of defects, nor do we accept any liability for undesirable + consequences resulting from your use of the software. + + **Origin** + - IJG License + - Modified BSD License + - zlib License diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/Makefile.am glmark2-2021.02/android/jni/src/libjpeg-turbo/Makefile.am --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/Makefile.am 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/Makefile.am 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,778 @@ +lib_LTLIBRARIES = libjpeg.la +libjpeg_la_LDFLAGS = -version-info ${LIBTOOL_CURRENT}:${SO_MINOR_VERSION}:${SO_AGE} -no-undefined +include_HEADERS = jerror.h jmorecfg.h jpeglib.h + +if WITH_TURBOJPEG +lib_LTLIBRARIES += libturbojpeg.la +libturbojpeg_la_LDFLAGS = -version-info 1:0:1 -no-undefined +include_HEADERS += turbojpeg.h +endif + +nodist_include_HEADERS = jconfig.h + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = pkgscripts/libjpeg.pc pkgscripts/libturbojpeg.pc + +HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ + jpegint.h jpeglib.h jversion.h jsimd.h jsimddct.h jpegcomp.h \ + jpeg_nbits_table.h + +libjpeg_la_SOURCES = $(HDRS) jcapimin.c jcapistd.c jccoefct.c jccolor.c \ + jcdctmgr.c jchuff.c jcinit.c jcmainct.c jcmarker.c jcmaster.c \ + jcomapi.c jcparam.c jcphuff.c jcprepct.c jcsample.c jctrans.c \ + jdapimin.c jdapistd.c jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c \ + jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c jdmaster.c \ + jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c \ + jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c \ + jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c + +if WITH_ARITH +libjpeg_la_SOURCES += jaricom.c +endif + +if WITH_ARITH_ENC +libjpeg_la_SOURCES += jcarith.c +endif + +if WITH_ARITH_DEC +libjpeg_la_SOURCES += jdarith.c +endif + + +SUBDIRS = java + + +if WITH_TURBOJPEG + +libturbojpeg_la_SOURCES = $(libjpeg_la_SOURCES) turbojpeg.c turbojpeg.h \ + transupp.c transupp.h jdatadst-tj.c jdatasrc-tj.c + +if WITH_JAVA + +libturbojpeg_la_SOURCES += turbojpeg-jni.c +libturbojpeg_la_CFLAGS = ${JNI_CFLAGS} +TJMAPFILE = turbojpeg-mapfile.jni + +else + +TJMAPFILE = turbojpeg-mapfile + +endif + +libturbojpeg_la_SOURCES += $(TJMAPFILE) + +if VERSION_SCRIPT +libturbojpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)$(srcdir)/$(TJMAPFILE) +endif + +endif + + +if VERSION_SCRIPT +libjpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)libjpeg.map +endif + + +if WITH_SIMD + +SUBDIRS += simd +libjpeg_la_LIBADD = simd/libsimd.la +libturbojpeg_la_LIBADD = simd/libsimd.la + +else + +libjpeg_la_SOURCES += jsimd_none.c + +endif + + +bin_PROGRAMS = cjpeg djpeg jpegtran rdjpgcom wrjpgcom +noinst_PROGRAMS = jcstest + + +if WITH_TURBOJPEG + +bin_PROGRAMS += tjbench + +noinst_PROGRAMS += tjunittest + +tjbench_SOURCES = tjbench.c bmp.h bmp.c tjutil.h tjutil.c rdbmp.c rdppm.c \ + wrbmp.c wrppm.c + +tjbench_LDADD = libturbojpeg.la libjpeg.la -lm + +tjbench_CFLAGS = -DBMP_SUPPORTED -DPPM_SUPPORTED + +tjunittest_SOURCES = tjunittest.c tjutil.h tjutil.c + +tjunittest_LDADD = libturbojpeg.la + +endif + + +cjpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c cjpeg.c rdgif.c rdppm.c rdswitch.c +if WITH_12BIT +else +cjpeg_SOURCES += rdbmp.c rdtarga.c +endif + +cjpeg_LDADD = libjpeg.la + +cjpeg_CFLAGS = -DGIF_SUPPORTED -DPPM_SUPPORTED +if WITH_12BIT +else +cjpeg_CFLAGS += -DBMP_SUPPORTED -DTARGA_SUPPORTED +endif + +djpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c djpeg.c rdcolmap.c rdswitch.c \ + wrgif.c wrppm.c +if WITH_12BIT +else +djpeg_SOURCES += wrbmp.c wrtarga.c +endif + +djpeg_LDADD = libjpeg.la + +djpeg_CFLAGS = -DGIF_SUPPORTED -DPPM_SUPPORTED +if WITH_12BIT +else +djpeg_CFLAGS += -DBMP_SUPPORTED -DTARGA_SUPPORTED +endif + +jpegtran_SOURCES = jpegtran.c rdswitch.c cdjpeg.c transupp.c transupp.h + +jpegtran_LDADD = libjpeg.la + +rdjpgcom_SOURCES = rdjpgcom.c + +rdjpgcom_LDADD = libjpeg.la + +wrjpgcom_SOURCES = wrjpgcom.c + +wrjpgcom_LDADD = libjpeg.la + +jcstest_SOURCES = jcstest.c + +jcstest_LDADD = libjpeg.la + +dist_man1_MANS = cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 wrjpgcom.1 + +DOCS= coderules.txt jconfig.txt change.log rdrle.c wrrle.c BUILDING.md \ + ChangeLog.txt + +dist_doc_DATA = README.ijg README.md libjpeg.txt structure.txt usage.txt \ + wizard.txt LICENSE.md + +exampledir = $(docdir) +dist_example_DATA = example.c + + +EXTRA_DIST = win release $(DOCS) testimages CMakeLists.txt \ + sharedlib/CMakeLists.txt cmakescripts libjpeg.map.in doc doxygen.config \ + doxygen-extra.css jccolext.c jdcolext.c jdcol565.c jdmrgext.c jdmrg565.c \ + jstdhuff.c jdcoefct.h jdmainct.h jdmaster.h jdsample.h wrppm.h \ + md5/CMakeLists.txt + +dist-hook: + rm -rf `find $(distdir) -name .svn` + + +SUBDIRS += md5 + +if WITH_12BIT + +TESTORIG = testorig12.jpg +MD5_JPEG_RGB_ISLOW = 9620f424569594bb9242b48498ad801f +MD5_PPM_RGB_ISLOW = f3301d2219783b8b3d942b7239fa50c0 +MD5_JPEG_422_IFAST_OPT = 7322e3bd2f127f7de4b40d4480ce60e4 +MD5_PPM_422_IFAST = 79807fa552899e66a04708f533e16950 +MD5_PPM_422M_IFAST = 07737bfe8a7c1c87aaa393a0098d16b0 +MD5_JPEG_420_IFAST_Q100_PROG = a1da220b5604081863a504297ed59e55 +MD5_PPM_420_Q100_IFAST = 1b3730122709f53d007255e8dfd3305e +MD5_PPM_420M_Q100_IFAST = 980a1a3c5bf9510022869d30b7d26566 +MD5_JPEG_GRAY_ISLOW = 235c90707b16e2e069f37c888b2636d9 +MD5_PPM_GRAY_ISLOW = 7213c10af507ad467da5578ca5ee1fca +MD5_PPM_GRAY_ISLOW_RGB = e96ee81c30a6ed422d466338bd3de65d +MD5_JPEG_420S_IFAST_OPT = 7af8e60be4d9c227ec63ac9b6630855e +MD5_JPEG_3x2_FLOAT_PROG_SSE = a8c17daf77b457725ec929e215b603f8 +MD5_PPM_3x2_FLOAT_SSE = 42876ab9e5c2f76a87d08db5fbd57956 +MD5_JPEG_3x2_FLOAT_PROG_32BIT = a8c17daf77b457725ec929e215b603f8 +MD5_PPM_3x2_FLOAT_32BIT = 42876ab9e5c2f76a87d08db5fbd57956 +MD5_PPM_3x2_FLOAT_64BIT = d6fbc71153b3d8ded484dbc17c7b9cf4 +MD5_JPEG_3x2_IFAST_PROG = 1396cc2b7185cfe943d408c9d305339e +MD5_PPM_3x2_IFAST = 3975985ef6eeb0a2cdc58daa651ccc00 +MD5_PPM_420M_ISLOW_2_1 = 4ca6be2a6f326ff9eaab63e70a8259c0 +MD5_PPM_420M_ISLOW_15_8 = 12aa9f9534c1b3d7ba047322226365eb +MD5_PPM_420M_ISLOW_13_8 = f7e22817c7b25e1393e4ec101e9d4e96 +MD5_PPM_420M_ISLOW_11_8 = 800a16f9f4dc9b293197bfe11be10a82 +MD5_PPM_420M_ISLOW_9_8 = 06b7a92a9bc69f4dc36ec40f1937d55c +MD5_PPM_420M_ISLOW_7_8 = 3ec444a14a4ab4eab88ffc49c48eca43 +MD5_PPM_420M_ISLOW_3_4 = 3e726b7ea872445b19437d1c1d4f0d93 +MD5_PPM_420M_ISLOW_5_8 = a8a771abdc94301d20ffac119b2caccd +MD5_PPM_420M_ISLOW_1_2 = b419124dd5568b085787234866102866 +MD5_PPM_420M_ISLOW_3_8 = 343d19015531b7bbe746124127244fa8 +MD5_PPM_420M_ISLOW_1_4 = 35fd59d866e44659edfa3c18db2a3edb +MD5_PPM_420M_ISLOW_1_8 = ccaed48ac0aedefda5d4abe4013f4ad7 +MD5_PPM_420_ISLOW_SKIP15_31 = 86664cd9dc956536409e44e244d20a97 +MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71 = 452a21656115a163029cfba5c04fa76a +MD5_PPM_444_ISLOW_SKIP1_6 = ef63901f71ef7a75cd78253fc0914f84 +MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13 = 15b173fb5872d9575572fbcc1b05956f +MD5_JPEG_CROP = cdb35ff4b4519392690ea040c56ea99c + +else + +TESTORIG = testorig.jpg +MD5_JPEG_RGB_ISLOW = 768e970dd57b340ff1b83c9d3d47c77b +MD5_PPM_RGB_ISLOW = 00a257f5393fef8821f2b88ac7421291 +MD5_BMP_RGB_ISLOW_565 = f07d2e75073e4bb10f6c6f4d36e2e3be +MD5_BMP_RGB_ISLOW_565D = 4cfa0928ef3e6bb626d7728c924cfda4 +MD5_JPEG_422_IFAST_OPT = 2540287b79d913f91665e660303ab2c8 +MD5_PPM_422_IFAST = 35bd6b3f833bad23de82acea847129fa +MD5_PPM_422M_IFAST = 8dbc65323d62cca7c91ba02dd1cfa81d +MD5_BMP_422M_IFAST_565 = 3294bd4d9a1f2b3d08ea6020d0db7065 +MD5_BMP_422M_IFAST_565D = da98c9c7b6039511be4a79a878a9abc1 +MD5_JPEG_420_IFAST_Q100_PROG = 990cbe0329c882420a2094da7e5adade +MD5_PPM_420_Q100_IFAST = 5a732542015c278ff43635e473a8a294 +MD5_PPM_420M_Q100_IFAST = ff692ee9323a3b424894862557c092f1 +MD5_JPEG_GRAY_ISLOW = 72b51f894b8f4a10b3ee3066770aa38d +MD5_PPM_GRAY_ISLOW = 8d3596c56eace32f205deccc229aa5ed +MD5_PPM_GRAY_ISLOW_RGB = 116424ac07b79e5e801f00508eab48ec +MD5_BMP_GRAY_ISLOW_565 = 12f78118e56a2f48b966f792fedf23cc +MD5_BMP_GRAY_ISLOW_565D = bdbbd616441a24354c98553df5dc82db +MD5_JPEG_420S_IFAST_OPT = 388708217ac46273ca33086b22827ed8 +# See README.md for more details on why this next bit is necessary. +MD5_JPEG_3x2_FLOAT_PROG_SSE = 343e3f8caf8af5986ebaf0bdc13b5c71 +MD5_PPM_3x2_FLOAT_SSE = 1a75f36e5904d6fc3a85a43da9ad89bb +MD5_JPEG_3x2_FLOAT_PROG_32BIT = 9bca803d2042bd1eb03819e2bf92b3e5 +MD5_PPM_3x2_FLOAT_32BIT = f6bfab038438ed8f5522fbd33595dcdc +MD5_PPM_3x2_FLOAT_64BIT = 0e917a34193ef976b679a6b069b1be26 +MD5_JPEG_3x2_IFAST_PROG = 1ee5d2c1a77f2da495f993c8c7cceca5 +MD5_PPM_3x2_IFAST = fd283664b3b49127984af0a7f118fccd +MD5_JPEG_420_ISLOW_ARI = e986fb0a637a8d833d96e8a6d6d84ea1 +MD5_JPEG_444_ISLOW_PROGARI = 0a8f1c8f66e113c3cf635df0a475a617 +MD5_PPM_420M_IFAST_ARI = 72b59a99bcf1de24c5b27d151bde2437 +MD5_JPEG_420_ISLOW = 9a68f56bc76e466aa7e52f415d0f4a5f +MD5_PPM_420M_ISLOW_2_1 = 9f9de8c0612f8d06869b960b05abf9c9 +MD5_PPM_420M_ISLOW_15_8 = b6875bc070720b899566cc06459b63b7 +MD5_PPM_420M_ISLOW_13_8 = bc3452573c8152f6ae552939ee19f82f +MD5_PPM_420M_ISLOW_11_8 = d8cc73c0aaacd4556569b59437ba00a5 +MD5_PPM_420M_ISLOW_9_8 = d25e61bc7eac0002f5b393aa223747b6 +MD5_PPM_420M_ISLOW_7_8 = ddb564b7c74a09494016d6cd7502a946 +MD5_PPM_420M_ISLOW_3_4 = 8ed8e68808c3fbc4ea764fc9d2968646 +MD5_PPM_420M_ISLOW_5_8 = a3363274999da2366a024efae6d16c9b +MD5_PPM_420M_ISLOW_1_2 = e692a315cea26b988c8e8b29a5dbcd81 +MD5_PPM_420M_ISLOW_3_8 = 79eca9175652ced755155c90e785a996 +MD5_PPM_420M_ISLOW_1_4 = 79cd778f8bf1a117690052cacdd54eca +MD5_PPM_420M_ISLOW_1_8 = 391b3d4aca640c8567d6f8745eb2142f +MD5_BMP_420_ISLOW_256 = 4980185e3776e89bd931736e1cddeee6 +MD5_BMP_420_ISLOW_565 = bf9d13e16c4923b92e1faa604d7922cb +MD5_BMP_420_ISLOW_565D = 6bde71526acc44bcff76f696df8638d2 +MD5_BMP_420M_ISLOW_565 = 8dc0185245353cfa32ad97027342216f +MD5_BMP_420M_ISLOW_565D =d1be3a3339166255e76fa50a0d70d73e +MD5_PPM_420_ISLOW_SKIP15_31 = c4c65c1e43d7275cd50328a61e6534f0 +MD5_PPM_420_ISLOW_ARI_SKIP16_139 = 087c6b123db16ac00cb88c5b590bb74a +MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71 = 26eb36ccc7d1f0cb80cdabb0ac8b5d99 +MD5_PPM_420_ISLOW_ARI_CROP53x53_4_4 = 886c6775af22370257122f8b16207e6d +MD5_PPM_444_ISLOW_SKIP1_6 = 5606f86874cf26b8fcee1117a0a436a6 +MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13 = db87dc7ce26bcdc7a6b56239ce2b9d6c +MD5_PPM_444_ISLOW_ARI_CROP37x37_0_0 = cb57b32bd6d03e35432362f7bf184b6d +MD5_JPEG_CROP = b4197f377e621c4e9b1d20471432610d + +endif + +.PHONY: test +test: tjquicktest tjbittest bittest + +if CROSS_COMPILING +tjquicktest: testclean +else +tjquicktest: testclean all +endif + +if WITH_TURBOJPEG +if WITH_JAVA + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -bi + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -noyuvpad + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi -noyuvpad +endif + ./tjunittest + ./tjunittest -alloc + ./tjunittest -yuv + ./tjunittest -yuv -alloc + ./tjunittest -yuv -noyuvpad +endif + echo GREAT SUCCESS! + +if CROSS_COMPILING +tjbittest: testclean +else +tjbittest: testclean all +endif + +if WITH_TURBOJPEG + +MD5_PPM_GRAY_TILE = 89d3ca21213d9d864b50b4e4e7de4ca6 +MD5_PPM_420_8x8_TILE = 847fceab15c5b7b911cb986cf0f71de3 +MD5_PPM_420_16x16_TILE = ca45552a93687e078f7137cc4126a7b0 +MD5_PPM_420_32x32_TILE = d8676f1d6b68df358353bba9844f4a00 +MD5_PPM_420_64x64_TILE = 4e4c1a3d7ea4bace4f868bcbe83b7050 +MD5_PPM_420_128x128_TILE = f24c3429c52265832beab9df72a0ceae +MD5_PPM_420M_8x8_TILE = bc25320e1f4c31ce2e610e43e9fd173c +MD5_PPM_420M_TILE = 75ffdf14602258c5c189522af57fa605 +MD5_PPM_422_8x8_TILE = d83dacd9fc73b0a6f10c09acad64eb1e +MD5_PPM_422_16x16_TILE = 35077fb610d72dd743b1eb0cbcfe10fb +MD5_PPM_422_32x32_TILE = e6902ed8a449ecc0f0d6f2bf945f65f7 +MD5_PPM_422_64x64_TILE = 2b4502a8f316cedbde1da7bce3d2231e +MD5_PPM_422_128x128_TILE = f0b5617d578f5e13c8eee215d64d4877 +MD5_PPM_422M_8x8_TILE = 828941d7f41cd6283abd6beffb7fd51d +MD5_PPM_422M_TILE = e877ae1324c4a280b95376f7f018172f +MD5_PPM_444_TILE = 7964e41e67cfb8d0a587c0aa4798f9c3 + +# Test compressing from/decompressing to an arbitrary subregion of a larger +# image buffer + cp $(srcdir)/testimages/testorig.ppm testout_tile.ppm + ./tjbench testout_tile.ppm 95 -rgb -quiet -tile -benchtime 0.01 >/dev/null 2>&1 + for i in 8 16 32 64 128; do \ + md5/md5cmp $(MD5_PPM_GRAY_TILE) testout_tile_GRAY_Q95_$$i\x$$i.ppm; \ + done + md5/md5cmp $(MD5_PPM_420_8x8_TILE) testout_tile_420_Q95_8x8.ppm + md5/md5cmp $(MD5_PPM_420_16x16_TILE) testout_tile_420_Q95_16x16.ppm + md5/md5cmp $(MD5_PPM_420_32x32_TILE) testout_tile_420_Q95_32x32.ppm + md5/md5cmp $(MD5_PPM_420_64x64_TILE) testout_tile_420_Q95_64x64.ppm + md5/md5cmp $(MD5_PPM_420_128x128_TILE) testout_tile_420_Q95_128x128.ppm + md5/md5cmp $(MD5_PPM_422_8x8_TILE) testout_tile_422_Q95_8x8.ppm + md5/md5cmp $(MD5_PPM_422_16x16_TILE) testout_tile_422_Q95_16x16.ppm + md5/md5cmp $(MD5_PPM_422_32x32_TILE) testout_tile_422_Q95_32x32.ppm + md5/md5cmp $(MD5_PPM_422_64x64_TILE) testout_tile_422_Q95_64x64.ppm + md5/md5cmp $(MD5_PPM_422_128x128_TILE) testout_tile_422_Q95_128x128.ppm + for i in 8 16 32 64 128; do \ + md5/md5cmp $(MD5_PPM_444_TILE) testout_tile_444_Q95_$$i\x$$i.ppm; \ + done + rm -f testout_tile_GRAY_* testout_tile_420_* testout_tile_422_* testout_tile_444_* + + ./tjbench testout_tile.ppm 95 -rgb -fastupsample -quiet -tile -benchtime 0.01 >/dev/null 2>&1 + md5/md5cmp $(MD5_PPM_420M_8x8_TILE) testout_tile_420_Q95_8x8.ppm + for i in 16 32 64 128; do \ + md5/md5cmp $(MD5_PPM_420M_TILE) testout_tile_420_Q95_$$i\x$$i.ppm; \ + done + md5/md5cmp $(MD5_PPM_422M_8x8_TILE) testout_tile_422_Q95_8x8.ppm + for i in 16 32 64 128; do \ + md5/md5cmp $(MD5_PPM_422M_TILE) testout_tile_422_Q95_$$i\x$$i.ppm; \ + done + rm -f testout_tile_GRAY_* testout_tile_420_* testout_tile_422_* testout_tile_444_* testout_tile.ppm + echo GREAT SUCCESS! + +endif + +if CROSS_COMPILING +bittest: testclean +else +bittest: testclean all +endif + +# These tests are carefully crafted to provide full coverage of as many of the +# underlying algorithms as possible (including all of the SIMD-accelerated +# ones.) + +# CC: null SAMP: fullsize FDCT: islow ENT: huff + ./cjpeg -rgb -dct int -outfile testout_rgb_islow.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_RGB_ISLOW) testout_rgb_islow.jpg +# CC: null SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -ppm -outfile testout_rgb_islow.ppm testout_rgb_islow.jpg + md5/md5cmp $(MD5_PPM_RGB_ISLOW) testout_rgb_islow.ppm + rm -f testout_rgb_islow.ppm +if WITH_12BIT + rm -f testout_rgb_islow.jpg +else +# CC: RGB->RGB565 SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_rgb_islow_565.bmp testout_rgb_islow.jpg + md5/md5cmp $(MD5_BMP_RGB_ISLOW_565) testout_rgb_islow_565.bmp + rm -f testout_rgb_islow_565.bmp +# CC: RGB->RGB565 (dithered) SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -bmp -outfile testout_rgb_islow_565D.bmp testout_rgb_islow.jpg + md5/md5cmp $(MD5_BMP_RGB_ISLOW_565D) testout_rgb_islow_565D.bmp + rm -f testout_rgb_islow_565D.bmp testout_rgb_islow.jpg +endif + +# CC: RGB->YCC SAMP: fullsize/h2v1 FDCT: ifast ENT: 2-pass huff + ./cjpeg -sample 2x1 -dct fast -opt -outfile testout_422_ifast_opt.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_422_IFAST_OPT) testout_422_ifast_opt.jpg +# CC: YCC->RGB SAMP: fullsize/h2v1 fancy IDCT: ifast ENT: huff + ./djpeg -dct fast -outfile testout_422_ifast.ppm testout_422_ifast_opt.jpg + md5/md5cmp $(MD5_PPM_422_IFAST) testout_422_ifast.ppm + rm -f testout_422_ifast.ppm +# CC: YCC->RGB SAMP: h2v1 merged IDCT: ifast ENT: huff + ./djpeg -dct fast -nosmooth -outfile testout_422m_ifast.ppm testout_422_ifast_opt.jpg + md5/md5cmp $(MD5_PPM_422M_IFAST) testout_422m_ifast.ppm + rm -f testout_422m_ifast.ppm +if WITH_12BIT + rm -f testout_422_ifast_opt.jpg +else +# CC: YCC->RGB565 SAMP: h2v1 merged IDCT: ifast ENT: huff + ./djpeg -dct int -nosmooth -rgb565 -dither none -bmp -outfile testout_422m_ifast_565.bmp testout_422_ifast_opt.jpg + md5/md5cmp $(MD5_BMP_422M_IFAST_565) testout_422m_ifast_565.bmp + rm -f testout_422m_ifast_565.bmp +# CC: YCC->RGB565 (dithered) SAMP: h2v1 merged IDCT: ifast ENT: huff + ./djpeg -dct int -nosmooth -rgb565 -bmp -outfile testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg + md5/md5cmp $(MD5_BMP_422M_IFAST_565D) testout_422m_ifast_565D.bmp + rm -f testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg +endif + +# CC: RGB->YCC SAMP: fullsize/h2v2 FDCT: ifast ENT: prog huff + ./cjpeg -sample 2x2 -quality 100 -dct fast -prog -outfile testout_420_q100_ifast_prog.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_420_IFAST_Q100_PROG) testout_420_q100_ifast_prog.jpg +# CC: YCC->RGB SAMP: fullsize/h2v2 fancy IDCT: ifast ENT: prog huff + ./djpeg -dct fast -outfile testout_420_q100_ifast.ppm testout_420_q100_ifast_prog.jpg + md5/md5cmp $(MD5_PPM_420_Q100_IFAST) testout_420_q100_ifast.ppm + rm -f testout_420_q100_ifast.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: ifast ENT: prog huff + ./djpeg -dct fast -nosmooth -outfile testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg + md5/md5cmp $(MD5_PPM_420M_Q100_IFAST) testout_420m_q100_ifast.ppm + rm -f testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg + +# CC: RGB->Gray SAMP: fullsize FDCT: islow ENT: huff + ./cjpeg -gray -dct int -outfile testout_gray_islow.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_GRAY_ISLOW) testout_gray_islow.jpg +# CC: Gray->Gray SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -outfile testout_gray_islow.ppm testout_gray_islow.jpg + md5/md5cmp $(MD5_PPM_GRAY_ISLOW) testout_gray_islow.ppm + rm -f testout_gray_islow.ppm +# CC: Gray->RGB SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb -outfile testout_gray_islow_rgb.ppm testout_gray_islow.jpg + md5/md5cmp $(MD5_PPM_GRAY_ISLOW_RGB) testout_gray_islow_rgb.ppm + rm -f testout_gray_islow_rgb.ppm +if WITH_12BIT + rm -f testout_gray_islow.jpg +else +# CC: Gray->RGB565 SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_gray_islow_565.bmp testout_gray_islow.jpg + md5/md5cmp $(MD5_BMP_GRAY_ISLOW_565) testout_gray_islow_565.bmp + rm -f testout_gray_islow_565.bmp +# CC: Gray->RGB565 (dithered) SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -bmp -outfile testout_gray_islow_565D.bmp testout_gray_islow.jpg + md5/md5cmp $(MD5_BMP_GRAY_ISLOW_565D) testout_gray_islow_565D.bmp + rm -f testout_gray_islow_565D.bmp testout_gray_islow.jpg +endif + +# CC: RGB->YCC SAMP: fullsize smooth/h2v2 smooth FDCT: islow +# ENT: 2-pass huff + ./cjpeg -sample 2x2 -smooth 1 -dct int -opt -outfile testout_420s_ifast_opt.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_420S_IFAST_OPT) testout_420s_ifast_opt.jpg + rm -f testout_420s_ifast_opt.jpg + +# The output of the floating point tests is not validated by default, because +# the output differs depending on the type of floating point math used, and +# this is only deterministic if the DCT/IDCT are implemented using SIMD +# instructions on a particular platform. Pass one of the following on the make +# command line to validate the floating point tests against one of the expected +# results: +# +# FLOATTEST=sse validate against the expected results from the libjpeg-turbo +# SSE SIMD extensions +# FLOATTEST=32bit validate against the expected results from the C code +# when running on a 32-bit FPU (or when SSE is being used for +# floating point math, which is generally the default with +# x86-64 compilers) +# FLOATTEST=64bit validate against the exepected results from the C code +# when running on a 64-bit FPU + +# CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff + ./cjpeg -sample 3x2 -dct float -prog -outfile testout_3x2_float_prog.jpg $(srcdir)/testimages/testorig.ppm + if [ "${FLOATTEST}" = "sse" ]; then \ + md5/md5cmp $(MD5_JPEG_3x2_FLOAT_PROG_SSE) testout_3x2_float_prog.jpg; \ + elif [ "${FLOATTEST}" = "32bit" -o "${FLOATTEST}" = "64bit" ]; then \ + md5/md5cmp $(MD5_JPEG_3x2_FLOAT_PROG_32BIT) testout_3x2_float_prog.jpg; \ + fi +# CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff + ./djpeg -dct float -outfile testout_3x2_float.ppm testout_3x2_float_prog.jpg + if [ "${FLOATTEST}" = "sse" ]; then \ + md5/md5cmp $(MD5_PPM_3x2_FLOAT_SSE) testout_3x2_float.ppm; \ + elif [ "${FLOATTEST}" = "32bit" ]; then \ + md5/md5cmp $(MD5_PPM_3x2_FLOAT_32BIT) testout_3x2_float.ppm; \ + elif [ "${FLOATTEST}" = "64bit" ]; then \ + md5/md5cmp $(MD5_PPM_3x2_FLOAT_64BIT) testout_3x2_float.ppm; \ + fi + rm -f testout_3x2_float.ppm testout_3x2_float_prog.jpg + +# CC: RGB->YCC SAMP: fullsize/int FDCT: ifast ENT: prog huff + ./cjpeg -sample 3x2 -dct fast -prog -outfile testout_3x2_ifast_prog.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_3x2_IFAST_PROG) testout_3x2_ifast_prog.jpg +# CC: YCC->RGB SAMP: fullsize/int IDCT: ifast ENT: prog huff + ./djpeg -dct fast -outfile testout_3x2_ifast.ppm testout_3x2_ifast_prog.jpg + md5/md5cmp $(MD5_PPM_3x2_IFAST) testout_3x2_ifast.ppm + rm -f testout_3x2_ifast.ppm testout_3x2_ifast_prog.jpg + +if WITH_ARITH_ENC +# CC: YCC->RGB SAMP: fullsize/h2v2 FDCT: islow ENT: arith + ./cjpeg -dct int -arithmetic -outfile testout_420_islow_ari.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_420_ISLOW_ARI) testout_420_islow_ari.jpg + rm -f testout_420_islow_ari.jpg + ./jpegtran -arithmetic -outfile testout_420_islow_ari.jpg $(srcdir)/testimages/testimgint.jpg + md5/md5cmp $(MD5_JPEG_420_ISLOW_ARI) testout_420_islow_ari.jpg + rm -f testout_420_islow_ari.jpg +# CC: YCC->RGB SAMP: fullsize FDCT: islow ENT: prog arith + ./cjpeg -sample 1x1 -dct int -prog -arithmetic -outfile testout_444_islow_progari.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_444_ISLOW_PROGARI) testout_444_islow_progari.jpg + rm -f testout_444_islow_progari.jpg +endif +if WITH_ARITH_DEC +# CC: RGB->YCC SAMP: h2v2 merged IDCT: ifast ENT: arith + ./djpeg -fast -ppm -outfile testout_420m_ifast_ari.ppm $(srcdir)/testimages/testimgari.jpg + md5/md5cmp $(MD5_PPM_420M_IFAST_ARI) testout_420m_ifast_ari.ppm + rm -f testout_420m_ifast_ari.ppm + ./jpegtran -outfile testout_420_islow.jpg $(srcdir)/testimages/testimgari.jpg + md5/md5cmp $(MD5_JPEG_420_ISLOW) testout_420_islow.jpg + rm -f testout_420_islow.jpg +endif + +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 16x16 islow ENT: huff + ./djpeg -dct int -scale 2/1 -nosmooth -ppm -outfile testout_420m_islow_2_1.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_2_1) testout_420m_islow_2_1.ppm + rm -f testout_420m_islow_2_1.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 15x15 islow ENT: huff + ./djpeg -dct int -scale 15/8 -nosmooth -ppm -outfile testout_420m_islow_15_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_15_8) testout_420m_islow_15_8.ppm + rm -f testout_420m_islow_15_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 13x13 islow ENT: huff + ./djpeg -dct int -scale 13/8 -nosmooth -ppm -outfile testout_420m_islow_13_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_13_8) testout_420m_islow_13_8.ppm + rm -f testout_420m_islow_13_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 11x11 islow ENT: huff + ./djpeg -dct int -scale 11/8 -nosmooth -ppm -outfile testout_420m_islow_11_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_11_8) testout_420m_islow_11_8.ppm + rm -f testout_420m_islow_11_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 9x9 islow ENT: huff + ./djpeg -dct int -scale 9/8 -nosmooth -ppm -outfile testout_420m_islow_9_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_9_8) testout_420m_islow_9_8.ppm + rm -f testout_420m_islow_9_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 7x7 islow/14x14 islow ENT: huff + ./djpeg -dct int -scale 7/8 -nosmooth -ppm -outfile testout_420m_islow_7_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_7_8) testout_420m_islow_7_8.ppm + rm -f testout_420m_islow_7_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 6x6 islow/12x12 islow ENT: huff + ./djpeg -dct int -scale 3/4 -nosmooth -ppm -outfile testout_420m_islow_3_4.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_3_4) testout_420m_islow_3_4.ppm + rm -f testout_420m_islow_3_4.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 5x5 islow/10x10 islow ENT: huff + ./djpeg -dct int -scale 5/8 -nosmooth -ppm -outfile testout_420m_islow_5_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_5_8) testout_420m_islow_5_8.ppm + rm -f testout_420m_islow_5_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 4x4 islow/8x8 islow ENT: huff + ./djpeg -dct int -scale 1/2 -nosmooth -ppm -outfile testout_420m_islow_1_2.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_1_2) testout_420m_islow_1_2.ppm + rm -f testout_420m_islow_1_2.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 3x3 islow/6x6 islow ENT: huff + ./djpeg -dct int -scale 3/8 -nosmooth -ppm -outfile testout_420m_islow_3_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_3_8) testout_420m_islow_3_8.ppm + rm -f testout_420m_islow_3_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 2x2 islow/4x4 islow ENT: huff + ./djpeg -dct int -scale 1/4 -nosmooth -ppm -outfile testout_420m_islow_1_4.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_1_4) testout_420m_islow_1_4.ppm + rm -f testout_420m_islow_1_4.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 1x1 islow/2x2 islow ENT: huff + ./djpeg -dct int -scale 1/8 -nosmooth -ppm -outfile testout_420m_islow_1_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_1_8) testout_420m_islow_1_8.ppm + rm -f testout_420m_islow_1_8.ppm +if WITH_12BIT +else +# CC: YCC->RGB (dithered) SAMP: h2v2 fancy IDCT: islow ENT: huff + ./djpeg -dct int -colors 256 -bmp -outfile testout_420_islow_256.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420_ISLOW_256) testout_420_islow_256.bmp + rm -f testout_420_islow_256.bmp +# CC: YCC->RGB565 SAMP: h2v2 fancy IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_420_islow_565.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420_ISLOW_565) testout_420_islow_565.bmp + rm -f testout_420_islow_565.bmp +# CC: YCC->RGB565 (dithered) SAMP: h2v2 fancy IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -bmp -outfile testout_420_islow_565D.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420_ISLOW_565D) testout_420_islow_565D.bmp + rm -f testout_420_islow_565D.bmp +# CC: YCC->RGB565 SAMP: h2v2 merged IDCT: islow ENT: huff + ./djpeg -dct int -nosmooth -rgb565 -dither none -bmp -outfile testout_420m_islow_565.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420M_ISLOW_565) testout_420m_islow_565.bmp + rm -f testout_420m_islow_565.bmp +# CC: YCC->RGB565 (dithered) SAMP: h2v2 merged IDCT: islow ENT: huff + ./djpeg -dct int -nosmooth -rgb565 -bmp -outfile testout_420m_islow_565D.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420M_ISLOW_565D) testout_420m_islow_565D.bmp + rm -f testout_420m_islow_565D.bmp +endif + +# Partial decode tests. These tests are designed to cover all of the possible +# code paths in jpeg_skip_scanlines(). + +# Context rows: Yes Intra-iMCU row: Yes iMCU row prefetch: No ENT: huff + ./djpeg -dct int -skip 15,31 -ppm -outfile testout_420_islow_skip15,31.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420_ISLOW_SKIP15_31) testout_420_islow_skip15,31.ppm + rm -f testout_420_islow_skip15,31.ppm +# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: Yes ENT: arith +if WITH_ARITH_DEC + ./djpeg -dct int -skip 16,139 -ppm -outfile testout_420_islow_ari_skip16,139.ppm $(srcdir)/testimages/testimgari.jpg + md5/md5cmp $(MD5_PPM_420_ISLOW_ARI_SKIP16_139) testout_420_islow_ari_skip16,139.ppm + rm -f testout_420_islow_ari_skip16,139.ppm +endif +# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: prog huff + ./cjpeg -dct int -prog -outfile testout_420_islow_prog.jpg $(srcdir)/testimages/testorig.ppm + ./djpeg -dct int -crop 62x62+71+71 -ppm -outfile testout_420_islow_prog_crop62x62,71,71.ppm testout_420_islow_prog.jpg + md5/md5cmp $(MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71) testout_420_islow_prog_crop62x62,71,71.ppm + rm -f testout_420_islow_prog_crop62x62,71,71.ppm testout_420_islow_prog.jpg +# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: arith +if WITH_ARITH_DEC + ./djpeg -dct int -crop 53x53+4+4 -ppm -outfile testout_420_islow_ari_crop53x53,4,4.ppm $(srcdir)/testimages/testimgari.jpg + md5/md5cmp $(MD5_PPM_420_ISLOW_ARI_CROP53x53_4_4) testout_420_islow_ari_crop53x53,4,4.ppm + rm -f testout_420_islow_ari_crop53x53,4,4.ppm +endif +# Context rows: No Intra-iMCU row: Yes ENT: huff + ./cjpeg -dct int -sample 1x1 -outfile testout_444_islow.jpg $(srcdir)/testimages/testorig.ppm + ./djpeg -dct int -skip 1,6 -ppm -outfile testout_444_islow_skip1,6.ppm testout_444_islow.jpg + md5/md5cmp $(MD5_PPM_444_ISLOW_SKIP1_6) testout_444_islow_skip1,6.ppm + rm -f testout_444_islow_skip1,6.ppm testout_444_islow.jpg +# Context rows: No Intra-iMCU row: No ENT: prog huff + ./cjpeg -dct int -prog -sample 1x1 -outfile testout_444_islow_prog.jpg $(srcdir)/testimages/testorig.ppm + ./djpeg -dct int -crop 98x98+13+13 -ppm -outfile testout_444_islow_prog_crop98x98,13,13.ppm testout_444_islow_prog.jpg + md5/md5cmp $(MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13) testout_444_islow_prog_crop98x98,13,13.ppm + rm -f testout_444_islow_prog_crop98x98,13,13.ppm testout_444_islow_prog.jpg +# Context rows: No Intra-iMCU row: No ENT: arith +if WITH_ARITH_ENC + ./cjpeg -dct int -arithmetic -sample 1x1 -outfile testout_444_islow_ari.jpg $(srcdir)/testimages/testorig.ppm +if WITH_ARITH_DEC + ./djpeg -dct int -crop 37x37+0+0 -ppm -outfile testout_444_islow_ari_crop37x37,0,0.ppm testout_444_islow_ari.jpg + md5/md5cmp $(MD5_PPM_444_ISLOW_ARI_CROP37x37_0_0) testout_444_islow_ari_crop37x37,0,0.ppm + rm -f testout_444_islow_ari_crop37x37,0,0.ppm +endif + rm -f testout_444_islow_ari.jpg +endif + + ./jpegtran -crop 120x90+20+50 -transpose -perfect -outfile testout_crop.jpg $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_JPEG_CROP) testout_crop.jpg + rm -f testout_crop.jpg + echo GREAT SUCCESS! + + +testclean: + rm -f testout* + rm -f *_GRAY_*.bmp + rm -f *_GRAY_*.png + rm -f *_GRAY_*.ppm + rm -f *_GRAY_*.jpg + rm -f *_GRAY.yuv + rm -f *_420_*.bmp + rm -f *_420_*.png + rm -f *_420_*.ppm + rm -f *_420_*.jpg + rm -f *_420.yuv + rm -f *_422_*.bmp + rm -f *_422_*.png + rm -f *_422_*.ppm + rm -f *_422_*.jpg + rm -f *_422.yuv + rm -f *_444_*.bmp + rm -f *_444_*.png + rm -f *_444_*.ppm + rm -f *_444_*.jpg + rm -f *_444.yuv + rm -f *_440_*.bmp + rm -f *_440_*.png + rm -f *_440_*.ppm + rm -f *_440_*.jpg + rm -f *_440.yuv + rm -f *_411_*.bmp + rm -f *_411_*.png + rm -f *_411_*.ppm + rm -f *_411_*.jpg + rm -f *_411.yuv + + +tjtest: + sh ./tjbenchtest + sh ./tjbenchtest -alloc + sh ./tjbenchtest -yuv + sh ./tjbenchtest -yuv -alloc +if WITH_JAVA + sh ./tjbenchtest.java + sh ./tjbenchtest.java -yuv +endif + + +pkgscripts/libjpeg-turbo.spec: pkgscripts/libjpeg-turbo.spec.tmpl + cat pkgscripts/libjpeg-turbo.spec.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \ + sed s@%{__docdir}@$(docdir)@g | sed s@%{__includedir}@$(includedir)@g | \ + sed s@%{__libdir}@$(libdir)@g | sed s@%{__mandir}@$(mandir)@g \ + > pkgscripts/libjpeg-turbo.spec + +rpm: all pkgscripts/libjpeg-turbo.spec + TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \ + mkdir -p $$TMPDIR/RPMS; \ + ln -fs `pwd` $$TMPDIR/BUILD; \ + rm -f ${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \ + rpmbuild -bb --define "_blddir $$TMPDIR/buildroot" \ + --define "_topdir $$TMPDIR" \ + --target ${RPMARCH} pkgscripts/libjpeg-turbo.spec; \ + cp $$TMPDIR/RPMS/${RPMARCH}/${PKGNAME}-${VERSION}-${BUILD}.${RPMARCH}.rpm \ + ${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \ + rm -rf $$TMPDIR + +srpm: dist-gzip pkgscripts/libjpeg-turbo.spec + TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \ + mkdir -p $$TMPDIR/RPMS; \ + mkdir -p $$TMPDIR/SRPMS; \ + mkdir -p $$TMPDIR/BUILD; \ + mkdir -p $$TMPDIR/SOURCES; \ + mkdir -p $$TMPDIR/SPECS; \ + rm -f ${PKGNAME}-${VERSION}.src.rpm; \ + cp ${PACKAGE_NAME}-${VERSION}.tar.gz $$TMPDIR/SOURCES; \ + cat pkgscripts/libjpeg-turbo.spec | sed s/%{_blddir}/%{_tmppath}/g \ + | sed s/#--\>//g \ + > $$TMPDIR/SPECS/libjpeg-turbo.spec; \ + rpmbuild -bs --define "_topdir $$TMPDIR" $$TMPDIR/SPECS/libjpeg-turbo.spec; \ + cp $$TMPDIR/SRPMS/${PKGNAME}-${VERSION}-${BUILD}.src.rpm \ + ${PKGNAME}-${VERSION}.src.rpm; \ + rm -rf $$TMPDIR + +pkgscripts/makedpkg: pkgscripts/makedpkg.tmpl + cat pkgscripts/makedpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \ + > pkgscripts/makedpkg + +deb: all pkgscripts/makedpkg + sh pkgscripts/makedpkg + +pkgscripts/uninstall: pkgscripts/uninstall.tmpl + cat pkgscripts/uninstall.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \ + sed s@%{__includedir}@$(includedir)@g | sed s@%{__libdir}@$(libdir)@g | \ + sed s@%{__mandir}@$(mandir)@g > pkgscripts/uninstall + +pkgscripts/makemacpkg: pkgscripts/makemacpkg.tmpl + cat pkgscripts/makemacpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__bindir}@$(bindir)@g | sed s@%{__docdir}@$(docdir)@g | \ + sed s@%{__libdir}@$(libdir)@g > pkgscripts/makemacpkg + +if X86_64 + +udmg: all pkgscripts/makemacpkg pkgscripts/uninstall + sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} + +iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall + sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" + +else + +iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall + sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" + +endif + +dmg: all pkgscripts/makemacpkg pkgscripts/uninstall + sh pkgscripts/makemacpkg + +pkgscripts/makecygwinpkg: pkgscripts/makecygwinpkg.tmpl + cat pkgscripts/makecygwinpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \ + > pkgscripts/makecygwinpkg + +cygwinpkg: all pkgscripts/makecygwinpkg + sh pkgscripts/makecygwinpkg diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdbmp.c glmark2-2021.02/android/jni/src/libjpeg-turbo/rdbmp.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdbmp.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdbmp.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,483 @@ +/* + * rdbmp.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2009-2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Modified 2011 by Siarhei Siamashka. + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in Microsoft "BMP" + * format (MS Windows 3.x, OS/2 1.x, and OS/2 2.x flavors). + * Currently, only 8-bit and 24-bit images are supported, not 1-bit or + * 4-bit (feeding such low-depth images into JPEG would be silly anyway). + * Also, we don't support RLE-compressed files. + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; start_input may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed BMP format). + * + * This code contributed by James Arthur Boucher. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef BMP_SUPPORTED + + +/* Macros to deal with unsigned chars as efficiently as compiler allows */ + +#ifdef HAVE_UNSIGNED_CHAR +typedef unsigned char U_CHAR; +#define UCH(x) ((int) (x)) +#else /* !HAVE_UNSIGNED_CHAR */ +#ifdef __CHAR_UNSIGNED__ +typedef char U_CHAR; +#define UCH(x) ((int) (x)) +#else +typedef char U_CHAR; +#define UCH(x) ((int) (x) & 0xFF) +#endif +#endif /* HAVE_UNSIGNED_CHAR */ + + +#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) + + +/* Private version of data source object */ + +typedef struct _bmp_source_struct *bmp_source_ptr; + +typedef struct _bmp_source_struct { + struct cjpeg_source_struct pub; /* public fields */ + + j_compress_ptr cinfo; /* back link saves passing separate parm */ + + JSAMPARRAY colormap; /* BMP colormap (converted to my format) */ + + jvirt_sarray_ptr whole_image; /* Needed to reverse row order */ + JDIMENSION source_row; /* Current source row number */ + JDIMENSION row_width; /* Physical width of scanlines in file */ + + int bits_per_pixel; /* remembers 8- or 24-bit format */ +} bmp_source_struct; + + +LOCAL(int) +read_byte (bmp_source_ptr sinfo) +/* Read next byte from BMP file */ +{ + register FILE *infile = sinfo->pub.input_file; + register int c; + + if ((c = getc(infile)) == EOF) + ERREXIT(sinfo->cinfo, JERR_INPUT_EOF); + return c; +} + + +LOCAL(void) +read_colormap (bmp_source_ptr sinfo, int cmaplen, int mapentrysize) +/* Read the colormap from a BMP file */ +{ + int i; + + switch (mapentrysize) { + case 3: + /* BGR format (occurs in OS/2 files) */ + for (i = 0; i < cmaplen; i++) { + sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); + } + break; + case 4: + /* BGR0 format (occurs in MS Windows files) */ + for (i = 0; i < cmaplen; i++) { + sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); + (void) read_byte(sinfo); + } + break; + default: + ERREXIT(sinfo->cinfo, JERR_BMP_BADCMAP); + break; + } +} + + +/* + * Read one row of pixels. + * The image has been read into the whole_image array, but is otherwise + * unprocessed. We must read it out in top-to-bottom row order, and if + * it is an 8-bit image, we must expand colormapped pixels to 24bit format. + */ + +METHODDEF(JDIMENSION) +get_8bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 8-bit colormap indexes */ +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + register JSAMPARRAY colormap = source->colormap; + JSAMPARRAY image_ptr; + register int t; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + source->source_row, (JDIMENSION) 1, FALSE); + + /* Expand the colormap indexes to real data */ + inptr = image_ptr[0]; + outptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + t = GETJSAMPLE(*inptr++); + *outptr++ = colormap[0][t]; /* can omit GETJSAMPLE() safely */ + *outptr++ = colormap[1][t]; + *outptr++ = colormap[2][t]; + } + + return 1; +} + + +METHODDEF(JDIMENSION) +get_24bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 24-bit pixels */ +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + JSAMPARRAY image_ptr; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + source->source_row, (JDIMENSION) 1, FALSE); + + /* Transfer data. Note source values are in BGR order + * (even though Microsoft's own documents say the opposite). + */ + inptr = image_ptr[0]; + outptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[1] = *inptr++; + outptr[0] = *inptr++; + outptr += 3; + } + + return 1; +} + + +METHODDEF(JDIMENSION) +get_32bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 32-bit pixels */ +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + JSAMPARRAY image_ptr; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + source->source_row, (JDIMENSION) 1, FALSE); + /* Transfer data. Note source values are in BGR order + * (even though Microsoft's own documents say the opposite). + */ + inptr = image_ptr[0]; + outptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[1] = *inptr++; + outptr[0] = *inptr++; + inptr++; /* skip the 4th byte (Alpha channel) */ + outptr += 3; + } + + return 1; +} + + +/* + * This method loads the image into whole_image during the first call on + * get_pixel_rows. The get_pixel_rows pointer is then adjusted to call + * get_8bit_row, get_24bit_row, or get_32bit_row on subsequent calls. + */ + +METHODDEF(JDIMENSION) +preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + register FILE *infile = source->pub.input_file; + register JSAMPROW out_ptr; + JSAMPARRAY image_ptr; + JDIMENSION row; + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + + /* Read the data into a virtual array in input-file row order. */ + for (row = 0; row < cinfo->image_height; row++) { + if (progress != NULL) { + progress->pub.pass_counter = (long) row; + progress->pub.pass_limit = (long) cinfo->image_height; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + row, (JDIMENSION) 1, TRUE); + out_ptr = image_ptr[0]; + if (fread(out_ptr, 1, source->row_width, infile) != source->row_width) { + if (feof(infile)) + ERREXIT(cinfo, JERR_INPUT_EOF); + else + ERREXIT(cinfo, JERR_FILE_READ); + } + } + if (progress != NULL) + progress->completed_extra_passes++; + + /* Set up to read from the virtual array in top-to-bottom order */ + switch (source->bits_per_pixel) { + case 8: + source->pub.get_pixel_rows = get_8bit_row; + break; + case 24: + source->pub.get_pixel_rows = get_24bit_row; + break; + case 32: + source->pub.get_pixel_rows = get_32bit_row; + break; + default: + ERREXIT(cinfo, JERR_BMP_BADDEPTH); + } + source->source_row = cinfo->image_height; + + /* And read the first row */ + return (*source->pub.get_pixel_rows) (cinfo, sinfo); +} + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + U_CHAR bmpfileheader[14]; + U_CHAR bmpinfoheader[64]; +#define GET_2B(array,offset) ((unsigned short) UCH(array[offset]) + \ + (((unsigned short) UCH(array[offset+1])) << 8)) +#define GET_4B(array,offset) ((unsigned int) UCH(array[offset]) + \ + (((unsigned int) UCH(array[offset+1])) << 8) + \ + (((unsigned int) UCH(array[offset+2])) << 16) + \ + (((unsigned int) UCH(array[offset+3])) << 24)) + unsigned int bfOffBits; + unsigned int headerSize; + int biWidth; + int biHeight; + unsigned short biPlanes; + unsigned int biCompression; + int biXPelsPerMeter,biYPelsPerMeter; + unsigned int biClrUsed = 0; + int mapentrysize = 0; /* 0 indicates no colormap */ + int bPad; + JDIMENSION row_width; + + /* Read and verify the bitmap file header */ + if (! ReadOK(source->pub.input_file, bmpfileheader, 14)) + ERREXIT(cinfo, JERR_INPUT_EOF); + if (GET_2B(bmpfileheader,0) != 0x4D42) /* 'BM' */ + ERREXIT(cinfo, JERR_BMP_NOT); + bfOffBits = GET_4B(bmpfileheader,10); + /* We ignore the remaining fileheader fields */ + + /* The infoheader might be 12 bytes (OS/2 1.x), 40 bytes (Windows), + * or 64 bytes (OS/2 2.x). Check the first 4 bytes to find out which. + */ + if (! ReadOK(source->pub.input_file, bmpinfoheader, 4)) + ERREXIT(cinfo, JERR_INPUT_EOF); + headerSize = GET_4B(bmpinfoheader,0); + if (headerSize < 12 || headerSize > 64) + ERREXIT(cinfo, JERR_BMP_BADHEADER); + if (! ReadOK(source->pub.input_file, bmpinfoheader+4, headerSize-4)) + ERREXIT(cinfo, JERR_INPUT_EOF); + + switch (headerSize) { + case 12: + /* Decode OS/2 1.x header (Microsoft calls this a BITMAPCOREHEADER) */ + biWidth = (int) GET_2B(bmpinfoheader,4); + biHeight = (int) GET_2B(bmpinfoheader,6); + biPlanes = GET_2B(bmpinfoheader,8); + source->bits_per_pixel = (int) GET_2B(bmpinfoheader,10); + + switch (source->bits_per_pixel) { + case 8: /* colormapped image */ + mapentrysize = 3; /* OS/2 uses RGBTRIPLE colormap */ + TRACEMS2(cinfo, 1, JTRC_BMP_OS2_MAPPED, biWidth, biHeight); + break; + case 24: /* RGB image */ + TRACEMS2(cinfo, 1, JTRC_BMP_OS2, biWidth, biHeight); + break; + default: + ERREXIT(cinfo, JERR_BMP_BADDEPTH); + break; + } + break; + case 40: + case 64: + /* Decode Windows 3.x header (Microsoft calls this a BITMAPINFOHEADER) */ + /* or OS/2 2.x header, which has additional fields that we ignore */ + biWidth = (int) GET_4B(bmpinfoheader,4); + biHeight = (int) GET_4B(bmpinfoheader,8); + biPlanes = GET_2B(bmpinfoheader,12); + source->bits_per_pixel = (int) GET_2B(bmpinfoheader,14); + biCompression = GET_4B(bmpinfoheader,16); + biXPelsPerMeter = (int) GET_4B(bmpinfoheader,24); + biYPelsPerMeter = (int) GET_4B(bmpinfoheader,28); + biClrUsed = GET_4B(bmpinfoheader,32); + /* biSizeImage, biClrImportant fields are ignored */ + + switch (source->bits_per_pixel) { + case 8: /* colormapped image */ + mapentrysize = 4; /* Windows uses RGBQUAD colormap */ + TRACEMS2(cinfo, 1, JTRC_BMP_MAPPED, biWidth, biHeight); + break; + case 24: /* RGB image */ + TRACEMS2(cinfo, 1, JTRC_BMP, biWidth, biHeight); + break; + case 32: /* RGB image + Alpha channel */ + TRACEMS2(cinfo, 1, JTRC_BMP, biWidth, biHeight); + break; + default: + ERREXIT(cinfo, JERR_BMP_BADDEPTH); + break; + } + if (biCompression != 0) + ERREXIT(cinfo, JERR_BMP_COMPRESSED); + + if (biXPelsPerMeter > 0 && biYPelsPerMeter > 0) { + /* Set JFIF density parameters from the BMP data */ + cinfo->X_density = (UINT16) (biXPelsPerMeter/100); /* 100 cm per meter */ + cinfo->Y_density = (UINT16) (biYPelsPerMeter/100); + cinfo->density_unit = 2; /* dots/cm */ + } + break; + default: + ERREXIT(cinfo, JERR_BMP_BADHEADER); + return; + } + + if (biWidth <= 0 || biHeight <= 0) + ERREXIT(cinfo, JERR_BMP_EMPTY); + if (biPlanes != 1) + ERREXIT(cinfo, JERR_BMP_BADPLANES); + + /* Compute distance to bitmap data --- will adjust for colormap below */ + bPad = bfOffBits - (headerSize + 14); + + /* Read the colormap, if any */ + if (mapentrysize > 0) { + if (biClrUsed <= 0) + biClrUsed = 256; /* assume it's 256 */ + else if (biClrUsed > 256) + ERREXIT(cinfo, JERR_BMP_BADCMAP); + /* Allocate space to store the colormap */ + source->colormap = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) biClrUsed, (JDIMENSION) 3); + /* and read it from the file */ + read_colormap(source, (int) biClrUsed, mapentrysize); + /* account for size of colormap */ + bPad -= biClrUsed * mapentrysize; + } + + /* Skip any remaining pad bytes */ + if (bPad < 0) /* incorrect bfOffBits value? */ + ERREXIT(cinfo, JERR_BMP_BADHEADER); + while (--bPad >= 0) { + (void) read_byte(source); + } + + /* Compute row width in file, including padding to 4-byte boundary */ + if (source->bits_per_pixel == 24) + row_width = (JDIMENSION) (biWidth * 3); + else if (source->bits_per_pixel == 32) + row_width = (JDIMENSION) (biWidth * 4); + else + row_width = (JDIMENSION) biWidth; + while ((row_width & 3) != 0) row_width++; + source->row_width = row_width; + + /* Allocate space for inversion array, prepare for preload pass */ + source->whole_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + row_width, (JDIMENSION) biHeight, (JDIMENSION) 1); + source->pub.get_pixel_rows = preload_image; + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } + + /* Allocate one-row buffer for returned data */ + source->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) (biWidth * 3), (JDIMENSION) 1); + source->pub.buffer_height = 1; + + cinfo->in_color_space = JCS_RGB; + cinfo->input_components = 3; + cinfo->data_precision = 8; + cinfo->image_width = (JDIMENSION) biWidth; + cinfo->image_height = (JDIMENSION) biHeight; +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + +/* + * The module selection routine for BMP format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_bmp (j_compress_ptr cinfo) +{ + bmp_source_ptr source; + + /* Create module interface object */ + source = (bmp_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(bmp_source_struct)); + source->cinfo = cinfo; /* make back link for subroutines */ + /* Fill in method ptrs, except get_pixel_rows which start_input sets */ + source->pub.start_input = start_input_bmp; + source->pub.finish_input = finish_input_bmp; + + return (cjpeg_source_ptr) source; +} + +#endif /* BMP_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdcolmap.c glmark2-2021.02/android/jni/src/libjpeg-turbo/rdcolmap.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdcolmap.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdcolmap.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,254 @@ +/* + * rdcolmap.c + * + * Copyright (C) 1994-1996, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file implements djpeg's "-map file" switch. It reads a source image + * and constructs a colormap to be supplied to the JPEG decompressor. + * + * Currently, these file formats are supported for the map file: + * GIF: the contents of the GIF's global colormap are used. + * PPM (either text or raw flavor): the entire file is read and + * each unique pixel value is entered in the map. + * Note that reading a large PPM file will be horrendously slow. + * Typically, a PPM-format map file should contain just one pixel + * of each desired color. Such a file can be extracted from an + * ordinary image PPM file with ppmtomap(1). + * + * Rescaling a PPM that has a maxval unequal to MAXJSAMPLE is not + * currently implemented. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef QUANT_2PASS_SUPPORTED /* otherwise can't quantize to supplied map */ + +/* Portions of this code are based on the PBMPLUS library, which is: +** +** Copyright (C) 1988 by Jef Poskanzer. +** +** Permission to use, copy, modify, and distribute this software and its +** documentation for any purpose and without fee is hereby granted, provided +** that the above copyright notice appear in all copies and that both that +** copyright notice and this permission notice appear in supporting +** documentation. This software is provided "as is" without express or +** implied warranty. +*/ + + +/* + * Add a (potentially) new color to the color map. + */ + +LOCAL(void) +add_map_entry (j_decompress_ptr cinfo, int R, int G, int B) +{ + JSAMPROW colormap0 = cinfo->colormap[0]; + JSAMPROW colormap1 = cinfo->colormap[1]; + JSAMPROW colormap2 = cinfo->colormap[2]; + int ncolors = cinfo->actual_number_of_colors; + int index; + + /* Check for duplicate color. */ + for (index = 0; index < ncolors; index++) { + if (GETJSAMPLE(colormap0[index]) == R && + GETJSAMPLE(colormap1[index]) == G && + GETJSAMPLE(colormap2[index]) == B) + return; /* color is already in map */ + } + + /* Check for map overflow. */ + if (ncolors >= (MAXJSAMPLE+1)) + ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, (MAXJSAMPLE+1)); + + /* OK, add color to map. */ + colormap0[ncolors] = (JSAMPLE) R; + colormap1[ncolors] = (JSAMPLE) G; + colormap2[ncolors] = (JSAMPLE) B; + cinfo->actual_number_of_colors++; +} + + +/* + * Extract color map from a GIF file. + */ + +LOCAL(void) +read_gif_map (j_decompress_ptr cinfo, FILE *infile) +{ + int header[13]; + int i, colormaplen; + int R, G, B; + + /* Initial 'G' has already been read by read_color_map */ + /* Read the rest of the GIF header and logical screen descriptor */ + for (i = 1; i < 13; i++) { + if ((header[i] = getc(infile)) == EOF) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + } + + /* Verify GIF Header */ + if (header[1] != 'I' || header[2] != 'F') + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + /* There must be a global color map. */ + if ((header[10] & 0x80) == 0) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + /* OK, fetch it. */ + colormaplen = 2 << (header[10] & 0x07); + + for (i = 0; i < colormaplen; i++) { + R = getc(infile); + G = getc(infile); + B = getc(infile); + if (R == EOF || G == EOF || B == EOF) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + add_map_entry(cinfo, + R << (BITS_IN_JSAMPLE-8), + G << (BITS_IN_JSAMPLE-8), + B << (BITS_IN_JSAMPLE-8)); + } +} + + +/* Support routines for reading PPM */ + + +LOCAL(int) +pbm_getc (FILE *infile) +/* Read next char, skipping over any comments */ +/* A comment/newline sequence is returned as a newline */ +{ + register int ch; + + ch = getc(infile); + if (ch == '#') { + do { + ch = getc(infile); + } while (ch != '\n' && ch != EOF); + } + return ch; +} + + +LOCAL(unsigned int) +read_pbm_integer (j_decompress_ptr cinfo, FILE *infile) +/* Read an unsigned decimal integer from the PPM file */ +/* Swallows one trailing character after the integer */ +/* Note that on a 16-bit-int machine, only values up to 64k can be read. */ +/* This should not be a problem in practice. */ +{ + register int ch; + register unsigned int val; + + /* Skip any leading whitespace */ + do { + ch = pbm_getc(infile); + if (ch == EOF) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + } while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); + + if (ch < '0' || ch > '9') + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + val = ch - '0'; + while ((ch = pbm_getc(infile)) >= '0' && ch <= '9') { + val *= 10; + val += ch - '0'; + } + return val; +} + + +/* + * Extract color map from a PPM file. + */ + +LOCAL(void) +read_ppm_map (j_decompress_ptr cinfo, FILE *infile) +{ + int c; + unsigned int w, h, maxval, row, col; + int R, G, B; + + /* Initial 'P' has already been read by read_color_map */ + c = getc(infile); /* save format discriminator for a sec */ + + /* while we fetch the remaining header info */ + w = read_pbm_integer(cinfo, infile); + h = read_pbm_integer(cinfo, infile); + maxval = read_pbm_integer(cinfo, infile); + + if (w <= 0 || h <= 0 || maxval <= 0) /* error check */ + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + /* For now, we don't support rescaling from an unusual maxval. */ + if (maxval != (unsigned int) MAXJSAMPLE) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + switch (c) { + case '3': /* it's a text-format PPM file */ + for (row = 0; row < h; row++) { + for (col = 0; col < w; col++) { + R = read_pbm_integer(cinfo, infile); + G = read_pbm_integer(cinfo, infile); + B = read_pbm_integer(cinfo, infile); + add_map_entry(cinfo, R, G, B); + } + } + break; + + case '6': /* it's a raw-format PPM file */ + for (row = 0; row < h; row++) { + for (col = 0; col < w; col++) { + R = getc(infile); + G = getc(infile); + B = getc(infile); + if (R == EOF || G == EOF || B == EOF) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + add_map_entry(cinfo, R, G, B); + } + } + break; + + default: + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + break; + } +} + + +/* + * Main entry point from djpeg.c. + * Input: opened input file (from file name argument on command line). + * Output: colormap and actual_number_of_colors fields are set in cinfo. + */ + +GLOBAL(void) +read_color_map (j_decompress_ptr cinfo, FILE *infile) +{ + /* Allocate space for a color map of maximum supported size. */ + cinfo->colormap = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) (MAXJSAMPLE+1), (JDIMENSION) 3); + cinfo->actual_number_of_colors = 0; /* initialize map to empty */ + + /* Read first byte to determine file format */ + switch (getc(infile)) { + case 'G': + read_gif_map(cinfo, infile); + break; + case 'P': + read_ppm_map(cinfo, infile); + break; + default: + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + break; + } +} + +#endif /* QUANT_2PASS_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdgif.c glmark2-2021.02/android/jni/src/libjpeg-turbo/rdgif.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdgif.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdgif.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,39 @@ +/* + * rdgif.c + * + * Copyright (C) 1991-1997, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in GIF format. + * + ***************************************************************************** + * NOTE: to avoid entanglements with Unisys' patent on LZW compression, * + * the ability to read GIF files has been removed from the IJG distribution. * + * Sorry about that. * + ***************************************************************************** + * + * We are required to state that + * "The Graphics Interchange Format(c) is the Copyright property of + * CompuServe Incorporated. GIF(sm) is a Service Mark property of + * CompuServe Incorporated." + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef GIF_SUPPORTED + +/* + * The module selection routine for GIF format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_gif (j_compress_ptr cinfo) +{ + fprintf(stderr, "GIF input is unsupported for legal reasons. Sorry.\n"); + exit(EXIT_FAILURE); + return NULL; /* keep compiler happy */ +} + +#endif /* GIF_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdjpgcom.1 glmark2-2021.02/android/jni/src/libjpeg-turbo/rdjpgcom.1 --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdjpgcom.1 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdjpgcom.1 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,63 @@ +.TH RDJPGCOM 1 "02 April 2009" +.SH NAME +rdjpgcom \- display text comments from a JPEG file +.SH SYNOPSIS +.B rdjpgcom +[ +.B \-raw +] +[ +.B \-verbose +] +[ +.I filename +] +.LP +.SH DESCRIPTION +.LP +.B rdjpgcom +reads the named JPEG/JFIF file, or the standard input if no file is named, +and prints any text comments found in the file on the standard output. +.PP +The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file. +Although the standard doesn't actually define what COM blocks are for, they +are widely used to hold user-supplied text strings. This lets you add +annotations, titles, index terms, etc to your JPEG files, and later retrieve +them as text. COM blocks do not interfere with the image stored in the JPEG +file. The maximum size of a COM block is 64K, but you can have as many of +them as you like in one JPEG file. +.SH OPTIONS +.TP +.B \-raw +Normally +.B rdjpgcom +escapes non-printable characters in comments, for security reasons. +This option avoids that. +.PP +.B \-verbose +Causes +.B rdjpgcom +to also display the JPEG image dimensions. +.PP +Switch names may be abbreviated, and are not case sensitive. +.SH HINTS +.B rdjpgcom +does not depend on the IJG JPEG library. Its source code is intended as an +illustration of the minimum amount of code required to parse a JPEG file +header correctly. +.PP +In +.B \-verbose +mode, +.B rdjpgcom +will also attempt to print the contents of any "APP12" markers as text. +Some digital cameras produce APP12 markers containing useful textual +information. If you like, you can modify the source code to print +other APPn marker types as well. +.SH SEE ALSO +.BR cjpeg (1), +.BR djpeg (1), +.BR jpegtran (1), +.BR wrjpgcom (1) +.SH AUTHOR +Independent JPEG Group diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdjpgcom.c glmark2-2021.02/android/jni/src/libjpeg-turbo/rdjpgcom.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdjpgcom.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdjpgcom.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,510 @@ +/* + * rdjpgcom.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1997, Thomas G. Lane. + * Modified 2009 by Bill Allombert, Guido Vollbeding. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a very simple stand-alone application that displays + * the text in COM (comment) markers in a JFIF file. + * This may be useful as an example of the minimum logic needed to parse + * JPEG markers. + */ + +#define JPEG_CJPEG_DJPEG /* to get the command-line config symbols */ +#include "jinclude.h" /* get auto-config symbols, */ + +#ifdef HAVE_LOCALE_H +#include /* Bill Allombert: use locale for isprint */ +#endif +#include /* to declare isupper(), tolower() */ +#ifdef USE_SETMODE +#include /* to declare setmode()'s parameter macros */ +/* If you have setmode() but not , just delete this line: */ +#include /* to declare setmode() */ +#endif + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + +#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */ +#define READ_BINARY "r" +#else +#define READ_BINARY "rb" +#endif + +#ifndef EXIT_FAILURE /* define exit() codes if not provided */ +#define EXIT_FAILURE 1 +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + + +/* + * These macros are used to read the input file. + * To reuse this code in another application, you might need to change these. + */ + +static FILE *infile; /* input JPEG file */ + +/* Return next input byte, or EOF if no more */ +#define NEXTBYTE() getc(infile) + + +/* Error exit handler */ +#define ERREXIT(msg) (fprintf(stderr, "%s\n", msg), exit(EXIT_FAILURE)) + + +/* Read one byte, testing for EOF */ +static int +read_1_byte (void) +{ + int c; + + c = NEXTBYTE(); + if (c == EOF) + ERREXIT("Premature EOF in JPEG file"); + return c; +} + +/* Read 2 bytes, convert to unsigned int */ +/* All 2-byte quantities in JPEG markers are MSB first */ +static unsigned int +read_2_bytes (void) +{ + int c1, c2; + + c1 = NEXTBYTE(); + if (c1 == EOF) + ERREXIT("Premature EOF in JPEG file"); + c2 = NEXTBYTE(); + if (c2 == EOF) + ERREXIT("Premature EOF in JPEG file"); + return (((unsigned int) c1) << 8) + ((unsigned int) c2); +} + + +/* + * JPEG markers consist of one or more 0xFF bytes, followed by a marker + * code byte (which is not an FF). Here are the marker codes of interest + * in this program. (See jdmarker.c for a more complete list.) + */ + +#define M_SOF0 0xC0 /* Start Of Frame N */ +#define M_SOF1 0xC1 /* N indicates which compression process */ +#define M_SOF2 0xC2 /* Only SOF0-SOF2 are now in common use */ +#define M_SOF3 0xC3 +#define M_SOF5 0xC5 /* NB: codes C4 and CC are NOT SOF markers */ +#define M_SOF6 0xC6 +#define M_SOF7 0xC7 +#define M_SOF9 0xC9 +#define M_SOF10 0xCA +#define M_SOF11 0xCB +#define M_SOF13 0xCD +#define M_SOF14 0xCE +#define M_SOF15 0xCF +#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ +#define M_EOI 0xD9 /* End Of Image (end of datastream) */ +#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ +#define M_APP0 0xE0 /* Application-specific marker, type N */ +#define M_APP12 0xEC /* (we don't bother to list all 16 APPn's) */ +#define M_COM 0xFE /* COMment */ + + +/* + * Find the next JPEG marker and return its marker code. + * We expect at least one FF byte, possibly more if the compressor used FFs + * to pad the file. + * There could also be non-FF garbage between markers. The treatment of such + * garbage is unspecified; we choose to skip over it but emit a warning msg. + * NB: this routine must not be used after seeing SOS marker, since it will + * not deal correctly with FF/00 sequences in the compressed image data... + */ + +static int +next_marker (void) +{ + int c; + int discarded_bytes = 0; + + /* Find 0xFF byte; count and skip any non-FFs. */ + c = read_1_byte(); + while (c != 0xFF) { + discarded_bytes++; + c = read_1_byte(); + } + /* Get marker code byte, swallowing any duplicate FF bytes. Extra FFs + * are legal as pad bytes, so don't count them in discarded_bytes. + */ + do { + c = read_1_byte(); + } while (c == 0xFF); + + if (discarded_bytes != 0) { + fprintf(stderr, "Warning: garbage data found in JPEG file\n"); + } + + return c; +} + + +/* + * Read the initial marker, which should be SOI. + * For a JFIF file, the first two bytes of the file should be literally + * 0xFF M_SOI. To be more general, we could use next_marker, but if the + * input file weren't actually JPEG at all, next_marker might read the whole + * file and then return a misleading error message... + */ + +static int +first_marker (void) +{ + int c1, c2; + + c1 = NEXTBYTE(); + c2 = NEXTBYTE(); + if (c1 != 0xFF || c2 != M_SOI) + ERREXIT("Not a JPEG file"); + return c2; +} + + +/* + * Most types of marker are followed by a variable-length parameter segment. + * This routine skips over the parameters for any marker we don't otherwise + * want to process. + * Note that we MUST skip the parameter segment explicitly in order not to + * be fooled by 0xFF bytes that might appear within the parameter segment; + * such bytes do NOT introduce new markers. + */ + +static void +skip_variable (void) +/* Skip over an unknown or uninteresting variable-length marker */ +{ + unsigned int length; + + /* Get the marker parameter length count */ + length = read_2_bytes(); + /* Length includes itself, so must be at least 2 */ + if (length < 2) + ERREXIT("Erroneous JPEG marker length"); + length -= 2; + /* Skip over the remaining bytes */ + while (length > 0) { + (void) read_1_byte(); + length--; + } +} + + +/* + * Process a COM marker. + * We want to print out the marker contents as legible text; + * we must guard against non-text junk and varying newline representations. + */ + +static void +process_COM (int raw) +{ + unsigned int length; + int ch; + int lastch = 0; + + /* Bill Allombert: set locale properly for isprint */ +#ifdef HAVE_LOCALE_H + setlocale(LC_CTYPE, ""); +#endif + + /* Get the marker parameter length count */ + length = read_2_bytes(); + /* Length includes itself, so must be at least 2 */ + if (length < 2) + ERREXIT("Erroneous JPEG marker length"); + length -= 2; + + while (length > 0) { + ch = read_1_byte(); + if (raw) { + putc(ch, stdout); + /* Emit the character in a readable form. + * Nonprintables are converted to \nnn form, + * while \ is converted to \\. + * Newlines in CR, CR/LF, or LF form will be printed as one newline. + */ + } else if (ch == '\r') { + printf("\n"); + } else if (ch == '\n') { + if (lastch != '\r') + printf("\n"); + } else if (ch == '\\') { + printf("\\\\"); + } else if (isprint(ch)) { + putc(ch, stdout); + } else { + printf("\\%03o", ch); + } + lastch = ch; + length--; + } + printf("\n"); + + /* Bill Allombert: revert to C locale */ +#ifdef HAVE_LOCALE_H + setlocale(LC_CTYPE, "C"); +#endif +} + + +/* + * Process a SOFn marker. + * This code is only needed if you want to know the image dimensions... + */ + +static void +process_SOFn (int marker) +{ + unsigned int length; + unsigned int image_height, image_width; + int data_precision, num_components; + const char *process; + int ci; + + length = read_2_bytes(); /* usual parameter length count */ + + data_precision = read_1_byte(); + image_height = read_2_bytes(); + image_width = read_2_bytes(); + num_components = read_1_byte(); + + switch (marker) { + case M_SOF0: process = "Baseline"; break; + case M_SOF1: process = "Extended sequential"; break; + case M_SOF2: process = "Progressive"; break; + case M_SOF3: process = "Lossless"; break; + case M_SOF5: process = "Differential sequential"; break; + case M_SOF6: process = "Differential progressive"; break; + case M_SOF7: process = "Differential lossless"; break; + case M_SOF9: process = "Extended sequential, arithmetic coding"; break; + case M_SOF10: process = "Progressive, arithmetic coding"; break; + case M_SOF11: process = "Lossless, arithmetic coding"; break; + case M_SOF13: process = "Differential sequential, arithmetic coding"; break; + case M_SOF14: process = "Differential progressive, arithmetic coding"; break; + case M_SOF15: process = "Differential lossless, arithmetic coding"; break; + default: process = "Unknown"; break; + } + + printf("JPEG image is %uw * %uh, %d color components, %d bits per sample\n", + image_width, image_height, num_components, data_precision); + printf("JPEG process: %s\n", process); + + if (length != (unsigned int) (8 + num_components * 3)) + ERREXIT("Bogus SOF marker length"); + + for (ci = 0; ci < num_components; ci++) { + (void) read_1_byte(); /* Component ID code */ + (void) read_1_byte(); /* H, V sampling factors */ + (void) read_1_byte(); /* Quantization table number */ + } +} + + +/* + * Parse the marker stream until SOS or EOI is seen; + * display any COM markers. + * While the companion program wrjpgcom will always insert COM markers before + * SOFn, other implementations might not, so we scan to SOS before stopping. + * If we were only interested in the image dimensions, we would stop at SOFn. + * (Conversely, if we only cared about COM markers, there would be no need + * for special code to handle SOFn; we could treat it like other markers.) + */ + +static int +scan_JPEG_header (int verbose, int raw) +{ + int marker; + + /* Expect SOI at start of file */ + if (first_marker() != M_SOI) + ERREXIT("Expected SOI marker first"); + + /* Scan miscellaneous markers until we reach SOS. */ + for (;;) { + marker = next_marker(); + switch (marker) { + /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be, + * treated as SOFn. C4 in particular is actually DHT. + */ + case M_SOF0: /* Baseline */ + case M_SOF1: /* Extended sequential, Huffman */ + case M_SOF2: /* Progressive, Huffman */ + case M_SOF3: /* Lossless, Huffman */ + case M_SOF5: /* Differential sequential, Huffman */ + case M_SOF6: /* Differential progressive, Huffman */ + case M_SOF7: /* Differential lossless, Huffman */ + case M_SOF9: /* Extended sequential, arithmetic */ + case M_SOF10: /* Progressive, arithmetic */ + case M_SOF11: /* Lossless, arithmetic */ + case M_SOF13: /* Differential sequential, arithmetic */ + case M_SOF14: /* Differential progressive, arithmetic */ + case M_SOF15: /* Differential lossless, arithmetic */ + if (verbose) + process_SOFn(marker); + else + skip_variable(); + break; + + case M_SOS: /* stop before hitting compressed data */ + return marker; + + case M_EOI: /* in case it's a tables-only JPEG stream */ + return marker; + + case M_COM: + process_COM(raw); + break; + + case M_APP12: + /* Some digital camera makers put useful textual information into + * APP12 markers, so we print those out too when in -verbose mode. + */ + if (verbose) { + printf("APP12 contains:\n"); + process_COM(raw); + } else + skip_variable(); + break; + + default: /* Anything else just gets skipped */ + skip_variable(); /* we assume it has a parameter count... */ + break; + } + } /* end loop */ +} + + +/* Command line parsing code */ + +static const char *progname; /* program name for error messages */ + + +static void +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "rdjpgcom displays any textual comments in a JPEG file.\n"); + + fprintf(stderr, "Usage: %s [switches] [inputfile]\n", progname); + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -raw Display non-printable characters in comments (unsafe)\n"); + fprintf(stderr, " -verbose Also display dimensions of JPEG image\n"); + + exit(EXIT_FAILURE); +} + + +static int +keymatch (char *arg, const char *keyword, int minchars) +/* Case-insensitive matching of (possibly abbreviated) keyword switches. */ +/* keyword is the constant keyword (must be lower case already), */ +/* minchars is length of minimum legal abbreviation. */ +{ + register int ca, ck; + register int nmatched = 0; + + while ((ca = *arg++) != '\0') { + if ((ck = *keyword++) == '\0') + return 0; /* arg longer than keyword, no good */ + if (isupper(ca)) /* force arg to lcase (assume ck is already) */ + ca = tolower(ca); + if (ca != ck) + return 0; /* no good */ + nmatched++; /* count matched characters */ + } + /* reached end of argument; fail if it's too short for unique abbrev */ + if (nmatched < minchars) + return 0; + return 1; /* A-OK */ +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + int argn; + char *arg; + int verbose = 0, raw = 0; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "rdjpgcom"; /* in case C library doesn't provide it */ + + /* Parse switches, if any */ + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (arg[0] != '-') + break; /* not switch, must be file name */ + arg++; /* advance over '-' */ + if (keymatch(arg, "verbose", 1)) { + verbose++; + } else if (keymatch(arg, "raw", 1)) { + raw = 1; + } else + usage(); + } + + /* Open the input file. */ + /* Unix style: expect zero or one file name */ + if (argn < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } + if (argn < argc) { + if ((infile = fopen(argv[argn], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdin), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open stdin\n", progname); + exit(EXIT_FAILURE); + } +#else + infile = stdin; +#endif + } + + /* Scan the JPEG headers. */ + (void) scan_JPEG_header(verbose, raw); + + /* All done. */ + exit(EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdppm.c glmark2-2021.02/android/jni/src/libjpeg-turbo/rdppm.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdppm.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdppm.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,461 @@ +/* + * rdppm.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2009 by Bill Allombert, Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in PPM/PGM format. + * The extended 2-byte-per-sample raw PPM/PGM formats are supported. + * The PBMPLUS library is NOT required to compile this software + * (but it is highly useful as a set of PPM image manipulation programs). + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; start_input may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed PPM format). + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef PPM_SUPPORTED + + +/* Portions of this code are based on the PBMPLUS library, which is: +** +** Copyright (C) 1988 by Jef Poskanzer. +** +** Permission to use, copy, modify, and distribute this software and its +** documentation for any purpose and without fee is hereby granted, provided +** that the above copyright notice appear in all copies and that both that +** copyright notice and this permission notice appear in supporting +** documentation. This software is provided "as is" without express or +** implied warranty. +*/ + + +/* Macros to deal with unsigned chars as efficiently as compiler allows */ + +#ifdef HAVE_UNSIGNED_CHAR +typedef unsigned char U_CHAR; +#define UCH(x) ((int) (x)) +#else /* !HAVE_UNSIGNED_CHAR */ +#ifdef __CHAR_UNSIGNED__ +typedef char U_CHAR; +#define UCH(x) ((int) (x)) +#else +typedef char U_CHAR; +#define UCH(x) ((int) (x) & 0xFF) +#endif +#endif /* HAVE_UNSIGNED_CHAR */ + + +#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) + + +/* Private version of data source object */ + +typedef struct { + struct cjpeg_source_struct pub; /* public fields */ + + /* Usually these two pointers point to the same place: */ + U_CHAR *iobuffer; /* fread's I/O buffer */ + JSAMPROW pixrow; /* compressor input buffer */ + size_t buffer_width; /* width of I/O buffer */ + JSAMPLE *rescale; /* => maxval-remapping array, or NULL */ + int maxval; +} ppm_source_struct; + +typedef ppm_source_struct *ppm_source_ptr; + + +LOCAL(int) +pbm_getc (FILE *infile) +/* Read next char, skipping over any comments */ +/* A comment/newline sequence is returned as a newline */ +{ + register int ch; + + ch = getc(infile); + if (ch == '#') { + do { + ch = getc(infile); + } while (ch != '\n' && ch != EOF); + } + return ch; +} + + +LOCAL(unsigned int) +read_pbm_integer (j_compress_ptr cinfo, FILE *infile, unsigned int maxval) +/* Read an unsigned decimal integer from the PPM file */ +/* Swallows one trailing character after the integer */ +/* Note that on a 16-bit-int machine, only values up to 64k can be read. */ +/* This should not be a problem in practice. */ +{ + register int ch; + register unsigned int val; + + /* Skip any leading whitespace */ + do { + ch = pbm_getc(infile); + if (ch == EOF) + ERREXIT(cinfo, JERR_INPUT_EOF); + } while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); + + if (ch < '0' || ch > '9') + ERREXIT(cinfo, JERR_PPM_NONNUMERIC); + + val = ch - '0'; + while ((ch = pbm_getc(infile)) >= '0' && ch <= '9') { + val *= 10; + val += ch - '0'; + } + + if (val > maxval) + ERREXIT(cinfo, JERR_PPM_TOOLARGE); + + return val; +} + + +/* + * Read one row of pixels. + * + * We provide several different versions depending on input file format. + * In all cases, input is scaled to the size of JSAMPLE. + * + * A really fast path is provided for reading byte/sample raw files with + * maxval = MAXJSAMPLE, which is the normal case for 8-bit data. + */ + + +METHODDEF(JDIMENSION) +get_text_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading text-format PGM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + FILE *infile = source->pub.input_file; + register JSAMPROW ptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_text_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading text-format PPM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + FILE *infile = source->pub.input_file; + register JSAMPROW ptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_scaled_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format PGM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + for (col = cinfo->image_width; col > 0; col--) { + *ptr++ = rescale[UCH(*bufferptr++)]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_scaled_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format PPM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + for (col = cinfo->image_width; col > 0; col--) { + *ptr++ = rescale[UCH(*bufferptr++)]; + *ptr++ = rescale[UCH(*bufferptr++)]; + *ptr++ = rescale[UCH(*bufferptr++)]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_raw_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format files with maxval = MAXJSAMPLE. + * In this case we just read right into the JSAMPLE buffer! + * Note that same code works for PPM and PGM files. + */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + return 1; +} + + +METHODDEF(JDIMENSION) +get_word_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-word-format PGM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + for (col = cinfo->image_width; col > 0; col--) { + register int temp; + temp = UCH(*bufferptr++) << 8; + temp |= UCH(*bufferptr++); + *ptr++ = rescale[temp]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_word_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-word-format PPM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + for (col = cinfo->image_width; col > 0; col--) { + register int temp; + temp = UCH(*bufferptr++) << 8; + temp |= UCH(*bufferptr++); + *ptr++ = rescale[temp]; + temp = UCH(*bufferptr++) << 8; + temp |= UCH(*bufferptr++); + *ptr++ = rescale[temp]; + temp = UCH(*bufferptr++) << 8; + temp |= UCH(*bufferptr++); + *ptr++ = rescale[temp]; + } + return 1; +} + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + int c; + unsigned int w, h, maxval; + boolean need_iobuffer, use_raw_buffer, need_rescale; + + if (getc(source->pub.input_file) != 'P') + ERREXIT(cinfo, JERR_PPM_NOT); + + c = getc(source->pub.input_file); /* subformat discriminator character */ + + /* detect unsupported variants (ie, PBM) before trying to read header */ + switch (c) { + case '2': /* it's a text-format PGM file */ + case '3': /* it's a text-format PPM file */ + case '5': /* it's a raw-format PGM file */ + case '6': /* it's a raw-format PPM file */ + break; + default: + ERREXIT(cinfo, JERR_PPM_NOT); + break; + } + + /* fetch the remaining header info */ + w = read_pbm_integer(cinfo, source->pub.input_file, 65535); + h = read_pbm_integer(cinfo, source->pub.input_file, 65535); + maxval = read_pbm_integer(cinfo, source->pub.input_file, 65535); + + if (w <= 0 || h <= 0 || maxval <= 0) /* error check */ + ERREXIT(cinfo, JERR_PPM_NOT); + + cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */ + cinfo->image_width = (JDIMENSION) w; + cinfo->image_height = (JDIMENSION) h; + source->maxval = maxval; + + /* initialize flags to most common settings */ + need_iobuffer = TRUE; /* do we need an I/O buffer? */ + use_raw_buffer = FALSE; /* do we map input buffer onto I/O buffer? */ + need_rescale = TRUE; /* do we need a rescale array? */ + + switch (c) { + case '2': /* it's a text-format PGM file */ + cinfo->input_components = 1; + cinfo->in_color_space = JCS_GRAYSCALE; + TRACEMS2(cinfo, 1, JTRC_PGM_TEXT, w, h); + source->pub.get_pixel_rows = get_text_gray_row; + need_iobuffer = FALSE; + break; + + case '3': /* it's a text-format PPM file */ + cinfo->input_components = 3; + cinfo->in_color_space = JCS_RGB; + TRACEMS2(cinfo, 1, JTRC_PPM_TEXT, w, h); + source->pub.get_pixel_rows = get_text_rgb_row; + need_iobuffer = FALSE; + break; + + case '5': /* it's a raw-format PGM file */ + cinfo->input_components = 1; + cinfo->in_color_space = JCS_GRAYSCALE; + TRACEMS2(cinfo, 1, JTRC_PGM, w, h); + if (maxval > 255) { + source->pub.get_pixel_rows = get_word_gray_row; + } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR)) { + source->pub.get_pixel_rows = get_raw_row; + use_raw_buffer = TRUE; + need_rescale = FALSE; + } else { + source->pub.get_pixel_rows = get_scaled_gray_row; + } + break; + + case '6': /* it's a raw-format PPM file */ + cinfo->input_components = 3; + cinfo->in_color_space = JCS_RGB; + TRACEMS2(cinfo, 1, JTRC_PPM, w, h); + if (maxval > 255) { + source->pub.get_pixel_rows = get_word_rgb_row; + } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR)) { + source->pub.get_pixel_rows = get_raw_row; + use_raw_buffer = TRUE; + need_rescale = FALSE; + } else { + source->pub.get_pixel_rows = get_scaled_rgb_row; + } + break; + } + + /* Allocate space for I/O buffer: 1 or 3 bytes or words/pixel. */ + if (need_iobuffer) { + source->buffer_width = (size_t) w * cinfo->input_components * + ((maxval <= 255) ? sizeof(U_CHAR) : (2 * sizeof(U_CHAR))); + source->iobuffer = (U_CHAR *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + source->buffer_width); + } + + /* Create compressor input buffer. */ + if (use_raw_buffer) { + /* For unscaled raw-input case, we can just map it onto the I/O buffer. */ + /* Synthesize a JSAMPARRAY pointer structure */ + source->pixrow = (JSAMPROW) source->iobuffer; + source->pub.buffer = & source->pixrow; + source->pub.buffer_height = 1; + } else { + /* Need to translate anyway, so make a separate sample buffer. */ + source->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) w * cinfo->input_components, (JDIMENSION) 1); + source->pub.buffer_height = 1; + } + + /* Compute the rescaling array if required. */ + if (need_rescale) { + long val, half_maxval; + + /* On 16-bit-int machines we have to be careful of maxval = 65535 */ + source->rescale = (JSAMPLE *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (size_t) (((long) maxval + 1L) * + sizeof(JSAMPLE))); + half_maxval = maxval / 2; + for (val = 0; val <= (long) maxval; val++) { + /* The multiplication here must be done in 32 bits to avoid overflow */ + source->rescale[val] = (JSAMPLE) ((val * MAXJSAMPLE + half_maxval) / + maxval); + } + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + +/* + * The module selection routine for PPM format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_ppm (j_compress_ptr cinfo) +{ + ppm_source_ptr source; + + /* Create module interface object */ + source = (ppm_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(ppm_source_struct)); + /* Fill in method ptrs, except get_pixel_rows which start_input sets */ + source->pub.start_input = start_input_ppm; + source->pub.finish_input = finish_input_ppm; + + return (cjpeg_source_ptr) source; +} + +#endif /* PPM_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdrle.c glmark2-2021.02/android/jni/src/libjpeg-turbo/rdrle.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdrle.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdrle.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,389 @@ +/* + * rdrle.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in Utah RLE format. + * The Utah Raster Toolkit library is required (version 3.1 or later). + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; start_input may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed RLE format). + * + * Based on code contributed by Mike Lijewski, + * with updates from Robert Hutchinson. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef RLE_SUPPORTED + +/* rle.h is provided by the Utah Raster Toolkit. */ + +#include + +/* + * We assume that JSAMPLE has the same representation as rle_pixel, + * to wit, "unsigned char". Hence we can't cope with 12- or 16-bit samples. + */ + +#if BITS_IN_JSAMPLE != 8 + Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */ +#endif + +/* + * We support the following types of RLE files: + * + * GRAYSCALE - 8 bits, no colormap + * MAPPEDGRAY - 8 bits, 1 channel colomap + * PSEUDOCOLOR - 8 bits, 3 channel colormap + * TRUECOLOR - 24 bits, 3 channel colormap + * DIRECTCOLOR - 24 bits, no colormap + * + * For now, we ignore any alpha channel in the image. + */ + +typedef enum + { GRAYSCALE, MAPPEDGRAY, PSEUDOCOLOR, TRUECOLOR, DIRECTCOLOR } rle_kind; + + +/* + * Since RLE stores scanlines bottom-to-top, we have to invert the image + * to conform to JPEG's top-to-bottom order. To do this, we read the + * incoming image into a virtual array on the first get_pixel_rows call, + * then fetch the required row from the virtual array on subsequent calls. + */ + +typedef struct _rle_source_struct *rle_source_ptr; + +typedef struct _rle_source_struct { + struct cjpeg_source_struct pub; /* public fields */ + + rle_kind visual; /* actual type of input file */ + jvirt_sarray_ptr image; /* virtual array to hold the image */ + JDIMENSION row; /* current row # in the virtual array */ + rle_hdr header; /* Input file information */ + rle_pixel **rle_row; /* holds a row returned by rle_getrow() */ + +} rle_source_struct; + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + rle_source_ptr source = (rle_source_ptr) sinfo; + JDIMENSION width, height; +#ifdef PROGRESS_REPORT + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; +#endif + + /* Use RLE library routine to get the header info */ + source->header = *rle_hdr_init(NULL); + source->header.rle_file = source->pub.input_file; + switch (rle_get_setup(&(source->header))) { + case RLE_SUCCESS: + /* A-OK */ + break; + case RLE_NOT_RLE: + ERREXIT(cinfo, JERR_RLE_NOT); + break; + case RLE_NO_SPACE: + ERREXIT(cinfo, JERR_RLE_MEM); + break; + case RLE_EMPTY: + ERREXIT(cinfo, JERR_RLE_EMPTY); + break; + case RLE_EOF: + ERREXIT(cinfo, JERR_RLE_EOF); + break; + default: + ERREXIT(cinfo, JERR_RLE_BADERROR); + break; + } + + /* Figure out what we have, set private vars and return values accordingly */ + + width = source->header.xmax - source->header.xmin + 1; + height = source->header.ymax - source->header.ymin + 1; + source->header.xmin = 0; /* realign horizontally */ + source->header.xmax = width-1; + + cinfo->image_width = width; + cinfo->image_height = height; + cinfo->data_precision = 8; /* we can only handle 8 bit data */ + + if (source->header.ncolors == 1 && source->header.ncmap == 0) { + source->visual = GRAYSCALE; + TRACEMS2(cinfo, 1, JTRC_RLE_GRAY, width, height); + } else if (source->header.ncolors == 1 && source->header.ncmap == 1) { + source->visual = MAPPEDGRAY; + TRACEMS3(cinfo, 1, JTRC_RLE_MAPGRAY, width, height, + 1 << source->header.cmaplen); + } else if (source->header.ncolors == 1 && source->header.ncmap == 3) { + source->visual = PSEUDOCOLOR; + TRACEMS3(cinfo, 1, JTRC_RLE_MAPPED, width, height, + 1 << source->header.cmaplen); + } else if (source->header.ncolors == 3 && source->header.ncmap == 3) { + source->visual = TRUECOLOR; + TRACEMS3(cinfo, 1, JTRC_RLE_FULLMAP, width, height, + 1 << source->header.cmaplen); + } else if (source->header.ncolors == 3 && source->header.ncmap == 0) { + source->visual = DIRECTCOLOR; + TRACEMS2(cinfo, 1, JTRC_RLE, width, height); + } else + ERREXIT(cinfo, JERR_RLE_UNSUPPORTED); + + if (source->visual == GRAYSCALE || source->visual == MAPPEDGRAY) { + cinfo->in_color_space = JCS_GRAYSCALE; + cinfo->input_components = 1; + } else { + cinfo->in_color_space = JCS_RGB; + cinfo->input_components = 3; + } + + /* + * A place to hold each scanline while it's converted. + * (GRAYSCALE scanlines don't need converting) + */ + if (source->visual != GRAYSCALE) { + source->rle_row = (rle_pixel**) (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) width, (JDIMENSION) cinfo->input_components); + } + + /* request a virtual array to hold the image */ + source->image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) (width * source->header.ncolors), + (JDIMENSION) height, (JDIMENSION) 1); + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + /* count file input as separate pass */ + progress->total_extra_passes++; + } +#endif + + source->pub.buffer_height = 1; +} + + +/* + * Read one row of pixels. + * Called only after load_image has read the image into the virtual array. + * Used for GRAYSCALE, MAPPEDGRAY, TRUECOLOR, and DIRECTCOLOR images. + */ + +METHODDEF(JDIMENSION) +get_rle_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + rle_source_ptr source = (rle_source_ptr) sinfo; + + source->row--; + source->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, source->row, (JDIMENSION) 1, FALSE); + + return 1; +} + +/* + * Read one row of pixels. + * Called only after load_image has read the image into the virtual array. + * Used for PSEUDOCOLOR images. + */ + +METHODDEF(JDIMENSION) +get_pseudocolor_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + rle_source_ptr source = (rle_source_ptr) sinfo; + JSAMPROW src_row, dest_row; + JDIMENSION col; + rle_map *colormap; + int val; + + colormap = source->header.cmap; + dest_row = source->pub.buffer[0]; + source->row--; + src_row = *(*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, source->row, (JDIMENSION) 1, FALSE); + + for (col = cinfo->image_width; col > 0; col--) { + val = GETJSAMPLE(*src_row++); + *dest_row++ = (JSAMPLE) (colormap[val ] >> 8); + *dest_row++ = (JSAMPLE) (colormap[val + 256] >> 8); + *dest_row++ = (JSAMPLE) (colormap[val + 512] >> 8); + } + + return 1; +} + + +/* + * Load the image into a virtual array. We have to do this because RLE + * files start at the lower left while the JPEG standard has them starting + * in the upper left. This is called the first time we want to get a row + * of input. What we do is load the RLE data into the array and then call + * the appropriate routine to read one row from the array. Before returning, + * we set source->pub.get_pixel_rows so that subsequent calls go straight to + * the appropriate row-reading routine. + */ + +METHODDEF(JDIMENSION) +load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + rle_source_ptr source = (rle_source_ptr) sinfo; + JDIMENSION row, col; + JSAMPROW scanline, red_ptr, green_ptr, blue_ptr; + rle_pixel **rle_row; + rle_map *colormap; + char channel; +#ifdef PROGRESS_REPORT + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; +#endif + + colormap = source->header.cmap; + rle_row = source->rle_row; + + /* Read the RLE data into our virtual array. + * We assume here that rle_pixel is represented the same as JSAMPLE. + */ + RLE_CLR_BIT(source->header, RLE_ALPHA); /* don't read the alpha channel */ + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_limit = cinfo->image_height; + progress->pub.pass_counter = 0; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + + switch (source->visual) { + + case GRAYSCALE: + case PSEUDOCOLOR: + for (row = 0; row < cinfo->image_height; row++) { + rle_row = (rle_pixel **) (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + rle_getrow(&source->header, rle_row); +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + break; + + case MAPPEDGRAY: + case TRUECOLOR: + for (row = 0; row < cinfo->image_height; row++) { + scanline = *(*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + rle_row = source->rle_row; + rle_getrow(&source->header, rle_row); + + for (col = 0; col < cinfo->image_width; col++) { + for (channel = 0; channel < source->header.ncolors; channel++) { + *scanline++ = (JSAMPLE) + (colormap[GETJSAMPLE(rle_row[channel][col]) + 256 * channel] >> 8); + } + } + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + break; + + case DIRECTCOLOR: + for (row = 0; row < cinfo->image_height; row++) { + scanline = *(*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + rle_getrow(&source->header, rle_row); + + red_ptr = rle_row[0]; + green_ptr = rle_row[1]; + blue_ptr = rle_row[2]; + + for (col = cinfo->image_width; col > 0; col--) { + *scanline++ = *red_ptr++; + *scanline++ = *green_ptr++; + *scanline++ = *blue_ptr++; + } + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + } + +#ifdef PROGRESS_REPORT + if (progress != NULL) + progress->completed_extra_passes++; +#endif + + /* Set up to call proper row-extraction routine in future */ + if (source->visual == PSEUDOCOLOR) { + source->pub.buffer = source->rle_row; + source->pub.get_pixel_rows = get_pseudocolor_row; + } else { + source->pub.get_pixel_rows = get_rle_row; + } + source->row = cinfo->image_height; + + /* And fetch the topmost (bottommost) row */ + return (*source->pub.get_pixel_rows) (cinfo, sinfo); +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + +/* + * The module selection routine for RLE format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_rle (j_compress_ptr cinfo) +{ + rle_source_ptr source; + + /* Create module interface object */ + source = (rle_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(rle_source_struct)); + /* Fill in method ptrs */ + source->pub.start_input = start_input_rle; + source->pub.finish_input = finish_input_rle; + source->pub.get_pixel_rows = load_image; + + return (cjpeg_source_ptr) source; +} + +#endif /* RLE_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdswitch.c glmark2-2021.02/android/jni/src/libjpeg-turbo/rdswitch.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdswitch.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdswitch.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,424 @@ +/* + * rdswitch.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to process some of cjpeg's more complicated + * command-line switches. Switches processed here are: + * -qtables file Read quantization tables from text file + * -scans file Read scan script from text file + * -quality N[,N,...] Set quality ratings + * -qslots N[,N,...] Set component quantization table selectors + * -sample HxV[,HxV,...] Set component sampling factors + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include /* to declare isdigit(), isspace() */ + + +LOCAL(int) +text_getc (FILE *file) +/* Read next char, skipping over any comments (# to end of line) */ +/* A comment/newline sequence is returned as a newline */ +{ + register int ch; + + ch = getc(file); + if (ch == '#') { + do { + ch = getc(file); + } while (ch != '\n' && ch != EOF); + } + return ch; +} + + +LOCAL(boolean) +read_text_integer (FILE *file, long *result, int *termchar) +/* Read an unsigned decimal integer from a file, store it in result */ +/* Reads one trailing character after the integer; returns it in termchar */ +{ + register int ch; + register long val; + + /* Skip any leading whitespace, detect EOF */ + do { + ch = text_getc(file); + if (ch == EOF) { + *termchar = ch; + return FALSE; + } + } while (isspace(ch)); + + if (! isdigit(ch)) { + *termchar = ch; + return FALSE; + } + + val = ch - '0'; + while ((ch = text_getc(file)) != EOF) { + if (! isdigit(ch)) + break; + val *= 10; + val += ch - '0'; + } + *result = val; + *termchar = ch; + return TRUE; +} + + +#if JPEG_LIB_VERSION < 70 +static int q_scale_factor[NUM_QUANT_TBLS] = {100, 100, 100, 100}; +#endif + +GLOBAL(boolean) +read_quant_tables (j_compress_ptr cinfo, char *filename, + boolean force_baseline) +/* Read a set of quantization tables from the specified file. + * The file is plain ASCII text: decimal numbers with whitespace between. + * Comments preceded by '#' may be included in the file. + * There may be one to NUM_QUANT_TBLS tables in the file, each of 64 values. + * The tables are implicitly numbered 0,1,etc. + * NOTE: does not affect the qslots mapping, which will default to selecting + * table 0 for luminance (or primary) components, 1 for chrominance components. + * You must use -qslots if you want a different component->table mapping. + */ +{ + FILE *fp; + int tblno, i, termchar; + long val; + unsigned int table[DCTSIZE2]; + + if ((fp = fopen(filename, "r")) == NULL) { + fprintf(stderr, "Can't open table file %s\n", filename); + return FALSE; + } + tblno = 0; + + while (read_text_integer(fp, &val, &termchar)) { /* read 1st element of table */ + if (tblno >= NUM_QUANT_TBLS) { + fprintf(stderr, "Too many tables in file %s\n", filename); + fclose(fp); + return FALSE; + } + table[0] = (unsigned int) val; + for (i = 1; i < DCTSIZE2; i++) { + if (! read_text_integer(fp, &val, &termchar)) { + fprintf(stderr, "Invalid table data in file %s\n", filename); + fclose(fp); + return FALSE; + } + table[i] = (unsigned int) val; + } +#if JPEG_LIB_VERSION >= 70 + jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno], + force_baseline); +#else + jpeg_add_quant_table(cinfo, tblno, table, q_scale_factor[tblno], + force_baseline); +#endif + tblno++; + } + + if (termchar != EOF) { + fprintf(stderr, "Non-numeric data in file %s\n", filename); + fclose(fp); + return FALSE; + } + + fclose(fp); + return TRUE; +} + + +#ifdef C_MULTISCAN_FILES_SUPPORTED + +LOCAL(boolean) +read_scan_integer (FILE *file, long *result, int *termchar) +/* Variant of read_text_integer that always looks for a non-space termchar; + * this simplifies parsing of punctuation in scan scripts. + */ +{ + register int ch; + + if (! read_text_integer(file, result, termchar)) + return FALSE; + ch = *termchar; + while (ch != EOF && isspace(ch)) + ch = text_getc(file); + if (isdigit(ch)) { /* oops, put it back */ + if (ungetc(ch, file) == EOF) + return FALSE; + ch = ' '; + } else { + /* Any separators other than ';' and ':' are ignored; + * this allows user to insert commas, etc, if desired. + */ + if (ch != EOF && ch != ';' && ch != ':') + ch = ' '; + } + *termchar = ch; + return TRUE; +} + + +GLOBAL(boolean) +read_scan_script (j_compress_ptr cinfo, char *filename) +/* Read a scan script from the specified text file. + * Each entry in the file defines one scan to be emitted. + * Entries are separated by semicolons ';'. + * An entry contains one to four component indexes, + * optionally followed by a colon ':' and four progressive-JPEG parameters. + * The component indexes denote which component(s) are to be transmitted + * in the current scan. The first component has index 0. + * Sequential JPEG is used if the progressive-JPEG parameters are omitted. + * The file is free format text: any whitespace may appear between numbers + * and the ':' and ';' punctuation marks. Also, other punctuation (such + * as commas or dashes) can be placed between numbers if desired. + * Comments preceded by '#' may be included in the file. + * Note: we do very little validity checking here; + * jcmaster.c will validate the script parameters. + */ +{ + FILE *fp; + int scanno, ncomps, termchar; + long val; + jpeg_scan_info *scanptr; +#define MAX_SCANS 100 /* quite arbitrary limit */ + jpeg_scan_info scans[MAX_SCANS]; + + if ((fp = fopen(filename, "r")) == NULL) { + fprintf(stderr, "Can't open scan definition file %s\n", filename); + return FALSE; + } + scanptr = scans; + scanno = 0; + + while (read_scan_integer(fp, &val, &termchar)) { + if (scanno >= MAX_SCANS) { + fprintf(stderr, "Too many scans defined in file %s\n", filename); + fclose(fp); + return FALSE; + } + scanptr->component_index[0] = (int) val; + ncomps = 1; + while (termchar == ' ') { + if (ncomps >= MAX_COMPS_IN_SCAN) { + fprintf(stderr, "Too many components in one scan in file %s\n", + filename); + fclose(fp); + return FALSE; + } + if (! read_scan_integer(fp, &val, &termchar)) + goto bogus; + scanptr->component_index[ncomps] = (int) val; + ncomps++; + } + scanptr->comps_in_scan = ncomps; + if (termchar == ':') { + if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + goto bogus; + scanptr->Ss = (int) val; + if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + goto bogus; + scanptr->Se = (int) val; + if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + goto bogus; + scanptr->Ah = (int) val; + if (! read_scan_integer(fp, &val, &termchar)) + goto bogus; + scanptr->Al = (int) val; + } else { + /* set non-progressive parameters */ + scanptr->Ss = 0; + scanptr->Se = DCTSIZE2-1; + scanptr->Ah = 0; + scanptr->Al = 0; + } + if (termchar != ';' && termchar != EOF) { +bogus: + fprintf(stderr, "Invalid scan entry format in file %s\n", filename); + fclose(fp); + return FALSE; + } + scanptr++, scanno++; + } + + if (termchar != EOF) { + fprintf(stderr, "Non-numeric data in file %s\n", filename); + fclose(fp); + return FALSE; + } + + if (scanno > 0) { + /* Stash completed scan list in cinfo structure. + * NOTE: for cjpeg's use, JPOOL_IMAGE is the right lifetime for this data, + * but if you want to compress multiple images you'd want JPOOL_PERMANENT. + */ + scanptr = (jpeg_scan_info *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + scanno * sizeof(jpeg_scan_info)); + MEMCOPY(scanptr, scans, scanno * sizeof(jpeg_scan_info)); + cinfo->scan_info = scanptr; + cinfo->num_scans = scanno; + } + + fclose(fp); + return TRUE; +} + +#endif /* C_MULTISCAN_FILES_SUPPORTED */ + + +#if JPEG_LIB_VERSION < 70 +/* These are the sample quantization tables given in JPEG spec section K.1. + * The spec says that the values given produce "good" quality, and + * when divided by 2, "very good" quality. + */ +static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, + 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, + 72, 92, 95, 98, 112, 100, 103, 99 +}; +static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 +}; + + +LOCAL(void) +jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) +{ + jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, + q_scale_factor[0], force_baseline); + jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, + q_scale_factor[1], force_baseline); +} +#endif + + +GLOBAL(boolean) +set_quality_ratings (j_compress_ptr cinfo, char *arg, boolean force_baseline) +/* Process a quality-ratings parameter string, of the form + * N[,N,...] + * If there are more q-table slots than parameters, the last value is replicated. + */ +{ + int val = 75; /* default value */ + int tblno; + char ch; + + for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) { + if (*arg) { + ch = ','; /* if not set by sscanf, will be ',' */ + if (sscanf(arg, "%d%c", &val, &ch) < 1) + return FALSE; + if (ch != ',') /* syntax check */ + return FALSE; + /* Convert user 0-100 rating to percentage scaling */ +#if JPEG_LIB_VERSION >= 70 + cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val); +#else + q_scale_factor[tblno] = jpeg_quality_scaling(val); +#endif + while (*arg && *arg++ != ',') /* advance to next segment of arg string */ + ; + } else { + /* reached end of parameter, set remaining factors to last value */ +#if JPEG_LIB_VERSION >= 70 + cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val); +#else + q_scale_factor[tblno] = jpeg_quality_scaling(val); +#endif + } + } + jpeg_default_qtables(cinfo, force_baseline); + return TRUE; +} + + +GLOBAL(boolean) +set_quant_slots (j_compress_ptr cinfo, char *arg) +/* Process a quantization-table-selectors parameter string, of the form + * N[,N,...] + * If there are more components than parameters, the last value is replicated. + */ +{ + int val = 0; /* default table # */ + int ci; + char ch; + + for (ci = 0; ci < MAX_COMPONENTS; ci++) { + if (*arg) { + ch = ','; /* if not set by sscanf, will be ',' */ + if (sscanf(arg, "%d%c", &val, &ch) < 1) + return FALSE; + if (ch != ',') /* syntax check */ + return FALSE; + if (val < 0 || val >= NUM_QUANT_TBLS) { + fprintf(stderr, "JPEG quantization tables are numbered 0..%d\n", + NUM_QUANT_TBLS-1); + return FALSE; + } + cinfo->comp_info[ci].quant_tbl_no = val; + while (*arg && *arg++ != ',') /* advance to next segment of arg string */ + ; + } else { + /* reached end of parameter, set remaining components to last table */ + cinfo->comp_info[ci].quant_tbl_no = val; + } + } + return TRUE; +} + + +GLOBAL(boolean) +set_sample_factors (j_compress_ptr cinfo, char *arg) +/* Process a sample-factors parameter string, of the form + * HxV[,HxV,...] + * If there are more components than parameters, "1x1" is assumed for the rest. + */ +{ + int ci, val1, val2; + char ch1, ch2; + + for (ci = 0; ci < MAX_COMPONENTS; ci++) { + if (*arg) { + ch2 = ','; /* if not set by sscanf, will be ',' */ + if (sscanf(arg, "%d%c%d%c", &val1, &ch1, &val2, &ch2) < 3) + return FALSE; + if ((ch1 != 'x' && ch1 != 'X') || ch2 != ',') /* syntax check */ + return FALSE; + if (val1 <= 0 || val1 > 4 || val2 <= 0 || val2 > 4) { + fprintf(stderr, "JPEG sampling factors must be 1..4\n"); + return FALSE; + } + cinfo->comp_info[ci].h_samp_factor = val1; + cinfo->comp_info[ci].v_samp_factor = val2; + while (*arg && *arg++ != ',') /* advance to next segment of arg string */ + ; + } else { + /* reached end of parameter, set remaining components to 1x1 sampling */ + cinfo->comp_info[ci].h_samp_factor = 1; + cinfo->comp_info[ci].v_samp_factor = 1; + } + } + return TRUE; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdtarga.c glmark2-2021.02/android/jni/src/libjpeg-turbo/rdtarga.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/rdtarga.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/rdtarga.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,503 @@ +/* + * rdtarga.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in Targa format. + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; start_input may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed Targa format). + * + * Based on code contributed by Lee Daniel Crocker. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef TARGA_SUPPORTED + + +/* Macros to deal with unsigned chars as efficiently as compiler allows */ + +#ifdef HAVE_UNSIGNED_CHAR +typedef unsigned char U_CHAR; +#define UCH(x) ((int) (x)) +#else /* !HAVE_UNSIGNED_CHAR */ +#ifdef __CHAR_UNSIGNED__ +typedef char U_CHAR; +#define UCH(x) ((int) (x)) +#else +typedef char U_CHAR; +#define UCH(x) ((int) (x) & 0xFF) +#endif +#endif /* HAVE_UNSIGNED_CHAR */ + + +#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) + + +/* Private version of data source object */ + +typedef struct _tga_source_struct *tga_source_ptr; + +typedef struct _tga_source_struct { + struct cjpeg_source_struct pub; /* public fields */ + + j_compress_ptr cinfo; /* back link saves passing separate parm */ + + JSAMPARRAY colormap; /* Targa colormap (converted to my format) */ + + jvirt_sarray_ptr whole_image; /* Needed if funny input row order */ + JDIMENSION current_row; /* Current logical row number to read */ + + /* Pointer to routine to extract next Targa pixel from input file */ + void (*read_pixel) (tga_source_ptr sinfo); + + /* Result of read_pixel is delivered here: */ + U_CHAR tga_pixel[4]; + + int pixel_size; /* Bytes per Targa pixel (1 to 4) */ + + /* State info for reading RLE-coded pixels; both counts must be init to 0 */ + int block_count; /* # of pixels remaining in RLE block */ + int dup_pixel_count; /* # of times to duplicate previous pixel */ + + /* This saves the correct pixel-row-expansion method for preload_image */ + JDIMENSION (*get_pixel_rows) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo); +} tga_source_struct; + + +/* For expanding 5-bit pixel values to 8-bit with best rounding */ + +static const UINT8 c5to8bits[32] = { + 0, 8, 16, 25, 33, 41, 49, 58, + 66, 74, 82, 90, 99, 107, 115, 123, + 132, 140, 148, 156, 165, 173, 181, 189, + 197, 206, 214, 222, 230, 239, 247, 255 +}; + + + +LOCAL(int) +read_byte (tga_source_ptr sinfo) +/* Read next byte from Targa file */ +{ + register FILE *infile = sinfo->pub.input_file; + register int c; + + if ((c = getc(infile)) == EOF) + ERREXIT(sinfo->cinfo, JERR_INPUT_EOF); + return c; +} + + +LOCAL(void) +read_colormap (tga_source_ptr sinfo, int cmaplen, int mapentrysize) +/* Read the colormap from a Targa file */ +{ + int i; + + /* Presently only handles 24-bit BGR format */ + if (mapentrysize != 24) + ERREXIT(sinfo->cinfo, JERR_TGA_BADCMAP); + + for (i = 0; i < cmaplen; i++) { + sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); + } +} + + +/* + * read_pixel methods: get a single pixel from Targa file into tga_pixel[] + */ + +METHODDEF(void) +read_non_rle_pixel (tga_source_ptr sinfo) +/* Read one Targa pixel from the input file; no RLE expansion */ +{ + register FILE *infile = sinfo->pub.input_file; + register int i; + + for (i = 0; i < sinfo->pixel_size; i++) { + sinfo->tga_pixel[i] = (U_CHAR) getc(infile); + } +} + + +METHODDEF(void) +read_rle_pixel (tga_source_ptr sinfo) +/* Read one Targa pixel from the input file, expanding RLE data as needed */ +{ + register FILE *infile = sinfo->pub.input_file; + register int i; + + /* Duplicate previously read pixel? */ + if (sinfo->dup_pixel_count > 0) { + sinfo->dup_pixel_count--; + return; + } + + /* Time to read RLE block header? */ + if (--sinfo->block_count < 0) { /* decrement pixels remaining in block */ + i = read_byte(sinfo); + if (i & 0x80) { /* Start of duplicate-pixel block? */ + sinfo->dup_pixel_count = i & 0x7F; /* number of dups after this one */ + sinfo->block_count = 0; /* then read new block header */ + } else { + sinfo->block_count = i & 0x7F; /* number of pixels after this one */ + } + } + + /* Read next pixel */ + for (i = 0; i < sinfo->pixel_size; i++) { + sinfo->tga_pixel[i] = (U_CHAR) getc(infile); + } +} + + +/* + * Read one row of pixels. + * + * We provide several different versions depending on input file format. + */ + + +METHODDEF(JDIMENSION) +get_8bit_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 8-bit grayscale pixels */ +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ + *ptr++ = (JSAMPLE) UCH(source->tga_pixel[0]); + } + return 1; +} + +METHODDEF(JDIMENSION) +get_8bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 8-bit colormap indexes */ +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + register int t; + register JSAMPROW ptr; + register JDIMENSION col; + register JSAMPARRAY colormap = source->colormap; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ + t = UCH(source->tga_pixel[0]); + *ptr++ = colormap[0][t]; + *ptr++ = colormap[1][t]; + *ptr++ = colormap[2][t]; + } + return 1; +} + +METHODDEF(JDIMENSION) +get_16bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 16-bit pixels */ +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + register int t; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ + t = UCH(source->tga_pixel[0]); + t += UCH(source->tga_pixel[1]) << 8; + /* We expand 5 bit data to 8 bit sample width. + * The format of the 16-bit (LSB first) input word is + * xRRRRRGGGGGBBBBB + */ + ptr[2] = (JSAMPLE) c5to8bits[t & 0x1F]; + t >>= 5; + ptr[1] = (JSAMPLE) c5to8bits[t & 0x1F]; + t >>= 5; + ptr[0] = (JSAMPLE) c5to8bits[t & 0x1F]; + ptr += 3; + } + return 1; +} + +METHODDEF(JDIMENSION) +get_24bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 24-bit pixels */ +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ + *ptr++ = (JSAMPLE) UCH(source->tga_pixel[2]); /* change BGR to RGB order */ + *ptr++ = (JSAMPLE) UCH(source->tga_pixel[1]); + *ptr++ = (JSAMPLE) UCH(source->tga_pixel[0]); + } + return 1; +} + +/* + * Targa also defines a 32-bit pixel format with order B,G,R,A. + * We presently ignore the attribute byte, so the code for reading + * these pixels is identical to the 24-bit routine above. + * This works because the actual pixel length is only known to read_pixel. + */ + +#define get_32bit_row get_24bit_row + + +/* + * This method is for re-reading the input data in standard top-down + * row order. The entire image has already been read into whole_image + * with proper conversion of pixel format, but it's in a funny row order. + */ + +METHODDEF(JDIMENSION) +get_memory_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + JDIMENSION source_row; + + /* Compute row of source that maps to current_row of normal order */ + /* For now, assume image is bottom-up and not interlaced. */ + /* NEEDS WORK to support interlaced images! */ + source_row = cinfo->image_height - source->current_row - 1; + + /* Fetch that row from virtual array */ + source->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + source_row, (JDIMENSION) 1, FALSE); + + source->current_row++; + return 1; +} + + +/* + * This method loads the image into whole_image during the first call on + * get_pixel_rows. The get_pixel_rows pointer is then adjusted to call + * get_memory_row on subsequent calls. + */ + +METHODDEF(JDIMENSION) +preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + JDIMENSION row; + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + + /* Read the data into a virtual array in input-file row order. */ + for (row = 0; row < cinfo->image_height; row++) { + if (progress != NULL) { + progress->pub.pass_counter = (long) row; + progress->pub.pass_limit = (long) cinfo->image_height; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } + source->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, row, (JDIMENSION) 1, TRUE); + (*source->get_pixel_rows) (cinfo, sinfo); + } + if (progress != NULL) + progress->completed_extra_passes++; + + /* Set up to read from the virtual array in unscrambled order */ + source->pub.get_pixel_rows = get_memory_row; + source->current_row = 0; + /* And read the first row */ + return get_memory_row(cinfo, sinfo); +} + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + U_CHAR targaheader[18]; + int idlen, cmaptype, subtype, flags, interlace_type, components; + unsigned int width, height, maplen; + boolean is_bottom_up; + +#define GET_2B(offset) ((unsigned int) UCH(targaheader[offset]) + \ + (((unsigned int) UCH(targaheader[offset+1])) << 8)) + + if (! ReadOK(source->pub.input_file, targaheader, 18)) + ERREXIT(cinfo, JERR_INPUT_EOF); + + /* Pretend "15-bit" pixels are 16-bit --- we ignore attribute bit anyway */ + if (targaheader[16] == 15) + targaheader[16] = 16; + + idlen = UCH(targaheader[0]); + cmaptype = UCH(targaheader[1]); + subtype = UCH(targaheader[2]); + maplen = GET_2B(5); + width = GET_2B(12); + height = GET_2B(14); + source->pixel_size = UCH(targaheader[16]) >> 3; + flags = UCH(targaheader[17]); /* Image Descriptor byte */ + + is_bottom_up = ((flags & 0x20) == 0); /* bit 5 set => top-down */ + interlace_type = flags >> 6; /* bits 6/7 are interlace code */ + + if (cmaptype > 1 || /* cmaptype must be 0 or 1 */ + source->pixel_size < 1 || source->pixel_size > 4 || + (UCH(targaheader[16]) & 7) != 0 || /* bits/pixel must be multiple of 8 */ + interlace_type != 0 || /* currently don't allow interlaced image */ + width == 0 || height == 0) /* image width/height must be non-zero */ + ERREXIT(cinfo, JERR_TGA_BADPARMS); + + if (subtype > 8) { + /* It's an RLE-coded file */ + source->read_pixel = read_rle_pixel; + source->block_count = source->dup_pixel_count = 0; + subtype -= 8; + } else { + /* Non-RLE file */ + source->read_pixel = read_non_rle_pixel; + } + + /* Now should have subtype 1, 2, or 3 */ + components = 3; /* until proven different */ + cinfo->in_color_space = JCS_RGB; + + switch (subtype) { + case 1: /* Colormapped image */ + if (source->pixel_size == 1 && cmaptype == 1) + source->get_pixel_rows = get_8bit_row; + else + ERREXIT(cinfo, JERR_TGA_BADPARMS); + TRACEMS2(cinfo, 1, JTRC_TGA_MAPPED, width, height); + break; + case 2: /* RGB image */ + switch (source->pixel_size) { + case 2: + source->get_pixel_rows = get_16bit_row; + break; + case 3: + source->get_pixel_rows = get_24bit_row; + break; + case 4: + source->get_pixel_rows = get_32bit_row; + break; + default: + ERREXIT(cinfo, JERR_TGA_BADPARMS); + break; + } + TRACEMS2(cinfo, 1, JTRC_TGA, width, height); + break; + case 3: /* Grayscale image */ + components = 1; + cinfo->in_color_space = JCS_GRAYSCALE; + if (source->pixel_size == 1) + source->get_pixel_rows = get_8bit_gray_row; + else + ERREXIT(cinfo, JERR_TGA_BADPARMS); + TRACEMS2(cinfo, 1, JTRC_TGA_GRAY, width, height); + break; + default: + ERREXIT(cinfo, JERR_TGA_BADPARMS); + break; + } + + if (is_bottom_up) { + /* Create a virtual array to buffer the upside-down image. */ + source->whole_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) width * components, (JDIMENSION) height, (JDIMENSION) 1); + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } + /* source->pub.buffer will point to the virtual array. */ + source->pub.buffer_height = 1; /* in case anyone looks at it */ + source->pub.get_pixel_rows = preload_image; + } else { + /* Don't need a virtual array, but do need a one-row input buffer. */ + source->whole_image = NULL; + source->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) width * components, (JDIMENSION) 1); + source->pub.buffer_height = 1; + source->pub.get_pixel_rows = source->get_pixel_rows; + } + + while (idlen--) /* Throw away ID field */ + (void) read_byte(source); + + if (maplen > 0) { + if (maplen > 256 || GET_2B(3) != 0) + ERREXIT(cinfo, JERR_TGA_BADCMAP); + /* Allocate space to store the colormap */ + source->colormap = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, (JDIMENSION) maplen, (JDIMENSION) 3); + /* and read it from the file */ + read_colormap(source, (int) maplen, UCH(targaheader[7])); + } else { + if (cmaptype) /* but you promised a cmap! */ + ERREXIT(cinfo, JERR_TGA_BADPARMS); + source->colormap = NULL; + } + + cinfo->input_components = components; + cinfo->data_precision = 8; + cinfo->image_width = width; + cinfo->image_height = height; +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + +/* + * The module selection routine for Targa format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_targa (j_compress_ptr cinfo) +{ + tga_source_ptr source; + + /* Create module interface object */ + source = (tga_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(tga_source_struct)); + source->cinfo = cinfo; /* make back link for subroutines */ + /* Fill in method ptrs, except get_pixel_rows which start_input sets */ + source->pub.start_input = start_input_tga; + source->pub.finish_input = finish_input_tga; + + return (cjpeg_source_ptr) source; +} + +#endif /* TARGA_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/README glmark2-2021.02/android/jni/src/libjpeg-turbo/README --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/README 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/README 1970-01-01 08:00:00.000000000 +0800 @@ -1,290 +0,0 @@ -libjpeg-turbo note: This file contains portions of the libjpeg v6b and v8 -README files, with additional wordsmithing by The libjpeg-turbo Project. -It is included only for reference, as some parts of it may not apply to -libjpeg-turbo. Please see README-turbo.txt for information specific to -libjpeg-turbo. - - -The Independent JPEG Group's JPEG software -========================================== - -This distribution contains a release of the Independent JPEG Group's free JPEG -software. You are welcome to redistribute this software and to use it for any -purpose, subject to the conditions under LEGAL ISSUES, below. - -This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone, -Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson, -Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers, -and other members of the Independent JPEG Group. - -IJG is not affiliated with the official ISO JPEG standards committee. - - -DOCUMENTATION ROADMAP -===================== - -This file contains the following sections: - -OVERVIEW General description of JPEG and the IJG software. -LEGAL ISSUES Copyright, lack of warranty, terms of distribution. -REFERENCES Where to learn more about JPEG. -ARCHIVE LOCATIONS Where to find newer versions of this software. -FILE FORMAT WARS Software *not* to get. -TO DO Plans for future IJG releases. - -Other documentation files in the distribution are: - -User documentation: - install.txt How to configure and install the IJG software. - usage.txt Usage instructions for cjpeg, djpeg, jpegtran, - rdjpgcom, and wrjpgcom. - *.1 Unix-style man pages for programs (same info as usage.txt). - wizard.txt Advanced usage instructions for JPEG wizards only. - change.log Version-to-version change highlights. -Programmer and internal documentation: - libjpeg.txt How to use the JPEG library in your own programs. - example.c Sample code for calling the JPEG library. - structure.txt Overview of the JPEG library's internal structure. - filelist.txt Road map of IJG files. - coderules.txt Coding style rules --- please read if you contribute code. - -Please read at least the files install.txt and usage.txt. Some information -can also be found in the JPEG FAQ (Frequently Asked Questions) article. See -ARCHIVE LOCATIONS below to find out where to obtain the FAQ article. - -If you want to understand how the JPEG code works, we suggest reading one or -more of the REFERENCES, then looking at the documentation files (in roughly -the order listed) before diving into the code. - - -OVERVIEW -======== - -This package contains C software to implement JPEG image encoding, decoding, -and transcoding. JPEG (pronounced "jay-peg") is a standardized compression -method for full-color and gray-scale images. JPEG's strong suit is compressing -photographic images or other types of images that have smooth color and -brightness transitions between neighboring pixels. Images with sharp lines or -other abrupt features may not compress well with JPEG, and a higher JPEG -quality may have to be used to avoid visible compression artifacts with such -images. - -JPEG is lossy, meaning that the output pixels are not necessarily identical to -the input pixels. However, on photographic content and other "smooth" images, -very good compression ratios can be obtained with no visible compression -artifacts, and extremely high compression ratios are possible if you are -willing to sacrifice image quality (by reducing the "quality" setting in the -compressor.) - -This software implements JPEG baseline, extended-sequential, and progressive -compression processes. Provision is made for supporting all variants of these -processes, although some uncommon parameter settings aren't implemented yet. -We have made no provision for supporting the hierarchical or lossless -processes defined in the standard. - -We provide a set of library routines for reading and writing JPEG image files, -plus two sample applications "cjpeg" and "djpeg", which use the library to -perform conversion between JPEG and some other popular image file formats. -The library is intended to be reused in other applications. - -In order to support file conversion and viewing software, we have included -considerable functionality beyond the bare JPEG coding/decoding capability; -for example, the color quantization modules are not strictly part of JPEG -decoding, but they are essential for output to colormapped file formats or -colormapped displays. These extra functions can be compiled out of the -library if not required for a particular application. - -We have also included "jpegtran", a utility for lossless transcoding between -different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple -applications for inserting and extracting textual comments in JFIF files. - -The emphasis in designing this software has been on achieving portability and -flexibility, while also making it fast enough to be useful. In particular, -the software is not intended to be read as a tutorial on JPEG. (See the -REFERENCES section for introductory material.) Rather, it is intended to -be reliable, portable, industrial-strength code. We do not claim to have -achieved that goal in every aspect of the software, but we strive for it. - -We welcome the use of this software as a component of commercial products. -No royalty is required, but we do ask for an acknowledgement in product -documentation, as described under LEGAL ISSUES. - - -LEGAL ISSUES -============ - -In plain English: - -1. We don't promise that this software works. (But if you find any bugs, - please let us know!) -2. You can use this software for whatever you want. You don't have to pay us. -3. You may not pretend that you wrote this software. If you use it in a - program, you must acknowledge somewhere in your documentation that - you've used the IJG code. - -In legalese: - -The authors make NO WARRANTY or representation, either express or implied, -with respect to this software, its quality, accuracy, merchantability, or -fitness for a particular purpose. This software is provided "AS IS", and you, -its user, assume the entire risk as to its quality and accuracy. - -This software is copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding. -All Rights Reserved except as specified below. - -Permission is hereby granted to use, copy, modify, and distribute this -software (or portions thereof) for any purpose, without fee, subject to these -conditions: -(1) If any part of the source code for this software is distributed, then this -README file must be included, with this copyright and no-warranty notice -unaltered; and any additions, deletions, or changes to the original files -must be clearly indicated in accompanying documentation. -(2) If only executable code is distributed, then the accompanying -documentation must state that "this software is based in part on the work of -the Independent JPEG Group". -(3) Permission for use of this software is granted only if the user accepts -full responsibility for any undesirable consequences; the authors accept -NO LIABILITY for damages of any kind. - -These conditions apply to any software derived from or based on the IJG code, -not just to the unmodified library. If you use our work, you ought to -acknowledge us. - -Permission is NOT granted for the use of any IJG author's name or company name -in advertising or publicity relating to this software or products derived from -it. This software may be referred to only as "the Independent JPEG Group's -software". - -We specifically permit and encourage the use of this software as the basis of -commercial products, provided that all warranty or liability claims are -assumed by the product vendor. - - -ansi2knr.c is included in this distribution by permission of L. Peter Deutsch, -sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA. -ansi2knr.c is NOT covered by the above copyright and conditions, but instead -by the usual distribution terms of the Free Software Foundation; principally, -that you must include source code if you redistribute it. (See the file -ansi2knr.c for full details.) However, since ansi2knr.c is not needed as part -of any program generated from the IJG code, this does not limit you more than -the foregoing paragraphs do. - -The Unix configuration script "configure" was produced with GNU Autoconf. -It is copyright by the Free Software Foundation but is freely distributable. -The same holds for its supporting scripts (config.guess, config.sub, -ltmain.sh). Another support script, install-sh, is copyright by X Consortium -but is also freely distributable. - -The IJG distribution formerly included code to read and write GIF files. -To avoid entanglement with the Unisys LZW patent, GIF reading support has -been removed altogether, and the GIF writer has been simplified to produce -"uncompressed GIFs". This technique does not use the LZW algorithm; the -resulting GIF files are larger than usual, but are readable by all standard -GIF decoders. - -We are required to state that - "The Graphics Interchange Format(c) is the Copyright property of - CompuServe Incorporated. GIF(sm) is a Service Mark property of - CompuServe Incorporated." - - -REFERENCES -========== - -We recommend reading one or more of these references before trying to -understand the innards of the JPEG software. - -The best short technical introduction to the JPEG compression algorithm is - Wallace, Gregory K. "The JPEG Still Picture Compression Standard", - Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44. -(Adjacent articles in that issue discuss MPEG motion picture compression, -applications of JPEG, and related topics.) If you don't have the CACM issue -handy, a PostScript file containing a revised version of Wallace's article is -available at http://www.ijg.org/files/wallace.ps.gz. The file (actually -a preprint for an article that appeared in IEEE Trans. Consumer Electronics) -omits the sample images that appeared in CACM, but it includes corrections -and some added material. Note: the Wallace article is copyright ACM and IEEE, -and it may not be used for commercial purposes. - -A somewhat less technical, more leisurely introduction to JPEG can be found in -"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by -M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1. This book provides -good explanations and example C code for a multitude of compression methods -including JPEG. It is an excellent source if you are comfortable reading C -code but don't know much about data compression in general. The book's JPEG -sample code is far from industrial-strength, but when you are ready to look -at a full implementation, you've got one here... - -The best currently available description of JPEG is the textbook "JPEG Still -Image Data Compression Standard" by William B. Pennebaker and Joan L. -Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. -Price US$59.95, 638 pp. The book includes the complete text of the ISO JPEG -standards (DIS 10918-1 and draft DIS 10918-2). - -The original JPEG standard is divided into two parts, Part 1 being the actual -specification, while Part 2 covers compliance testing methods. Part 1 is -titled "Digital Compression and Coding of Continuous-tone Still Images, -Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS -10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of -Continuous-tone Still Images, Part 2: Compliance testing" and has document -numbers ISO/IEC IS 10918-2, ITU-T T.83. - -The JPEG standard does not specify all details of an interchangeable file -format. For the omitted details we follow the "JFIF" conventions, revision -1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report -and thus received a formal publication status. It is available as a free -download in PDF format from -http://www.ecma-international.org/publications/techreports/E-TR-098.htm. -A PostScript version of the JFIF document is available at -http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at -http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures. - -The TIFF 6.0 file format specification can be obtained by FTP from -ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme -found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems. -IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6). -Instead, we recommend the JPEG design proposed by TIFF Technical Note #2 -(Compression tag 7). Copies of this Note can be obtained from -http://www.ijg.org/files/. It is expected that the next revision -of the TIFF spec will replace the 6.0 JPEG design with the Note's design. -Although IJG's own code does not support TIFF/JPEG, the free libtiff library -uses our library to implement TIFF/JPEG per the Note. - - -ARCHIVE LOCATIONS -================= - -The "official" archive site for this software is www.ijg.org. -The most recent released version can always be found there in -directory "files". This particular version will be archived as -http://www.ijg.org/files/jpegsrc.v8d.tar.gz, and in Windows-compatible -"zip" archive format as http://www.ijg.org/files/jpegsr8d.zip. - -The JPEG FAQ (Frequently Asked Questions) article is a source of some -general information about JPEG. -It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/ -and other news.answers archive sites, including the official news.answers -archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/. -If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu -with body - send usenet/news.answers/jpeg-faq/part1 - send usenet/news.answers/jpeg-faq/part2 - - -FILE FORMAT WARS -================ - -The ISO JPEG standards committee actually promotes different formats like -"JPEG 2000" or "JPEG XR", which are incompatible with original DCT-based -JPEG. IJG therefore does not support these formats (see REFERENCES). Indeed, -one of the original reasons for developing this free software was to help -force convergence on common, interoperable format standards for JPEG files. -Don't use an incompatible file format! -(In any case, our decoder will remain capable of reading existing JPEG -image files indefinitely.) - - -TO DO -===== - -Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/README.ijg glmark2-2021.02/android/jni/src/libjpeg-turbo/README.ijg --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/README.ijg 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/README.ijg 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,279 @@ +libjpeg-turbo note: This file has been modified by The libjpeg-turbo Project +to include only information relevant to libjpeg-turbo, to wordsmith certain +sections, and to remove impolitic language that existed in the libjpeg v8 +README. It is included only for reference. Please see README.md for +information specific to libjpeg-turbo. + + +The Independent JPEG Group's JPEG software +========================================== + +This distribution contains a release of the Independent JPEG Group's free JPEG +software. You are welcome to redistribute this software and to use it for any +purpose, subject to the conditions under LEGAL ISSUES, below. + +This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone, +Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson, +Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers, +and other members of the Independent JPEG Group. + +IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee +(also known as JPEG, together with ITU-T SG16). + + +DOCUMENTATION ROADMAP +===================== + +This file contains the following sections: + +OVERVIEW General description of JPEG and the IJG software. +LEGAL ISSUES Copyright, lack of warranty, terms of distribution. +REFERENCES Where to learn more about JPEG. +ARCHIVE LOCATIONS Where to find newer versions of this software. +FILE FORMAT WARS Software *not* to get. +TO DO Plans for future IJG releases. + +Other documentation files in the distribution are: + +User documentation: + usage.txt Usage instructions for cjpeg, djpeg, jpegtran, + rdjpgcom, and wrjpgcom. + *.1 Unix-style man pages for programs (same info as usage.txt). + wizard.txt Advanced usage instructions for JPEG wizards only. + change.log Version-to-version change highlights. +Programmer and internal documentation: + libjpeg.txt How to use the JPEG library in your own programs. + example.c Sample code for calling the JPEG library. + structure.txt Overview of the JPEG library's internal structure. + coderules.txt Coding style rules --- please read if you contribute code. + +Please read at least usage.txt. Some information can also be found in the JPEG +FAQ (Frequently Asked Questions) article. See ARCHIVE LOCATIONS below to find +out where to obtain the FAQ article. + +If you want to understand how the JPEG code works, we suggest reading one or +more of the REFERENCES, then looking at the documentation files (in roughly +the order listed) before diving into the code. + + +OVERVIEW +======== + +This package contains C software to implement JPEG image encoding, decoding, +and transcoding. JPEG (pronounced "jay-peg") is a standardized compression +method for full-color and grayscale images. JPEG's strong suit is compressing +photographic images or other types of images that have smooth color and +brightness transitions between neighboring pixels. Images with sharp lines or +other abrupt features may not compress well with JPEG, and a higher JPEG +quality may have to be used to avoid visible compression artifacts with such +images. + +JPEG is lossy, meaning that the output pixels are not necessarily identical to +the input pixels. However, on photographic content and other "smooth" images, +very good compression ratios can be obtained with no visible compression +artifacts, and extremely high compression ratios are possible if you are +willing to sacrifice image quality (by reducing the "quality" setting in the +compressor.) + +This software implements JPEG baseline, extended-sequential, and progressive +compression processes. Provision is made for supporting all variants of these +processes, although some uncommon parameter settings aren't implemented yet. +We have made no provision for supporting the hierarchical or lossless +processes defined in the standard. + +We provide a set of library routines for reading and writing JPEG image files, +plus two sample applications "cjpeg" and "djpeg", which use the library to +perform conversion between JPEG and some other popular image file formats. +The library is intended to be reused in other applications. + +In order to support file conversion and viewing software, we have included +considerable functionality beyond the bare JPEG coding/decoding capability; +for example, the color quantization modules are not strictly part of JPEG +decoding, but they are essential for output to colormapped file formats or +colormapped displays. These extra functions can be compiled out of the +library if not required for a particular application. + +We have also included "jpegtran", a utility for lossless transcoding between +different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple +applications for inserting and extracting textual comments in JFIF files. + +The emphasis in designing this software has been on achieving portability and +flexibility, while also making it fast enough to be useful. In particular, +the software is not intended to be read as a tutorial on JPEG. (See the +REFERENCES section for introductory material.) Rather, it is intended to +be reliable, portable, industrial-strength code. We do not claim to have +achieved that goal in every aspect of the software, but we strive for it. + +We welcome the use of this software as a component of commercial products. +No royalty is required, but we do ask for an acknowledgement in product +documentation, as described under LEGAL ISSUES. + + +LEGAL ISSUES +============ + +In plain English: + +1. We don't promise that this software works. (But if you find any bugs, + please let us know!) +2. You can use this software for whatever you want. You don't have to pay us. +3. You may not pretend that you wrote this software. If you use it in a + program, you must acknowledge somewhere in your documentation that + you've used the IJG code. + +In legalese: + +The authors make NO WARRANTY or representation, either express or implied, +with respect to this software, its quality, accuracy, merchantability, or +fitness for a particular purpose. This software is provided "AS IS", and you, +its user, assume the entire risk as to its quality and accuracy. + +This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding. +All Rights Reserved except as specified below. + +Permission is hereby granted to use, copy, modify, and distribute this +software (or portions thereof) for any purpose, without fee, subject to these +conditions: +(1) If any part of the source code for this software is distributed, then this +README file must be included, with this copyright and no-warranty notice +unaltered; and any additions, deletions, or changes to the original files +must be clearly indicated in accompanying documentation. +(2) If only executable code is distributed, then the accompanying +documentation must state that "this software is based in part on the work of +the Independent JPEG Group". +(3) Permission for use of this software is granted only if the user accepts +full responsibility for any undesirable consequences; the authors accept +NO LIABILITY for damages of any kind. + +These conditions apply to any software derived from or based on the IJG code, +not just to the unmodified library. If you use our work, you ought to +acknowledge us. + +Permission is NOT granted for the use of any IJG author's name or company name +in advertising or publicity relating to this software or products derived from +it. This software may be referred to only as "the Independent JPEG Group's +software". + +We specifically permit and encourage the use of this software as the basis of +commercial products, provided that all warranty or liability claims are +assumed by the product vendor. + + +The Unix configuration script "configure" was produced with GNU Autoconf. +It is copyright by the Free Software Foundation but is freely distributable. +The same holds for its supporting scripts (config.guess, config.sub, +ltmain.sh). Another support script, install-sh, is copyright by X Consortium +but is also freely distributable. + +The IJG distribution formerly included code to read and write GIF files. +To avoid entanglement with the Unisys LZW patent (now expired), GIF reading +support has been removed altogether, and the GIF writer has been simplified +to produce "uncompressed GIFs". This technique does not use the LZW +algorithm; the resulting GIF files are larger than usual, but are readable +by all standard GIF decoders. + +We are required to state that + "The Graphics Interchange Format(c) is the Copyright property of + CompuServe Incorporated. GIF(sm) is a Service Mark property of + CompuServe Incorporated." + + +REFERENCES +========== + +We recommend reading one or more of these references before trying to +understand the innards of the JPEG software. + +The best short technical introduction to the JPEG compression algorithm is + Wallace, Gregory K. "The JPEG Still Picture Compression Standard", + Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44. +(Adjacent articles in that issue discuss MPEG motion picture compression, +applications of JPEG, and related topics.) If you don't have the CACM issue +handy, a PDF file containing a revised version of Wallace's article is +available at http://www.ijg.org/files/Wallace.JPEG.pdf. The file (actually +a preprint for an article that appeared in IEEE Trans. Consumer Electronics) +omits the sample images that appeared in CACM, but it includes corrections +and some added material. Note: the Wallace article is copyright ACM and IEEE, +and it may not be used for commercial purposes. + +A somewhat less technical, more leisurely introduction to JPEG can be found in +"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by +M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1. This book provides +good explanations and example C code for a multitude of compression methods +including JPEG. It is an excellent source if you are comfortable reading C +code but don't know much about data compression in general. The book's JPEG +sample code is far from industrial-strength, but when you are ready to look +at a full implementation, you've got one here... + +The best currently available description of JPEG is the textbook "JPEG Still +Image Data Compression Standard" by William B. Pennebaker and Joan L. +Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. +Price US$59.95, 638 pp. The book includes the complete text of the ISO JPEG +standards (DIS 10918-1 and draft DIS 10918-2). + +The original JPEG standard is divided into two parts, Part 1 being the actual +specification, while Part 2 covers compliance testing methods. Part 1 is +titled "Digital Compression and Coding of Continuous-tone Still Images, +Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS +10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of +Continuous-tone Still Images, Part 2: Compliance testing" and has document +numbers ISO/IEC IS 10918-2, ITU-T T.83. + +The JPEG standard does not specify all details of an interchangeable file +format. For the omitted details we follow the "JFIF" conventions, revision +1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report +and thus received a formal publication status. It is available as a free +download in PDF format from +http://www.ecma-international.org/publications/techreports/E-TR-098.htm. +A PostScript version of the JFIF document is available at +http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at +http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures. + +The TIFF 6.0 file format specification can be obtained by FTP from +ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme +found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems. +IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6). +Instead, we recommend the JPEG design proposed by TIFF Technical Note #2 +(Compression tag 7). Copies of this Note can be obtained from +http://www.ijg.org/files/. It is expected that the next revision +of the TIFF spec will replace the 6.0 JPEG design with the Note's design. +Although IJG's own code does not support TIFF/JPEG, the free libtiff library +uses our library to implement TIFF/JPEG per the Note. + + +ARCHIVE LOCATIONS +================= + +The "official" archive site for this software is www.ijg.org. +The most recent released version can always be found there in +directory "files". + +The JPEG FAQ (Frequently Asked Questions) article is a source of some +general information about JPEG. +It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/ +and other news.answers archive sites, including the official news.answers +archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/. +If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu +with body + send usenet/news.answers/jpeg-faq/part1 + send usenet/news.answers/jpeg-faq/part2 + + +FILE FORMAT WARS +================ + +The ISO/IEC JTC1/SC29/WG1 standards committee (also known as JPEG, together +with ITU-T SG16) currently promotes different formats containing the name +"JPEG" which are incompatible with original DCT-based JPEG. IJG therefore does +not support these formats (see REFERENCES). Indeed, one of the original +reasons for developing this free software was to help force convergence on +common, interoperable format standards for JPEG files. +Don't use an incompatible file format! +(In any case, our decoder will remain capable of reading existing JPEG +image files indefinitely.) + + +TO DO +===== + +Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/README.md glmark2-2021.02/android/jni/src/libjpeg-turbo/README.md --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/README.md 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/README.md 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,335 @@ +Background +========== + +libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, +NEON, AltiVec) to accelerate baseline JPEG compression and decompression on +x86, x86-64, ARM, and PowerPC systems. On such systems, libjpeg-turbo is +generally 2-6x as fast as libjpeg, all else being equal. On other types of +systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by +virtue of its highly-optimized Huffman coding routines. In many cases, the +performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs. + +libjpeg-turbo implements both the traditional libjpeg API as well as the less +powerful but more straightforward TurboJPEG API. libjpeg-turbo also features +colorspace extensions that allow it to compress from/decompress to 32-bit and +big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java +interface. + +libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated +derivative of libjpeg v6b developed by Miyasaka Masaru. The TigerVNC and +VirtualGL projects made numerous enhancements to the codec in 2009, and in +early 2010, libjpeg-turbo spun off into an independent project, with the goal +of making high-speed JPEG compression/decompression technology available to a +broader range of users and developers. + + +License +======= + +libjpeg-turbo is covered by three compatible BSD-style open source licenses. +Refer to [LICENSE.md](LICENSE.md) for a roll-up of license terms. + + +Using libjpeg-turbo +=================== + +libjpeg-turbo includes two APIs that can be used to compress and decompress +JPEG images: + +- **TurboJPEG API** + This API provides an easy-to-use interface for compressing and decompressing + JPEG images in memory. It also provides some functionality that would not be + straightforward to achieve using the underlying libjpeg API, such as + generating planar YUV images and performing multiple simultaneous lossless + transforms on an image. The Java interface for libjpeg-turbo is written on + top of the TurboJPEG API. + +- **libjpeg API** + This is the de facto industry-standard API for compressing and decompressing + JPEG images. It is more difficult to use than the TurboJPEG API but also + more powerful. The libjpeg API implementation in libjpeg-turbo is both + API/ABI-compatible and mathematically compatible with libjpeg v6b. It can + also optionally be configured to be API/ABI-compatible with libjpeg v7 and v8 + (see below.) + +There is no significant performance advantage to either API when both are used +to perform similar operations. + +Colorspace Extensions +--------------------- + +libjpeg-turbo includes extensions that allow JPEG images to be compressed +directly from (and decompressed directly to) buffers that use BGR, BGRX, +RGBX, XBGR, and XRGB pixel ordering. This is implemented with ten new +colorspace constants: + + JCS_EXT_RGB /* red/green/blue */ + JCS_EXT_RGBX /* red/green/blue/x */ + JCS_EXT_BGR /* blue/green/red */ + JCS_EXT_BGRX /* blue/green/red/x */ + JCS_EXT_XBGR /* x/blue/green/red */ + JCS_EXT_XRGB /* x/red/green/blue */ + JCS_EXT_RGBA /* red/green/blue/alpha */ + JCS_EXT_BGRA /* blue/green/red/alpha */ + JCS_EXT_ABGR /* alpha/blue/green/red */ + JCS_EXT_ARGB /* alpha/red/green/blue */ + +Setting `cinfo.in_color_space` (compression) or `cinfo.out_color_space` +(decompression) to one of these values will cause libjpeg-turbo to read the +red, green, and blue values from (or write them to) the appropriate position in +the pixel when compressing from/decompressing to an RGB buffer. + +Your application can check for the existence of these extensions at compile +time with: + + #ifdef JCS_EXTENSIONS + +At run time, attempting to use these extensions with a libjpeg implementation +that does not support them will result in a "Bogus input colorspace" error. +Applications can trap this error in order to test whether run-time support is +available for the colorspace extensions. + +When using the RGBX, BGRX, XBGR, and XRGB colorspaces during decompression, the +X byte is undefined, and in order to ensure the best performance, libjpeg-turbo +can set that byte to whatever value it wishes. If an application expects the X +byte to be used as an alpha channel, then it should specify `JCS_EXT_RGBA`, +`JCS_EXT_BGRA`, `JCS_EXT_ABGR`, or `JCS_EXT_ARGB`. When these colorspace +constants are used, the X byte is guaranteed to be 0xFF, which is interpreted +as opaque. + +Your application can check for the existence of the alpha channel colorspace +extensions at compile time with: + + #ifdef JCS_ALPHA_EXTENSIONS + +[jcstest.c](jcstest.c), located in the libjpeg-turbo source tree, demonstrates +how to check for the existence of the colorspace extensions at compile time and +run time. + +libjpeg v7 and v8 API/ABI Emulation +----------------------------------- + +With libjpeg v7 and v8, new features were added that necessitated extending the +compression and decompression structures. Unfortunately, due to the exposed +nature of those structures, extending them also necessitated breaking backward +ABI compatibility with previous libjpeg releases. Thus, programs that were +built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is +based on the libjpeg v6b code base. Although libjpeg v7 and v8 are not +as widely used as v6b, enough programs (including a few Linux distros) made +the switch that there was a demand to emulate the libjpeg v7 and v8 ABIs +in libjpeg-turbo. It should be noted, however, that this feature was added +primarily so that applications that had already been compiled to use libjpeg +v7+ could take advantage of accelerated baseline JPEG encoding/decoding +without recompiling. libjpeg-turbo does not claim to support all of the +libjpeg v7+ features, nor to produce identical output to libjpeg v7+ in all +cases (see below.) + +By passing an argument of `--with-jpeg7` or `--with-jpeg8` to `configure`, or +an argument of `-DWITH_JPEG7=1` or `-DWITH_JPEG8=1` to `cmake`, you can build a +version of libjpeg-turbo that emulates the libjpeg v7 or v8 ABI, so that +programs that are built against libjpeg v7 or v8 can be run with libjpeg-turbo. +The following section describes which libjpeg v7+ features are supported and +which aren't. + +### Support for libjpeg v7 and v8 Features + +#### Fully supported + +- **libjpeg: IDCT scaling extensions in decompressor** + libjpeg-turbo supports IDCT scaling with scaling factors of 1/8, 1/4, 3/8, + 1/2, 5/8, 3/4, 7/8, 9/8, 5/4, 11/8, 3/2, 13/8, 7/4, 15/8, and 2/1 (only 1/4 + and 1/2 are SIMD-accelerated.) + +- **libjpeg: Arithmetic coding** + +- **libjpeg: In-memory source and destination managers** + See notes below. + +- **cjpeg: Separate quality settings for luminance and chrominance** + Note that the libpjeg v7+ API was extended to accommodate this feature only + for convenience purposes. It has always been possible to implement this + feature with libjpeg v6b (see rdswitch.c for an example.) + +- **cjpeg: 32-bit BMP support** + +- **cjpeg: `-rgb` option** + +- **jpegtran: Lossless cropping** + +- **jpegtran: `-perfect` option** + +- **jpegtran: Forcing width/height when performing lossless crop** + +- **rdjpgcom: `-raw` option** + +- **rdjpgcom: Locale awareness** + + +#### Not supported + +NOTE: As of this writing, extensive research has been conducted into the +usefulness of DCT scaling as a means of data reduction and SmartScale as a +means of quality improvement. The reader is invited to peruse the research at +http://www.libjpeg-turbo.org/About/SmartScale and draw his/her own conclusions, +but it is the general belief of our project that these features have not +demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo. + +- **libjpeg: DCT scaling in compressor** + `cinfo.scale_num` and `cinfo.scale_denom` are silently ignored. + There is no technical reason why DCT scaling could not be supported when + emulating the libjpeg v7+ API/ABI, but without the SmartScale extension (see + below), only scaling factors of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and + 8/9 would be available, which is of limited usefulness. + +- **libjpeg: SmartScale** + `cinfo.block_size` is silently ignored. + SmartScale is an extension to the JPEG format that allows for DCT block + sizes other than 8x8. Providing support for this new format would be + feasible (particularly without full acceleration.) However, until/unless + the format becomes either an official industry standard or, at minimum, an + accepted solution in the community, we are hesitant to implement it, as + there is no sense of whether or how it might change in the future. It is + our belief that SmartScale has not demonstrated sufficient usefulness as a + lossless format nor as a means of quality enhancement, and thus our primary + interest in providing this feature would be as a means of supporting + additional DCT scaling factors. + +- **libjpeg: Fancy downsampling in compressor** + `cinfo.do_fancy_downsampling` is silently ignored. + This requires the DCT scaling feature, which is not supported. + +- **jpegtran: Scaling** + This requires both the DCT scaling and SmartScale features, which are not + supported. + +- **Lossless RGB JPEG files** + This requires the SmartScale feature, which is not supported. + +### What About libjpeg v9? + +libjpeg v9 introduced yet another field to the JPEG compression structure +(`color_transform`), thus making the ABI backward incompatible with that of +libjpeg v8. This new field was introduced solely for the purpose of supporting +lossless SmartScale encoding. Furthermore, there was actually no reason to +extend the API in this manner, as the color transform could have just as easily +been activated by way of a new JPEG colorspace constant, thus preserving +backward ABI compatibility. + +Our research (see link above) has shown that lossless SmartScale does not +generally accomplish anything that can't already be accomplished better with +existing, standard lossless formats. Therefore, at this time it is our belief +that there is not sufficient technical justification for software projects to +upgrade from libjpeg v8 to libjpeg v9, and thus there is not sufficient +echnical justification for us to emulate the libjpeg v9 ABI. + +In-Memory Source/Destination Managers +------------------------------------- + +By default, libjpeg-turbo 1.3 and later includes the `jpeg_mem_src()` and +`jpeg_mem_dest()` functions, even when not emulating the libjpeg v8 API/ABI. +Previously, it was necessary to build libjpeg-turbo from source with libjpeg v8 +API/ABI emulation in order to use the in-memory source/destination managers, +but several projects requested that those functions be included when emulating +the libjpeg v6b API/ABI as well. This allows the use of those functions by +programs that need them, without breaking ABI compatibility for programs that +don't, and it allows those functions to be provided in the "official" +libjpeg-turbo binaries. + +Those who are concerned about maintaining strict conformance with the libjpeg +v6b or v7 API can pass an argument of `--without-mem-srcdst` to `configure` or +an argument of `-DWITH_MEM_SRCDST=0` to `cmake` prior to building +libjpeg-turbo. This will restore the pre-1.3 behavior, in which +`jpeg_mem_src()` and `jpeg_mem_dest()` are only included when emulating the +libjpeg v8 API/ABI. + +On Un*x systems, including the in-memory source/destination managers changes +the dynamic library version from 62.0.0 to 62.1.0 if using libjpeg v6b API/ABI +emulation and from 7.0.0 to 7.1.0 if using libjpeg v7 API/ABI emulation. + +Note that, on most Un*x systems, the dynamic linker will not look for a +function in a library until that function is actually used. Thus, if a program +is built against libjpeg-turbo 1.3+ and uses `jpeg_mem_src()` or +`jpeg_mem_dest()`, that program will not fail if run against an older version +of libjpeg-turbo or against libjpeg v7- until the program actually tries to +call `jpeg_mem_src()` or `jpeg_mem_dest()`. Such is not the case on Windows. +If a program is built against the libjpeg-turbo 1.3+ DLL and uses +`jpeg_mem_src()` or `jpeg_mem_dest()`, then it must use the libjpeg-turbo 1.3+ +DLL at run time. + +Both cjpeg and djpeg have been extended to allow testing the in-memory +source/destination manager functions. See their respective man pages for more +details. + + +Mathematical Compatibility +========================== + +For the most part, libjpeg-turbo should produce identical output to libjpeg +v6b. The one exception to this is when using the floating point DCT/IDCT, in +which case the outputs of libjpeg v6b and libjpeg-turbo can differ for the +following reasons: + +- The SSE/SSE2 floating point DCT implementation in libjpeg-turbo is ever so + slightly more accurate than the implementation in libjpeg v6b, but not by + any amount perceptible to human vision (generally in the range of 0.01 to + 0.08 dB gain in PNSR.) + +- When not using the SIMD extensions, libjpeg-turbo uses the more accurate + (and slightly faster) floating point IDCT algorithm introduced in libjpeg + v8a as opposed to the algorithm used in libjpeg v6b. It should be noted, + however, that this algorithm basically brings the accuracy of the floating + point IDCT in line with the accuracy of the slow integer IDCT. The floating + point DCT/IDCT algorithms are mainly a legacy feature, and they do not + produce significantly more accuracy than the slow integer algorithms (to put + numbers on this, the typical difference in PNSR between the two algorithms + is less than 0.10 dB, whereas changing the quality level by 1 in the upper + range of the quality scale is typically more like a 1.0 dB difference.) + +- If the floating point algorithms in libjpeg-turbo are not implemented using + SIMD instructions on a particular platform, then the accuracy of the + floating point DCT/IDCT can depend on the compiler settings. + +While libjpeg-turbo does emulate the libjpeg v8 API/ABI, under the hood it is +still using the same algorithms as libjpeg v6b, so there are several specific +cases in which libjpeg-turbo cannot be expected to produce the same output as +libjpeg v8: + +- When decompressing using scaling factors of 1/2 and 1/4, because libjpeg v8 + implements those scaling algorithms differently than libjpeg v6b does, and + libjpeg-turbo's SIMD extensions are based on the libjpeg v6b behavior. + +- When using chrominance subsampling, because libjpeg v8 implements this + with its DCT/IDCT scaling algorithms rather than with a separate + downsampling/upsampling algorithm. In our testing, the subsampled/upsampled + output of libjpeg v8 is less accurate than that of libjpeg v6b for this + reason. + +- When decompressing using a scaling factor > 1 and merged (AKA "non-fancy" or + "non-smooth") chrominance upsampling, because libjpeg v8 does not support + merged upsampling with scaling factors > 1. + + +Performance Pitfalls +==================== + +Restart Markers +--------------- + +The optimized Huffman decoder in libjpeg-turbo does not handle restart markers +in a way that makes the rest of the libjpeg infrastructure happy, so it is +necessary to use the slow Huffman decoder when decompressing a JPEG image that +has restart markers. This can cause the decompression performance to drop by +as much as 20%, but the performance will still be much greater than that of +libjpeg. Many consumer packages, such as PhotoShop, use restart markers when +generating JPEG images, so images generated by those programs will experience +this issue. + +Fast Integer Forward DCT at High Quality Levels +----------------------------------------------- + +The algorithm used by the SIMD-accelerated quantization function cannot produce +correct results whenever the fast integer forward DCT is used along with a JPEG +quality of 98-100. Thus, libjpeg-turbo must use the non-SIMD quantization +function in those cases. This causes performance to drop by as much as 40%. +It is therefore strongly advised that you use the slow integer forward DCT +whenever encoding images with a JPEG quality of 98 or higher. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/README-turbo.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/README-turbo.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/README-turbo.txt 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/README-turbo.txt 1970-01-01 08:00:00.000000000 +0800 @@ -1,361 +0,0 @@ -******************************************************************************* -** Background -******************************************************************************* - -libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX, -SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, -x86-64, and ARM systems. On such systems, libjpeg-turbo is generally 2-4x as -fast as the unmodified version of libjpeg, all else being equal. - -libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but -the TigerVNC and VirtualGL projects made numerous enhancements to the codec in -2009, including improved support for Mac OS X, 64-bit support, support for -32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman -encoding/decoding, and various bug fixes. The goal was to produce a fully -open-source codec that could replace the partially closed-source TurboJPEG/IPP -codec used by VirtualGL and TurboVNC. libjpeg-turbo generally achieves 80-120% -of the performance of TurboJPEG/IPP. It is faster in some areas but slower in -others. - -In early 2010, libjpeg-turbo spun off into its own independent project, with -the goal of making high-speed JPEG compression/decompression technology -available to a broader range of users and developers. - - -******************************************************************************* -** License -******************************************************************************* - -Most of libjpeg-turbo inherits the non-restrictive, BSD-style license used by -libjpeg (see README.) The TurboJPEG/OSS wrapper (both C and Java versions) and -associated test programs bear a similar license, which is reproduced below: - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -- Neither the name of the libjpeg-turbo Project nor the names of its - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - - -******************************************************************************* -** Using libjpeg-turbo -******************************************************************************* - -libjpeg-turbo includes two APIs that can be used to compress and decompress -JPEG images: - - TurboJPEG API: This API provides an easy-to-use interface for compressing - and decompressing JPEG images in memory. It also provides some functionality - that would not be straightforward to achieve using the underlying libjpeg - API, such as generating planar YUV images and performing multiple - simultaneous lossless transforms on an image. The Java interface for - libjpeg-turbo is written on top of the TurboJPEG API. - - libjpeg API: This is the de facto industry-standard API for compressing and - decompressing JPEG images. It is more difficult to use than the TurboJPEG - API but also more powerful. libjpeg-turbo is both API/ABI-compatible and - mathematically compatible with libjpeg v6b. It can also optionally be - configured to be API/ABI-compatible with libjpeg v7 and v8 (see below.) - - -============================= -Replacing libjpeg at Run Time -============================= - -If a Unix application is dynamically linked with libjpeg, then you can replace -libjpeg with libjpeg-turbo at run time by manipulating LD_LIBRARY_PATH. -For instance: - - [Using libjpeg] - > time cjpeg vgl_5674_0098.jpg - real 0m0.392s - user 0m0.074s - sys 0m0.020s - - [Using libjpeg-turbo] - > export LD_LIBRARY_PATH=/opt/libjpeg-turbo/{lib}:$LD_LIBRARY_PATH - > time cjpeg vgl_5674_0098.jpg - real 0m0.109s - user 0m0.029s - sys 0m0.010s - -NOTE: {lib} can be lib, lib32, lib64, or lib/64, depending on the O/S and -architecture. - -System administrators can also replace the libjpeg sym links in /usr/{lib} with -links to the libjpeg-turbo dynamic library located in /opt/libjpeg-turbo/{lib}. -This will effectively accelerate every application that uses the libjpeg -dynamic library on the system. - -The libjpeg-turbo SDK for Visual C++ installs the libjpeg-turbo DLL -(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether it was built with -libjpeg v6b, v7, or v8 emulation) into c:\libjpeg-turbo[64]\bin, and the PATH -environment variable can be modified such that this directory is searched -before any others that might contain a libjpeg DLL. However, if a libjpeg -DLL exists in an application's install directory, then Windows will load this -DLL first whenever the application is launched. Thus, if an application ships -with jpeg62.dll, jpeg7.dll, or jpeg8.dll, then back up the application's -version of this DLL and copy c:\libjpeg-turbo[64]\bin\jpeg*.dll into the -application's install directory to accelerate it. - -The version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for -Visual C++ requires the Visual C++ 2008 C run-time DLL (msvcr90.dll). -msvcr90.dll ships with more recent versions of Windows, but users of older -Windows releases can obtain it from the Visual C++ 2008 Redistributable -Package, which is available as a free download from Microsoft's web site. - -NOTE: Features of libjpeg that require passing a C run-time structure, such -as a file handle, from an application to libjpeg will probably not work with -the version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for -Visual C++, unless the application is also built to use the Visual C++ 2008 C -run-time DLL. In particular, this affects jpeg_stdio_dest() and -jpeg_stdio_src(). - -Mac applications typically embed their own copies of the libjpeg dylib inside -the (hidden) application bundle, so it is not possible to globally replace -libjpeg on OS X systems. If an application uses a shared library version of -libjpeg, then it may be possible to replace the application's version of it. -This would generally involve copying libjpeg.*.dylib from libjpeg-turbo into -the appropriate place in the application bundle and using install_name_tool to -repoint the dylib to the new directory. This requires an advanced knowledge of -OS X and would not survive an upgrade or a re-install of the application. -Thus, it is not recommended for most users. - -======================= -Replacing TurboJPEG/IPP -======================= - -libjpeg-turbo is a drop-in replacement for the TurboJPEG/IPP SDK used by -VirtualGL 2.1.x and TurboVNC 0.6 (and prior.) libjpeg-turbo contains a wrapper -library (TurboJPEG/OSS) that emulates the TurboJPEG API using libjpeg-turbo -instead of the closed-source Intel Performance Primitives. You can replace the -TurboJPEG/IPP package on Linux systems with the libjpeg-turbo package in order -to make existing releases of VirtualGL 2.1.x and TurboVNC 0.x use the new codec -at run time. Note that the 64-bit libjpeg-turbo packages contain only 64-bit -binaries, whereas the TurboJPEG/IPP 64-bit packages contained both 64-bit and -32-bit binaries. Thus, to replace a TurboJPEG/IPP 64-bit package, install -both the 64-bit and 32-bit versions of libjpeg-turbo. - -You can also build the VirtualGL 2.1.x and TurboVNC 0.6 source code with -the libjpeg-turbo SDK instead of TurboJPEG/IPP. It should work identically. -libjpeg-turbo also includes static library versions of TurboJPEG/OSS, which -are used to build VirtualGL 2.2 and TurboVNC 1.0 and later. - -======================================== -Using libjpeg-turbo in Your Own Programs -======================================== - -For the most part, libjpeg-turbo should work identically to libjpeg, so in -most cases, an application can be built against libjpeg and then run against -libjpeg-turbo. On Unix systems (including Cygwin), you can build against -libjpeg-turbo instead of libjpeg by setting - - CPATH=/opt/libjpeg-turbo/include - and - LIBRARY_PATH=/opt/libjpeg-turbo/{lib} - -({lib} = lib32 or lib64, depending on whether you are building a 32-bit or a -64-bit application.) - -If using MinGW, then set - - CPATH=/c/libjpeg-turbo-gcc[64]/include - and - LIBRARY_PATH=/c/libjpeg-turbo-gcc[64]/lib - -Building against libjpeg-turbo is useful, for instance, if you want to build an -application that leverages the libjpeg-turbo colorspace extensions (see below.) -On Linux and Solaris systems, you would still need to manipulate -LD_LIBRARY_PATH or create appropriate sym links to use libjpeg-turbo at run -time. On such systems, you can pass -R /opt/libjpeg-turbo/{lib} to the linker -to force the use of libjpeg-turbo at run time rather than libjpeg (also useful -if you want to leverage the colorspace extensions), or you can link against the -libjpeg-turbo static library. - -To force a Linux, Solaris, or MinGW application to link against the static -version of libjpeg-turbo, you can use the following linker options: - - -Wl,-Bstatic -ljpeg -Wl,-Bdynamic - -On OS X, simply add /opt/libjpeg-turbo/lib/libjpeg.a to the linker command -line (this also works on Linux and Solaris.) - -To build Visual C++ applications using libjpeg-turbo, add -c:\libjpeg-turbo[64]\include to the system or user INCLUDE environment -variable and c:\libjpeg-turbo[64]\lib to the system or user LIB environment -variable, and then link against either jpeg.lib (to use the DLL version of -libjpeg-turbo) or jpeg-static.lib (to use the static version of libjpeg-turbo.) - -===================== -Colorspace Extensions -===================== - -libjpeg-turbo includes extensions that allow JPEG images to be compressed -directly from (and decompressed directly to) buffers that use BGR, BGRX, -RGBX, XBGR, and XRGB pixel ordering. This is implemented with ten new -colorspace constants: - - JCS_EXT_RGB /* red/green/blue */ - JCS_EXT_RGBX /* red/green/blue/x */ - JCS_EXT_BGR /* blue/green/red */ - JCS_EXT_BGRX /* blue/green/red/x */ - JCS_EXT_XBGR /* x/blue/green/red */ - JCS_EXT_XRGB /* x/red/green/blue */ - JCS_EXT_RGBA /* red/green/blue/alpha */ - JCS_EXT_BGRA /* blue/green/red/alpha */ - JCS_EXT_ABGR /* alpha/blue/green/red */ - JCS_EXT_ARGB /* alpha/red/green/blue */ - -Setting cinfo.in_color_space (compression) or cinfo.out_color_space -(decompression) to one of these values will cause libjpeg-turbo to read the -red, green, and blue values from (or write them to) the appropriate position in -the pixel when compressing from/decompressing to an RGB buffer. - -Your application can check for the existence of these extensions at compile -time with: - - #ifdef JCS_EXTENSIONS - -At run time, attempting to use these extensions with a version of libjpeg -that doesn't support them will result in a "Bogus input colorspace" error. - -When using the RGBX, BGRX, XBGR, and XRGB colorspaces during decompression, the -X byte is undefined, and in order to ensure the best performance, libjpeg-turbo -can set that byte to whatever value it wishes. If an application expects the X -byte to be used as an alpha channel, then it should specify JCS_EXT_RGBA, -JCS_EXT_BGRA, JCS_EXT_ABGR, or JCS_EXT_ARGB. When these colorspace constants -are used, the X byte is guaranteed to be 0xFF, which is interpreted as opaque. - -Your application can check for the existence of the alpha channel colorspace -extensions at compile time with: - - #ifdef JCS_ALPHA_EXTENSIONS - -jcstest.c, located in the libjpeg-turbo source tree, demonstrates how to check -for the existence of the colorspace extensions at compile time and run time. - -================================= -libjpeg v7 and v8 API/ABI support -================================= - -With libjpeg v7 and v8, new features were added that necessitated extending the -compression and decompression structures. Unfortunately, due to the exposed -nature of those structures, extending them also necessitated breaking backward -ABI compatibility with previous libjpeg releases. Thus, programs that are -built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is -based on the libjpeg v6b code base. Although libjpeg v7 and v8 are still not -as widely used as v6b, enough programs (including a few Linux distros) have -made the switch that it was desirable to provide support for the libjpeg v7/v8 -API/ABI in libjpeg-turbo. Although libjpeg-turbo can now be configured as a -drop-in replacement for libjpeg v7 or v8, it should be noted that not all of -the features in libjpeg v7 and v8 are supported (see below.) - -By passing an argument of --with-jpeg7 or --with-jpeg8 to configure, or an -argument of -DWITH_JPEG7=1 or -DWITH_JPEG8=1 to cmake, you can build a version -of libjpeg-turbo that emulates the libjpeg v7 or v8 API/ABI, so that programs -that are built against libjpeg v7 or v8 can be run with libjpeg-turbo. The -following section describes which libjpeg v7+ features are supported and which -aren't. - -libjpeg v7 and v8 Features: ---------------------------- - -Fully supported: - --- cjpeg: Separate quality settings for luminance and chrominance - Note that the libpjeg v7+ API was extended to accommodate this feature only - for convenience purposes. It has always been possible to implement this - feature with libjpeg v6b (see rdswitch.c for an example.) - --- cjpeg: 32-bit BMP support - --- jpegtran: lossless cropping - --- jpegtran: -perfect option - --- rdjpgcom: -raw option - --- rdjpgcom: locale awareness - - -Fully supported when using libjpeg v7/v8 emulation: - --- libjpeg: In-memory source and destination managers - - -Not supported: - --- libjpeg: DCT scaling in compressor - cinfo.scale_num and cinfo.scale_denom are silently ignored. - There is no technical reason why DCT scaling cannot be supported, but - without the SmartScale extension (see below), it would only be able to - down-scale using ratios of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and 8/9, - which is of limited usefulness. - --- libjpeg: SmartScale - cinfo.block_size is silently ignored. - SmartScale is an extension to the JPEG format that allows for DCT block - sizes other than 8x8. It would be difficult to support this feature while - retaining backward compatibility with libjpeg v6b. - --- libjpeg: IDCT scaling extensions in decompressor - libjpeg-turbo still supports IDCT scaling with scaling factors of 1/2, 1/4, - and 1/8 (same as libjpeg v6b.) - --- libjpeg: Fancy downsampling in compressor - cinfo.do_fancy_downsampling is silently ignored. - This requires the DCT scaling feature, which is not supported. - --- jpegtran: Scaling - This requires both the DCT scaling and SmartScale features, which are not - supported. - --- Lossless RGB JPEG files - This requires the SmartScale feature, which is not supported. - - -******************************************************************************* -** Performance pitfalls -******************************************************************************* - -=============== -Restart Markers -=============== - -The optimized Huffman decoder in libjpeg-turbo does not handle restart markers -in a way that makes the rest of the libjpeg infrastructure happy, so it is -necessary to use the slow Huffman decoder when decompressing a JPEG image that -has restart markers. This can cause the decompression performance to drop by -as much as 20%, but the performance will still be much greater than that of -libjpeg. Many consumer packages, such as PhotoShop, use restart markers when -generating JPEG images, so images generated by those programs will experience -this issue. - -=============================================== -Fast Integer Forward DCT at High Quality Levels -=============================================== - -The algorithm used by the SIMD-accelerated quantization function cannot produce -correct results whenever the fast integer forward DCT is used along with a JPEG -quality of 98-100. Thus, libjpeg-turbo must use the non-SIMD quantization -function in those cases. This causes performance to drop by as much as 40%. -It is therefore strongly advised that you use the slow integer forward DCT -whenever encoding images with a JPEG quality of 98 or higher. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/CMakeLists.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/CMakeLists.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/CMakeLists.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,80 @@ +if(NOT DEFINED NASM) + set(NASM nasm CACHE FILEPATH "Path to NASM/YASM executable") +endif() + +if(SIMD_X86_64) + set(NAFLAGS -fwin64 -DWIN64 -D__x86_64__) +else() + if(BORLAND) + set(NAFLAGS -fobj -DOBJ32) + else() + set(NAFLAGS -fwin32 -DWIN32) + endif() +endif() +set(NAFLAGS ${NAFLAGS} -I${CMAKE_SOURCE_DIR}/win/ -I${CMAKE_CURRENT_SOURCE_DIR}/) + +# This only works if building from the command line. There is currently no way +# to set a variable's value based on the build type when using the MSVC IDE. +if(CMAKE_BUILD_TYPE STREQUAL "Debug" + OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + set(NAFLAGS ${NAFLAGS} -g) +endif() + +if(SIMD_X86_64) + set(SIMD_BASENAMES jfdctflt-sse-64 jccolor-sse2-64 jcgray-sse2-64 + jchuff-sse2-64 jcsample-sse2-64 jdcolor-sse2-64 jdmerge-sse2-64 + jdsample-sse2-64 jfdctfst-sse2-64 jfdctint-sse2-64 jidctflt-sse2-64 + jidctfst-sse2-64 jidctint-sse2-64 jidctred-sse2-64 jquantf-sse2-64 + jquanti-sse2-64) + message(STATUS "Building x86_64 SIMD extensions") +else() + set(SIMD_BASENAMES jsimdcpu jfdctflt-3dn jidctflt-3dn jquant-3dn jccolor-mmx + jcgray-mmx jcsample-mmx jdcolor-mmx jdmerge-mmx jdsample-mmx jfdctfst-mmx + jfdctint-mmx jidctfst-mmx jidctint-mmx jidctred-mmx jquant-mmx jfdctflt-sse + jidctflt-sse jquant-sse jccolor-sse2 jcgray-sse2 jchuff-sse2 jcsample-sse2 + jdcolor-sse2 jdmerge-sse2 jdsample-sse2 jfdctfst-sse2 jfdctint-sse2 + jidctflt-sse2 jidctfst-sse2 jidctint-sse2 jidctred-sse2 jquantf-sse2 + jquanti-sse2) + message(STATUS "Building i386 SIMD extensions") +endif() + +if(MSVC_IDE) + set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}") +else() + set(OBJDIR ${CMAKE_CURRENT_BINARY_DIR}) +endif() + +file(GLOB INC_FILES *.inc) + +foreach(file ${SIMD_BASENAMES}) + set(DEPFILE "") + set(SIMD_SRC ${CMAKE_CURRENT_SOURCE_DIR}/${file}.asm) + if(${file} MATCHES jccolor) + set(DEPFILE ${file}) + string(REGEX REPLACE "jccolor" "jccolext" DEPFILE ${DEPFILE}) + set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + endif() + if(${file} MATCHES jcgray) + set(DEPFILE ${file}) + string(REGEX REPLACE "jcgray" "jcgryext" DEPFILE ${DEPFILE}) + set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + endif() + if(${file} MATCHES jdcolor) + set(DEPFILE ${file}) + string(REGEX REPLACE "jdcolor" "jdcolext" DEPFILE ${DEPFILE}) + set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + endif() + if(${file} MATCHES jdmerge) + set(DEPFILE ${file}) + string(REGEX REPLACE "jdmerge" "jdmrgext" DEPFILE ${DEPFILE}) + set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + endif() + set(SIMD_OBJ ${OBJDIR}/${file}.obj) + add_custom_command(OUTPUT ${SIMD_OBJ} + DEPENDS ${SIMD_SRC} ${DEPFILE} ${INC_FILES} + COMMAND ${NASM} ${NAFLAGS} ${SIMD_SRC} -o${SIMD_OBJ}) + set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ}) +endforeach() + +set(SIMD_OBJS ${SIMD_OBJS} PARENT_SCOPE) +add_custom_target(simd DEPENDS ${SIMD_OBJS}) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolext-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolext-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolext-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolext-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,267 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * Copyright (C) 2014, Jay Foad. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jccolor-altivec.c */ + + +void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + JSAMPROW inptr, outptr0, outptr1, outptr2; + int pitch = img_width * RGB_PIXELSIZE, num_cols; +#if __BIG_ENDIAN__ + int offset; +#endif + unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; + + __vector unsigned char rgb0, rgb1 = {0}, rgb2 = {0}, + rgbg0, rgbg1, rgbg2, rgbg3, y, cb, cr; +#if __BIG_ENDIAN__ || RGB_PIXELSIZE == 4 + __vector unsigned char rgb3 = {0}; +#endif +#if __BIG_ENDIAN__ && RGB_PIXELSIZE == 4 + __vector unsigned char rgb4 = {0}; +#endif + __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3; + __vector unsigned short yl, yh, crl, crh, cbl, cbh; + __vector int y0, y1, y2, y3, cr0, cr1, cr2, cr3, cb0, cb1, cb2, cb3; + + /* Constants */ + __vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) }, + pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) }, + pw_mf016_mf033 = { __4X2(-F_0_168, -F_0_331) }, + pw_mf008_mf041 = { __4X2(-F_0_081, -F_0_418) }; + __vector unsigned short pw_f050_f000 = { __4X2(F_0_500, 0) }; + __vector int pd_onehalf = { __4X(ONE_HALF) }, + pd_onehalfm1_cj = { __4X(ONE_HALF - 1 + (CENTERJSAMPLE << SCALEBITS)) }; + __vector unsigned char pb_zero = { __16X(0) }, +#if __BIG_ENDIAN__ + shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; +#else + shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; +#endif + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; + + for (num_cols = pitch; num_cols > 0; + num_cols -= RGB_PIXELSIZE * 16, inptr += RGB_PIXELSIZE * 16, + outptr0 += 16, outptr1 += 16, outptr2 += 16) { + +#if __BIG_ENDIAN__ + /* Load 16 pixels == 48 or 64 bytes */ + offset = (size_t)inptr & 15; + if (offset) { + __vector unsigned char unaligned_shift_index; + int bytes = num_cols + offset; + + if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) { + /* Slow path to prevent buffer overread. Since there is no way to + * read a partial AltiVec register, overread would occur on the last + * chunk of the last image row if the right edge is not on a 16-byte + * boundary. It could also occur on other rows if the bytes per row + * is low enough. Since we can't determine whether we're on the last + * image row, we have to assume every row is the last. + */ + memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16)); + rgb0 = vec_ld(0, tmpbuf); + rgb1 = vec_ld(16, tmpbuf); + rgb2 = vec_ld(32, tmpbuf); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_ld(48, tmpbuf); +#endif + } else { + /* Fast path */ + rgb0 = vec_ld(0, inptr); + if (bytes > 16) + rgb1 = vec_ld(16, inptr); + if (bytes > 32) + rgb2 = vec_ld(32, inptr); + if (bytes > 48) + rgb3 = vec_ld(48, inptr); +#if RGB_PIXELSIZE == 4 + if (bytes > 64) + rgb4 = vec_ld(64, inptr); +#endif + unaligned_shift_index = vec_lvsl(0, inptr); + rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index); + rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index); + rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index); +#endif + } + } else { +#endif /* __BIG_ENDIAN__ */ + if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) { + /* Slow path */ + memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16)); + rgb0 = VEC_LD(0, tmpbuf); + rgb1 = VEC_LD(16, tmpbuf); + rgb2 = VEC_LD(32, tmpbuf); +#if RGB_PIXELSIZE == 4 + rgb3 = VEC_LD(48, tmpbuf); +#endif + } else { + /* Fast path */ + rgb0 = VEC_LD(0, inptr); + if (num_cols > 16) + rgb1 = VEC_LD(16, inptr); + if (num_cols > 32) + rgb2 = VEC_LD(32, inptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + rgb3 = VEC_LD(48, inptr); +#endif + } +#if __BIG_ENDIAN__ + } +#endif + +#if RGB_PIXELSIZE == 3 + /* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 + * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga + * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf + * + * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3 + * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7 + * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb + * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf + */ + rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX0); + rgbg1 = vec_perm(rgb0, rgb1, (__vector unsigned char)RGBG_INDEX1); + rgbg2 = vec_perm(rgb1, rgb2, (__vector unsigned char)RGBG_INDEX2); + rgbg3 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX3); +#else + /* rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3 + * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7 + * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb + * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf + * + * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3 + * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7 + * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb + * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf + */ + rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX); + rgbg1 = vec_perm(rgb1, rgb1, (__vector unsigned char)RGBG_INDEX); + rgbg2 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX); + rgbg3 = vec_perm(rgb3, rgb3, (__vector unsigned char)RGBG_INDEX); +#endif + + /* rg0 = R0 G0 R1 G1 R2 G2 R3 G3 + * bg0 = B0 G0 B1 G1 B2 G2 B3 G3 + * ... + * + * NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't + * support unsigned vectors. + */ + rg0 = (__vector signed short)VEC_UNPACKHU(rgbg0); + bg0 = (__vector signed short)VEC_UNPACKLU(rgbg0); + rg1 = (__vector signed short)VEC_UNPACKHU(rgbg1); + bg1 = (__vector signed short)VEC_UNPACKLU(rgbg1); + rg2 = (__vector signed short)VEC_UNPACKHU(rgbg2); + bg2 = (__vector signed short)VEC_UNPACKLU(rgbg2); + rg3 = (__vector signed short)VEC_UNPACKHU(rgbg3); + bg3 = (__vector signed short)VEC_UNPACKLU(rgbg3); + + /* (Original) + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + * + * (This implementation) + * Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + */ + + /* Calculate Y values */ + + y0 = vec_msums(rg0, pw_f0299_f0337, pd_onehalf); + y1 = vec_msums(rg1, pw_f0299_f0337, pd_onehalf); + y2 = vec_msums(rg2, pw_f0299_f0337, pd_onehalf); + y3 = vec_msums(rg3, pw_f0299_f0337, pd_onehalf); + y0 = vec_msums(bg0, pw_f0114_f0250, y0); + y1 = vec_msums(bg1, pw_f0114_f0250, y1); + y2 = vec_msums(bg2, pw_f0114_f0250, y2); + y3 = vec_msums(bg3, pw_f0114_f0250, y3); + /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from + * each dword into a new 16-bit vector, which is the equivalent of + * descaling the 32-bit results (right-shifting by 16 bits) and then + * packing them. + */ + yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1, + shift_pack_index); + yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3, + shift_pack_index); + y = vec_pack(yl, yh); + vec_st(y, 0, outptr0); + + /* Calculate Cb values */ + cb0 = vec_msums(rg0, pw_mf016_mf033, pd_onehalfm1_cj); + cb1 = vec_msums(rg1, pw_mf016_mf033, pd_onehalfm1_cj); + cb2 = vec_msums(rg2, pw_mf016_mf033, pd_onehalfm1_cj); + cb3 = vec_msums(rg3, pw_mf016_mf033, pd_onehalfm1_cj); + cb0 = (__vector int)vec_msum((__vector unsigned short)bg0, pw_f050_f000, + (__vector unsigned int)cb0); + cb1 = (__vector int)vec_msum((__vector unsigned short)bg1, pw_f050_f000, + (__vector unsigned int)cb1); + cb2 = (__vector int)vec_msum((__vector unsigned short)bg2, pw_f050_f000, + (__vector unsigned int)cb2); + cb3 = (__vector int)vec_msum((__vector unsigned short)bg3, pw_f050_f000, + (__vector unsigned int)cb3); + cbl = vec_perm((__vector unsigned short)cb0, + (__vector unsigned short)cb1, shift_pack_index); + cbh = vec_perm((__vector unsigned short)cb2, + (__vector unsigned short)cb3, shift_pack_index); + cb = vec_pack(cbl, cbh); + vec_st(cb, 0, outptr1); + + /* Calculate Cr values */ + cr0 = vec_msums(bg0, pw_mf008_mf041, pd_onehalfm1_cj); + cr1 = vec_msums(bg1, pw_mf008_mf041, pd_onehalfm1_cj); + cr2 = vec_msums(bg2, pw_mf008_mf041, pd_onehalfm1_cj); + cr3 = vec_msums(bg3, pw_mf008_mf041, pd_onehalfm1_cj); + cr0 = (__vector int)vec_msum((__vector unsigned short)rg0, pw_f050_f000, + (__vector unsigned int)cr0); + cr1 = (__vector int)vec_msum((__vector unsigned short)rg1, pw_f050_f000, + (__vector unsigned int)cr1); + cr2 = (__vector int)vec_msum((__vector unsigned short)rg2, pw_f050_f000, + (__vector unsigned int)cr2); + cr3 = (__vector int)vec_msum((__vector unsigned short)rg3, pw_f050_f000, + (__vector unsigned int)cr3); + crl = vec_perm((__vector unsigned short)cr0, + (__vector unsigned short)cr1, shift_pack_index); + crh = vec_perm((__vector unsigned short)cr2, + (__vector unsigned short)cr3, shift_pack_index); + cr = vec_pack(crl, crh); + vec_st(cr, 0, outptr2); + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolext-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolext-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolext-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolext-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,477 @@ +; +; jccolext.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_mmx (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define img_width(b) (b)+8 ; JDIMENSION img_width +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_rgb_ycc_convert_mmx) PRIVATE + +EXTN(jsimd_rgb_ycc_convert_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_MMWORD + jae short .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + xor eax,eax + mov al, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + xor edx,edx + mov dx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd mmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd mmG, DWORD [esi+ecx] + psllq mmA, DWORD_BIT + por mmA,mmG +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + movq mmG,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + mov ecx, SIZEOF_MMWORD + jmp short .rgb_ycc_cnv +.column_ld16: + test cl, 2*SIZEOF_MMWORD + mov ecx, SIZEOF_MMWORD + jz short .rgb_ycc_cnv + movq mmF,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] + +.rgb_ycc_cnv: + ; mmA=(00 10 20 01 11 21 02 12) + ; mmG=(22 03 13 23 04 14 24 05) + ; mmF=(15 25 06 16 26 07 17 27) + + movq mmD,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) + psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) + + punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05) + psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) + + punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16) + punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27) + + movq mmE,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) + psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) + + punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) + + punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07) + punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27) + + pxor mmH,mmH + + movq mmC,mmA + punpcklbw mmA,mmH ; mmA=(00 02 04 06) + punpckhbw mmC,mmH ; mmC=(10 12 14 16) + + movq mmB,mmE + punpcklbw mmE,mmH ; mmE=(20 22 24 26) + punpckhbw mmB,mmH ; mmB=(01 03 05 07) + + movq mmF,mmD + punpcklbw mmD,mmH ; mmD=(11 13 15 17) + punpckhbw mmF,mmH ; mmF=(21 23 25 27) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_MMWORD/8 + jz short .column_ld2 + sub ecx, byte SIZEOF_MMWORD/8 + movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_MMWORD/4 + jz short .column_ld4 + sub ecx, byte SIZEOF_MMWORD/4 + movq mmF,mmA + movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld4: + test cl, SIZEOF_MMWORD/2 + mov ecx, SIZEOF_MMWORD + jz short .rgb_ycc_cnv + movq mmD,mmA + movq mmC,mmF + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] + movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] + +.rgb_ycc_cnv: + ; mmA=(00 10 20 30 01 11 21 31) + ; mmF=(02 12 22 32 03 13 23 33) + ; mmD=(04 14 24 34 05 15 25 35) + ; mmC=(06 16 26 36 07 17 27 37) + + movq mmB,mmA + punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32) + punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33) + + movq mmG,mmD + punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36) + punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37) + + movq mmE,mmA + punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36) + + movq mmH,mmB + punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17) + punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37) + + pxor mmF,mmF + + movq mmC,mmA + punpcklbw mmA,mmF ; mmA=(00 02 04 06) + punpckhbw mmC,mmF ; mmC=(10 12 14 16) + + movq mmD,mmB + punpcklbw mmB,mmF ; mmB=(01 03 05 07) + punpckhbw mmD,mmF ; mmD=(11 13 15 17) + + movq mmG,mmE + punpcklbw mmE,mmF ; mmE=(20 22 24 26) + punpckhbw mmG,mmF ; mmG=(30 32 34 36) + + punpcklbw mmF,mmH + punpckhbw mmH,mmH + psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27) + psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE + ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movq MMWORD [wk(0)], mm0 ; wk(0)=RE + movq MMWORD [wk(1)], mm1 ; wk(1)=RO + movq MMWORD [wk(2)], mm4 ; wk(2)=BE + movq MMWORD [wk(3)], mm5 ; wk(3)=BO + + movq mm6,mm1 + punpcklwd mm1,mm3 + punpckhwd mm6,mm3 + movq mm7,mm1 + movq mm4,mm6 + pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movq MMWORD [wk(4)], mm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movq MMWORD [wk(5)], mm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor mm1,mm1 + pxor mm6,mm6 + punpcklwd mm1,mm5 ; mm1=BOL + punpckhwd mm6,mm5 ; mm6=BOH + psrld mm1,1 ; mm1=BOL*FIX(0.500) + psrld mm6,1 ; mm6=BOH*FIX(0.500) + + movq mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ] + + paddd mm7,mm1 + paddd mm4,mm6 + paddd mm7,mm5 + paddd mm4,mm5 + psrld mm7,SCALEBITS ; mm7=CbOL + psrld mm4,SCALEBITS ; mm4=CbOH + packssdw mm7,mm4 ; mm7=CbO + + movq mm1, MMWORD [wk(2)] ; mm1=BE + + movq mm6,mm0 + punpcklwd mm0,mm2 + punpckhwd mm6,mm2 + movq mm5,mm0 + movq mm4,mm6 + pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movq MMWORD [wk(6)], mm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movq MMWORD [wk(7)], mm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor mm0,mm0 + pxor mm6,mm6 + punpcklwd mm0,mm1 ; mm0=BEL + punpckhwd mm6,mm1 ; mm6=BEH + psrld mm0,1 ; mm0=BEL*FIX(0.500) + psrld mm6,1 ; mm6=BEH*FIX(0.500) + + movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] + + paddd mm5,mm0 + paddd mm4,mm6 + paddd mm5,mm1 + paddd mm4,mm1 + psrld mm5,SCALEBITS ; mm5=CbEL + psrld mm4,SCALEBITS ; mm4=CbEH + packssdw mm5,mm4 ; mm5=CbE + + psllw mm7,BYTE_BIT + por mm5,mm7 ; mm5=Cb + movq MMWORD [ebx], mm5 ; Save Cb + + movq mm0, MMWORD [wk(3)] ; mm0=BO + movq mm6, MMWORD [wk(2)] ; mm6=BE + movq mm1, MMWORD [wk(1)] ; mm1=RO + + movq mm4,mm0 + punpcklwd mm0,mm3 + punpckhwd mm4,mm3 + movq mm7,mm0 + movq mm5,mm4 + pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] + + paddd mm0, MMWORD [wk(4)] + paddd mm4, MMWORD [wk(5)] + paddd mm0,mm3 + paddd mm4,mm3 + psrld mm0,SCALEBITS ; mm0=YOL + psrld mm4,SCALEBITS ; mm4=YOH + packssdw mm0,mm4 ; mm0=YO + + pxor mm3,mm3 + pxor mm4,mm4 + punpcklwd mm3,mm1 ; mm3=ROL + punpckhwd mm4,mm1 ; mm4=ROH + psrld mm3,1 ; mm3=ROL*FIX(0.500) + psrld mm4,1 ; mm4=ROH*FIX(0.500) + + movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] + + paddd mm7,mm3 + paddd mm5,mm4 + paddd mm7,mm1 + paddd mm5,mm1 + psrld mm7,SCALEBITS ; mm7=CrOL + psrld mm5,SCALEBITS ; mm5=CrOH + packssdw mm7,mm5 ; mm7=CrO + + movq mm3, MMWORD [wk(0)] ; mm3=RE + + movq mm4,mm6 + punpcklwd mm6,mm2 + punpckhwd mm4,mm2 + movq mm1,mm6 + movq mm5,mm4 + pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] + + paddd mm6, MMWORD [wk(6)] + paddd mm4, MMWORD [wk(7)] + paddd mm6,mm2 + paddd mm4,mm2 + psrld mm6,SCALEBITS ; mm6=YEL + psrld mm4,SCALEBITS ; mm4=YEH + packssdw mm6,mm4 ; mm6=YE + + psllw mm0,BYTE_BIT + por mm6,mm0 ; mm6=Y + movq MMWORD [edi], mm6 ; Save Y + + pxor mm2,mm2 + pxor mm4,mm4 + punpcklwd mm2,mm3 ; mm2=REL + punpckhwd mm4,mm3 ; mm4=REH + psrld mm2,1 ; mm2=REL*FIX(0.500) + psrld mm4,1 ; mm4=REH*FIX(0.500) + + movq mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ] + + paddd mm1,mm2 + paddd mm5,mm4 + paddd mm1,mm0 + paddd mm5,mm0 + psrld mm1,SCALEBITS ; mm1=CrEL + psrld mm5,SCALEBITS ; mm5=CrEH + packssdw mm1,mm5 ; mm1=CrE + + psllw mm7,BYTE_BIT + por mm1,mm7 ; mm1=Cr + movq MMWORD [edx], mm1 ; Save Cr + + sub ecx, byte SIZEOF_MMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr + add edi, byte SIZEOF_MMWORD ; outptr0 + add ebx, byte SIZEOF_MMWORD ; outptr1 + add edx, byte SIZEOF_MMWORD ; outptr2 + cmp ecx, byte SIZEOF_MMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolext-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolext-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolext-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolext-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,485 @@ +; +; jccolext.asm - colorspace conversion (64-bit SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2009, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +; r10 = JDIMENSION img_width +; r11 = JSAMPARRAY input_buf +; r12 = JSAMPIMAGE output_buf +; r13 = JDIMENSION output_row +; r14 = int num_rows + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 8 + + align 16 + + global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE + +EXTN(jsimd_rgb_ycc_convert_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov ecx, r10d + test rcx,rcx + jz near .return + + push rcx + + mov rsi, r12 + mov ecx, r13d + mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY] + lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] + lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] + lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rsi, r11 + mov eax, r14d + test rax,rax + jle near .return +.rowloop: + push rdx + push rbx + push rdi + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr0 + mov rbx, JSAMPROW [rbx] ; outptr1 + mov rdx, JSAMPROW [rdx] ; outptr2 + + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push rax + push rdx + lea rcx,[rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub rcx, byte SIZEOF_BYTE + movzx rax, BYTE [rsi+rcx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub rcx, byte SIZEOF_WORD + movzx rdx, WORD [rsi+rcx] + shl rax, WORD_BIT + or rax,rdx +.column_ld4: + movd xmmA,eax + pop rdx + pop rax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub rcx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [rsi+rcx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub rcx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [rsi+rcx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + mov rcx, SIZEOF_XMMWORD + jmp short .rgb_ycc_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov rcx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub rcx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub rcx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub rcx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov rcx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE + movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + movdqa xmm7,xmm1 + movdqa xmm4,xmm6 + pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor xmm1,xmm1 + pxor xmm6,xmm6 + punpcklwd xmm1,xmm5 ; xmm1=BOL + punpckhwd xmm6,xmm5 ; xmm6=BOH + psrld xmm1,1 ; xmm1=BOL*FIX(0.500) + psrld xmm6,1 ; xmm6=BOH*FIX(0.500) + + movdqa xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm1 + paddd xmm4,xmm6 + paddd xmm7,xmm5 + paddd xmm4,xmm5 + psrld xmm7,SCALEBITS ; xmm7=CbOL + psrld xmm4,SCALEBITS ; xmm4=CbOH + packssdw xmm7,xmm4 ; xmm7=CbO + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + movdqa xmm5,xmm0 + movdqa xmm4,xmm6 + pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor xmm0,xmm0 + pxor xmm6,xmm6 + punpcklwd xmm0,xmm1 ; xmm0=BEL + punpckhwd xmm6,xmm1 ; xmm6=BEH + psrld xmm0,1 ; xmm0=BEL*FIX(0.500) + psrld xmm6,1 ; xmm6=BEH*FIX(0.500) + + movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm5,xmm0 + paddd xmm4,xmm6 + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrld xmm5,SCALEBITS ; xmm5=CbEL + psrld xmm4,SCALEBITS ; xmm4=CbEH + packssdw xmm5,xmm4 ; xmm5=CbE + + psllw xmm7,BYTE_BIT + por xmm5,xmm7 ; xmm5=Cb + movdqa XMMWORD [rbx], xmm5 ; Save Cb + + movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + movdqa xmm7,xmm0 + movdqa xmm5,xmm4 + pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] + + paddd xmm0, XMMWORD [wk(4)] + paddd xmm4, XMMWORD [wk(5)] + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + pxor xmm3,xmm3 + pxor xmm4,xmm4 + punpcklwd xmm3,xmm1 ; xmm3=ROL + punpckhwd xmm4,xmm1 ; xmm4=ROH + psrld xmm3,1 ; xmm3=ROL*FIX(0.500) + psrld xmm4,1 ; xmm4=ROH*FIX(0.500) + + movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm3 + paddd xmm5,xmm4 + paddd xmm7,xmm1 + paddd xmm5,xmm1 + psrld xmm7,SCALEBITS ; xmm7=CrOL + psrld xmm5,SCALEBITS ; xmm5=CrOH + packssdw xmm7,xmm5 ; xmm7=CrO + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(6)] + paddd xmm4, XMMWORD [wk(7)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [rdi], xmm6 ; Save Y + + pxor xmm2,xmm2 + pxor xmm4,xmm4 + punpcklwd xmm2,xmm3 ; xmm2=REL + punpckhwd xmm4,xmm3 ; xmm4=REH + psrld xmm2,1 ; xmm2=REL*FIX(0.500) + psrld xmm4,1 ; xmm4=REH*FIX(0.500) + + movdqa xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ] + + paddd xmm1,xmm2 + paddd xmm5,xmm4 + paddd xmm1,xmm0 + paddd xmm5,xmm0 + psrld xmm1,SCALEBITS ; xmm1=CrEL + psrld xmm5,SCALEBITS ; xmm5=CrEH + packssdw xmm1,xmm5 ; xmm1=CrE + + psllw xmm7,BYTE_BIT + por xmm1,xmm7 ; xmm1=Cr + movdqa XMMWORD [rdx], xmm1 ; Save Cr + + sub rcx, byte SIZEOF_XMMWORD + add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add rdi, byte SIZEOF_XMMWORD ; outptr0 + add rbx, byte SIZEOF_XMMWORD ; outptr1 + add rdx, byte SIZEOF_XMMWORD ; outptr2 + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx,rcx + jnz near .column_ld1 + + pop rcx ; col + pop rsi + pop rdi + pop rbx + pop rdx + + add rsi, byte SIZEOF_JSAMPROW ; input_buf + add rdi, byte SIZEOF_JSAMPROW + add rbx, byte SIZEOF_JSAMPROW + add rdx, byte SIZEOF_JSAMPROW + dec rax ; num_rows + jg near .rowloop + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolext-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolext-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolext-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolext-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,503 @@ +; +; jccolext.asm - colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define img_width(b) (b)+8 ; JDIMENSION img_width +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + + global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE + +EXTN(jsimd_rgb_ycc_convert_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd xmmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [esi+ecx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [esi+ecx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + mov ecx, SIZEOF_XMMWORD + jmp short .rgb_ycc_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov ecx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov ecx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE + movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + movdqa xmm7,xmm1 + movdqa xmm4,xmm6 + pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor xmm1,xmm1 + pxor xmm6,xmm6 + punpcklwd xmm1,xmm5 ; xmm1=BOL + punpckhwd xmm6,xmm5 ; xmm6=BOH + psrld xmm1,1 ; xmm1=BOL*FIX(0.500) + psrld xmm6,1 ; xmm6=BOH*FIX(0.500) + + movdqa xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm1 + paddd xmm4,xmm6 + paddd xmm7,xmm5 + paddd xmm4,xmm5 + psrld xmm7,SCALEBITS ; xmm7=CbOL + psrld xmm4,SCALEBITS ; xmm4=CbOH + packssdw xmm7,xmm4 ; xmm7=CbO + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + movdqa xmm5,xmm0 + movdqa xmm4,xmm6 + pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor xmm0,xmm0 + pxor xmm6,xmm6 + punpcklwd xmm0,xmm1 ; xmm0=BEL + punpckhwd xmm6,xmm1 ; xmm6=BEH + psrld xmm0,1 ; xmm0=BEL*FIX(0.500) + psrld xmm6,1 ; xmm6=BEH*FIX(0.500) + + movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm5,xmm0 + paddd xmm4,xmm6 + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrld xmm5,SCALEBITS ; xmm5=CbEL + psrld xmm4,SCALEBITS ; xmm4=CbEH + packssdw xmm5,xmm4 ; xmm5=CbE + + psllw xmm7,BYTE_BIT + por xmm5,xmm7 ; xmm5=Cb + movdqa XMMWORD [ebx], xmm5 ; Save Cb + + movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + movdqa xmm7,xmm0 + movdqa xmm5,xmm4 + pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] + + paddd xmm0, XMMWORD [wk(4)] + paddd xmm4, XMMWORD [wk(5)] + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + pxor xmm3,xmm3 + pxor xmm4,xmm4 + punpcklwd xmm3,xmm1 ; xmm3=ROL + punpckhwd xmm4,xmm1 ; xmm4=ROH + psrld xmm3,1 ; xmm3=ROL*FIX(0.500) + psrld xmm4,1 ; xmm4=ROH*FIX(0.500) + + movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm3 + paddd xmm5,xmm4 + paddd xmm7,xmm1 + paddd xmm5,xmm1 + psrld xmm7,SCALEBITS ; xmm7=CrOL + psrld xmm5,SCALEBITS ; xmm5=CrOH + packssdw xmm7,xmm5 ; xmm7=CrO + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(6)] + paddd xmm4, XMMWORD [wk(7)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [edi], xmm6 ; Save Y + + pxor xmm2,xmm2 + pxor xmm4,xmm4 + punpcklwd xmm2,xmm3 ; xmm2=REL + punpckhwd xmm4,xmm3 ; xmm4=REH + psrld xmm2,1 ; xmm2=REL*FIX(0.500) + psrld xmm4,1 ; xmm4=REH*FIX(0.500) + + movdqa xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ] + + paddd xmm1,xmm2 + paddd xmm5,xmm4 + paddd xmm1,xmm0 + paddd xmm5,xmm0 + psrld xmm1,SCALEBITS ; xmm1=CrEL + psrld xmm5,SCALEBITS ; xmm5=CrEH + packssdw xmm1,xmm5 ; xmm1=CrE + + psllw xmm7,BYTE_BIT + por xmm1,xmm7 ; xmm1=Cr + movdqa XMMWORD [edx], xmm1 ; Save Cr + + sub ecx, byte SIZEOF_XMMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add edi, byte SIZEOF_XMMWORD ; outptr0 + add ebx, byte SIZEOF_XMMWORD ; outptr1 + add edx, byte SIZEOF_XMMWORD ; outptr2 + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolor-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolor-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolor-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolor-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,104 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* RGB --> YCC CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_081 5329 /* FIX(0.08131) */ +#define F_0_114 7471 /* FIX(0.11400) */ +#define F_0_168 11059 /* FIX(0.16874) */ +#define F_0_250 16384 /* FIX(0.25000) */ +#define F_0_299 19595 /* FIX(0.29900) */ +#define F_0_331 21709 /* FIX(0.33126) */ +#define F_0_418 27439 /* FIX(0.41869) */ +#define F_0_500 32768 /* FIX(0.50000) */ +#define F_0_587 38470 /* FIX(0.58700) */ +#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + + +#define RGBG_INDEX0 {0,1,3,4,6,7,9,10,2,1,5,4,8,7,11,10} +#define RGBG_INDEX1 {12,13,15,16,18,19,21,22,14,13,17,16,20,19,23,22} +#define RGBG_INDEX2 {8,9,11,12,14,15,17,18,10,9,13,12,16,15,19,18} +#define RGBG_INDEX3 {4,5,7,8,10,11,13,14,6,5,9,8,12,11,15,14} +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_rgb_ycc_convert_altivec jsimd_extrgb_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGBG_INDEX {0,1,4,5,8,9,12,13,2,1,6,5,10,9,14,13} +#define jsimd_rgb_ycc_convert_altivec jsimd_extrgbx_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGBG_INDEX0 {2,1,5,4,8,7,11,10,0,1,3,4,6,7,9,10} +#define RGBG_INDEX1 {14,13,17,16,20,19,23,22,12,13,15,16,18,19,21,22} +#define RGBG_INDEX2 {10,9,13,12,16,15,19,18,8,9,11,12,14,15,17,18} +#define RGBG_INDEX3 {6,5,9,8,12,11,15,14,4,5,7,8,10,11,13,14} +#define jsimd_rgb_ycc_convert_altivec jsimd_extbgr_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGBG_INDEX {2,1,6,5,10,9,14,13,0,1,4,5,8,9,12,13} +#define jsimd_rgb_ycc_convert_altivec jsimd_extbgrx_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGBG_INDEX {3,2,7,6,11,10,15,14,1,2,5,6,9,10,13,14} +#define jsimd_rgb_ycc_convert_altivec jsimd_extxbgr_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGBG_INDEX {1,2,5,6,9,10,13,14,3,2,7,6,11,10,15,14} +#define jsimd_rgb_ycc_convert_altivec jsimd_extxrgb_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolor-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolor-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolor-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolor-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,123 @@ +; +; jccolor.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_ycc_convert_mmx) PRIVATE + +EXTN(jconst_rgb_ycc_convert_mmx): + +PW_F0299_F0337 times 2 dw F_0_299, F_0_337 +PW_F0114_F0250 times 2 dw F_0_114, F_0_250 +PW_MF016_MF033 times 2 dw -F_0_168,-F_0_331 +PW_MF008_MF041 times 2 dw -F_0_081,-F_0_418 +PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 2 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx +%include "jccolext-mmx.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolor-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolor-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolor-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolor-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jccolor.asm - colorspace conversion (64-bit SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2009, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE + +EXTN(jconst_rgb_ycc_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331 +PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418 +PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolor-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolor-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jccolor-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jccolor-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jccolor.asm - colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2009, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE + +EXTN(jconst_rgb_ycc_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331 +PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418 +PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 +%include "jccolext-sse2.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgray-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgray-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgray-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgray-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,99 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* RGB --> GRAYSCALE CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_114 7471 /* FIX(0.11400) */ +#define F_0_250 16384 /* FIX(0.25000) */ +#define F_0_299 19595 /* FIX(0.29900) */ +#define F_0_587 38470 /* FIX(0.58700) */ +#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + + +#define RGBG_INDEX0 {0,1,3,4,6,7,9,10,2,1,5,4,8,7,11,10} +#define RGBG_INDEX1 {12,13,15,16,18,19,21,22,14,13,17,16,20,19,23,22} +#define RGBG_INDEX2 {8,9,11,12,14,15,17,18,10,9,13,12,16,15,19,18} +#define RGBG_INDEX3 {4,5,7,8,10,11,13,14,6,5,9,8,12,11,15,14} +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_rgb_gray_convert_altivec jsimd_extrgb_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGBG_INDEX {0,1,4,5,8,9,12,13,2,1,6,5,10,9,14,13} +#define jsimd_rgb_gray_convert_altivec jsimd_extrgbx_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGBG_INDEX0 {2,1,5,4,8,7,11,10,0,1,3,4,6,7,9,10} +#define RGBG_INDEX1 {14,13,17,16,20,19,23,22,12,13,15,16,18,19,21,22} +#define RGBG_INDEX2 {10,9,13,12,16,15,19,18,8,9,11,12,14,15,17,18} +#define RGBG_INDEX3 {6,5,9,8,12,11,15,14,4,5,7,8,10,11,13,14} +#define jsimd_rgb_gray_convert_altivec jsimd_extbgr_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGBG_INDEX {2,1,6,5,10,9,14,13,0,1,4,5,8,9,12,13} +#define jsimd_rgb_gray_convert_altivec jsimd_extbgrx_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGBG_INDEX {3,2,7,6,11,10,15,14,1,2,5,6,9,10,13,14} +#define jsimd_rgb_gray_convert_altivec jsimd_extxbgr_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGBG_INDEX {1,2,5,6,9,10,13,14,3,2,7,6,11,10,15,14} +#define jsimd_rgb_gray_convert_altivec jsimd_extxrgb_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgray-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgray-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgray-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgray-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,116 @@ +; +; jcgray.asm - grayscale colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2011 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_gray_convert_mmx) PRIVATE + +EXTN(jconst_rgb_gray_convert_mmx): + +PW_F0299_F0337 times 2 dw F_0_299, F_0_337 +PW_F0114_F0250 times 2 dw F_0_114, F_0_250 +PD_ONEHALF times 2 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extrgb_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extrgbx_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extbgr_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extbgrx_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extxbgr_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extxrgb_gray_convert_mmx +%include "jcgryext-mmx.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgray-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgray-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgray-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgray-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,113 @@ +; +; jcgray.asm - grayscale colorspace conversion (64-bit SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2011, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE + +EXTN(jconst_rgb_gray_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgray-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgray-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgray-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgray-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,113 @@ +; +; jcgray.asm - grayscale colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2011, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE + +EXTN(jconst_rgb_gray_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2 +%include "jcgryext-sse2.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgryext-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgryext-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgryext-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgryext-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,227 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * Copyright (C) 2014, Jay Foad. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jcgray-altivec.c */ + + +void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width, + JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + JSAMPROW inptr, outptr; + int pitch = img_width * RGB_PIXELSIZE, num_cols; +#if __BIG_ENDIAN__ + int offset; + unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; +#endif + + __vector unsigned char rgb0, rgb1 = {0}, rgb2 = {0}, + rgbg0, rgbg1, rgbg2, rgbg3, y; +#if __BIG_ENDIAN__ || RGB_PIXELSIZE == 4 + __vector unsigned char rgb3 = {0}; +#endif +#if __BIG_ENDIAN__ && RGB_PIXELSIZE == 4 + __vector unsigned char rgb4 = {0}; +#endif + __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3; + __vector unsigned short yl, yh; + __vector int y0, y1, y2, y3; + + /* Constants */ + __vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) }, + pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) }; + __vector int pd_onehalf = { __4X(ONE_HALF) }; + __vector unsigned char pb_zero = { __16X(0) }, +#if __BIG_ENDIAN__ + shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; +#else + shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; +#endif + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr = output_buf[0][output_row]; + output_row++; + + for (num_cols = pitch; num_cols > 0; + num_cols -= RGB_PIXELSIZE * 16, inptr += RGB_PIXELSIZE * 16, + outptr += 16) { + +#if __BIG_ENDIAN__ + /* Load 16 pixels == 48 or 64 bytes */ + offset = (size_t)inptr & 15; + if (offset) { + __vector unsigned char unaligned_shift_index; + int bytes = num_cols + offset; + + if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) { + /* Slow path to prevent buffer overread. Since there is no way to + * read a partial AltiVec register, overread would occur on the last + * chunk of the last image row if the right edge is not on a 16-byte + * boundary. It could also occur on other rows if the bytes per row + * is low enough. Since we can't determine whether we're on the last + * image row, we have to assume every row is the last. + */ + memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16)); + rgb0 = vec_ld(0, tmpbuf); + rgb1 = vec_ld(16, tmpbuf); + rgb2 = vec_ld(32, tmpbuf); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_ld(48, tmpbuf); +#endif + } else { + /* Fast path */ + rgb0 = vec_ld(0, inptr); + if (bytes > 16) + rgb1 = vec_ld(16, inptr); + if (bytes > 32) + rgb2 = vec_ld(32, inptr); + if (bytes > 48) + rgb3 = vec_ld(48, inptr); +#if RGB_PIXELSIZE == 4 + if (bytes > 64) + rgb4 = vec_ld(64, inptr); +#endif + unaligned_shift_index = vec_lvsl(0, inptr); + rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index); + rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index); + rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index); +#endif + } + } else { + if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) { + /* Slow path */ + memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16)); + rgb0 = vec_ld(0, tmpbuf); + rgb1 = vec_ld(16, tmpbuf); + rgb2 = vec_ld(32, tmpbuf); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_ld(48, tmpbuf); +#endif + } else { + /* Fast path */ + rgb0 = vec_ld(0, inptr); + if (num_cols > 16) + rgb1 = vec_ld(16, inptr); + if (num_cols > 32) + rgb2 = vec_ld(32, inptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + rgb3 = vec_ld(48, inptr); +#endif + } + } +#else + /* Little endian */ + rgb0 = vec_vsx_ld(0, inptr); + if (num_cols > 16) + rgb1 = vec_vsx_ld(16, inptr); + if (num_cols > 32) + rgb2 = vec_vsx_ld(32, inptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + rgb3 = vec_vsx_ld(48, inptr); +#endif +#endif + +#if RGB_PIXELSIZE == 3 + /* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 + * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga + * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf + * + * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3 + * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7 + * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb + * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf + */ + rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX0); + rgbg1 = vec_perm(rgb0, rgb1, (__vector unsigned char)RGBG_INDEX1); + rgbg2 = vec_perm(rgb1, rgb2, (__vector unsigned char)RGBG_INDEX2); + rgbg3 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX3); +#else + /* rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3 + * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7 + * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb + * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf + * + * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3 + * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7 + * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb + * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf + */ + rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX); + rgbg1 = vec_perm(rgb1, rgb1, (__vector unsigned char)RGBG_INDEX); + rgbg2 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX); + rgbg3 = vec_perm(rgb3, rgb3, (__vector unsigned char)RGBG_INDEX); +#endif + + /* rg0 = R0 G0 R1 G1 R2 G2 R3 G3 + * bg0 = B0 G0 B1 G1 B2 G2 B3 G3 + * ... + * + * NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't + * support unsigned vectors. + */ + rg0 = (__vector signed short)VEC_UNPACKHU(rgbg0); + bg0 = (__vector signed short)VEC_UNPACKLU(rgbg0); + rg1 = (__vector signed short)VEC_UNPACKHU(rgbg1); + bg1 = (__vector signed short)VEC_UNPACKLU(rgbg1); + rg2 = (__vector signed short)VEC_UNPACKHU(rgbg2); + bg2 = (__vector signed short)VEC_UNPACKLU(rgbg2); + rg3 = (__vector signed short)VEC_UNPACKHU(rgbg3); + bg3 = (__vector signed short)VEC_UNPACKLU(rgbg3); + + /* (Original) + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * + * (This implementation) + * Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + */ + + /* Calculate Y values */ + + y0 = vec_msums(rg0, pw_f0299_f0337, pd_onehalf); + y1 = vec_msums(rg1, pw_f0299_f0337, pd_onehalf); + y2 = vec_msums(rg2, pw_f0299_f0337, pd_onehalf); + y3 = vec_msums(rg3, pw_f0299_f0337, pd_onehalf); + y0 = vec_msums(bg0, pw_f0114_f0250, y0); + y1 = vec_msums(bg1, pw_f0114_f0250, y1); + y2 = vec_msums(bg2, pw_f0114_f0250, y2); + y3 = vec_msums(bg3, pw_f0114_f0250, y3); + /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from + * each dword into a new 16-bit vector, which is the equivalent of + * descaling the 32-bit results (right-shifting by 16 bits) and then + * packing them. + */ + yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1, + shift_pack_index); + yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3, + shift_pack_index); + y = vec_pack(yl, yh); + vec_st(y, 0, outptr); + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgryext-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgryext-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgryext-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgryext-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,357 @@ +; +; jcgryext.asm - grayscale colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2011 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_mmx (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define img_width(b) (b)+8 ; JDIMENSION img_width +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_rgb_gray_convert_mmx) PRIVATE + +EXTN(jsimd_rgb_gray_convert_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_MMWORD + jae short .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + xor eax,eax + mov al, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + xor edx,edx + mov dx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd mmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd mmG, DWORD [esi+ecx] + psllq mmA, DWORD_BIT + por mmA,mmG +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + movq mmG,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + mov ecx, SIZEOF_MMWORD + jmp short .rgb_gray_cnv +.column_ld16: + test cl, 2*SIZEOF_MMWORD + mov ecx, SIZEOF_MMWORD + jz short .rgb_gray_cnv + movq mmF,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_gray_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] + +.rgb_gray_cnv: + ; mmA=(00 10 20 01 11 21 02 12) + ; mmG=(22 03 13 23 04 14 24 05) + ; mmF=(15 25 06 16 26 07 17 27) + + movq mmD,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) + psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) + + punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05) + psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) + + punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16) + punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27) + + movq mmE,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) + psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) + + punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) + + punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07) + punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27) + + pxor mmH,mmH + + movq mmC,mmA + punpcklbw mmA,mmH ; mmA=(00 02 04 06) + punpckhbw mmC,mmH ; mmC=(10 12 14 16) + + movq mmB,mmE + punpcklbw mmE,mmH ; mmE=(20 22 24 26) + punpckhbw mmB,mmH ; mmB=(01 03 05 07) + + movq mmF,mmD + punpcklbw mmD,mmH ; mmD=(11 13 15 17) + punpckhbw mmF,mmH ; mmF=(21 23 25 27) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_MMWORD/8 + jz short .column_ld2 + sub ecx, byte SIZEOF_MMWORD/8 + movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_MMWORD/4 + jz short .column_ld4 + sub ecx, byte SIZEOF_MMWORD/4 + movq mmF,mmA + movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld4: + test cl, SIZEOF_MMWORD/2 + mov ecx, SIZEOF_MMWORD + jz short .rgb_gray_cnv + movq mmD,mmA + movq mmC,mmF + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_gray_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] + movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] + +.rgb_gray_cnv: + ; mmA=(00 10 20 30 01 11 21 31) + ; mmF=(02 12 22 32 03 13 23 33) + ; mmD=(04 14 24 34 05 15 25 35) + ; mmC=(06 16 26 36 07 17 27 37) + + movq mmB,mmA + punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32) + punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33) + + movq mmG,mmD + punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36) + punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37) + + movq mmE,mmA + punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36) + + movq mmH,mmB + punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17) + punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37) + + pxor mmF,mmF + + movq mmC,mmA + punpcklbw mmA,mmF ; mmA=(00 02 04 06) + punpckhbw mmC,mmF ; mmC=(10 12 14 16) + + movq mmD,mmB + punpcklbw mmB,mmF ; mmB=(01 03 05 07) + punpckhbw mmD,mmF ; mmD=(11 13 15 17) + + movq mmG,mmE + punpcklbw mmE,mmF ; mmE=(20 22 24 26) + punpckhbw mmG,mmF ; mmG=(30 32 34 36) + + punpcklbw mmF,mmH + punpckhbw mmH,mmH + psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27) + psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE + ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movq mm6,mm1 + punpcklwd mm1,mm3 + punpckhwd mm6,mm3 + pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movq mm7, mm6 ; mm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movq mm6,mm0 + punpcklwd mm0,mm2 + punpckhwd mm6,mm2 + pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movq MMWORD [wk(0)], mm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movq MMWORD [wk(1)], mm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movq mm0, mm5 ; mm0=BO + movq mm6, mm4 ; mm6=BE + + movq mm4,mm0 + punpcklwd mm0,mm3 + punpckhwd mm4,mm3 + pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] + + paddd mm0, mm1 + paddd mm4, mm7 + paddd mm0,mm3 + paddd mm4,mm3 + psrld mm0,SCALEBITS ; mm0=YOL + psrld mm4,SCALEBITS ; mm4=YOH + packssdw mm0,mm4 ; mm0=YO + + movq mm4,mm6 + punpcklwd mm6,mm2 + punpckhwd mm4,mm2 + pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] + + paddd mm6, MMWORD [wk(0)] + paddd mm4, MMWORD [wk(1)] + paddd mm6,mm2 + paddd mm4,mm2 + psrld mm6,SCALEBITS ; mm6=YEL + psrld mm4,SCALEBITS ; mm4=YEH + packssdw mm6,mm4 ; mm6=YE + + psllw mm0,BYTE_BIT + por mm6,mm0 ; mm6=Y + movq MMWORD [edi], mm6 ; Save Y + + sub ecx, byte SIZEOF_MMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr + add edi, byte SIZEOF_MMWORD ; outptr0 + cmp ecx, byte SIZEOF_MMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgryext-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgryext-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgryext-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgryext-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,364 @@ +; +; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2011, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +; r10 = JDIMENSION img_width +; r11 = JSAMPARRAY input_buf +; r12 = JSAMPIMAGE output_buf +; r13 = JDIMENSION output_row +; r14 = int num_rows + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + + global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE + +EXTN(jsimd_rgb_gray_convert_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov ecx, r10d + test rcx,rcx + jz near .return + + push rcx + + mov rsi, r12 + mov ecx, r13d + mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] + lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rsi, r11 + mov eax, r14d + test rax,rax + jle near .return +.rowloop: + push rdi + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr0 + + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push rax + push rdx + lea rcx,[rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub rcx, byte SIZEOF_BYTE + movzx rax, BYTE [rsi+rcx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub rcx, byte SIZEOF_WORD + movzx rdx, WORD [rsi+rcx] + shl rax, WORD_BIT + or rax,rdx +.column_ld4: + movd xmmA,eax + pop rdx + pop rax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub rcx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [rsi+rcx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub rcx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [rsi+rcx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + mov rcx, SIZEOF_XMMWORD + jmp short .rgb_gray_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov rcx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub rcx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub rcx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub rcx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov rcx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa xmm0, xmm5 ; xmm0=BO + movdqa xmm6, xmm4 ; xmm6=BE + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] + + paddd xmm0, xmm1 + paddd xmm4, xmm7 + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(0)] + paddd xmm4, XMMWORD [wk(1)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [rdi], xmm6 ; Save Y + + sub rcx, byte SIZEOF_XMMWORD + add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add rdi, byte SIZEOF_XMMWORD ; outptr0 + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx,rcx + jnz near .column_ld1 + + pop rcx ; col + pop rsi + pop rdi + + add rsi, byte SIZEOF_JSAMPROW ; input_buf + add rdi, byte SIZEOF_JSAMPROW + dec rax ; num_rows + jg near .rowloop + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgryext-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgryext-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcgryext-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcgryext-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,383 @@ +; +; jcgryext.asm - grayscale colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2011, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define img_width(b) (b)+8 ; JDIMENSION img_width +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + + global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE + +EXTN(jsimd_rgb_gray_convert_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd xmmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [esi+ecx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [esi+ecx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + mov ecx, SIZEOF_XMMWORD + jmp short .rgb_gray_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov ecx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov ecx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa xmm0, xmm5 ; xmm0=BO + movdqa xmm6, xmm4 ; xmm6=BE + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] + + paddd xmm0, xmm1 + paddd xmm4, xmm7 + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(0)] + paddd xmm4, XMMWORD [wk(1)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [edi], xmm6 ; Save Y + + sub ecx, byte SIZEOF_XMMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add edi, byte SIZEOF_XMMWORD ; outptr0 + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jchuff-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jchuff-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jchuff-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jchuff-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,361 @@ +; +; jchuff-sse2-64.asm - Huffman entropy encoding (64-bit SSE2) +; +; Copyright 2009-2011, 2014-2016 D. R. Commander. +; Copyright 2015 Matthieu Darbois +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains an SSE2 implementation for Huffman coding of one block. +; The following code is based directly on jchuff.c; see jchuff.c for more +; details. +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_huff_encode_one_block) PRIVATE + +EXTN(jconst_huff_encode_one_block): + +%include "jpeg_nbits_table.inc" + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +; These macros perform the same task as the emit_bits() function in the +; original libjpeg code. In addition to reducing overhead by explicitly +; inlining the code, additional performance is achieved by taking into +; account the size of the bit buffer and waiting until it is almost full +; before emptying it. This mostly benefits 64-bit platforms, since 6 +; bytes can be stored in a 64-bit bit buffer before it has to be emptied. + +%macro EMIT_BYTE 0 + sub put_bits, 8 ; put_bits -= 8; + mov rdx, put_buffer + mov ecx, put_bits + shr rdx, cl ; c = (JOCTET)GETJOCTET(put_buffer >> put_bits); + mov byte [buffer], dl ; *buffer++ = c; + add buffer, 1 + cmp dl, 0xFF ; need to stuff a zero byte? + jne %%.EMIT_BYTE_END + mov byte [buffer], 0 ; *buffer++ = 0; + add buffer, 1 +%%.EMIT_BYTE_END: +%endmacro + +%macro PUT_BITS 1 + add put_bits, ecx ; put_bits += size; + shl put_buffer, cl ; put_buffer = (put_buffer << size); + or put_buffer, %1 +%endmacro + +%macro CHECKBUF31 0 + cmp put_bits, 32 ; if (put_bits > 31) { + jl %%.CHECKBUF31_END + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE +%%.CHECKBUF31_END: +%endmacro + +%macro CHECKBUF47 0 + cmp put_bits, 48 ; if (put_bits > 47) { + jl %%.CHECKBUF47_END + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE +%%.CHECKBUF47_END: +%endmacro + +%macro EMIT_BITS 2 + CHECKBUF47 + mov ecx, %2 + PUT_BITS %1 +%endmacro + +%macro kloop_prepare 37 ;(ko, jno0, ..., jno31, xmm0, xmm1, xmm2, xmm3) + pxor xmm8, xmm8 ; __m128i neg = _mm_setzero_si128(); + pxor xmm9, xmm9 ; __m128i neg = _mm_setzero_si128(); + pxor xmm10, xmm10 ; __m128i neg = _mm_setzero_si128(); + pxor xmm11, xmm11 ; __m128i neg = _mm_setzero_si128(); + pinsrw %34, word [r12 + %2 * SIZEOF_WORD], 0 ; xmm_shadow[0] = block[jno0]; + pinsrw %35, word [r12 + %10 * SIZEOF_WORD], 0 ; xmm_shadow[8] = block[jno8]; + pinsrw %36, word [r12 + %18 * SIZEOF_WORD], 0 ; xmm_shadow[16] = block[jno16]; + pinsrw %37, word [r12 + %26 * SIZEOF_WORD], 0 ; xmm_shadow[24] = block[jno24]; + pinsrw %34, word [r12 + %3 * SIZEOF_WORD], 1 ; xmm_shadow[1] = block[jno1]; + pinsrw %35, word [r12 + %11 * SIZEOF_WORD], 1 ; xmm_shadow[9] = block[jno9]; + pinsrw %36, word [r12 + %19 * SIZEOF_WORD], 1 ; xmm_shadow[17] = block[jno17]; + pinsrw %37, word [r12 + %27 * SIZEOF_WORD], 1 ; xmm_shadow[25] = block[jno25]; + pinsrw %34, word [r12 + %4 * SIZEOF_WORD], 2 ; xmm_shadow[2] = block[jno2]; + pinsrw %35, word [r12 + %12 * SIZEOF_WORD], 2 ; xmm_shadow[10] = block[jno10]; + pinsrw %36, word [r12 + %20 * SIZEOF_WORD], 2 ; xmm_shadow[18] = block[jno18]; + pinsrw %37, word [r12 + %28 * SIZEOF_WORD], 2 ; xmm_shadow[26] = block[jno26]; + pinsrw %34, word [r12 + %5 * SIZEOF_WORD], 3 ; xmm_shadow[3] = block[jno3]; + pinsrw %35, word [r12 + %13 * SIZEOF_WORD], 3 ; xmm_shadow[11] = block[jno11]; + pinsrw %36, word [r12 + %21 * SIZEOF_WORD], 3 ; xmm_shadow[19] = block[jno19]; + pinsrw %37, word [r12 + %29 * SIZEOF_WORD], 3 ; xmm_shadow[27] = block[jno27]; + pinsrw %34, word [r12 + %6 * SIZEOF_WORD], 4 ; xmm_shadow[4] = block[jno4]; + pinsrw %35, word [r12 + %14 * SIZEOF_WORD], 4 ; xmm_shadow[12] = block[jno12]; + pinsrw %36, word [r12 + %22 * SIZEOF_WORD], 4 ; xmm_shadow[20] = block[jno20]; + pinsrw %37, word [r12 + %30 * SIZEOF_WORD], 4 ; xmm_shadow[28] = block[jno28]; + pinsrw %34, word [r12 + %7 * SIZEOF_WORD], 5 ; xmm_shadow[5] = block[jno5]; + pinsrw %35, word [r12 + %15 * SIZEOF_WORD], 5 ; xmm_shadow[13] = block[jno13]; + pinsrw %36, word [r12 + %23 * SIZEOF_WORD], 5 ; xmm_shadow[21] = block[jno21]; + pinsrw %37, word [r12 + %31 * SIZEOF_WORD], 5 ; xmm_shadow[29] = block[jno29]; + pinsrw %34, word [r12 + %8 * SIZEOF_WORD], 6 ; xmm_shadow[6] = block[jno6]; + pinsrw %35, word [r12 + %16 * SIZEOF_WORD], 6 ; xmm_shadow[14] = block[jno14]; + pinsrw %36, word [r12 + %24 * SIZEOF_WORD], 6 ; xmm_shadow[22] = block[jno22]; + pinsrw %37, word [r12 + %32 * SIZEOF_WORD], 6 ; xmm_shadow[30] = block[jno30]; + pinsrw %34, word [r12 + %9 * SIZEOF_WORD], 7 ; xmm_shadow[7] = block[jno7]; + pinsrw %35, word [r12 + %17 * SIZEOF_WORD], 7 ; xmm_shadow[15] = block[jno15]; + pinsrw %36, word [r12 + %25 * SIZEOF_WORD], 7 ; xmm_shadow[23] = block[jno23]; +%if %1 != 32 + pinsrw %37, word [r12 + %33 * SIZEOF_WORD], 7 ; xmm_shadow[31] = block[jno31]; +%else + pinsrw %37, ebx, 7 ; xmm_shadow[31] = block[jno31]; +%endif + pcmpgtw xmm8, %34 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm9, %35 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm10, %36 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm11, %37 ; neg = _mm_cmpgt_epi16(neg, x1); + paddw %34, xmm8 ; x1 = _mm_add_epi16(x1, neg); + paddw %35, xmm9 ; x1 = _mm_add_epi16(x1, neg); + paddw %36, xmm10 ; x1 = _mm_add_epi16(x1, neg); + paddw %37, xmm11 ; x1 = _mm_add_epi16(x1, neg); + pxor %34, xmm8 ; x1 = _mm_xor_si128(x1, neg); + pxor %35, xmm9 ; x1 = _mm_xor_si128(x1, neg); + pxor %36, xmm10 ; x1 = _mm_xor_si128(x1, neg); + pxor %37, xmm11 ; x1 = _mm_xor_si128(x1, neg); + pxor xmm8, %34 ; neg = _mm_xor_si128(neg, x1); + pxor xmm9, %35 ; neg = _mm_xor_si128(neg, x1); + pxor xmm10, %36 ; neg = _mm_xor_si128(neg, x1); + pxor xmm11, %37 ; neg = _mm_xor_si128(neg, x1); + movdqa XMMWORD [t1 + %1 * SIZEOF_WORD], %34 ; _mm_storeu_si128((__m128i *)(t1 + ko), x1); + movdqa XMMWORD [t1 + (%1 + 8) * SIZEOF_WORD], %35 ; _mm_storeu_si128((__m128i *)(t1 + ko + 8), x1); + movdqa XMMWORD [t1 + (%1 + 16) * SIZEOF_WORD], %36 ; _mm_storeu_si128((__m128i *)(t1 + ko + 16), x1); + movdqa XMMWORD [t1 + (%1 + 24) * SIZEOF_WORD], %37 ; _mm_storeu_si128((__m128i *)(t1 + ko + 24), x1); + movdqa XMMWORD [t2 + %1 * SIZEOF_WORD], xmm8 ; _mm_storeu_si128((__m128i *)(t2 + ko), neg); + movdqa XMMWORD [t2 + (%1 + 8) * SIZEOF_WORD], xmm9 ; _mm_storeu_si128((__m128i *)(t2 + ko + 8), neg); + movdqa XMMWORD [t2 + (%1 + 16) * SIZEOF_WORD], xmm10 ; _mm_storeu_si128((__m128i *)(t2 + ko + 16), neg); + movdqa XMMWORD [t2 + (%1 + 24) * SIZEOF_WORD], xmm11 ; _mm_storeu_si128((__m128i *)(t2 + ko + 24), neg); +%endmacro + +; +; Encode a single block's worth of coefficients. +; +; GLOBAL(JOCTET*) +; jsimd_huff_encode_one_block_sse2 (working_state *state, JOCTET *buffer, +; JCOEFPTR block, int last_dc_val, +; c_derived_tbl *dctbl, c_derived_tbl *actbl) +; + +; r10 = working_state *state +; r11 = JOCTET *buffer +; r12 = JCOEFPTR block +; r13 = int last_dc_val +; r14 = c_derived_tbl *dctbl +; r15 = c_derived_tbl *actbl + +%define t1 rbp-(DCTSIZE2*SIZEOF_WORD) +%define t2 t1-(DCTSIZE2*SIZEOF_WORD) +%define put_buffer r8 +%define put_bits r9d +%define buffer rax + + align 16 + global EXTN(jsimd_huff_encode_one_block_sse2) PRIVATE + +EXTN(jsimd_huff_encode_one_block_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [t2] + collect_args +%ifdef WIN64 + movaps XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm8 + movaps XMMWORD [rsp-2*SIZEOF_XMMWORD], xmm9 + movaps XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm10 + movaps XMMWORD [rsp-4*SIZEOF_XMMWORD], xmm11 + sub rsp, 4*SIZEOF_XMMWORD +%endif + push rbx + + mov buffer, r11 ; r11 is now sratch + + mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer; + mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits; + push r10 ; r10 is now scratch + + ; Encode the DC coefficient difference per section F.1.2.1 + movsx edi, word [r12] ; temp = temp2 = block[0] - last_dc_val; + sub edi, r13d ; r13 is not used anymore + mov ebx, edi + + ; This is a well-known technique for obtaining the absolute value + ; without a branch. It is derived from an assembly language technique + ; presented in "How to Optimize for the Pentium Processors", + ; Copyright (c) 1996, 1997 by Agner Fog. + mov esi, edi + sar esi, 31 ; temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); + xor edi, esi ; temp ^= temp3; + sub edi, esi ; temp -= temp3; + + ; For a negative input, want temp2 = bitwise complement of abs(input) + ; This code assumes we are on a two's complement machine + add ebx, esi ; temp2 += temp3; + + ; Find the number of bits needed for the magnitude of the coefficient + lea r11, [rel jpeg_nbits_table] + movzx rdi, byte [r11 + rdi] ; nbits = JPEG_NBITS(temp); + ; Emit the Huffman-coded symbol for the number of bits + mov r11d, INT [r14 + rdi * 4] ; code = dctbl->ehufco[nbits]; + movzx esi, byte [r14 + rdi + 1024] ; size = dctbl->ehufsi[nbits]; + EMIT_BITS r11, esi ; EMIT_BITS(code, size) + + ; Mask off any extra bits in code + mov esi, 1 + mov ecx, edi + shl esi, cl + dec esi + and ebx, esi ; temp2 &= (((JLONG) 1)<ehufco[0xf0]; + movzx r14d, byte [r15 + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + lea rsi, [t1] +.BLOOP: + bsf r12, r11 ; r = __builtin_ctzl(index); + jz .ELOOP + mov rcx, r12 + lea rsi, [rsi+r12*2] ; k += r; + shr r11, cl ; index >>= r; + movzx rdi, word [rsi] ; temp = t1[k]; + lea rbx, [rel jpeg_nbits_table] + movzx rdi, byte [rbx + rdi] ; nbits = JPEG_NBITS(temp); +.BRLOOP: + cmp r12, 16 ; while (r > 15) { + jl .ERLOOP + EMIT_BITS r13, r14d ; EMIT_BITS(code_0xf0, size_0xf0) + sub r12, 16 ; r -= 16; + jmp .BRLOOP +.ERLOOP: + ; Emit Huffman symbol for run length / number of bits + CHECKBUF31 ; uses rcx, rdx + + shl r12, 4 ; temp3 = (r << 4) + nbits; + add r12, rdi + mov ebx, INT [r15 + r12 * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [r15 + r12 + 1024] ; size = actbl->ehufsi[temp3]; + PUT_BITS rbx + + ;EMIT_CODE(code, size) + + movsx ebx, word [rsi-DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov rcx, rdi + mov rdx, 1 + shl rdx, cl + dec rdx + and rbx, rdx ; temp2 &= (((JLONG) 1)<>= 1; + add rsi, 2 ; ++k; + jmp .BLOOP +.ELOOP: + ; If the last coef(s) were zero, emit an end-of-block code + lea rdi, [t1 + (DCTSIZE2-1) * 2] ; r = DCTSIZE2-1-k; + cmp rdi, rsi ; if (r > 0) { + je .EFN + mov ebx, INT [r15] ; code = actbl->ehufco[0]; + movzx r12d, byte [r15 + 1024] ; size = actbl->ehufsi[0]; + EMIT_BITS rbx, r12d +.EFN: + pop r10 + ; Save put_buffer & put_bits + mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer; + mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits; + + pop rbx +%ifdef WIN64 + movaps xmm11, XMMWORD [rsp+0*SIZEOF_XMMWORD] + movaps xmm10, XMMWORD [rsp+1*SIZEOF_XMMWORD] + movaps xmm9, XMMWORD [rsp+2*SIZEOF_XMMWORD] + movaps xmm8, XMMWORD [rsp+3*SIZEOF_XMMWORD] + add rsp, 4*SIZEOF_XMMWORD +%endif + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jchuff-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jchuff-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jchuff-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jchuff-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,427 @@ +; +; jchuff-sse2.asm - Huffman entropy encoding (SSE2) +; +; Copyright 2009-2011, 2014-2016 D. R. Commander. +; Copyright 2015 Matthieu Darbois +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains an SSE2 implementation for Huffman coding of one block. +; The following code is based directly on jchuff.c; see jchuff.c for more +; details. +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_huff_encode_one_block) PRIVATE + +EXTN(jconst_huff_encode_one_block): + +%include "jpeg_nbits_table.inc" + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +; These macros perform the same task as the emit_bits() function in the +; original libjpeg code. In addition to reducing overhead by explicitly +; inlining the code, additional performance is achieved by taking into +; account the size of the bit buffer and waiting until it is almost full +; before emptying it. This mostly benefits 64-bit platforms, since 6 +; bytes can be stored in a 64-bit bit buffer before it has to be emptied. + +%macro EMIT_BYTE 0 + sub put_bits, 8 ; put_bits -= 8; + mov edx, put_buffer + mov ecx, put_bits + shr edx, cl ; c = (JOCTET)GETJOCTET(put_buffer >> put_bits); + mov byte [eax], dl ; *buffer++ = c; + add eax, 1 + cmp dl, 0xFF ; need to stuff a zero byte? + jne %%.EMIT_BYTE_END + mov byte [eax], 0 ; *buffer++ = 0; + add eax, 1 +%%.EMIT_BYTE_END: +%endmacro + +%macro PUT_BITS 1 + add put_bits, ecx ; put_bits += size; + shl put_buffer, cl ; put_buffer = (put_buffer << size); + or put_buffer, %1 +%endmacro + +%macro CHECKBUF15 0 + cmp put_bits, 16 ; if (put_bits > 31) { + jl %%.CHECKBUF15_END + mov eax, POINTER [esp+buffer] + EMIT_BYTE + EMIT_BYTE + mov POINTER [esp+buffer], eax +%%.CHECKBUF15_END: +%endmacro + +%macro EMIT_BITS 1 + PUT_BITS %1 + CHECKBUF15 +%endmacro + +%macro kloop_prepare 37 ;(ko, jno0, ..., jno31, xmm0, xmm1, xmm2, xmm3) + pxor xmm4, xmm4 ; __m128i neg = _mm_setzero_si128(); + pxor xmm5, xmm5 ; __m128i neg = _mm_setzero_si128(); + pxor xmm6, xmm6 ; __m128i neg = _mm_setzero_si128(); + pxor xmm7, xmm7 ; __m128i neg = _mm_setzero_si128(); + pinsrw %34, word [esi + %2 * SIZEOF_WORD], 0 ; xmm_shadow[0] = block[jno0]; + pinsrw %35, word [esi + %10 * SIZEOF_WORD], 0 ; xmm_shadow[8] = block[jno8]; + pinsrw %36, word [esi + %18 * SIZEOF_WORD], 0 ; xmm_shadow[16] = block[jno16]; + pinsrw %37, word [esi + %26 * SIZEOF_WORD], 0 ; xmm_shadow[24] = block[jno24]; + pinsrw %34, word [esi + %3 * SIZEOF_WORD], 1 ; xmm_shadow[1] = block[jno1]; + pinsrw %35, word [esi + %11 * SIZEOF_WORD], 1 ; xmm_shadow[9] = block[jno9]; + pinsrw %36, word [esi + %19 * SIZEOF_WORD], 1 ; xmm_shadow[17] = block[jno17]; + pinsrw %37, word [esi + %27 * SIZEOF_WORD], 1 ; xmm_shadow[25] = block[jno25]; + pinsrw %34, word [esi + %4 * SIZEOF_WORD], 2 ; xmm_shadow[2] = block[jno2]; + pinsrw %35, word [esi + %12 * SIZEOF_WORD], 2 ; xmm_shadow[10] = block[jno10]; + pinsrw %36, word [esi + %20 * SIZEOF_WORD], 2 ; xmm_shadow[18] = block[jno18]; + pinsrw %37, word [esi + %28 * SIZEOF_WORD], 2 ; xmm_shadow[26] = block[jno26]; + pinsrw %34, word [esi + %5 * SIZEOF_WORD], 3 ; xmm_shadow[3] = block[jno3]; + pinsrw %35, word [esi + %13 * SIZEOF_WORD], 3 ; xmm_shadow[11] = block[jno11]; + pinsrw %36, word [esi + %21 * SIZEOF_WORD], 3 ; xmm_shadow[19] = block[jno19]; + pinsrw %37, word [esi + %29 * SIZEOF_WORD], 3 ; xmm_shadow[27] = block[jno27]; + pinsrw %34, word [esi + %6 * SIZEOF_WORD], 4 ; xmm_shadow[4] = block[jno4]; + pinsrw %35, word [esi + %14 * SIZEOF_WORD], 4 ; xmm_shadow[12] = block[jno12]; + pinsrw %36, word [esi + %22 * SIZEOF_WORD], 4 ; xmm_shadow[20] = block[jno20]; + pinsrw %37, word [esi + %30 * SIZEOF_WORD], 4 ; xmm_shadow[28] = block[jno28]; + pinsrw %34, word [esi + %7 * SIZEOF_WORD], 5 ; xmm_shadow[5] = block[jno5]; + pinsrw %35, word [esi + %15 * SIZEOF_WORD], 5 ; xmm_shadow[13] = block[jno13]; + pinsrw %36, word [esi + %23 * SIZEOF_WORD], 5 ; xmm_shadow[21] = block[jno21]; + pinsrw %37, word [esi + %31 * SIZEOF_WORD], 5 ; xmm_shadow[29] = block[jno29]; + pinsrw %34, word [esi + %8 * SIZEOF_WORD], 6 ; xmm_shadow[6] = block[jno6]; + pinsrw %35, word [esi + %16 * SIZEOF_WORD], 6 ; xmm_shadow[14] = block[jno14]; + pinsrw %36, word [esi + %24 * SIZEOF_WORD], 6 ; xmm_shadow[22] = block[jno22]; + pinsrw %37, word [esi + %32 * SIZEOF_WORD], 6 ; xmm_shadow[30] = block[jno30]; + pinsrw %34, word [esi + %9 * SIZEOF_WORD], 7 ; xmm_shadow[7] = block[jno7]; + pinsrw %35, word [esi + %17 * SIZEOF_WORD], 7 ; xmm_shadow[15] = block[jno15]; + pinsrw %36, word [esi + %25 * SIZEOF_WORD], 7 ; xmm_shadow[23] = block[jno23]; +%if %1 != 32 + pinsrw %37, word [esi + %33 * SIZEOF_WORD], 7 ; xmm_shadow[31] = block[jno31]; +%else + pinsrw %37, ecx, 7 ; xmm_shadow[31] = block[jno31]; +%endif + pcmpgtw xmm4, %34 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm5, %35 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm6, %36 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm7, %37 ; neg = _mm_cmpgt_epi16(neg, x1); + paddw %34, xmm4 ; x1 = _mm_add_epi16(x1, neg); + paddw %35, xmm5 ; x1 = _mm_add_epi16(x1, neg); + paddw %36, xmm6 ; x1 = _mm_add_epi16(x1, neg); + paddw %37, xmm7 ; x1 = _mm_add_epi16(x1, neg); + pxor %34, xmm4 ; x1 = _mm_xor_si128(x1, neg); + pxor %35, xmm5 ; x1 = _mm_xor_si128(x1, neg); + pxor %36, xmm6 ; x1 = _mm_xor_si128(x1, neg); + pxor %37, xmm7 ; x1 = _mm_xor_si128(x1, neg); + pxor xmm4, %34 ; neg = _mm_xor_si128(neg, x1); + pxor xmm5, %35 ; neg = _mm_xor_si128(neg, x1); + pxor xmm6, %36 ; neg = _mm_xor_si128(neg, x1); + pxor xmm7, %37 ; neg = _mm_xor_si128(neg, x1); + movdqa XMMWORD [esp + t1 + %1 * SIZEOF_WORD], %34 ; _mm_storeu_si128((__m128i *)(t1 + ko), x1); + movdqa XMMWORD [esp + t1 + (%1 + 8) * SIZEOF_WORD], %35 ; _mm_storeu_si128((__m128i *)(t1 + ko + 8), x1); + movdqa XMMWORD [esp + t1 + (%1 + 16) * SIZEOF_WORD], %36 ; _mm_storeu_si128((__m128i *)(t1 + ko + 16), x1); + movdqa XMMWORD [esp + t1 + (%1 + 24) * SIZEOF_WORD], %37 ; _mm_storeu_si128((__m128i *)(t1 + ko + 24), x1); + movdqa XMMWORD [esp + t2 + %1 * SIZEOF_WORD], xmm4 ; _mm_storeu_si128((__m128i *)(t2 + ko), neg); + movdqa XMMWORD [esp + t2 + (%1 + 8) * SIZEOF_WORD], xmm5 ; _mm_storeu_si128((__m128i *)(t2 + ko + 8), neg); + movdqa XMMWORD [esp + t2 + (%1 + 16) * SIZEOF_WORD], xmm6 ; _mm_storeu_si128((__m128i *)(t2 + ko + 16), neg); + movdqa XMMWORD [esp + t2 + (%1 + 24) * SIZEOF_WORD], xmm7 ; _mm_storeu_si128((__m128i *)(t2 + ko + 24), neg); +%endmacro + +; +; Encode a single block's worth of coefficients. +; +; GLOBAL(JOCTET*) +; jsimd_huff_encode_one_block_sse2 (working_state *state, JOCTET *buffer, +; JCOEFPTR block, int last_dc_val, +; c_derived_tbl *dctbl, c_derived_tbl *actbl) +; + +; eax + 8 = working_state *state +; eax + 12 = JOCTET *buffer +; eax + 16 = JCOEFPTR block +; eax + 20 = int last_dc_val +; eax + 24 = c_derived_tbl *dctbl +; eax + 28 = c_derived_tbl *actbl + +%define pad 6*SIZEOF_DWORD ; Align to 16 bytes +%define t1 pad +%define t2 t1+(DCTSIZE2*SIZEOF_WORD) +%define block t2+(DCTSIZE2*SIZEOF_WORD) +%define actbl block+SIZEOF_DWORD +%define buffer actbl+SIZEOF_DWORD +%define temp buffer+SIZEOF_DWORD +%define temp2 temp+SIZEOF_DWORD +%define temp3 temp2+SIZEOF_DWORD +%define temp4 temp3+SIZEOF_DWORD +%define temp5 temp4+SIZEOF_DWORD +%define gotptr temp5+SIZEOF_DWORD ; void *gotptr +%define put_buffer ebx +%define put_bits edi + + align 16 + global EXTN(jsimd_huff_encode_one_block_sse2) PRIVATE + +EXTN(jsimd_huff_encode_one_block_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + sub esp, temp5+9*SIZEOF_DWORD-pad + push ebx + push ecx +; push edx ; need not be preserved + push esi + push edi + push ebp + + mov esi, POINTER [eax+8] ; (working_state *state) + mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer; + mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits; + push esi ; esi is now scratch + + get_GOT edx ; get GOT address + movpic POINTER [esp+gotptr], edx ; save GOT address + + mov ecx, POINTER [eax+28] + mov edx, POINTER [eax+16] + mov esi, POINTER [eax+12] + mov POINTER [esp+actbl], ecx + mov POINTER [esp+block], edx + mov POINTER [esp+buffer], esi + + ; Encode the DC coefficient difference per section F.1.2.1 + mov esi, POINTER [esp+block] ; block + movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val; + sub ecx, DWORD [eax+20] + mov esi, ecx + + ; This is a well-known technique for obtaining the absolute value + ; without a branch. It is derived from an assembly language technique + ; presented in "How to Optimize for the Pentium Processors", + ; Copyright (c) 1996, 1997 by Agner Fog. + mov edx, ecx + sar edx, 31 ; temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); + xor ecx, edx ; temp ^= temp3; + sub ecx, edx ; temp -= temp3; + + ; For a negative input, want temp2 = bitwise complement of abs(input) + ; This code assumes we are on a two's complement machine + add esi, edx ; temp2 += temp3; + mov DWORD [esp+temp], esi ; backup temp2 in temp + + ; Find the number of bits needed for the magnitude of the coefficient + movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp) + movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp); + mov DWORD [esp+temp2], edx ; backup nbits in temp2 + + ; Emit the Huffman-coded symbol for the number of bits + mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore + mov eax, INT [ebp + edx * 4] ; code = dctbl->ehufco[nbits]; + movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits]; + EMIT_BITS eax ; EMIT_BITS(code, size) + + mov ecx, DWORD [esp+temp2] ; restore nbits + + ; Mask off any extra bits in code + mov eax, 1 + shl eax, cl + dec eax + and eax, DWORD [esp+temp] ; temp2 &= (((JLONG) 1)<>= r; + mov DWORD [esp+temp3], edx +.BRLOOP: + cmp ecx, 16 ; while (r > 15) { + jl .ERLOOP + sub ecx, 16 ; r -= 16; + mov DWORD [esp+temp], ecx + mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; + movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) + mov ecx, DWORD [esp+temp] + jmp .BRLOOP +.ERLOOP: + movsx eax, word [esi] ; temp = t1[k]; + movpic edx, POINTER [esp+gotptr] ; load GOT address (edx) + movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp); + mov DWORD [esp+temp2], eax + ; Emit Huffman symbol for run length / number of bits + shl ecx, 4 ; temp3 = (r << 4) + nbits; + add ecx, eax + mov eax, INT [ebp + ecx * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [ebp + ecx + 1024] ; size = actbl->ehufsi[temp3]; + EMIT_BITS eax + + movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov ecx, DWORD [esp+temp2] + mov eax, 1 + shl eax, cl + dec eax + and eax, edx ; temp2 &= (((JLONG) 1)<>= 1; + + jmp .BLOOP +.ELOOP: + movdqa xmm0, XMMWORD [esp + t1 + 32 * SIZEOF_WORD] ; __m128i tmp0 = _mm_loadu_si128((__m128i *)(t1 + 0)); + movdqa xmm1, XMMWORD [esp + t1 + 40 * SIZEOF_WORD] ; __m128i tmp1 = _mm_loadu_si128((__m128i *)(t1 + 8)); + movdqa xmm2, XMMWORD [esp + t1 + 48 * SIZEOF_WORD] ; __m128i tmp2 = _mm_loadu_si128((__m128i *)(t1 + 16)); + movdqa xmm3, XMMWORD [esp + t1 + 56 * SIZEOF_WORD] ; __m128i tmp3 = _mm_loadu_si128((__m128i *)(t1 + 24)); + pcmpeqw xmm0, xmm7 ; tmp0 = _mm_cmpeq_epi16(tmp0, zero); + pcmpeqw xmm1, xmm7 ; tmp1 = _mm_cmpeq_epi16(tmp1, zero); + pcmpeqw xmm2, xmm7 ; tmp2 = _mm_cmpeq_epi16(tmp2, zero); + pcmpeqw xmm3, xmm7 ; tmp3 = _mm_cmpeq_epi16(tmp3, zero); + packsswb xmm0, xmm1 ; tmp0 = _mm_packs_epi16(tmp0, tmp1); + packsswb xmm2, xmm3 ; tmp2 = _mm_packs_epi16(tmp2, tmp3); + pmovmskb edx, xmm0 ; index = ((uint64_t)_mm_movemask_epi8(tmp0)) << 0; + pmovmskb ecx, xmm2 ; index = ((uint64_t)_mm_movemask_epi8(tmp2)) << 16; + shl ecx, 16 + or edx, ecx + not edx ; index = ~index; + + lea eax, [esp + t1 + (DCTSIZE2/2) * 2] + sub eax, esi + shr eax, 1 + bsf ecx, edx ; r = __builtin_ctzl(index); + jz .ELOOP2 + shr edx, cl ; index >>= r; + add ecx, eax + lea esi, [esi+ecx*2] ; k += r; + mov DWORD [esp+temp3], edx + jmp .BRLOOP2 +.BLOOP2: + bsf ecx, edx ; r = __builtin_ctzl(index); + jz .ELOOP2 + lea esi, [esi+ecx*2] ; k += r; + shr edx, cl ; index >>= r; + mov DWORD [esp+temp3], edx +.BRLOOP2: + cmp ecx, 16 ; while (r > 15) { + jl .ERLOOP2 + sub ecx, 16 ; r -= 16; + mov DWORD [esp+temp], ecx + mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; + movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) + mov ecx, DWORD [esp+temp] + jmp .BRLOOP2 +.ERLOOP2: + movsx eax, word [esi] ; temp = t1[k]; + bsr eax, eax ; nbits = 32 - __builtin_clz(temp); + inc eax + mov DWORD [esp+temp2], eax + ; Emit Huffman symbol for run length / number of bits + shl ecx, 4 ; temp3 = (r << 4) + nbits; + add ecx, eax + mov eax, INT [ebp + ecx * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [ebp + ecx + 1024] ; size = actbl->ehufsi[temp3]; + EMIT_BITS eax + + movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov ecx, DWORD [esp+temp2] + mov eax, 1 + shl eax, cl + dec eax + and eax, edx ; temp2 &= (((JLONG) 1)<>= 1; + + jmp .BLOOP2 +.ELOOP2: + ; If the last coef(s) were zero, emit an end-of-block code + lea edx, [esp + t1 + (DCTSIZE2-1) * 2] ; r = DCTSIZE2-1-k; + cmp edx, esi ; if (r > 0) { + je .EFN + mov eax, INT [ebp] ; code = actbl->ehufco[0]; + movzx ecx, byte [ebp + 1024] ; size = actbl->ehufsi[0]; + EMIT_BITS eax +.EFN: + mov eax, [esp+buffer] + pop esi + ; Save put_buffer & put_bits + mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer; + mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits; + + pop ebp + pop edi + pop esi +; pop edx ; need not be preserved + pop ecx + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcolsamp.inc glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcolsamp.inc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcolsamp.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcolsamp.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,105 @@ +; +; jcolsamp.inc - private declarations for color conversion & up/downsampling +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; [TAB8] + +; -------------------------------------------------------------------------- + +; pseudo-resisters to make ordering of RGB configurable +; +%if RGB_RED == 0 +%define mmA mm0 +%define mmB mm1 +%define xmmA xmm0 +%define xmmB xmm1 +%elif RGB_GREEN == 0 +%define mmA mm2 +%define mmB mm3 +%define xmmA xmm2 +%define xmmB xmm3 +%elif RGB_BLUE == 0 +%define mmA mm4 +%define mmB mm5 +%define xmmA xmm4 +%define xmmB xmm5 +%else +%define mmA mm6 +%define mmB mm7 +%define xmmA xmm6 +%define xmmB xmm7 +%endif + +%if RGB_RED == 1 +%define mmC mm0 +%define mmD mm1 +%define xmmC xmm0 +%define xmmD xmm1 +%elif RGB_GREEN == 1 +%define mmC mm2 +%define mmD mm3 +%define xmmC xmm2 +%define xmmD xmm3 +%elif RGB_BLUE == 1 +%define mmC mm4 +%define mmD mm5 +%define xmmC xmm4 +%define xmmD xmm5 +%else +%define mmC mm6 +%define mmD mm7 +%define xmmC xmm6 +%define xmmD xmm7 +%endif + +%if RGB_RED == 2 +%define mmE mm0 +%define mmF mm1 +%define xmmE xmm0 +%define xmmF xmm1 +%elif RGB_GREEN == 2 +%define mmE mm2 +%define mmF mm3 +%define xmmE xmm2 +%define xmmF xmm3 +%elif RGB_BLUE == 2 +%define mmE mm4 +%define mmF mm5 +%define xmmE xmm4 +%define xmmF xmm5 +%else +%define mmE mm6 +%define mmF mm7 +%define xmmE xmm6 +%define xmmF xmm7 +%endif + +%if RGB_RED == 3 +%define mmG mm0 +%define mmH mm1 +%define xmmG xmm0 +%define xmmH xmm1 +%elif RGB_GREEN == 3 +%define mmG mm2 +%define mmH mm3 +%define xmmG xmm2 +%define xmmH xmm3 +%elif RGB_BLUE == 3 +%define mmG mm4 +%define mmH mm5 +%define xmmG xmm4 +%define xmmH xmm5 +%else +%define mmG mm6 +%define mmH mm7 +%define xmmG xmm6 +%define xmmH xmm7 +%endif + +; -------------------------------------------------------------------------- diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,158 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* CHROMA DOWNSAMPLING */ + +#include "jsimd_altivec.h" +#include "jcsample.h" + + +void +jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, + JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + int outrow, outcol; + JDIMENSION output_cols = width_blocks * DCTSIZE; + JSAMPROW inptr, outptr; + + __vector unsigned char this0, next0, out; + __vector unsigned short this0e, this0o, next0e, next0o, outl, outh; + + /* Constants */ + __vector unsigned short pw_bias = { __4X2(0, 1) }, + pw_one = { __8X(1) }; + __vector unsigned char even_odd_index = + {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15}, + pb_zero = { __16X(0) }; + + expand_right_edge(input_data, max_v_samp_factor, image_width, + output_cols * 2); + + for (outrow = 0; outrow < v_samp_factor; outrow++) { + outptr = output_data[outrow]; + inptr = input_data[outrow]; + + for (outcol = output_cols; outcol > 0; + outcol -= 16, inptr += 32, outptr += 16) { + + this0 = vec_ld(0, inptr); + this0 = vec_perm(this0, this0, even_odd_index); + this0e = (__vector unsigned short)VEC_UNPACKHU(this0); + this0o = (__vector unsigned short)VEC_UNPACKLU(this0); + outl = vec_add(this0e, this0o); + outl = vec_add(outl, pw_bias); + outl = vec_sr(outl, pw_one); + + if (outcol > 8) { + next0 = vec_ld(16, inptr); + next0 = vec_perm(next0, next0, even_odd_index); + next0e = (__vector unsigned short)VEC_UNPACKHU(next0); + next0o = (__vector unsigned short)VEC_UNPACKLU(next0); + outh = vec_add(next0e, next0o); + outh = vec_add(outh, pw_bias); + outh = vec_sr(outh, pw_one); + } else + outh = vec_splat_u16(0); + + out = vec_pack(outl, outh); + vec_st(out, 0, outptr); + } + } +} + + +void +jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, + JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + int inrow, outrow, outcol; + JDIMENSION output_cols = width_blocks * DCTSIZE; + JSAMPROW inptr0, inptr1, outptr; + + __vector unsigned char this0, next0, this1, next1, out; + __vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o, + next1e, next1o, out0l, out0h, out1l, out1h, outl, outh; + + /* Constants */ + __vector unsigned short pw_bias = { __4X2(1, 2) }, + pw_two = { __8X(2) }; + __vector unsigned char even_odd_index = + { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, + pb_zero = { __16X(0) }; + + expand_right_edge(input_data, max_v_samp_factor, image_width, + output_cols * 2); + + for (inrow = 0, outrow = 0; outrow < v_samp_factor; + inrow += 2, outrow++) { + + inptr0 = input_data[inrow]; + inptr1 = input_data[inrow + 1]; + outptr = output_data[outrow]; + + for (outcol = output_cols; outcol > 0; + outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) { + + this0 = vec_ld(0, inptr0); + this0 = vec_perm(this0, this0, even_odd_index); + this0e = (__vector unsigned short)VEC_UNPACKHU(this0); + this0o = (__vector unsigned short)VEC_UNPACKLU(this0); + out0l = vec_add(this0e, this0o); + + this1 = vec_ld(0, inptr1); + this1 = vec_perm(this1, this1, even_odd_index); + this1e = (__vector unsigned short)VEC_UNPACKHU(this1); + this1o = (__vector unsigned short)VEC_UNPACKLU(this1); + out1l = vec_add(this1e, this1o); + + outl = vec_add(out0l, out1l); + outl = vec_add(outl, pw_bias); + outl = vec_sr(outl, pw_two); + + if (outcol > 8) { + next0 = vec_ld(16, inptr0); + next0 = vec_perm(next0, next0, even_odd_index); + next0e = (__vector unsigned short)VEC_UNPACKHU(next0); + next0o = (__vector unsigned short)VEC_UNPACKLU(next0); + out0h = vec_add(next0e, next0o); + + next1 = vec_ld(16, inptr1); + next1 = vec_perm(next1, next1, even_odd_index); + next1e = (__vector unsigned short)VEC_UNPACKHU(next1); + next1o = (__vector unsigned short)VEC_UNPACKLU(next1); + out1h = vec_add(next1e, next1o); + + outh = vec_add(out0h, out1h); + outh = vec_add(outh, pw_bias); + outh = vec_sr(outh, pw_two); + } else + outh = vec_splat_u16(0); + + out = vec_pack(outl, outh); + vec_st(out, 0, outptr); + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample.h glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,28 @@ +/* + * jcsample.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +LOCAL(void) +expand_right_edge (JSAMPARRAY image_data, int num_rows, + JDIMENSION input_cols, JDIMENSION output_cols) +{ + register JSAMPROW ptr; + register JSAMPLE pixval; + register int count; + int row; + int numcols = (int) (output_cols - input_cols); + + if (numcols > 0) { + for (row = 0; row < num_rows; row++) { + ptr = image_data[row] + input_cols; + pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ + for (count = numcols; count > 0; count--) + *ptr++ = pixval; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,324 @@ +; +; jcsample.asm - downsampling (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define img_width(b) (b)+8 ; JDIMENSION image_width +%define max_v_samp(b) (b)+12 ; int max_v_samp_factor +%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b)+20 ; JDIMENSION width_blocks +%define input_data(b) (b)+24 ; JSAMPARRAY input_data +%define output_data(b) (b)+28 ; JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v1_downsample_mmx) PRIVATE + +EXTN(jsimd_h2v1_downsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00010000 ; bias pattern + movd mm7,edx + pcmpeqw mm6,mm6 + punpckldq mm7,mm7 ; mm7={0, 1, 0, 1} + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm1, MMWORD [esi+1*SIZEOF_MMWORD] + movq mm2,mm0 + movq mm3,mm1 + + pand mm0,mm6 + psrlw mm2,BYTE_BIT + pand mm1,mm6 + psrlw mm3,BYTE_BIT + + paddw mm0,mm2 + paddw mm1,mm3 + paddw mm0,mm7 + paddw mm1,mm7 + psrlw mm0,1 + psrlw mm1,1 + + packuswb mm0,mm1 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add edi, byte 1*SIZEOF_MMWORD ; outptr + sub ecx, byte SIZEOF_MMWORD ; outcol + jnz short .columnloop + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define img_width(b) (b)+8 ; JDIMENSION image_width +%define max_v_samp(b) (b)+12 ; int max_v_samp_factor +%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b)+20 ; JDIMENSION width_blocks +%define input_data(b) (b)+24 ; JSAMPARRAY input_data +%define output_data(b) (b)+28 ; JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v2_downsample_mmx) PRIVATE + +EXTN(jsimd_h2v2_downsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + movd mm7,edx + pcmpeqw mm6,mm6 + punpckldq mm7,mm7 ; mm7={1, 2, 1, 2} + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [edx+0*SIZEOF_MMWORD] + movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm2, MMWORD [edx+1*SIZEOF_MMWORD] + movq mm3, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm4,mm0 + movq mm5,mm1 + pand mm0,mm6 + psrlw mm4,BYTE_BIT + pand mm1,mm6 + psrlw mm5,BYTE_BIT + paddw mm0,mm4 + paddw mm1,mm5 + + movq mm4,mm2 + movq mm5,mm3 + pand mm2,mm6 + psrlw mm4,BYTE_BIT + pand mm3,mm6 + psrlw mm5,BYTE_BIT + paddw mm2,mm4 + paddw mm3,mm5 + + paddw mm0,mm1 + paddw mm2,mm3 + paddw mm0,mm7 + paddw mm2,mm7 + psrlw mm0,2 + psrlw mm2,2 + + packuswb mm0,mm2 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + + add edx, byte 2*SIZEOF_MMWORD ; inptr0 + add esi, byte 2*SIZEOF_MMWORD ; inptr1 + add edi, byte 1*SIZEOF_MMWORD ; outptr + sub ecx, byte SIZEOF_MMWORD ; outcol + jnz near .columnloop + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,330 @@ +; +; jcsample.asm - downsampling (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +; r10 = JDIMENSION image_width +; r11 = int max_v_samp_factor +; r12 = JDIMENSION v_samp_factor +; r13 = JDIMENSION width_blocks +; r14 = JSAMPARRAY input_data +; r15 = JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_downsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov ecx, r13d + shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) + jz near .return + + mov edx, r10d + + ; -- expand_right_edge + + push rcx + shl rcx,1 ; output_cols * 2 + sub rcx,rdx + jle short .expand_end + + mov rax, r11 + test rax,rax + jle short .expand_end + + cld + mov rsi, r14 ; input_data +.expandloop: + push rax + push rcx + + mov rdi, JSAMPROW [rsi] + add rdi,rdx + mov al, JSAMPLE [rdi-1] + + rep stosb + + pop rcx + pop rax + + add rsi, byte SIZEOF_JSAMPROW + dec rax + jg short .expandloop + +.expand_end: + pop rcx ; output_cols + + ; -- h2v1_downsample + + mov eax, r12d ; rowctr + test eax,eax + jle near .return + + mov rdx, 0x00010000 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov rsi, r14 ; input_data + mov rdi, r15 ; output_data +.rowloop: + push rcx + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + +.columnloop_r8: + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + pxor xmm1,xmm1 + mov rcx, SIZEOF_XMMWORD + jmp short .downsample + +.columnloop: + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm2,xmm0 + movdqa xmm3,xmm1 + + pand xmm0,xmm6 + psrlw xmm2,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm3,BYTE_BIT + + paddw xmm0,xmm2 + paddw xmm1,xmm3 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + psrlw xmm0,1 + psrlw xmm1,1 + + packuswb xmm0,xmm1 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + + sub rcx, byte SIZEOF_XMMWORD ; outcol + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rdi, byte 1*SIZEOF_XMMWORD ; outptr + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + test rcx,rcx + jnz short .columnloop_r8 + + pop rsi + pop rdi + pop rcx + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rax ; rowctr + jg near .rowloop + +.return: + uncollect_args + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +; r10 = JDIMENSION image_width +; r11 = int max_v_samp_factor +; r12 = JDIMENSION v_samp_factor +; r13 = JDIMENSION width_blocks +; r14 = JSAMPARRAY input_data +; r15 = JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_downsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov ecx, r13d + shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) + jz near .return + + mov edx, r10d + + ; -- expand_right_edge + + push rcx + shl rcx,1 ; output_cols * 2 + sub rcx,rdx + jle short .expand_end + + mov rax, r11 + test rax,rax + jle short .expand_end + + cld + mov rsi, r14 ; input_data +.expandloop: + push rax + push rcx + + mov rdi, JSAMPROW [rsi] + add rdi,rdx + mov al, JSAMPLE [rdi-1] + + rep stosb + + pop rcx + pop rax + + add rsi, byte SIZEOF_JSAMPROW + dec rax + jg short .expandloop + +.expand_end: + pop rcx ; output_cols + + ; -- h2v2_downsample + + mov eax, r12d ; rowctr + test rax,rax + jle near .return + + mov rdx, 0x00020001 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov rsi, r14 ; input_data + mov rdi, r15 ; output_data +.rowloop: + push rcx + push rdi + push rsi + + mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 + mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1 + mov rdi, JSAMPROW [rdi] ; outptr + + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + +.columnloop_r8: + movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + pxor xmm2,xmm2 + pxor xmm3,xmm3 + mov rcx, SIZEOF_XMMWORD + jmp short .downsample + +.columnloop: + movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + pand xmm0,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm0,xmm4 + paddw xmm1,xmm5 + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + pand xmm2,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm3,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm2,xmm4 + paddw xmm3,xmm5 + + paddw xmm0,xmm1 + paddw xmm2,xmm3 + paddw xmm0,xmm7 + paddw xmm2,xmm7 + psrlw xmm0,2 + psrlw xmm2,2 + + packuswb xmm0,xmm2 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + + sub rcx, byte SIZEOF_XMMWORD ; outcol + add rdx, byte 2*SIZEOF_XMMWORD ; inptr0 + add rsi, byte 2*SIZEOF_XMMWORD ; inptr1 + add rdi, byte 1*SIZEOF_XMMWORD ; outptr + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx,rcx + jnz near .columnloop_r8 + + pop rsi + pop rdi + pop rcx + + add rsi, byte 2*SIZEOF_JSAMPROW ; input_data + add rdi, byte 1*SIZEOF_JSAMPROW ; output_data + dec rax ; rowctr + jg near .rowloop + +.return: + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jcsample-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jcsample-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,351 @@ +; +; jcsample.asm - downsampling (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define img_width(b) (b)+8 ; JDIMENSION image_width +%define max_v_samp(b) (b)+12 ; int max_v_samp_factor +%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b)+20 ; JDIMENSION width_blocks +%define input_data(b) (b)+24 ; JSAMPARRAY input_data +%define output_data(b) (b)+28 ; JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_downsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00010000 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + alignx 16,7 + +.columnloop_r8: + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + pxor xmm1,xmm1 + mov ecx, SIZEOF_XMMWORD + jmp short .downsample + alignx 16,7 + +.columnloop: + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm2,xmm0 + movdqa xmm3,xmm1 + + pand xmm0,xmm6 + psrlw xmm2,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm3,BYTE_BIT + + paddw xmm0,xmm2 + paddw xmm1,xmm3 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + psrlw xmm0,1 + psrlw xmm1,1 + + packuswb xmm0,xmm1 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + + sub ecx, byte SIZEOF_XMMWORD ; outcol + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add edi, byte 1*SIZEOF_XMMWORD ; outptr + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + test ecx,ecx + jnz short .columnloop_r8 + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define img_width(b) (b)+8 ; JDIMENSION image_width +%define max_v_samp(b) (b)+12 ; int max_v_samp_factor +%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b)+20 ; JDIMENSION width_blocks +%define input_data(b) (b)+24 ; JSAMPARRAY input_data +%define output_data(b) (b)+28 ; JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_downsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + alignx 16,7 + +.columnloop_r8: + movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + pxor xmm2,xmm2 + pxor xmm3,xmm3 + mov ecx, SIZEOF_XMMWORD + jmp short .downsample + alignx 16,7 + +.columnloop: + movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + pand xmm0,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm0,xmm4 + paddw xmm1,xmm5 + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + pand xmm2,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm3,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm2,xmm4 + paddw xmm3,xmm5 + + paddw xmm0,xmm1 + paddw xmm2,xmm3 + paddw xmm0,xmm7 + paddw xmm2,xmm7 + psrlw xmm0,2 + psrlw xmm2,2 + + packuswb xmm0,xmm2 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + + sub ecx, byte SIZEOF_XMMWORD ; outcol + add edx, byte 2*SIZEOF_XMMWORD ; inptr0 + add esi, byte 2*SIZEOF_XMMWORD ; inptr1 + add edi, byte 1*SIZEOF_XMMWORD ; outptr + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx,ecx + jnz near .columnloop_r8 + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolext-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolext-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolext-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolext-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,274 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jdcolor-altivec.c */ + + +void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf, + JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + JSAMPROW outptr, inptr0, inptr1, inptr2; + int pitch = out_width * RGB_PIXELSIZE, num_cols; +#if __BIG_ENDIAN__ + int offset; +#endif + unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; + + __vector unsigned char rgb0, rgb1, rgb2, rgbx0, rgbx1, rgbx2, rgbx3, + y, cb, cr; +#if __BIG_ENDIAN__ + __vector unsigned char edgel, edgeh, edges, out0, out1, out2, out3; +#if RGB_PIXELSIZE == 4 + __vector unsigned char out4; +#endif +#endif +#if RGB_PIXELSIZE == 4 + __vector unsigned char rgb3; +#endif + __vector short rg0, rg1, rg2, rg3, bx0, bx1, bx2, bx3, yl, yh, cbl, cbh, + crl, crh, rl, rh, gl, gh, bl, bh, g0w, g1w, g2w, g3w; + __vector int g0, g1, g2, g3; + + /* Constants + * NOTE: The >> 1 is to compensate for the fact that vec_madds() returns 17 + * high-order bits, not 16. + */ + __vector short pw_f0402 = { __8X(F_0_402 >> 1) }, + pw_mf0228 = { __8X(-F_0_228 >> 1) }, + pw_mf0344_f0285 = { __4X2(-F_0_344, F_0_285) }, + pw_one = { __8X(1) }, pw_255 = { __8X(255) }, + pw_cj = { __8X(CENTERJSAMPLE) }; + __vector int pd_onehalf = { __4X(ONE_HALF) }; + __vector unsigned char pb_zero = { __16X(0) }, +#if __BIG_ENDIAN__ + shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; +#else + shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; +#endif + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + + for (num_cols = pitch; num_cols > 0; + num_cols -= RGB_PIXELSIZE * 16, outptr += RGB_PIXELSIZE * 16, + inptr0 += 16, inptr1 += 16, inptr2 += 16) { + + y = vec_ld(0, inptr0); + /* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't + * support unsigned vectors. + */ + yl = (__vector signed short)VEC_UNPACKHU(y); + yh = (__vector signed short)VEC_UNPACKLU(y); + + cb = vec_ld(0, inptr1); + cbl = (__vector signed short)VEC_UNPACKHU(cb); + cbh = (__vector signed short)VEC_UNPACKLU(cb); + cbl = vec_sub(cbl, pw_cj); + cbh = vec_sub(cbh, pw_cj); + + cr = vec_ld(0, inptr2); + crl = (__vector signed short)VEC_UNPACKHU(cr); + crh = (__vector signed short)VEC_UNPACKLU(cr); + crl = vec_sub(crl, pw_cj); + crh = vec_sub(crh, pw_cj); + + /* (Original) + * R = Y + 1.40200 * Cr + * G = Y - 0.34414 * Cb - 0.71414 * Cr + * B = Y + 1.77200 * Cb + * + * (This implementation) + * R = Y + 0.40200 * Cr + Cr + * G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + * B = Y - 0.22800 * Cb + Cb + Cb + */ + bl = vec_add(cbl, cbl); + bh = vec_add(cbh, cbh); + bl = vec_madds(bl, pw_mf0228, pw_one); + bh = vec_madds(bh, pw_mf0228, pw_one); + bl = vec_sra(bl, (__vector unsigned short)pw_one); + bh = vec_sra(bh, (__vector unsigned short)pw_one); + bl = vec_add(bl, cbl); + bh = vec_add(bh, cbh); + bl = vec_add(bl, cbl); + bh = vec_add(bh, cbh); + bl = vec_add(bl, yl); + bh = vec_add(bh, yh); + + rl = vec_add(crl, crl); + rh = vec_add(crh, crh); + rl = vec_madds(rl, pw_f0402, pw_one); + rh = vec_madds(rh, pw_f0402, pw_one); + rl = vec_sra(rl, (__vector unsigned short)pw_one); + rh = vec_sra(rh, (__vector unsigned short)pw_one); + rl = vec_add(rl, crl); + rh = vec_add(rh, crh); + rl = vec_add(rl, yl); + rh = vec_add(rh, yh); + + g0w = vec_mergeh(cbl, crl); + g1w = vec_mergel(cbl, crl); + g0 = vec_msums(g0w, pw_mf0344_f0285, pd_onehalf); + g1 = vec_msums(g1w, pw_mf0344_f0285, pd_onehalf); + g2w = vec_mergeh(cbh, crh); + g3w = vec_mergel(cbh, crh); + g2 = vec_msums(g2w, pw_mf0344_f0285, pd_onehalf); + g3 = vec_msums(g3w, pw_mf0344_f0285, pd_onehalf); + /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from + * each dword into a new 16-bit vector, which is the equivalent of + * descaling the 32-bit results (right-shifting by 16 bits) and then + * packing them. + */ + gl = vec_perm((__vector short)g0, (__vector short)g1, shift_pack_index); + gh = vec_perm((__vector short)g2, (__vector short)g3, shift_pack_index); + gl = vec_sub(gl, crl); + gh = vec_sub(gh, crh); + gl = vec_add(gl, yl); + gh = vec_add(gh, yh); + + rg0 = vec_mergeh(rl, gl); + bx0 = vec_mergeh(bl, pw_255); + rg1 = vec_mergel(rl, gl); + bx1 = vec_mergel(bl, pw_255); + rg2 = vec_mergeh(rh, gh); + bx2 = vec_mergeh(bh, pw_255); + rg3 = vec_mergel(rh, gh); + bx3 = vec_mergel(bh, pw_255); + + rgbx0 = vec_packsu(rg0, bx0); + rgbx1 = vec_packsu(rg1, bx1); + rgbx2 = vec_packsu(rg2, bx2); + rgbx3 = vec_packsu(rg3, bx3); + +#if RGB_PIXELSIZE == 3 + /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3 + * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7 + * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb + * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf + * + * rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 + * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga + * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf + */ + rgb0 = vec_perm(rgbx0, rgbx1, (__vector unsigned char)RGB_INDEX0); + rgb1 = vec_perm(rgbx1, rgbx2, (__vector unsigned char)RGB_INDEX1); + rgb2 = vec_perm(rgbx2, rgbx3, (__vector unsigned char)RGB_INDEX2); +#else + /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3 + * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7 + * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb + * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf + * + * rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3 + * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7 + * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb + * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf + */ + rgb0 = vec_perm(rgbx0, rgbx0, (__vector unsigned char)RGB_INDEX); + rgb1 = vec_perm(rgbx1, rgbx1, (__vector unsigned char)RGB_INDEX); + rgb2 = vec_perm(rgbx2, rgbx2, (__vector unsigned char)RGB_INDEX); + rgb3 = vec_perm(rgbx3, rgbx3, (__vector unsigned char)RGB_INDEX); +#endif + +#if __BIG_ENDIAN__ + offset = (size_t)outptr & 15; + if (offset) { + __vector unsigned char unaligned_shift_index; + int bytes = num_cols + offset; + + if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) { + /* Slow path to prevent buffer overwrite. Since there is no way to + * write a partial AltiVec register, overwrite would occur on the + * last chunk of the last image row if the right edge is not on a + * 16-byte boundary. It could also occur on other rows if the bytes + * per row is low enough. Since we can't determine whether we're on + * the last image row, we have to assume every row is the last. + */ + vec_st(rgb0, 0, tmpbuf); + vec_st(rgb1, 16, tmpbuf); + vec_st(rgb2, 32, tmpbuf); +#if RGB_PIXELSIZE == 4 + vec_st(rgb3, 48, tmpbuf); +#endif + memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16)); + } else { + /* Fast path */ + unaligned_shift_index = vec_lvsl(0, outptr); + edgel = vec_ld(0, outptr); + edgeh = vec_ld(min(num_cols - 1, RGB_PIXELSIZE * 16), outptr); + edges = vec_perm(edgeh, edgel, unaligned_shift_index); + unaligned_shift_index = vec_lvsr(0, outptr); + out0 = vec_perm(edges, rgb0, unaligned_shift_index); + out1 = vec_perm(rgb0, rgb1, unaligned_shift_index); + out2 = vec_perm(rgb1, rgb2, unaligned_shift_index); +#if RGB_PIXELSIZE == 4 + out3 = vec_perm(rgb2, rgb3, unaligned_shift_index); + out4 = vec_perm(rgb3, edges, unaligned_shift_index); +#else + out3 = vec_perm(rgb2, edges, unaligned_shift_index); +#endif + vec_st(out0, 0, outptr); + if (bytes > 16) + vec_st(out1, 16, outptr); + if (bytes > 32) + vec_st(out2, 32, outptr); + if (bytes > 48) + vec_st(out3, 48, outptr); +#if RGB_PIXELSIZE == 4 + if (bytes > 64) + vec_st(out4, 64, outptr); +#endif + } + } else { +#endif /* __BIG_ENDIAN__ */ + if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) { + /* Slow path */ + VEC_ST(rgb0, 0, tmpbuf); + VEC_ST(rgb1, 16, tmpbuf); + VEC_ST(rgb2, 32, tmpbuf); +#if RGB_PIXELSIZE == 4 + VEC_ST(rgb3, 48, tmpbuf); +#endif + memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16)); + } else { + /* Fast path */ + VEC_ST(rgb0, 0, outptr); + if (num_cols > 16) + VEC_ST(rgb1, 16, outptr); + if (num_cols > 32) + VEC_ST(rgb2, 32, outptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + VEC_ST(rgb3, 48, outptr); +#endif + } +#if __BIG_ENDIAN__ + } +#endif + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolext-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolext-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolext-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolext-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,405 @@ +; +; jdcolext.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_mmx (JDIMENSION out_width, +; JSAMPIMAGE input_buf, JDIMENSION input_row, +; JSAMPARRAY output_buf, int num_rows) +; + +%define out_width(b) (b)+8 ; JDIMENSION out_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define input_row(b) (b)+16 ; JDIMENSION input_row +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_ycc_rgb_convert_mmx) PRIVATE + +EXTN(jsimd_ycc_rgb_convert_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [out_width(eax)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [input_row(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + push eax + push edi + push edx + push ebx + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr0 + mov ebx, JSAMPROW [ebx] ; inptr1 + mov edx, JSAMPROW [edx] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + movpic eax, POINTER [gotptr] ; load GOT address (eax) + alignx 16,7 +.columnloop: + + movq mm5, MMWORD [ebx] ; mm5=Cb(01234567) + movq mm1, MMWORD [edx] ; mm1=Cr(01234567) + + pcmpeqw mm4,mm4 + pcmpeqw mm7,mm7 + psrlw mm4,BYTE_BIT + psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} + movq mm0,mm4 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..} + + pand mm4,mm5 ; mm4=Cb(0246)=CbE + psrlw mm5,BYTE_BIT ; mm5=Cb(1357)=CbO + pand mm0,mm1 ; mm0=Cr(0246)=CrE + psrlw mm1,BYTE_BIT ; mm1=Cr(1357)=CrO + + paddw mm4,mm7 + paddw mm5,mm7 + paddw mm0,mm7 + paddw mm1,mm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movq mm2,mm4 ; mm2=CbE + movq mm3,mm5 ; mm3=CbO + paddw mm4,mm4 ; mm4=2*CbE + paddw mm5,mm5 ; mm5=2*CbO + movq mm6,mm0 ; mm6=CrE + movq mm7,mm1 ; mm7=CrO + paddw mm0,mm0 ; mm0=2*CrE + paddw mm1,mm1 ; mm1=2*CrO + + pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbE * -FIX(0.22800)) + pmulhw mm5,[GOTOFF(eax,PW_MF0228)] ; mm5=(2*CbO * -FIX(0.22800)) + pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrE * FIX(0.40200)) + pmulhw mm1,[GOTOFF(eax,PW_F0402)] ; mm1=(2*CrO * FIX(0.40200)) + + paddw mm4,[GOTOFF(eax,PW_ONE)] + paddw mm5,[GOTOFF(eax,PW_ONE)] + psraw mm4,1 ; mm4=(CbE * -FIX(0.22800)) + psraw mm5,1 ; mm5=(CbO * -FIX(0.22800)) + paddw mm0,[GOTOFF(eax,PW_ONE)] + paddw mm1,[GOTOFF(eax,PW_ONE)] + psraw mm0,1 ; mm0=(CrE * FIX(0.40200)) + psraw mm1,1 ; mm1=(CrO * FIX(0.40200)) + + paddw mm4,mm2 + paddw mm5,mm3 + paddw mm4,mm2 ; mm4=(CbE * FIX(1.77200))=(B-Y)E + paddw mm5,mm3 ; mm5=(CbO * FIX(1.77200))=(B-Y)O + paddw mm0,mm6 ; mm0=(CrE * FIX(1.40200))=(R-Y)E + paddw mm1,mm7 ; mm1=(CrO * FIX(1.40200))=(R-Y)O + + movq MMWORD [wk(0)], mm4 ; wk(0)=(B-Y)E + movq MMWORD [wk(1)], mm5 ; wk(1)=(B-Y)O + + movq mm4,mm2 + movq mm5,mm3 + punpcklwd mm2,mm6 + punpckhwd mm4,mm6 + pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm4,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd mm3,mm7 + punpckhwd mm5,mm7 + pmaddwd mm3,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd mm2,[GOTOFF(eax,PD_ONEHALF)] + paddd mm4,[GOTOFF(eax,PD_ONEHALF)] + psrad mm2,SCALEBITS + psrad mm4,SCALEBITS + paddd mm3,[GOTOFF(eax,PD_ONEHALF)] + paddd mm5,[GOTOFF(eax,PD_ONEHALF)] + psrad mm3,SCALEBITS + psrad mm5,SCALEBITS + + packssdw mm2,mm4 ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw mm3,mm5 ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw mm2,mm6 ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw mm3,mm7 ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movq mm5, MMWORD [esi] ; mm5=Y(01234567) + + pcmpeqw mm4,mm4 + psrlw mm4,BYTE_BIT ; mm4={0xFF 0x00 0xFF 0x00 ..} + pand mm4,mm5 ; mm4=Y(0246)=YE + psrlw mm5,BYTE_BIT ; mm5=Y(1357)=YO + + paddw mm0,mm4 ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6) + paddw mm1,mm5 ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7) + packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) + packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) + + paddw mm2,mm4 ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6) + paddw mm3,mm5 ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7) + packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + + paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6) + paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7) + packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07) + punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27) + + movq mmG,mmA + movq mmH,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03) + punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07) + + psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) + psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) + + movq mmC,mmD + movq mmB,mmD + punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14) + punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --) + + psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) + + movq mmF,mmE + punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25) + punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --) + + punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12) + punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05) + punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + + sub ecx, byte SIZEOF_MMWORD + jz short .nextrow + + add esi, byte SIZEOF_MMWORD ; inptr0 + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + jmp near .columnloop + alignx 16,7 + +.column_st16: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_MMWORD + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq mmA,mmC + sub ecx, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD + jmp short .column_st4 +.column_st8: + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmE + sub ecx, byte SIZEOF_MMWORD + add edi, byte SIZEOF_MMWORD +.column_st4: + movd eax,mmA + cmp ecx, byte SIZEOF_DWORD + jb short .column_st2 + mov DWORD [edi+0*SIZEOF_DWORD], eax + psrlq mmA,DWORD_BIT + movd eax,mmA + sub ecx, byte SIZEOF_DWORD + add edi, byte SIZEOF_DWORD +.column_st2: + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi+0*SIZEOF_WORD], ax + shr eax,WORD_BIT + sub ecx, byte SIZEOF_WORD + add edi, byte SIZEOF_WORD +.column_st1: + cmp ecx, byte SIZEOF_BYTE + jb short .nextrow + mov BYTE [edi+0*SIZEOF_BYTE], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%else + pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%endif + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36) + punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17) + punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37) + + movq mmC,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32) + punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36) + movq mmG,mmB + punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33) + punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37) + + movq mmD,mmA + punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31) + punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33) + movq mmH,mmC + punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35) + punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + movq MMWORD [edi+3*SIZEOF_MMWORD], mmH + + sub ecx, byte SIZEOF_MMWORD + jz short .nextrow + + add esi, byte SIZEOF_MMWORD ; inptr0 + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + jmp near .columnloop + alignx 16,7 + +.column_st16: + cmp ecx, byte SIZEOF_MMWORD/2 + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq mmA,mmC + movq mmD,mmH + sub ecx, byte SIZEOF_MMWORD/2 + add edi, byte 2*SIZEOF_MMWORD +.column_st8: + cmp ecx, byte SIZEOF_MMWORD/4 + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmD + sub ecx, byte SIZEOF_MMWORD/4 + add edi, byte 1*SIZEOF_MMWORD +.column_st4: + cmp ecx, byte SIZEOF_MMWORD/8 + jb short .nextrow + movd DWORD [edi+0*SIZEOF_DWORD], mmA + +%endif ; RGB_PIXELSIZE ; --------------- + + alignx 16,7 + +.nextrow: + pop ecx + pop esi + pop ebx + pop edx + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + add edi, byte SIZEOF_JSAMPROW ; output_buf + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolext-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolext-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolext-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolext-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,441 @@ +; +; jdcolext.asm - colorspace conversion (64-bit SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright 2009, 2012 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width, +; JSAMPIMAGE input_buf, JDIMENSION input_row, +; JSAMPARRAY output_buf, int num_rows) +; + +; r10 = JDIMENSION out_width +; r11 = JSAMPIMAGE input_buf +; r12 = JDIMENSION input_row +; r13 = JSAMPARRAY output_buf +; r14 = int num_rows + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE + +EXTN(jsimd_ycc_rgb_convert_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov ecx, r10d ; num_cols + test rcx,rcx + jz near .return + + push rcx + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] + lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] + lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rdi, r13 + mov eax, r14d + test rax,rax + jle near .return +.rowloop: + push rax + push rdi + push rdx + push rbx + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr0 + mov rbx, JSAMPROW [rbx] ; inptr1 + mov rdx, JSAMPROW [rdx] ; inptr2 + mov rdi, JSAMPROW [rdi] ; outptr +.columnloop: + + movdqa xmm5, XMMWORD [rbx] ; xmm5=Cb(0123456789ABCDEF) + movdqa xmm1, XMMWORD [rdx] ; xmm1=Cr(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + pcmpeqw xmm7,xmm7 + psrlw xmm4,BYTE_BIT + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} + + pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE + psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO + pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE + psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO + + paddw xmm4,xmm7 + paddw xmm5,xmm7 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm2,xmm4 ; xmm2=CbE + movdqa xmm3,xmm5 ; xmm3=CbO + paddw xmm4,xmm4 ; xmm4=2*CbE + paddw xmm5,xmm5 ; xmm5=2*CbO + movdqa xmm6,xmm0 ; xmm6=CrE + movdqa xmm7,xmm1 ; xmm7=CrO + paddw xmm0,xmm0 ; xmm0=2*CrE + paddw xmm1,xmm1 ; xmm1=2*CrO + + pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800)) + pmulhw xmm5,[rel PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800)) + pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrE * FIX(0.40200)) + pmulhw xmm1,[rel PW_F0402] ; xmm1=(2*CrO * FIX(0.40200)) + + paddw xmm4,[rel PW_ONE] + paddw xmm5,[rel PW_ONE] + psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800)) + psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800)) + paddw xmm0,[rel PW_ONE] + paddw xmm1,[rel PW_ONE] + psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200)) + psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200)) + + paddw xmm4,xmm2 + paddw xmm5,xmm3 + paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E + paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O + paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E + paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + punpcklwd xmm2,xmm6 + punpckhwd xmm4,xmm6 + pmaddwd xmm2,[rel PW_MF0344_F0285] + pmaddwd xmm4,[rel PW_MF0344_F0285] + punpcklwd xmm3,xmm7 + punpckhwd xmm5,xmm7 + pmaddwd xmm3,[rel PW_MF0344_F0285] + pmaddwd xmm5,[rel PW_MF0344_F0285] + + paddd xmm2,[rel PD_ONEHALF] + paddd xmm4,[rel PD_ONEHALF] + psrad xmm2,SCALEBITS + psrad xmm4,SCALEBITS + paddd xmm3,[rel PD_ONEHALF] + paddd xmm5,[rel PD_ONEHALF] + psrad xmm3,SCALEBITS + psrad xmm5,SCALEBITS + + packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movdqa xmm5, XMMWORD [rsi] ; xmm5=Y(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} + pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE + psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO + + paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) + paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) + paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) + paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .nextrow + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE + cmp rcx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub rcx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub rcx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + mov BYTE [rdi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .nextrow + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + cmp rcx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub rcx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub rcx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + movd XMM_DWORD [rdi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.nextrow: + pop rcx + pop rsi + pop rbx + pop rdx + pop rdi + pop rax + + add rsi, byte SIZEOF_JSAMPROW + add rbx, byte SIZEOF_JSAMPROW + add rdx, byte SIZEOF_JSAMPROW + add rdi, byte SIZEOF_JSAMPROW ; output_buf + dec rax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolext-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolext-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolext-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolext-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,460 @@ +; +; jdcolext.asm - colorspace conversion (SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright 2012 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width, +; JSAMPIMAGE input_buf, JDIMENSION input_row, +; JSAMPARRAY output_buf, int num_rows) +; + +%define out_width(b) (b)+8 ; JDIMENSION out_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define input_row(b) (b)+16 ; JDIMENSION input_row +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE + +EXTN(jsimd_ycc_rgb_convert_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [out_width(eax)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [input_row(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + push eax + push edi + push edx + push ebx + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr0 + mov ebx, JSAMPROW [ebx] ; inptr1 + mov edx, JSAMPROW [edx] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + movpic eax, POINTER [gotptr] ; load GOT address (eax) + alignx 16,7 +.columnloop: + + movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF) + movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + pcmpeqw xmm7,xmm7 + psrlw xmm4,BYTE_BIT + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} + + pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE + psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO + pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE + psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO + + paddw xmm4,xmm7 + paddw xmm5,xmm7 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm2,xmm4 ; xmm2=CbE + movdqa xmm3,xmm5 ; xmm3=CbO + paddw xmm4,xmm4 ; xmm4=2*CbE + paddw xmm5,xmm5 ; xmm5=2*CbO + movdqa xmm6,xmm0 ; xmm6=CrE + movdqa xmm7,xmm1 ; xmm7=CrO + paddw xmm0,xmm0 ; xmm0=2*CrE + paddw xmm1,xmm1 ; xmm1=2*CrO + + pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbE * -FIX(0.22800)) + pmulhw xmm5,[GOTOFF(eax,PW_MF0228)] ; xmm5=(2*CbO * -FIX(0.22800)) + pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrE * FIX(0.40200)) + pmulhw xmm1,[GOTOFF(eax,PW_F0402)] ; xmm1=(2*CrO * FIX(0.40200)) + + paddw xmm4,[GOTOFF(eax,PW_ONE)] + paddw xmm5,[GOTOFF(eax,PW_ONE)] + psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800)) + psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800)) + paddw xmm0,[GOTOFF(eax,PW_ONE)] + paddw xmm1,[GOTOFF(eax,PW_ONE)] + psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200)) + psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200)) + + paddw xmm4,xmm2 + paddw xmm5,xmm3 + paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E + paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O + paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E + paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + punpcklwd xmm2,xmm6 + punpckhwd xmm4,xmm6 + pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm4,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm3,xmm7 + punpckhwd xmm5,xmm7 + pmaddwd xmm3,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm4,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm2,SCALEBITS + psrad xmm4,SCALEBITS + paddd xmm3,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm3,SCALEBITS + psrad xmm5,SCALEBITS + + packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movdqa xmm5, XMMWORD [esi] ; xmm5=Y(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} + pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE + psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO + + paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) + paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) + paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) + paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .nextrow + + add esi, byte SIZEOF_XMMWORD ; inptr0 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + mov BYTE [edi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .nextrow + + add esi, byte SIZEOF_XMMWORD ; inptr0 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + movd XMM_DWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + + alignx 16,7 + +.nextrow: + pop ecx + pop esi + pop ebx + pop edx + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + add edi, byte SIZEOF_JSAMPROW ; output_buf + dec eax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolor-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolor-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolor-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolor-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,96 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* YCC --> RGB CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_344 22554 /* FIX(0.34414) */ +#define F_0_714 46802 /* FIX(0.71414) */ +#define F_1_402 91881 /* FIX(1.40200) */ +#define F_1_772 116130 /* FIX(1.77200) */ +#define F_0_402 (F_1_402 - 65536) /* FIX(1.40200) - FIX(1) */ +#define F_0_285 (65536 - F_0_714) /* FIX(1) - FIX(0.71414) */ +#define F_0_228 (131072 - F_1_772) /* FIX(2) - FIX(1.77200) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + +#define RGB_INDEX0 {0,1,8,2,3,10,4,5,12,6,7,14,16,17,24,18} +#define RGB_INDEX1 {3,10,4,5,12,6,7,14,16,17,24,18,19,26,20,21} +#define RGB_INDEX2 {12,6,7,14,16,17,24,18,19,26,20,21,28,22,23,30} +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extrgb_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGB_INDEX {0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extrgbx_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGB_INDEX0 {8,1,0,10,3,2,12,5,4,14,7,6,24,17,16,26} +#define RGB_INDEX1 {3,2,12,5,4,14,7,6,24,17,16,26,19,18,28,21} +#define RGB_INDEX2 {4,14,7,6,24,17,16,26,19,18,28,21,20,30,23,22} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extbgr_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGB_INDEX {8,1,0,9,10,3,2,11,12,5,4,13,14,7,6,15} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extbgrx_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGB_INDEX {9,8,1,0,11,10,3,2,13,12,5,4,15,14,7,6} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extxbgr_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGB_INDEX {9,0,1,8,11,2,3,10,13,4,5,12,15,6,7,14} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extxrgb_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolor-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolor-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolor-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolor-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jdcolor.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_ycc_rgb_convert_mmx) PRIVATE + +EXTN(jconst_ycc_rgb_convert_mmx): + +PW_F0402 times 4 dw F_0_402 +PW_MF0228 times 4 dw -F_0_228 +PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 +PW_ONE times 4 dw 1 +PD_ONEHALF times 2 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx +%include "jdcolext-mmx.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolor-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolor-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolor-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolor-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jdcolor.asm - colorspace conversion (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE + +EXTN(jconst_ycc_rgb_convert_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 +%include "jdcolext-sse2-64.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolor-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolor-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdcolor-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdcolor-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jdcolor.asm - colorspace conversion (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE + +EXTN(jconst_ycc_rgb_convert_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 +%include "jdcolext-sse2.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdct.inc glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdct.inc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdct.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdct.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,28 @@ +; +; jdct.inc - private declarations for forward & reverse DCT subsystems +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; [TAB8] + +; Each IDCT routine is responsible for range-limiting its results and +; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could +; be quite far out of range if the input data is corrupt, so a bulletproof +; range-limiting step is required. We use a mask-and-table-lookup method +; to do the combined operations quickly. +; +%define RANGE_MASK (MAXJSAMPLE * 4 + 3) ; 2 bits wider than legal samples + +%define ROW(n,b,s) ((b)+(n)*(s)) +%define COL(n,b,s) ((b)+(n)*(s)*DCTSIZE) + +%define DWBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD) +%define MMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD) +%define XMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD) + +; -------------------------------------------------------------------------- diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmerge-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmerge-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmerge-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmerge-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,108 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* MERGED YCC --> RGB CONVERSION AND UPSAMPLING */ + +#include "jsimd_altivec.h" + + +#define F_0_344 22554 /* FIX(0.34414) */ +#define F_0_714 46802 /* FIX(0.71414) */ +#define F_1_402 91881 /* FIX(1.40200) */ +#define F_1_772 116130 /* FIX(1.77200) */ +#define F_0_402 (F_1_402 - 65536) /* FIX(1.40200) - FIX(1) */ +#define F_0_285 (65536 - F_0_714) /* FIX(1) - FIX(0.71414) */ +#define F_0_228 (131072 - F_1_772) /* FIX(2) - FIX(1.77200) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + +#define RGB_INDEX0 {0,1,8,2,3,10,4,5,12,6,7,14,16,17,24,18} +#define RGB_INDEX1 {3,10,4,5,12,6,7,14,16,17,24,18,19,26,20,21} +#define RGB_INDEX2 {12,6,7,14,16,17,24,18,19,26,20,21,28,22,23,30} +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extrgb_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extrgb_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGB_INDEX {0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extrgbx_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extrgbx_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGB_INDEX0 {8,1,0,10,3,2,12,5,4,14,7,6,24,17,16,26} +#define RGB_INDEX1 {3,2,12,5,4,14,7,6,24,17,16,26,19,18,28,21} +#define RGB_INDEX2 {4,14,7,6,24,17,16,26,19,18,28,21,20,30,23,22} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extbgr_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extbgr_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGB_INDEX {8,1,0,9,10,3,2,11,12,5,4,13,14,7,6,15} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extbgrx_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extbgrx_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGB_INDEX {9,8,1,0,11,10,3,2,13,12,5,4,15,14,7,6} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extxbgr_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extxbgr_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGB_INDEX {9,0,1,8,11,2,3,10,13,4,5,12,15,6,7,14} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extxrgb_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extxrgb_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmerge-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmerge-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmerge-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmerge-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,126 @@ +; +; jdmerge.asm - merged upsampling/color conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_merged_upsample_mmx) PRIVATE + +EXTN(jconst_merged_upsample_mmx): + +PW_F0402 times 4 dw F_0_402 +PW_MF0228 times 4 dw -F_0_228 +PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 +PW_ONE times 4 dw 1 +PD_ONEHALF times 2 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgb_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgb_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgbx_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgbx_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgr_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgr_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgrx_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgrx_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxbgr_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxbgr_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxrgb_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxrgb_merged_upsample_mmx +%include "jdmrgext-mmx.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmerge-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmerge-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmerge-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmerge-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,126 @@ +; +; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_merged_upsample_sse2) PRIVATE + +EXTN(jconst_merged_upsample_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmerge-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmerge-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmerge-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmerge-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,126 @@ +; +; jdmerge.asm - merged upsampling/color conversion (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_merged_upsample_sse2) PRIVATE + +EXTN(jconst_merged_upsample_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmrgext-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmrgext-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmrgext-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmrgext-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,323 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jdmerge-altivec.c */ + + +void jsimd_h2v1_merged_upsample_altivec (JDIMENSION output_width, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + JSAMPROW outptr, inptr0, inptr1, inptr2; + int pitch = output_width * RGB_PIXELSIZE, num_cols, yloop; +#if __BIG_ENDIAN__ + int offset; +#endif + unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; + + __vector unsigned char rgb0, rgb1, rgb2, rgbx0, rgbx1, rgbx2, rgbx3, + y, cb, cr; +#if __BIG_ENDIAN__ + __vector unsigned char edgel, edgeh, edges, out0, out1, out2, out3; +#if RGB_PIXELSIZE == 4 + __vector unsigned char out4; +#endif +#endif +#if RGB_PIXELSIZE == 4 + __vector unsigned char rgb3; +#endif + __vector short rg0, rg1, rg2, rg3, bx0, bx1, bx2, bx3, ye, yo, cbl, cbh, + crl, crh, r_yl, r_yh, g_yl, g_yh, b_yl, b_yh, g_y0w, g_y1w, g_y2w, g_y3w, + rl, rh, gl, gh, bl, bh, re, ro, ge, go, be, bo; + __vector int g_y0, g_y1, g_y2, g_y3; + + /* Constants + * NOTE: The >> 1 is to compensate for the fact that vec_madds() returns 17 + * high-order bits, not 16. + */ + __vector short pw_f0402 = { __8X(F_0_402 >> 1) }, + pw_mf0228 = { __8X(-F_0_228 >> 1) }, + pw_mf0344_f0285 = { __4X2(-F_0_344, F_0_285) }, + pw_one = { __8X(1) }, pw_255 = { __8X(255) }, + pw_cj = { __8X(CENTERJSAMPLE) }; + __vector int pd_onehalf = { __4X(ONE_HALF) }; + __vector unsigned char pb_zero = { __16X(0) }, +#if __BIG_ENDIAN__ + shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}, + even_index = {0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30}, + odd_index = {0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31}; +#else + shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}, + even_index = {16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0}, + odd_index = {17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0}; +#endif + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + + for (num_cols = pitch; num_cols > 0; inptr1 += 16, inptr2 += 16) { + + cb = vec_ld(0, inptr1); + /* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't + * support unsigned vectors. + */ + cbl = (__vector signed short)VEC_UNPACKHU(cb); + cbh = (__vector signed short)VEC_UNPACKLU(cb); + cbl = vec_sub(cbl, pw_cj); + cbh = vec_sub(cbh, pw_cj); + + cr = vec_ld(0, inptr2); + crl = (__vector signed short)VEC_UNPACKHU(cr); + crh = (__vector signed short)VEC_UNPACKLU(cr); + crl = vec_sub(crl, pw_cj); + crh = vec_sub(crh, pw_cj); + + /* (Original) + * R = Y + 1.40200 * Cr + * G = Y - 0.34414 * Cb - 0.71414 * Cr + * B = Y + 1.77200 * Cb + * + * (This implementation) + * R = Y + 0.40200 * Cr + Cr + * G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + * B = Y - 0.22800 * Cb + Cb + Cb + */ + b_yl = vec_add(cbl, cbl); + b_yh = vec_add(cbh, cbh); + b_yl = vec_madds(b_yl, pw_mf0228, pw_one); + b_yh = vec_madds(b_yh, pw_mf0228, pw_one); + b_yl = vec_sra(b_yl, (__vector unsigned short)pw_one); + b_yh = vec_sra(b_yh, (__vector unsigned short)pw_one); + b_yl = vec_add(b_yl, cbl); + b_yh = vec_add(b_yh, cbh); + b_yl = vec_add(b_yl, cbl); + b_yh = vec_add(b_yh, cbh); + + r_yl = vec_add(crl, crl); + r_yh = vec_add(crh, crh); + r_yl = vec_madds(r_yl, pw_f0402, pw_one); + r_yh = vec_madds(r_yh, pw_f0402, pw_one); + r_yl = vec_sra(r_yl, (__vector unsigned short)pw_one); + r_yh = vec_sra(r_yh, (__vector unsigned short)pw_one); + r_yl = vec_add(r_yl, crl); + r_yh = vec_add(r_yh, crh); + + g_y0w = vec_mergeh(cbl, crl); + g_y1w = vec_mergel(cbl, crl); + g_y0 = vec_msums(g_y0w, pw_mf0344_f0285, pd_onehalf); + g_y1 = vec_msums(g_y1w, pw_mf0344_f0285, pd_onehalf); + g_y2w = vec_mergeh(cbh, crh); + g_y3w = vec_mergel(cbh, crh); + g_y2 = vec_msums(g_y2w, pw_mf0344_f0285, pd_onehalf); + g_y3 = vec_msums(g_y3w, pw_mf0344_f0285, pd_onehalf); + /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from + * each dword into a new 16-bit vector, which is the equivalent of + * descaling the 32-bit results (right-shifting by 16 bits) and then + * packing them. + */ + g_yl = vec_perm((__vector short)g_y0, (__vector short)g_y1, + shift_pack_index); + g_yh = vec_perm((__vector short)g_y2, (__vector short)g_y3, + shift_pack_index); + g_yl = vec_sub(g_yl, crl); + g_yh = vec_sub(g_yh, crh); + + for (yloop = 0; yloop < 2 && num_cols > 0; yloop++, + num_cols -= RGB_PIXELSIZE * 16, + outptr += RGB_PIXELSIZE * 16, inptr0 += 16) { + + y = vec_ld(0, inptr0); + ye = (__vector signed short)vec_perm(pb_zero, y, even_index); + yo = (__vector signed short)vec_perm(pb_zero, y, odd_index); + + if (yloop == 0) { + be = vec_add(b_yl, ye); + bo = vec_add(b_yl, yo); + re = vec_add(r_yl, ye); + ro = vec_add(r_yl, yo); + ge = vec_add(g_yl, ye); + go = vec_add(g_yl, yo); + } else { + be = vec_add(b_yh, ye); + bo = vec_add(b_yh, yo); + re = vec_add(r_yh, ye); + ro = vec_add(r_yh, yo); + ge = vec_add(g_yh, ye); + go = vec_add(g_yh, yo); + } + + rl = vec_mergeh(re, ro); + rh = vec_mergel(re, ro); + gl = vec_mergeh(ge, go); + gh = vec_mergel(ge, go); + bl = vec_mergeh(be, bo); + bh = vec_mergel(be, bo); + + rg0 = vec_mergeh(rl, gl); + bx0 = vec_mergeh(bl, pw_255); + rg1 = vec_mergel(rl, gl); + bx1 = vec_mergel(bl, pw_255); + rg2 = vec_mergeh(rh, gh); + bx2 = vec_mergeh(bh, pw_255); + rg3 = vec_mergel(rh, gh); + bx3 = vec_mergel(bh, pw_255); + + rgbx0 = vec_packsu(rg0, bx0); + rgbx1 = vec_packsu(rg1, bx1); + rgbx2 = vec_packsu(rg2, bx2); + rgbx3 = vec_packsu(rg3, bx3); + +#if RGB_PIXELSIZE == 3 + /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3 + * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7 + * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb + * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf + * + * rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 + * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga + * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf + */ + rgb0 = vec_perm(rgbx0, rgbx1, (__vector unsigned char)RGB_INDEX0); + rgb1 = vec_perm(rgbx1, rgbx2, (__vector unsigned char)RGB_INDEX1); + rgb2 = vec_perm(rgbx2, rgbx3, (__vector unsigned char)RGB_INDEX2); +#else + /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3 + * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7 + * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb + * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf + * + * rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3 + * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7 + * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb + * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf + */ + rgb0 = vec_perm(rgbx0, rgbx0, (__vector unsigned char)RGB_INDEX); + rgb1 = vec_perm(rgbx1, rgbx1, (__vector unsigned char)RGB_INDEX); + rgb2 = vec_perm(rgbx2, rgbx2, (__vector unsigned char)RGB_INDEX); + rgb3 = vec_perm(rgbx3, rgbx3, (__vector unsigned char)RGB_INDEX); +#endif + +#if __BIG_ENDIAN__ + offset = (size_t)outptr & 15; + if (offset) { + __vector unsigned char unaligned_shift_index; + int bytes = num_cols + offset; + + if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) { + /* Slow path to prevent buffer overwrite. Since there is no way to + * write a partial AltiVec register, overwrite would occur on the + * last chunk of the last image row if the right edge is not on a + * 16-byte boundary. It could also occur on other rows if the bytes + * per row is low enough. Since we can't determine whether we're on + * the last image row, we have to assume every row is the last. + */ + vec_st(rgb0, 0, tmpbuf); + vec_st(rgb1, 16, tmpbuf); + vec_st(rgb2, 32, tmpbuf); +#if RGB_PIXELSIZE == 4 + vec_st(rgb3, 48, tmpbuf); +#endif + memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16)); + } else { + /* Fast path */ + unaligned_shift_index = vec_lvsl(0, outptr); + edgel = vec_ld(0, outptr); + edgeh = vec_ld(min(num_cols - 1, RGB_PIXELSIZE * 16), outptr); + edges = vec_perm(edgeh, edgel, unaligned_shift_index); + unaligned_shift_index = vec_lvsr(0, outptr); + out0 = vec_perm(edges, rgb0, unaligned_shift_index); + out1 = vec_perm(rgb0, rgb1, unaligned_shift_index); + out2 = vec_perm(rgb1, rgb2, unaligned_shift_index); +#if RGB_PIXELSIZE == 4 + out3 = vec_perm(rgb2, rgb3, unaligned_shift_index); + out4 = vec_perm(rgb3, edges, unaligned_shift_index); +#else + out3 = vec_perm(rgb2, edges, unaligned_shift_index); +#endif + vec_st(out0, 0, outptr); + if (bytes > 16) + vec_st(out1, 16, outptr); + if (bytes > 32) + vec_st(out2, 32, outptr); + if (bytes > 48) + vec_st(out3, 48, outptr); +#if RGB_PIXELSIZE == 4 + if (bytes > 64) + vec_st(out4, 64, outptr); +#endif + } + } else { +#endif /* __BIG_ENDIAN__ */ + if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) { + /* Slow path */ + VEC_ST(rgb0, 0, tmpbuf); + VEC_ST(rgb1, 16, tmpbuf); + VEC_ST(rgb2, 32, tmpbuf); +#if RGB_PIXELSIZE == 4 + VEC_ST(rgb3, 48, tmpbuf); +#endif + memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16)); + } else { + /* Fast path */ + VEC_ST(rgb0, 0, outptr); + if (num_cols > 16) + VEC_ST(rgb1, 16, outptr); + if (num_cols > 32) + VEC_ST(rgb2, 32, outptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + VEC_ST(rgb3, 48, outptr); +#endif + } +#if __BIG_ENDIAN__ + } +#endif + } + } +} + + +void jsimd_h2v2_merged_upsample_altivec (JDIMENSION output_width, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + JSAMPROW inptr, outptr; + + inptr = input_buf[0][in_row_group_ctr]; + outptr = output_buf[0]; + + input_buf[0][in_row_group_ctr] = input_buf[0][in_row_group_ctr * 2]; + jsimd_h2v1_merged_upsample_altivec(output_width, input_buf, in_row_group_ctr, + output_buf); + + input_buf[0][in_row_group_ctr] = input_buf[0][in_row_group_ctr * 2 + 1]; + output_buf[0] = output_buf[1]; + jsimd_h2v1_merged_upsample_altivec(output_width, input_buf, in_row_group_ctr, + output_buf); + + input_buf[0][in_row_group_ctr] = inptr; + output_buf[0] = outptr; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmrgext-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmrgext-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmrgext-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmrgext-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,464 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_mmx (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b)+8 ; JDIMENSION output_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_h2v1_merged_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v1_merged_upsample_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [output_width(eax)] ; col + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16,7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movq mm6, MMWORD [ebx] ; mm6=Cb(01234567) + movq mm7, MMWORD [edx] ; mm7=Cr(01234567) + + pxor mm1,mm1 ; mm1=(all 0's) + pcmpeqw mm3,mm3 + psllw mm3,7 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80} + + movq mm4,mm6 + punpckhbw mm6,mm1 ; mm6=Cb(4567)=CbH + punpcklbw mm4,mm1 ; mm4=Cb(0123)=CbL + movq mm0,mm7 + punpckhbw mm7,mm1 ; mm7=Cr(4567)=CrH + punpcklbw mm0,mm1 ; mm0=Cr(0123)=CrL + + paddw mm6,mm3 + paddw mm4,mm3 + paddw mm7,mm3 + paddw mm0,mm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movq mm5,mm6 ; mm5=CbH + movq mm2,mm4 ; mm2=CbL + paddw mm6,mm6 ; mm6=2*CbH + paddw mm4,mm4 ; mm4=2*CbL + movq mm1,mm7 ; mm1=CrH + movq mm3,mm0 ; mm3=CrL + paddw mm7,mm7 ; mm7=2*CrH + paddw mm0,mm0 ; mm0=2*CrL + + pmulhw mm6,[GOTOFF(eax,PW_MF0228)] ; mm6=(2*CbH * -FIX(0.22800)) + pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbL * -FIX(0.22800)) + pmulhw mm7,[GOTOFF(eax,PW_F0402)] ; mm7=(2*CrH * FIX(0.40200)) + pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrL * FIX(0.40200)) + + paddw mm6,[GOTOFF(eax,PW_ONE)] + paddw mm4,[GOTOFF(eax,PW_ONE)] + psraw mm6,1 ; mm6=(CbH * -FIX(0.22800)) + psraw mm4,1 ; mm4=(CbL * -FIX(0.22800)) + paddw mm7,[GOTOFF(eax,PW_ONE)] + paddw mm0,[GOTOFF(eax,PW_ONE)] + psraw mm7,1 ; mm7=(CrH * FIX(0.40200)) + psraw mm0,1 ; mm0=(CrL * FIX(0.40200)) + + paddw mm6,mm5 + paddw mm4,mm2 + paddw mm6,mm5 ; mm6=(CbH * FIX(1.77200))=(B-Y)H + paddw mm4,mm2 ; mm4=(CbL * FIX(1.77200))=(B-Y)L + paddw mm7,mm1 ; mm7=(CrH * FIX(1.40200))=(R-Y)H + paddw mm0,mm3 ; mm0=(CrL * FIX(1.40200))=(R-Y)L + + movq MMWORD [wk(0)], mm6 ; wk(0)=(B-Y)H + movq MMWORD [wk(1)], mm7 ; wk(1)=(R-Y)H + + movq mm6,mm5 + movq mm7,mm2 + punpcklwd mm5,mm1 + punpckhwd mm6,mm1 + pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm6,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd mm2,mm3 + punpckhwd mm7,mm3 + pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm7,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd mm5,[GOTOFF(eax,PD_ONEHALF)] + paddd mm6,[GOTOFF(eax,PD_ONEHALF)] + psrad mm5,SCALEBITS + psrad mm6,SCALEBITS + paddd mm2,[GOTOFF(eax,PD_ONEHALF)] + paddd mm7,[GOTOFF(eax,PD_ONEHALF)] + psrad mm2,SCALEBITS + psrad mm7,SCALEBITS + + packssdw mm5,mm6 ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw mm2,mm7 ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw mm5,mm1 ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw mm2,mm3 ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movq MMWORD [wk(2)], mm5 ; wk(2)=(G-Y)H + + mov al,2 ; Yctr + jmp short .Yloop_1st + alignx 16,7 + +.Yloop_2nd: + movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H + movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H + movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H + alignx 16,7 + +.Yloop_1st: + movq mm7, MMWORD [esi] ; mm7=Y(01234567) + + pcmpeqw mm6,mm6 + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + pand mm6,mm7 ; mm6=Y(0246)=YE + psrlw mm7,BYTE_BIT ; mm7=Y(1357)=YO + + movq mm1,mm0 ; mm1=mm0=(R-Y)(L/H) + movq mm3,mm2 ; mm3=mm2=(G-Y)(L/H) + movq mm5,mm4 ; mm5=mm4=(B-Y)(L/H) + + paddw mm0,mm6 ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6) + paddw mm1,mm7 ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7) + packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) + packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) + + paddw mm2,mm6 ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6) + paddw mm3,mm7 ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7) + packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + + paddw mm4,mm6 ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6) + paddw mm5,mm7 ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7) + packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07) + punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27) + + movq mmG,mmA + movq mmH,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03) + punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07) + + psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) + psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) + + movq mmC,mmD + movq mmB,mmD + punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14) + punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --) + + psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) + + movq mmF,mmE + punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25) + punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --) + + punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12) + punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05) + punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + + sub ecx, byte SIZEOF_MMWORD + jz near .endcolumn + + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + add esi, byte SIZEOF_MMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st16: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_MMWORD + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq mmA,mmC + sub ecx, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD + jmp short .column_st4 +.column_st8: + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmE + sub ecx, byte SIZEOF_MMWORD + add edi, byte SIZEOF_MMWORD +.column_st4: + movd eax,mmA + cmp ecx, byte SIZEOF_DWORD + jb short .column_st2 + mov DWORD [edi+0*SIZEOF_DWORD], eax + psrlq mmA,DWORD_BIT + movd eax,mmA + sub ecx, byte SIZEOF_DWORD + add edi, byte SIZEOF_DWORD +.column_st2: + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi+0*SIZEOF_WORD], ax + shr eax,WORD_BIT + sub ecx, byte SIZEOF_WORD + add edi, byte SIZEOF_WORD +.column_st1: + cmp ecx, byte SIZEOF_BYTE + jb short .endcolumn + mov BYTE [edi+0*SIZEOF_BYTE], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%else + pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%endif + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36) + punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17) + punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37) + + movq mmC,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32) + punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36) + movq mmG,mmB + punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33) + punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37) + + movq mmD,mmA + punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31) + punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33) + movq mmH,mmC + punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35) + punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + movq MMWORD [edi+3*SIZEOF_MMWORD], mmH + + sub ecx, byte SIZEOF_MMWORD + jz short .endcolumn + + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + add esi, byte SIZEOF_MMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st16: + cmp ecx, byte SIZEOF_MMWORD/2 + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq mmA,mmC + movq mmD,mmH + sub ecx, byte SIZEOF_MMWORD/2 + add edi, byte 2*SIZEOF_MMWORD +.column_st8: + cmp ecx, byte SIZEOF_MMWORD/4 + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmD + sub ecx, byte SIZEOF_MMWORD/4 + add edi, byte 1*SIZEOF_MMWORD +.column_st4: + cmp ecx, byte SIZEOF_MMWORD/8 + jb short .endcolumn + movd DWORD [edi+0*SIZEOF_DWORD], mmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_mmx (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b)+8 ; JDIMENSION output_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + + align 16 + global EXTN(jsimd_h2v2_merged_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v2_merged_upsample_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, JDIMENSION [output_width(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx,esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; output_width + + call near EXTN(jsimd_h2v1_merged_upsample_mmx) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jsimd_h2v1_merged_upsample_mmx) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmrgext-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmrgext-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmrgext-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmrgext-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,538 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright 2009, 2012 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +; r10 = JDIMENSION output_width +; r11 = JSAMPIMAGE input_buf +; r12 = JDIMENSION in_row_group_ctr +; r13 = JSAMPARRAY output_buf + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 3 + + align 16 + global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_merged_upsample_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov ecx, r10d ; col + test rcx,rcx + jz near .return + + push rcx + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + mov rdi, r13 + mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0 + mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1 + mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2 + mov rdi, JSAMPROW [rdi] ; outptr + + pop rcx ; col + +.columnloop: + + movdqa xmm6, XMMWORD [rbx] ; xmm6=Cb(0123456789ABCDEF) + movdqa xmm7, XMMWORD [rdx] ; xmm7=Cr(0123456789ABCDEF) + + pxor xmm1,xmm1 ; xmm1=(all 0's) + pcmpeqw xmm3,xmm3 + psllw xmm3,7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + movdqa xmm4,xmm6 + punpckhbw xmm6,xmm1 ; xmm6=Cb(89ABCDEF)=CbH + punpcklbw xmm4,xmm1 ; xmm4=Cb(01234567)=CbL + movdqa xmm0,xmm7 + punpckhbw xmm7,xmm1 ; xmm7=Cr(89ABCDEF)=CrH + punpcklbw xmm0,xmm1 ; xmm0=Cr(01234567)=CrL + + paddw xmm6,xmm3 + paddw xmm4,xmm3 + paddw xmm7,xmm3 + paddw xmm0,xmm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm5,xmm6 ; xmm5=CbH + movdqa xmm2,xmm4 ; xmm2=CbL + paddw xmm6,xmm6 ; xmm6=2*CbH + paddw xmm4,xmm4 ; xmm4=2*CbL + movdqa xmm1,xmm7 ; xmm1=CrH + movdqa xmm3,xmm0 ; xmm3=CrL + paddw xmm7,xmm7 ; xmm7=2*CrH + paddw xmm0,xmm0 ; xmm0=2*CrL + + pmulhw xmm6,[rel PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7,[rel PW_F0402] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrL * FIX(0.40200)) + + paddw xmm6,[rel PW_ONE] + paddw xmm4,[rel PW_ONE] + psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) + psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) + paddw xmm7,[rel PW_ONE] + paddw xmm0,[rel PW_ONE] + psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) + psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) + + paddw xmm6,xmm5 + paddw xmm4,xmm2 + paddw xmm6,xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H + paddw xmm4,xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L + paddw xmm7,xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H + paddw xmm0,xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L + + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H + + movdqa xmm6,xmm5 + movdqa xmm7,xmm2 + punpcklwd xmm5,xmm1 + punpckhwd xmm6,xmm1 + pmaddwd xmm5,[rel PW_MF0344_F0285] + pmaddwd xmm6,[rel PW_MF0344_F0285] + punpcklwd xmm2,xmm3 + punpckhwd xmm7,xmm3 + pmaddwd xmm2,[rel PW_MF0344_F0285] + pmaddwd xmm7,[rel PW_MF0344_F0285] + + paddd xmm5,[rel PD_ONEHALF] + paddd xmm6,[rel PD_ONEHALF] + psrad xmm5,SCALEBITS + psrad xmm6,SCALEBITS + paddd xmm2,[rel PD_ONEHALF] + paddd xmm7,[rel PD_ONEHALF] + psrad xmm2,SCALEBITS + psrad xmm7,SCALEBITS + + packssdw xmm5,xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw xmm2,xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw xmm5,xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw xmm2,xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H + + mov al,2 ; Yctr + jmp short .Yloop_1st + +.Yloop_2nd: + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H + +.Yloop_1st: + movdqa xmm7, XMMWORD [rsi] ; xmm7=Y(0123456789ABCDEF) + + pcmpeqw xmm6,xmm6 + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + pand xmm6,xmm7 ; xmm6=Y(02468ACE)=YE + psrlw xmm7,BYTE_BIT ; xmm7=Y(13579BDF)=YO + + movdqa xmm1,xmm0 ; xmm1=xmm0=(R-Y)(L/H) + movdqa xmm3,xmm2 ; xmm3=xmm2=(G-Y)(L/H) + movdqa xmm5,xmm4 ; xmm5=xmm4=(B-Y)(L/H) + + paddw xmm0,xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) + paddw xmm1,xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) + paddw xmm3,xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4,xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) + paddw xmm5,xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE + cmp rcx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub rcx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub rcx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + mov BYTE [rdi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + cmp rcx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub rcx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub rcx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + movd XMM_DWORD [rdi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_sse2 (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +; r10 = JDIMENSION output_width +; r11 = JSAMPIMAGE input_buf +; r12 = JDIMENSION in_row_group_ctr +; r13 = JSAMPARRAY output_buf + + align 16 + global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_merged_upsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + mov eax, r10d + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + mov rdi, r13 + lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] + + push rdx ; inptr2 + push rbx ; inptr1 + push rsi ; inptr00 + mov rbx,rsp + + push rdi + push rcx + push rax + + %ifdef WIN64 + mov r8, rcx + mov r9, rdi + mov rcx, rax + mov rdx, rbx + %else + mov rdx, rcx + mov rcx, rdi + mov rdi, rax + mov rsi, rbx + %endif + + call EXTN(jsimd_h2v1_merged_upsample_sse2) + + pop rax + pop rcx + pop rdi + pop rsi + pop rbx + pop rdx + + add rdi, byte SIZEOF_JSAMPROW ; outptr1 + add rsi, byte SIZEOF_JSAMPROW ; inptr01 + + push rdx ; inptr2 + push rbx ; inptr1 + push rsi ; inptr00 + mov rbx,rsp + + push rdi + push rcx + push rax + + %ifdef WIN64 + mov r8, rcx + mov r9, rdi + mov rcx, rax + mov rdx, rbx + %else + mov rdx, rcx + mov rcx, rdi + mov rdi, rax + mov rsi, rbx + %endif + + call EXTN(jsimd_h2v1_merged_upsample_sse2) + + pop rax + pop rcx + pop rdi + pop rsi + pop rbx + pop rdx + + pop rbx + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmrgext-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmrgext-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdmrgext-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdmrgext-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,519 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright 2012 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b)+8 ; JDIMENSION output_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_merged_upsample_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [output_width(eax)] ; col + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16,7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF) + movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF) + + pxor xmm1,xmm1 ; xmm1=(all 0's) + pcmpeqw xmm3,xmm3 + psllw xmm3,7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + movdqa xmm4,xmm6 + punpckhbw xmm6,xmm1 ; xmm6=Cb(89ABCDEF)=CbH + punpcklbw xmm4,xmm1 ; xmm4=Cb(01234567)=CbL + movdqa xmm0,xmm7 + punpckhbw xmm7,xmm1 ; xmm7=Cr(89ABCDEF)=CrH + punpcklbw xmm0,xmm1 ; xmm0=Cr(01234567)=CrL + + paddw xmm6,xmm3 + paddw xmm4,xmm3 + paddw xmm7,xmm3 + paddw xmm0,xmm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm5,xmm6 ; xmm5=CbH + movdqa xmm2,xmm4 ; xmm2=CbL + paddw xmm6,xmm6 ; xmm6=2*CbH + paddw xmm4,xmm4 ; xmm4=2*CbL + movdqa xmm1,xmm7 ; xmm1=CrH + movdqa xmm3,xmm0 ; xmm3=CrL + paddw xmm7,xmm7 ; xmm7=2*CrH + paddw xmm0,xmm0 ; xmm0=2*CrL + + pmulhw xmm6,[GOTOFF(eax,PW_MF0228)] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7,[GOTOFF(eax,PW_F0402)] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrL * FIX(0.40200)) + + paddw xmm6,[GOTOFF(eax,PW_ONE)] + paddw xmm4,[GOTOFF(eax,PW_ONE)] + psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) + psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) + paddw xmm7,[GOTOFF(eax,PW_ONE)] + paddw xmm0,[GOTOFF(eax,PW_ONE)] + psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) + psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) + + paddw xmm6,xmm5 + paddw xmm4,xmm2 + paddw xmm6,xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H + paddw xmm4,xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L + paddw xmm7,xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H + paddw xmm0,xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L + + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H + + movdqa xmm6,xmm5 + movdqa xmm7,xmm2 + punpcklwd xmm5,xmm1 + punpckhwd xmm6,xmm1 + pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm6,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm2,xmm3 + punpckhwd xmm7,xmm3 + pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm7,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm6,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm5,SCALEBITS + psrad xmm6,SCALEBITS + paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm7,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm2,SCALEBITS + psrad xmm7,SCALEBITS + + packssdw xmm5,xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw xmm2,xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw xmm5,xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw xmm2,xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H + + mov al,2 ; Yctr + jmp short .Yloop_1st + alignx 16,7 + +.Yloop_2nd: + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H + alignx 16,7 + +.Yloop_1st: + movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF) + + pcmpeqw xmm6,xmm6 + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + pand xmm6,xmm7 ; xmm6=Y(02468ACE)=YE + psrlw xmm7,BYTE_BIT ; xmm7=Y(13579BDF)=YO + + movdqa xmm1,xmm0 ; xmm1=xmm0=(R-Y)(L/H) + movdqa xmm3,xmm2 ; xmm3=xmm2=(G-Y)(L/H) + movdqa xmm5,xmm4 ; xmm5=xmm4=(B-Y)(L/H) + + paddw xmm0,xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) + paddw xmm1,xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) + paddw xmm3,xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4,xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) + paddw xmm5,xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + mov BYTE [edi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + movd XMM_DWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_sse2 (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b)+8 ; JDIMENSION output_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + + align 16 + global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_merged_upsample_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, POINTER [output_width(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx,esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; output_width + + call near EXTN(jsimd_h2v1_merged_upsample_sse2) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jsimd_h2v1_merged_upsample_sse2) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdsample-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdsample-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdsample-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdsample-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,392 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* CHROMA UPSAMPLING */ + +#include "jsimd_altivec.h" + + +void +jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, + JDIMENSION downsampled_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr, outptr; + int inrow, incol; + + __vector unsigned char this0, last0, p_last0, next0 = {0}, p_next0, + out; + __vector short this0e, this0o, this0l, this0h, last0l, last0h, + next0l, next0h, outle, outhe, outlo, outho; + + /* Constants */ + __vector unsigned char pb_zero = { __16X(0) }, pb_three = { __16X(3) }, + last_index_col0 = {0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14}, + last_index = {15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30}, + next_index = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, + next_index_lastcol = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,15}, +#if __BIG_ENDIAN__ + merge_pack_index = {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}; +#else + merge_pack_index = {0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30}; +#endif + __vector short pw_one = { __8X(1) }, pw_two = { __8X(2) }; + + for (inrow = 0; inrow < max_v_samp_factor; inrow++) { + inptr = input_data[inrow]; + outptr = output_data[inrow]; + + if (downsampled_width & 15) + inptr[downsampled_width] = inptr[downsampled_width - 1]; + + this0 = vec_ld(0, inptr); + p_last0 = vec_perm(this0, this0, last_index_col0); + last0 = this0; + + for (incol = downsampled_width; incol > 0; + incol -= 16, inptr += 16, outptr += 32) { + + if (downsampled_width - incol > 0) { + p_last0 = vec_perm(last0, this0, last_index); + last0 = this0; + } + + if (incol <= 16) + p_next0 = vec_perm(this0, this0, next_index_lastcol); + else { + next0 = vec_ld(16, inptr); + p_next0 = vec_perm(this0, next0, next_index); + } + + this0e = (__vector short)vec_mule(this0, pb_three); + this0o = (__vector short)vec_mulo(this0, pb_three); + this0l = vec_mergeh(this0e, this0o); + this0h = vec_mergel(this0e, this0o); + + last0l = (__vector short)VEC_UNPACKHU(p_last0); + last0h = (__vector short)VEC_UNPACKLU(p_last0); + last0l = vec_add(last0l, pw_one); + + next0l = (__vector short)VEC_UNPACKHU(p_next0); + next0h = (__vector short)VEC_UNPACKLU(p_next0); + next0l = vec_add(next0l, pw_two); + + outle = vec_add(this0l, last0l); + outlo = vec_add(this0l, next0l); + outle = vec_sr(outle, (__vector unsigned short)pw_two); + outlo = vec_sr(outlo, (__vector unsigned short)pw_two); + + out = vec_perm((__vector unsigned char)outle, + (__vector unsigned char)outlo, merge_pack_index); + vec_st(out, 0, outptr); + + if (incol > 8) { + last0h = vec_add(last0h, pw_one); + next0h = vec_add(next0h, pw_two); + + outhe = vec_add(this0h, last0h); + outho = vec_add(this0h, next0h); + outhe = vec_sr(outhe, (__vector unsigned short)pw_two); + outho = vec_sr(outho, (__vector unsigned short)pw_two); + + out = vec_perm((__vector unsigned char)outhe, + (__vector unsigned char)outho, merge_pack_index); + vec_st(out, 16, outptr); + } + + this0 = next0; + } + } +} + + +void +jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, + JDIMENSION downsampled_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr_1, inptr0, inptr1, outptr0, outptr1; + int inrow, outrow, incol; + + __vector unsigned char this_1, this0, this1, out; + __vector short this_1l, this_1h, this0l, this0h, this1l, this1h, + lastcolsum_1h, lastcolsum1h, + p_lastcolsum_1l, p_lastcolsum_1h, p_lastcolsum1l, p_lastcolsum1h, + thiscolsum_1l, thiscolsum_1h, thiscolsum1l, thiscolsum1h, + nextcolsum_1l = {0}, nextcolsum_1h = {0}, + nextcolsum1l = {0}, nextcolsum1h = {0}, + p_nextcolsum_1l, p_nextcolsum_1h, p_nextcolsum1l, p_nextcolsum1h, + tmpl, tmph, outle, outhe, outlo, outho; + + /* Constants */ + __vector unsigned char pb_zero = { __16X(0) }, + last_index_col0 = {0,1,0,1,2,3,4,5,6,7,8,9,10,11,12,13}, + last_index={14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29}, + next_index = {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}, + next_index_lastcol = {2,3,4,5,6,7,8,9,10,11,12,13,14,15,14,15}, +#if __BIG_ENDIAN__ + merge_pack_index = {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}; +#else + merge_pack_index = {0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30}; +#endif + __vector short pw_zero = { __8X(0) }, pw_three = { __8X(3) }, + pw_seven = { __8X(7) }, pw_eight = { __8X(8) }; + __vector unsigned short pw_four = { __8X(4) }; + + for (inrow = 0, outrow = 0; outrow < max_v_samp_factor; inrow++) { + + inptr_1 = input_data[inrow - 1]; + inptr0 = input_data[inrow]; + inptr1 = input_data[inrow + 1]; + outptr0 = output_data[outrow++]; + outptr1 = output_data[outrow++]; + + if (downsampled_width & 15) { + inptr_1[downsampled_width] = inptr_1[downsampled_width - 1]; + inptr0[downsampled_width] = inptr0[downsampled_width - 1]; + inptr1[downsampled_width] = inptr1[downsampled_width - 1]; + } + + this0 = vec_ld(0, inptr0); + this0l = (__vector short)VEC_UNPACKHU(this0); + this0h = (__vector short)VEC_UNPACKLU(this0); + this0l = vec_mladd(this0l, pw_three, pw_zero); + this0h = vec_mladd(this0h, pw_three, pw_zero); + + this_1 = vec_ld(0, inptr_1); + this_1l = (__vector short)VEC_UNPACKHU(this_1); + this_1h = (__vector short)VEC_UNPACKLU(this_1); + thiscolsum_1l = vec_add(this0l, this_1l); + thiscolsum_1h = vec_add(this0h, this_1h); + lastcolsum_1h = thiscolsum_1h; + p_lastcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1l, last_index_col0); + p_lastcolsum_1h = vec_perm(thiscolsum_1l, thiscolsum_1h, last_index); + + this1 = vec_ld(0, inptr1); + this1l = (__vector short)VEC_UNPACKHU(this1); + this1h = (__vector short)VEC_UNPACKLU(this1); + thiscolsum1l = vec_add(this0l, this1l); + thiscolsum1h = vec_add(this0h, this1h); + lastcolsum1h = thiscolsum1h; + p_lastcolsum1l = vec_perm(thiscolsum1l, thiscolsum1l, last_index_col0); + p_lastcolsum1h = vec_perm(thiscolsum1l, thiscolsum1h, last_index); + + for (incol = downsampled_width; incol > 0; + incol -= 16, inptr_1 += 16, inptr0 += 16, inptr1 += 16, + outptr0 += 32, outptr1 += 32) { + + if (downsampled_width - incol > 0) { + p_lastcolsum_1l = vec_perm(lastcolsum_1h, thiscolsum_1l, last_index); + p_lastcolsum_1h = vec_perm(thiscolsum_1l, thiscolsum_1h, last_index); + p_lastcolsum1l = vec_perm(lastcolsum1h, thiscolsum1l, last_index); + p_lastcolsum1h = vec_perm(thiscolsum1l, thiscolsum1h, last_index); + lastcolsum_1h = thiscolsum_1h; lastcolsum1h = thiscolsum1h; + } + + if (incol <= 16) { + p_nextcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1h, next_index); + p_nextcolsum_1h = vec_perm(thiscolsum_1h, thiscolsum_1h, + next_index_lastcol); + p_nextcolsum1l = vec_perm(thiscolsum1l, thiscolsum1h, next_index); + p_nextcolsum1h = vec_perm(thiscolsum1h, thiscolsum1h, + next_index_lastcol); + } else { + this0 = vec_ld(16, inptr0); + this0l = (__vector short)VEC_UNPACKHU(this0); + this0h = (__vector short)VEC_UNPACKLU(this0); + this0l = vec_mladd(this0l, pw_three, pw_zero); + this0h = vec_mladd(this0h, pw_three, pw_zero); + + this_1 = vec_ld(16, inptr_1); + this_1l = (__vector short)VEC_UNPACKHU(this_1); + this_1h = (__vector short)VEC_UNPACKLU(this_1); + nextcolsum_1l = vec_add(this0l, this_1l); + nextcolsum_1h = vec_add(this0h, this_1h); + p_nextcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1h, next_index); + p_nextcolsum_1h = vec_perm(thiscolsum_1h, nextcolsum_1l, next_index); + + this1 = vec_ld(16, inptr1); + this1l = (__vector short)VEC_UNPACKHU(this1); + this1h = (__vector short)VEC_UNPACKLU(this1); + nextcolsum1l = vec_add(this0l, this1l); + nextcolsum1h = vec_add(this0h, this1h); + p_nextcolsum1l = vec_perm(thiscolsum1l, thiscolsum1h, next_index); + p_nextcolsum1h = vec_perm(thiscolsum1h, nextcolsum1l, next_index); + } + + /* Process the upper row */ + + tmpl = vec_mladd(thiscolsum_1l, pw_three, pw_zero); + outle = vec_add(tmpl, p_lastcolsum_1l); + outle = vec_add(outle, pw_eight); + outle = vec_sr(outle, pw_four); + + outlo = vec_add(tmpl, p_nextcolsum_1l); + outlo = vec_add(outlo, pw_seven); + outlo = vec_sr(outlo, pw_four); + + out = vec_perm((__vector unsigned char)outle, + (__vector unsigned char)outlo, merge_pack_index); + vec_st(out, 0, outptr0); + + if (incol > 8) { + tmph = vec_mladd(thiscolsum_1h, pw_three, pw_zero); + outhe = vec_add(tmph, p_lastcolsum_1h); + outhe = vec_add(outhe, pw_eight); + outhe = vec_sr(outhe, pw_four); + + outho = vec_add(tmph, p_nextcolsum_1h); + outho = vec_add(outho, pw_seven); + outho = vec_sr(outho, pw_four); + + out = vec_perm((__vector unsigned char)outhe, + (__vector unsigned char)outho, merge_pack_index); + vec_st(out, 16, outptr0); + } + + /* Process the lower row */ + + tmpl = vec_mladd(thiscolsum1l, pw_three, pw_zero); + outle = vec_add(tmpl, p_lastcolsum1l); + outle = vec_add(outle, pw_eight); + outle = vec_sr(outle, pw_four); + + outlo = vec_add(tmpl, p_nextcolsum1l); + outlo = vec_add(outlo, pw_seven); + outlo = vec_sr(outlo, pw_four); + + out = vec_perm((__vector unsigned char)outle, + (__vector unsigned char)outlo, merge_pack_index); + vec_st(out, 0, outptr1); + + if (incol > 8) { + tmph = vec_mladd(thiscolsum1h, pw_three, pw_zero); + outhe = vec_add(tmph, p_lastcolsum1h); + outhe = vec_add(outhe, pw_eight); + outhe = vec_sr(outhe, pw_four); + + outho = vec_add(tmph, p_nextcolsum1h); + outho = vec_add(outho, pw_seven); + outho = vec_sr(outho, pw_four); + + out = vec_perm((__vector unsigned char)outhe, + (__vector unsigned char)outho, merge_pack_index); + vec_st(out, 16, outptr1); + } + + thiscolsum_1l = nextcolsum_1l; thiscolsum_1h = nextcolsum_1h; + thiscolsum1l = nextcolsum1l; thiscolsum1h = nextcolsum1h; + } + } +} + + +/* These are rarely used (mainly just for decompressing YCCK images) */ + +void +jsimd_h2v1_upsample_altivec (int max_v_samp_factor, + JDIMENSION output_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr, outptr; + int inrow, incol; + + __vector unsigned char in, inl, inh; + + for (inrow = 0; inrow < max_v_samp_factor; inrow++) { + inptr = input_data[inrow]; + outptr = output_data[inrow]; + + for (incol = (output_width + 31) & (~31); incol > 0; + incol -= 64, inptr += 32, outptr += 64) { + + in = vec_ld(0, inptr); + inl = vec_mergeh(in, in); + inh = vec_mergel(in, in); + + vec_st(inl, 0, outptr); + vec_st(inh, 16, outptr); + + if (incol > 32) { + in = vec_ld(16, inptr); + inl = vec_mergeh(in, in); + inh = vec_mergel(in, in); + + vec_st(inl, 32, outptr); + vec_st(inh, 48, outptr); + } + } + } +} + + +void +jsimd_h2v2_upsample_altivec (int max_v_samp_factor, + JDIMENSION output_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr, outptr0, outptr1; + int inrow, outrow, incol; + + __vector unsigned char in, inl, inh; + + for (inrow = 0, outrow = 0; outrow < max_v_samp_factor; inrow++) { + + inptr = input_data[inrow]; + outptr0 = output_data[outrow++]; + outptr1 = output_data[outrow++]; + + for (incol = (output_width + 31) & (~31); incol > 0; + incol -= 64, inptr += 32, outptr0 += 64, outptr1 += 64) { + + in = vec_ld(0, inptr); + inl = vec_mergeh(in, in); + inh = vec_mergel(in, in); + + vec_st(inl, 0, outptr0); + vec_st(inl, 0, outptr1); + + vec_st(inh, 16, outptr0); + vec_st(inh, 16, outptr1); + + if (incol > 32) { + in = vec_ld(16, inptr); + inl = vec_mergeh(in, in); + inh = vec_mergel(in, in); + + vec_st(inl, 32, outptr0); + vec_st(inl, 32, outptr1); + + vec_st(inh, 48, outptr0); + vec_st(inh, 48, outptr1); + } + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdsample-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdsample-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdsample-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdsample-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,737 @@ +; +; jdsample.asm - upsampling (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fancy_upsample_mmx) PRIVATE + +EXTN(jconst_fancy_upsample_mmx): + +PW_ONE times 4 dw 1 +PW_TWO times 4 dw 2 +PW_THREE times 4 dw 3 +PW_SEVEN times 4 dw 7 +PW_EIGHT times 4 dw 8 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_mmx (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v1_fancy_upsample_mmx): + push ebp + mov ebp,esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr + test eax,eax + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_MMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor mm0,mm0 ; mm0=(all 0's) + pcmpeqb mm7,mm7 + psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT + pand mm7, MMWORD [esi+0*SIZEOF_MMWORD] + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + cmp eax, byte SIZEOF_MMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + pcmpeqb mm6,mm6 + psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT + pand mm6, MMWORD [esi+0*SIZEOF_MMWORD] + jmp short .upsample + alignx 16,7 + +.columnloop: + movq mm6, MMWORD [esi+1*SIZEOF_MMWORD] + psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT + +.upsample: + movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm2,mm1 + movq mm3,mm1 ; mm1=( 0 1 2 3 4 5 6 7) + psllq mm2,BYTE_BIT ; mm2=( - 0 1 2 3 4 5 6) + psrlq mm3,BYTE_BIT ; mm3=( 1 2 3 4 5 6 7 -) + + por mm2,mm7 ; mm2=(-1 0 1 2 3 4 5 6) + por mm3,mm6 ; mm3=( 1 2 3 4 5 6 7 8) + + movq mm7,mm1 + psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT ; mm7=( 7 - - - - - - -) + + movq mm4,mm1 + punpcklbw mm1,mm0 ; mm1=( 0 1 2 3) + punpckhbw mm4,mm0 ; mm4=( 4 5 6 7) + movq mm5,mm2 + punpcklbw mm2,mm0 ; mm2=(-1 0 1 2) + punpckhbw mm5,mm0 ; mm5=( 3 4 5 6) + movq mm6,mm3 + punpcklbw mm3,mm0 ; mm3=( 1 2 3 4) + punpckhbw mm6,mm0 ; mm6=( 5 6 7 8) + + pmullw mm1,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + paddw mm2,[GOTOFF(ebx,PW_ONE)] + paddw mm5,[GOTOFF(ebx,PW_ONE)] + paddw mm3,[GOTOFF(ebx,PW_TWO)] + paddw mm6,[GOTOFF(ebx,PW_TWO)] + + paddw mm2,mm1 + paddw mm5,mm4 + psrlw mm2,2 ; mm2=OutLE=( 0 2 4 6) + psrlw mm5,2 ; mm5=OutHE=( 8 10 12 14) + paddw mm3,mm1 + paddw mm6,mm4 + psrlw mm3,2 ; mm3=OutLO=( 1 3 5 7) + psrlw mm6,2 ; mm6=OutHO=( 9 11 13 15) + + psllw mm3,BYTE_BIT + psllw mm6,BYTE_BIT + por mm2,mm3 ; mm2=OutL=( 0 1 2 3 4 5 6 7) + por mm5,mm6 ; mm5=OutH=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm5 + + sub eax, byte SIZEOF_MMWORD + add esi, byte 1*SIZEOF_MMWORD ; inptr + add edi, byte 2*SIZEOF_MMWORD ; outptr + cmp eax, byte SIZEOF_MMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_mmx (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr + + align 16 + global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v2_fancy_upsample_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx,eax ; edx = original ebp + mov eax, JDIMENSION [downsamp_width(edx)] ; colctr + test eax,eax + jz near .return + + mov ecx, INT [max_v_samp(edx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_MMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + movq mm0, MMWORD [ebx+0*SIZEOF_MMWORD] ; mm0=row[ 0][0] + movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0] + movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor mm3,mm3 ; mm3=(all 0's) + movq mm4,mm0 + punpcklbw mm0,mm3 ; mm0=row[ 0][0]( 0 1 2 3) + punpckhbw mm4,mm3 ; mm4=row[ 0][0]( 4 5 6 7) + movq mm5,mm1 + punpcklbw mm1,mm3 ; mm1=row[-1][0]( 0 1 2 3) + punpckhbw mm5,mm3 ; mm5=row[-1][0]( 4 5 6 7) + movq mm6,mm2 + punpcklbw mm2,mm3 ; mm2=row[+1][0]( 0 1 2 3) + punpckhbw mm6,mm3 ; mm6=row[+1][0]( 4 5 6 7) + + pmullw mm0,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + + pcmpeqb mm7,mm7 + psrlq mm7,(SIZEOF_MMWORD-2)*BYTE_BIT + + paddw mm1,mm0 ; mm1=Int0L=( 0 1 2 3) + paddw mm5,mm4 ; mm5=Int0H=( 4 5 6 7) + paddw mm2,mm0 ; mm2=Int1L=( 0 1 2 3) + paddw mm6,mm4 ; mm6=Int1H=( 4 5 6 7) + + movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 ; temporarily save + movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 ; the intermediate data + movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm6 + + pand mm1,mm7 ; mm1=( 0 - - -) + pand mm2,mm7 ; mm2=( 0 - - -) + + movq MMWORD [wk(0)], mm1 + movq MMWORD [wk(1)], mm2 + + poppic ebx + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + cmp eax, byte SIZEOF_MMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pcmpeqb mm1,mm1 + psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT + movq mm2,mm1 + + pand mm1, MMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7) + pand mm2, MMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 + + jmp short .upsample + alignx 16,7 + +.columnloop: + ; -- process the next column block + + movq mm0, MMWORD [ebx+1*SIZEOF_MMWORD] ; mm0=row[ 0][1] + movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1] + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor mm3,mm3 ; mm3=(all 0's) + movq mm4,mm0 + punpcklbw mm0,mm3 ; mm0=row[ 0][1]( 0 1 2 3) + punpckhbw mm4,mm3 ; mm4=row[ 0][1]( 4 5 6 7) + movq mm5,mm1 + punpcklbw mm1,mm3 ; mm1=row[-1][1]( 0 1 2 3) + punpckhbw mm5,mm3 ; mm5=row[-1][1]( 4 5 6 7) + movq mm6,mm2 + punpcklbw mm2,mm3 ; mm2=row[+1][1]( 0 1 2 3) + punpckhbw mm6,mm3 ; mm6=row[+1][1]( 4 5 6 7) + + pmullw mm0,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + + paddw mm1,mm0 ; mm1=Int0L=( 0 1 2 3) + paddw mm5,mm4 ; mm5=Int0H=( 4 5 6 7) + paddw mm2,mm0 ; mm2=Int1L=( 0 1 2 3) + paddw mm6,mm4 ; mm6=Int1H=( 4 5 6 7) + + movq MMWORD [edx+2*SIZEOF_MMWORD], mm1 ; temporarily save + movq MMWORD [edx+3*SIZEOF_MMWORD], mm5 ; the intermediate data + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm6 + + psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm1=( - - - 0) + psllq mm2,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm2=( - - - 0) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 + +.upsample: + ; -- process the upper row + + movq mm7, MMWORD [edx+0*SIZEOF_MMWORD] ; mm7=Int0L=( 0 1 2 3) + movq mm3, MMWORD [edx+1*SIZEOF_MMWORD] ; mm3=Int0H=( 4 5 6 7) + + movq mm0,mm7 + movq mm4,mm3 + psrlq mm0,2*BYTE_BIT ; mm0=( 1 2 3 -) + psllq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( - - - 4) + movq mm5,mm7 + movq mm6,mm3 + psrlq mm5,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm5=( 3 - - -) + psllq mm6,2*BYTE_BIT ; mm6=( - 4 5 6) + + por mm0,mm4 ; mm0=( 1 2 3 4) + por mm5,mm6 ; mm5=( 3 4 5 6) + + movq mm1,mm7 + movq mm2,mm3 + psllq mm1,2*BYTE_BIT ; mm1=( - 0 1 2) + psrlq mm2,2*BYTE_BIT ; mm2=( 5 6 7 -) + movq mm4,mm3 + psrlq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( 7 - - -) + + por mm1, MMWORD [wk(0)] ; mm1=(-1 0 1 2) + por mm2, MMWORD [wk(2)] ; mm2=( 5 6 7 8) + + movq MMWORD [wk(0)], mm4 + + pmullw mm7,[GOTOFF(ebx,PW_THREE)] + pmullw mm3,[GOTOFF(ebx,PW_THREE)] + paddw mm1,[GOTOFF(ebx,PW_EIGHT)] + paddw mm5,[GOTOFF(ebx,PW_EIGHT)] + paddw mm0,[GOTOFF(ebx,PW_SEVEN)] + paddw mm2,[GOTOFF(ebx,PW_SEVEN)] + + paddw mm1,mm7 + paddw mm5,mm3 + psrlw mm1,4 ; mm1=Out0LE=( 0 2 4 6) + psrlw mm5,4 ; mm5=Out0HE=( 8 10 12 14) + paddw mm0,mm7 + paddw mm2,mm3 + psrlw mm0,4 ; mm0=Out0LO=( 1 3 5 7) + psrlw mm2,4 ; mm2=Out0HO=( 9 11 13 15) + + psllw mm0,BYTE_BIT + psllw mm2,BYTE_BIT + por mm1,mm0 ; mm1=Out0L=( 0 1 2 3 4 5 6 7) + por mm5,mm2 ; mm5=Out0H=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 + movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 + + ; -- process the lower row + + movq mm6, MMWORD [edi+0*SIZEOF_MMWORD] ; mm6=Int1L=( 0 1 2 3) + movq mm4, MMWORD [edi+1*SIZEOF_MMWORD] ; mm4=Int1H=( 4 5 6 7) + + movq mm7,mm6 + movq mm3,mm4 + psrlq mm7,2*BYTE_BIT ; mm7=( 1 2 3 -) + psllq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( - - - 4) + movq mm0,mm6 + movq mm2,mm4 + psrlq mm0,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm0=( 3 - - -) + psllq mm2,2*BYTE_BIT ; mm2=( - 4 5 6) + + por mm7,mm3 ; mm7=( 1 2 3 4) + por mm0,mm2 ; mm0=( 3 4 5 6) + + movq mm1,mm6 + movq mm5,mm4 + psllq mm1,2*BYTE_BIT ; mm1=( - 0 1 2) + psrlq mm5,2*BYTE_BIT ; mm5=( 5 6 7 -) + movq mm3,mm4 + psrlq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( 7 - - -) + + por mm1, MMWORD [wk(1)] ; mm1=(-1 0 1 2) + por mm5, MMWORD [wk(3)] ; mm5=( 5 6 7 8) + + movq MMWORD [wk(1)], mm3 + + pmullw mm6,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + paddw mm1,[GOTOFF(ebx,PW_EIGHT)] + paddw mm0,[GOTOFF(ebx,PW_EIGHT)] + paddw mm7,[GOTOFF(ebx,PW_SEVEN)] + paddw mm5,[GOTOFF(ebx,PW_SEVEN)] + + paddw mm1,mm6 + paddw mm0,mm4 + psrlw mm1,4 ; mm1=Out1LE=( 0 2 4 6) + psrlw mm0,4 ; mm0=Out1HE=( 8 10 12 14) + paddw mm7,mm6 + paddw mm5,mm4 + psrlw mm7,4 ; mm7=Out1LO=( 1 3 5 7) + psrlw mm5,4 ; mm5=Out1HO=( 9 11 13 15) + + psllw mm7,BYTE_BIT + psllw mm5,BYTE_BIT + por mm1,mm7 ; mm1=Out1L=( 0 1 2 3 4 5 6 7) + por mm0,mm5 ; mm0=Out1H=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm1 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm0 + + poppic ebx + + sub eax, byte SIZEOF_MMWORD + add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_MMWORD ; inptr0 + add esi, byte 1*SIZEOF_MMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_MMWORD ; outptr0 + add edi, byte 2*SIZEOF_MMWORD ; outptr1 + cmp eax, byte SIZEOF_MMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_mmx (int max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define output_width(b) (b)+12 ; JDIMENSION output_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v1_upsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_MMWORD)-1 + and edx, byte -(2*SIZEOF_MMWORD) + jz short .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + + movq mm1,mm0 + punpcklbw mm0,mm0 + punpckhbw mm1,mm1 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm3,mm2 + punpcklbw mm2,mm2 + punpckhbw mm3,mm3 + + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add edi, byte 4*SIZEOF_MMWORD ; outptr + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_mmx (int max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define output_width(b) (b)+12 ; JDIMENSION output_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v2_upsample_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_MMWORD)-1 + and edx, byte -(2*SIZEOF_MMWORD) + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + + movq mm1,mm0 + punpcklbw mm0,mm0 + punpckhbw mm1,mm1 + + movq MMWORD [ebx+0*SIZEOF_MMWORD], mm0 + movq MMWORD [ebx+1*SIZEOF_MMWORD], mm1 + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm3,mm2 + punpcklbw mm2,mm2 + punpckhbw mm3,mm3 + + movq MMWORD [ebx+2*SIZEOF_MMWORD], mm2 + movq MMWORD [ebx+3*SIZEOF_MMWORD], mm3 + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add ebx, byte 4*SIZEOF_MMWORD ; outptr0 + add edi, byte 4*SIZEOF_MMWORD ; outptr1 + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdsample-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdsample-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdsample-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdsample-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,671 @@ +; +; jdsample.asm - upsampling (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fancy_upsample_sse2) PRIVATE + +EXTN(jconst_fancy_upsample_sse2): + +PW_ONE times 8 dw 1 +PW_TWO times 8 dw 2 +PW_THREE times 8 dw 3 +PW_SEVEN times 8 dw 7 +PW_EIGHT times 8 dw 8 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11 = JDIMENSION downsampled_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_fancy_upsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov eax, r11d ; colctr + test rax,rax + jz near .return + + mov rcx, r10 ; rowctr + test rcx,rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rax ; colctr + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + + test rax, SIZEOF_XMMWORD-1 + jz short .skip + mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor xmm0,xmm0 ; xmm0=(all 0's) + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-1) + pand xmm7, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + add rax, byte SIZEOF_XMMWORD-1 + and rax, byte -SIZEOF_XMMWORD + cmp rax, byte SIZEOF_XMMWORD + ja short .columnloop + +.columnloop_last: + pcmpeqb xmm6,xmm6 + pslldq xmm6,(SIZEOF_XMMWORD-1) + pand xmm6, XMMWORD [rsi+0*SIZEOF_XMMWORD] + jmp short .upsample + +.columnloop: + movdqa xmm6, XMMWORD [rsi+1*SIZEOF_XMMWORD] + pslldq xmm6,(SIZEOF_XMMWORD-1) + +.upsample: + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm2,xmm1 + movdqa xmm3,xmm1 ; xmm1=( 0 1 2 ... 13 14 15) + pslldq xmm2,1 ; xmm2=(-- 0 1 ... 12 13 14) + psrldq xmm3,1 ; xmm3=( 1 2 3 ... 14 15 --) + + por xmm2,xmm7 ; xmm2=(-1 0 1 ... 12 13 14) + por xmm3,xmm6 ; xmm3=( 1 2 3 ... 14 15 16) + + movdqa xmm7,xmm1 + psrldq xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) + + movdqa xmm4,xmm1 + punpcklbw xmm1,xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm2 + punpcklbw xmm2,xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) + punpckhbw xmm5,xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) + movdqa xmm6,xmm3 + punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) + punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) + + pmullw xmm1,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + paddw xmm2,[rel PW_ONE] + paddw xmm5,[rel PW_ONE] + paddw xmm3,[rel PW_TWO] + paddw xmm6,[rel PW_TWO] + + paddw xmm2,xmm1 + paddw xmm5,xmm4 + psrlw xmm2,2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) + paddw xmm3,xmm1 + paddw xmm6,xmm4 + psrlw xmm3,2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) + psrlw xmm6,2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) + + psllw xmm3,BYTE_BIT + psllw xmm6,BYTE_BIT + por xmm2,xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) + por xmm5,xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm5 + + sub rax, byte SIZEOF_XMMWORD + add rsi, byte 1*SIZEOF_XMMWORD ; inptr + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + cmp rax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop rsi + pop rdi + pop rax + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rcx ; rowctr + jg near .rowloop + +.return: + uncollect_args + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11 = JDIMENSION downsampled_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 4 + + align 16 + global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_fancy_upsample_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov eax, r11d ; colctr + test rax,rax + jz near .return + + mov rcx, r10 ; rowctr + test rcx,rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rax ; colctr + push rcx + push rdi + push rsi + + mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 + mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 + mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 + + test rax, SIZEOF_XMMWORD-1 + jz short .skip + push rdx + mov dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop rdx +.skip: + ; -- process the first column block + + movdqa xmm0, XMMWORD [rbx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] + movdqa xmm1, XMMWORD [rcx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] + movdqa xmm2, XMMWORD [rsi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-2) + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm6 + + pand xmm1,xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) + pand xmm2,xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) + + movdqa XMMWORD [wk(0)], xmm1 + movdqa XMMWORD [wk(1)], xmm2 + + add rax, byte SIZEOF_XMMWORD-1 + and rax, byte -SIZEOF_XMMWORD + cmp rax, byte SIZEOF_XMMWORD + ja short .columnloop + +.columnloop_last: + ; -- process the last column block + + pcmpeqb xmm1,xmm1 + pslldq xmm1,(SIZEOF_XMMWORD-2) + movdqa xmm2,xmm1 + + pand xmm1, XMMWORD [rdx+1*SIZEOF_XMMWORD] + pand xmm2, XMMWORD [rdi+1*SIZEOF_XMMWORD] + + movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) + movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) + + jmp near .upsample + +.columnloop: + ; -- process the next column block + + movdqa xmm0, XMMWORD [rbx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] + movdqa xmm1, XMMWORD [rcx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [rdx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [rdx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm6 + + pslldq xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) + pslldq xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) + + movdqa XMMWORD [wk(2)], xmm1 + movdqa XMMWORD [wk(3)], xmm2 + +.upsample: + ; -- process the upper row + + movdqa xmm7, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [rdx+1*SIZEOF_XMMWORD] + + movdqa xmm0,xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) + movdqa xmm4,xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) + psrldq xmm0,2 ; xmm0=( 1 2 3 4 5 6 7 --) + pslldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) + movdqa xmm5,xmm7 + movdqa xmm6,xmm3 + psrldq xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) + pslldq xmm6,2 ; xmm6=(-- 8 9 10 11 12 13 14) + + por xmm0,xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) + por xmm5,xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm7 + movdqa xmm2,xmm3 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm2,2 ; xmm2=( 9 10 11 12 13 14 15 --) + movdqa xmm4,xmm3 + psrldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(0)], xmm4 + + pmullw xmm7,[rel PW_THREE] + pmullw xmm3,[rel PW_THREE] + paddw xmm1,[rel PW_EIGHT] + paddw xmm5,[rel PW_EIGHT] + paddw xmm0,[rel PW_SEVEN] + paddw xmm2,[rel PW_SEVEN] + + paddw xmm1,xmm7 + paddw xmm5,xmm3 + psrlw xmm1,4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) + paddw xmm0,xmm7 + paddw xmm2,xmm3 + psrlw xmm0,4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) + psrlw xmm2,4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) + + psllw xmm0,BYTE_BIT + psllw xmm2,BYTE_BIT + por xmm1,xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) + por xmm5,xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 + + ; -- process the lower row + + movdqa xmm6, XMMWORD [rdi+0*SIZEOF_XMMWORD] + movdqa xmm4, XMMWORD [rdi+1*SIZEOF_XMMWORD] + + movdqa xmm7,xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) + movdqa xmm3,xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) + psrldq xmm7,2 ; xmm7=( 1 2 3 4 5 6 7 --) + pslldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) + movdqa xmm0,xmm6 + movdqa xmm2,xmm4 + psrldq xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) + pslldq xmm2,2 ; xmm2=(-- 8 9 10 11 12 13 14) + + por xmm7,xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) + por xmm0,xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm5,2 ; xmm5=( 9 10 11 12 13 14 15 --) + movdqa xmm3,xmm4 + psrldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(1)], xmm3 + + pmullw xmm6,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + paddw xmm1,[rel PW_EIGHT] + paddw xmm0,[rel PW_EIGHT] + paddw xmm7,[rel PW_SEVEN] + paddw xmm5,[rel PW_SEVEN] + + paddw xmm1,xmm6 + paddw xmm0,xmm4 + psrlw xmm1,4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) + psrlw xmm0,4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) + paddw xmm7,xmm6 + paddw xmm5,xmm4 + psrlw xmm7,4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) + psrlw xmm5,4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) + + psllw xmm7,BYTE_BIT + psllw xmm5,BYTE_BIT + por xmm1,xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) + por xmm0,xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm0 + + sub rax, byte SIZEOF_XMMWORD + add rcx, byte 1*SIZEOF_XMMWORD ; inptr1(above) + add rbx, byte 1*SIZEOF_XMMWORD ; inptr0 + add rsi, byte 1*SIZEOF_XMMWORD ; inptr1(below) + add rdx, byte 2*SIZEOF_XMMWORD ; outptr0 + add rdi, byte 2*SIZEOF_XMMWORD ; outptr1 + cmp rax, byte SIZEOF_XMMWORD + ja near .columnloop + test rax,rax + jnz near .columnloop_last + + pop rsi + pop rdi + pop rcx + pop rax + + add rsi, byte 1*SIZEOF_JSAMPROW ; input_data + add rdi, byte 2*SIZEOF_JSAMPROW ; output_data + sub rcx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11 = JDIMENSION output_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_upsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov edx, r11d + add rdx, byte (2*SIZEOF_XMMWORD)-1 + and rdx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov rcx, r10 ; rowctr + test rcx,rcx + jz short .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + mov rax,rdx ; colctr +.columnloop: + + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rdi, byte 4*SIZEOF_XMMWORD ; outptr + jmp short .columnloop + +.nextrow: + pop rsi + pop rdi + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rcx ; rowctr + jg short .rowloop + +.return: + uncollect_args + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_sse2 (nt max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11 = JDIMENSION output_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_upsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + mov edx, r11d + add rdx, byte (2*SIZEOF_XMMWORD)-1 + and rdx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov rcx, r10 ; rowctr + test rcx,rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 + mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 + mov rax,rdx ; colctr +.columnloop: + + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [rbx+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rbx+3*SIZEOF_XMMWORD], xmm3 + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rbx, byte 4*SIZEOF_XMMWORD ; outptr0 + add rdi, byte 4*SIZEOF_XMMWORD ; outptr1 + jmp short .columnloop + +.nextrow: + pop rsi + pop rdi + + add rsi, byte 1*SIZEOF_JSAMPROW ; input_data + add rdi, byte 2*SIZEOF_JSAMPROW ; output_data + sub rcx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop rbx + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdsample-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdsample-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jdsample-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jdsample-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,729 @@ +; +; jdsample.asm - upsampling (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fancy_upsample_sse2) PRIVATE + +EXTN(jconst_fancy_upsample_sse2): + +PW_ONE times 8 dw 1 +PW_TWO times 8 dw 2 +PW_THREE times 8 dw 3 +PW_SEVEN times 8 dw 7 +PW_EIGHT times 8 dw 8 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_fancy_upsample_sse2): + push ebp + mov ebp,esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr + test eax,eax + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_XMMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor xmm0,xmm0 ; xmm0=(all 0's) + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-1) + pand xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD] + + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + cmp eax, byte SIZEOF_XMMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + pcmpeqb xmm6,xmm6 + pslldq xmm6,(SIZEOF_XMMWORD-1) + pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD] + jmp short .upsample + alignx 16,7 + +.columnloop: + movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD] + pslldq xmm6,(SIZEOF_XMMWORD-1) + +.upsample: + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm2,xmm1 + movdqa xmm3,xmm1 ; xmm1=( 0 1 2 ... 13 14 15) + pslldq xmm2,1 ; xmm2=(-- 0 1 ... 12 13 14) + psrldq xmm3,1 ; xmm3=( 1 2 3 ... 14 15 --) + + por xmm2,xmm7 ; xmm2=(-1 0 1 ... 12 13 14) + por xmm3,xmm6 ; xmm3=( 1 2 3 ... 14 15 16) + + movdqa xmm7,xmm1 + psrldq xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) + + movdqa xmm4,xmm1 + punpcklbw xmm1,xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm2 + punpcklbw xmm2,xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) + punpckhbw xmm5,xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) + movdqa xmm6,xmm3 + punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) + punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) + + pmullw xmm1,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + paddw xmm2,[GOTOFF(ebx,PW_ONE)] + paddw xmm5,[GOTOFF(ebx,PW_ONE)] + paddw xmm3,[GOTOFF(ebx,PW_TWO)] + paddw xmm6,[GOTOFF(ebx,PW_TWO)] + + paddw xmm2,xmm1 + paddw xmm5,xmm4 + psrlw xmm2,2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) + paddw xmm3,xmm1 + paddw xmm6,xmm4 + psrlw xmm3,2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) + psrlw xmm6,2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) + + psllw xmm3,BYTE_BIT + psllw xmm6,BYTE_BIT + por xmm2,xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) + por xmm5,xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5 + + sub eax, byte SIZEOF_XMMWORD + add esi, byte 1*SIZEOF_XMMWORD ; inptr + add edi, byte 2*SIZEOF_XMMWORD ; outptr + cmp eax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr + + align 16 + global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_fancy_upsample_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx,eax ; edx = original ebp + mov eax, JDIMENSION [downsamp_width(edx)] ; colctr + test eax,eax + jz near .return + + mov ecx, INT [max_v_samp(edx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_XMMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + movdqa xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] + movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] + movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-2) + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6 + + pand xmm1,xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) + pand xmm2,xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) + + movdqa XMMWORD [wk(0)], xmm1 + movdqa XMMWORD [wk(1)], xmm2 + + poppic ebx + + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + cmp eax, byte SIZEOF_XMMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pcmpeqb xmm1,xmm1 + pslldq xmm1,(SIZEOF_XMMWORD-2) + movdqa xmm2,xmm1 + + pand xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD] + pand xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD] + + movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) + movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) + + jmp near .upsample + alignx 16,7 + +.columnloop: + ; -- process the next column block + + movdqa xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] + movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6 + + pslldq xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) + pslldq xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) + + movdqa XMMWORD [wk(2)], xmm1 + movdqa XMMWORD [wk(3)], xmm2 + +.upsample: + ; -- process the upper row + + movdqa xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD] + + movdqa xmm0,xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) + movdqa xmm4,xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) + psrldq xmm0,2 ; xmm0=( 1 2 3 4 5 6 7 --) + pslldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) + movdqa xmm5,xmm7 + movdqa xmm6,xmm3 + psrldq xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) + pslldq xmm6,2 ; xmm6=(-- 8 9 10 11 12 13 14) + + por xmm0,xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) + por xmm5,xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm7 + movdqa xmm2,xmm3 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm2,2 ; xmm2=( 9 10 11 12 13 14 15 --) + movdqa xmm4,xmm3 + psrldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(0)], xmm4 + + pmullw xmm7,[GOTOFF(ebx,PW_THREE)] + pmullw xmm3,[GOTOFF(ebx,PW_THREE)] + paddw xmm1,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm5,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm0,[GOTOFF(ebx,PW_SEVEN)] + paddw xmm2,[GOTOFF(ebx,PW_SEVEN)] + + paddw xmm1,xmm7 + paddw xmm5,xmm3 + psrlw xmm1,4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) + paddw xmm0,xmm7 + paddw xmm2,xmm3 + psrlw xmm0,4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) + psrlw xmm2,4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) + + psllw xmm0,BYTE_BIT + psllw xmm2,BYTE_BIT + por xmm1,xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) + por xmm5,xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 + + ; -- process the lower row + + movdqa xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD] + movdqa xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD] + + movdqa xmm7,xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) + movdqa xmm3,xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) + psrldq xmm7,2 ; xmm7=( 1 2 3 4 5 6 7 --) + pslldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) + movdqa xmm0,xmm6 + movdqa xmm2,xmm4 + psrldq xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) + pslldq xmm2,2 ; xmm2=(-- 8 9 10 11 12 13 14) + + por xmm7,xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) + por xmm0,xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm5,2 ; xmm5=( 9 10 11 12 13 14 15 --) + movdqa xmm3,xmm4 + psrldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(1)], xmm3 + + pmullw xmm6,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + paddw xmm1,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm0,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm7,[GOTOFF(ebx,PW_SEVEN)] + paddw xmm5,[GOTOFF(ebx,PW_SEVEN)] + + paddw xmm1,xmm6 + paddw xmm0,xmm4 + psrlw xmm1,4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) + psrlw xmm0,4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) + paddw xmm7,xmm6 + paddw xmm5,xmm4 + psrlw xmm7,4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) + psrlw xmm5,4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) + + psllw xmm7,BYTE_BIT + psllw xmm5,BYTE_BIT + por xmm1,xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) + por xmm0,xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0 + + poppic ebx + + sub eax, byte SIZEOF_XMMWORD + add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_XMMWORD ; inptr0 + add esi, byte 1*SIZEOF_XMMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_XMMWORD ; outptr0 + add edi, byte 2*SIZEOF_XMMWORD ; outptr1 + cmp eax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define output_width(b) (b)+12 ; JDIMENSION output_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_upsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_XMMWORD)-1 + and edx, byte -(2*SIZEOF_XMMWORD) + jz short .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add edi, byte 4*SIZEOF_XMMWORD ; outptr + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_sse2 (nt max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define output_width(b) (b)+12 ; JDIMENSION output_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_upsample_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_XMMWORD)-1 + and edx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3 + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add ebx, byte 4*SIZEOF_XMMWORD ; outptr0 + add edi, byte 4*SIZEOF_XMMWORD ; outptr1 + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg short .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctflt-3dn.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctflt-3dn.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctflt-3dn.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctflt-3dn.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,320 @@ +; +; jfdctflt.asm - floating-point FDCT (3DNow!) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_float_3dnow) PRIVATE + +EXTN(jconst_fdct_float_3dnow): + +PD_0_382 times 2 dd 0.382683432365089771728460 +PD_0_707 times 2 dd 0.707106781186547524400844 +PD_0_541 times 2 dd 0.541196100146196984399723 +PD_1_306 times 2 dd 1.306562964876376527856643 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_3dnow (FAST_FLOAT *data) +; + +%define data(b) (b)+8 ; FAST_FLOAT *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_float_3dnow) PRIVATE + +EXTN(jsimd_fdct_float_3dnow): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17) + + movq mm4,mm0 ; transpose coefficients + punpckldq mm0,mm1 ; mm0=(00 10)=data0 + punpckhdq mm4,mm1 ; mm4=(01 11)=data1 + movq mm5,mm2 ; transpose coefficients + punpckldq mm2,mm3 ; mm2=(06 16)=data6 + punpckhdq mm5,mm3 ; mm5=(07 17)=data7 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm2 ; mm4=data1-data6=tmp6 + pfsub mm0,mm5 ; mm0=data0-data7=tmp7 + pfadd mm6,mm2 ; mm6=data1+data6=tmp1 + pfadd mm7,mm5 ; mm7=data0+data7=tmp0 + + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] + + ; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15) + + movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 + + movq mm4,mm1 ; transpose coefficients + punpckldq mm1,mm3 ; mm1=(02 12)=data2 + punpckhdq mm4,mm3 ; mm4=(03 13)=data3 + movq mm0,mm2 ; transpose coefficients + punpckldq mm2,mm5 ; mm2=(04 14)=data4 + punpckhdq mm0,mm5 ; mm0=(05 15)=data5 + + movq mm3,mm4 + movq mm5,mm1 + pfadd mm4,mm2 ; mm4=data3+data4=tmp3 + pfadd mm1,mm0 ; mm1=data2+data5=tmp2 + pfsub mm3,mm2 ; mm3=data3-data4=tmp4 + pfsub mm5,mm0 ; mm5=data2-data5=tmp5 + + ; -- Even part + + movq mm2,mm7 + movq mm0,mm6 + pfsub mm7,mm4 ; mm7=tmp13 + pfsub mm6,mm1 ; mm6=tmp12 + pfadd mm2,mm4 ; mm2=tmp10 + pfadd mm0,mm1 ; mm0=tmp11 + + pfadd mm6,mm7 + pfmul mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1 + + movq mm4,mm2 + movq mm1,mm7 + pfsub mm2,mm0 ; mm2=data4 + pfsub mm7,mm6 ; mm7=data6 + pfadd mm4,mm0 ; mm4=data0 + pfadd mm1,mm6 ; mm1=data2 + + movq MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1 + + ; -- Odd part + + movq mm0, MMWORD [wk(0)] ; mm0=tmp6 + movq mm6, MMWORD [wk(1)] ; mm6=tmp7 + + pfadd mm3,mm5 ; mm3=tmp10 + pfadd mm5,mm0 ; mm5=tmp11 + pfadd mm0,mm6 ; mm0=tmp12, mm6=tmp7 + + pfmul mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3 + + movq mm2,mm3 ; mm2=tmp10 + pfsub mm3,mm0 + pfmul mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5 + pfmul mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) + pfmul mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) + pfadd mm2,mm3 ; mm2=z2 + pfadd mm0,mm3 ; mm0=z4 + + movq mm7,mm6 + pfsub mm6,mm5 ; mm6=z13 + pfadd mm7,mm5 ; mm7=z11 + + movq mm4,mm6 + movq mm1,mm7 + pfsub mm6,mm2 ; mm6=data3 + pfsub mm7,mm0 ; mm7=data7 + pfadd mm4,mm2 ; mm4=data5 + pfadd mm1,mm0 ; mm1=data1 + + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + add edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] + + ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71) + + movq mm4,mm0 ; transpose coefficients + punpckldq mm0,mm1 ; mm0=(00 01)=data0 + punpckhdq mm4,mm1 ; mm4=(10 11)=data1 + movq mm5,mm2 ; transpose coefficients + punpckldq mm2,mm3 ; mm2=(60 61)=data6 + punpckhdq mm5,mm3 ; mm5=(70 71)=data7 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm2 ; mm4=data1-data6=tmp6 + pfsub mm0,mm5 ; mm0=data0-data7=tmp7 + pfadd mm6,mm2 ; mm6=data1+data6=tmp1 + pfadd mm7,mm5 ; mm7=data0+data7=tmp0 + + movq mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] + + ; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51) + + movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 + + movq mm4,mm1 ; transpose coefficients + punpckldq mm1,mm3 ; mm1=(20 21)=data2 + punpckhdq mm4,mm3 ; mm4=(30 31)=data3 + movq mm0,mm2 ; transpose coefficients + punpckldq mm2,mm5 ; mm2=(40 41)=data4 + punpckhdq mm0,mm5 ; mm0=(50 51)=data5 + + movq mm3,mm4 + movq mm5,mm1 + pfadd mm4,mm2 ; mm4=data3+data4=tmp3 + pfadd mm1,mm0 ; mm1=data2+data5=tmp2 + pfsub mm3,mm2 ; mm3=data3-data4=tmp4 + pfsub mm5,mm0 ; mm5=data2-data5=tmp5 + + ; -- Even part + + movq mm2,mm7 + movq mm0,mm6 + pfsub mm7,mm4 ; mm7=tmp13 + pfsub mm6,mm1 ; mm6=tmp12 + pfadd mm2,mm4 ; mm2=tmp10 + pfadd mm0,mm1 ; mm0=tmp11 + + pfadd mm6,mm7 + pfmul mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1 + + movq mm4,mm2 + movq mm1,mm7 + pfsub mm2,mm0 ; mm2=data4 + pfsub mm7,mm6 ; mm7=data6 + pfadd mm4,mm0 ; mm4=data0 + pfadd mm1,mm6 ; mm1=data2 + + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + ; -- Odd part + + movq mm0, MMWORD [wk(0)] ; mm0=tmp6 + movq mm6, MMWORD [wk(1)] ; mm6=tmp7 + + pfadd mm3,mm5 ; mm3=tmp10 + pfadd mm5,mm0 ; mm5=tmp11 + pfadd mm0,mm6 ; mm0=tmp12, mm6=tmp7 + + pfmul mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3 + + movq mm2,mm3 ; mm2=tmp10 + pfsub mm3,mm0 + pfmul mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5 + pfmul mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) + pfmul mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) + pfadd mm2,mm3 ; mm2=z2 + pfadd mm0,mm3 ; mm0=z4 + + movq mm7,mm6 + pfsub mm6,mm5 ; mm6=z13 + pfadd mm7,mm5 ; mm7=z11 + + movq mm4,mm6 + movq mm1,mm7 + pfsub mm6,mm2 ; mm6=data3 + pfsub mm7,mm0 ; mm7=data7 + pfadd mm4,mm2 ; mm4=data5 + pfadd mm1,mm0 ; mm1=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + add edx, byte 2*SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + + femms ; empty MMX/3DNow! state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctflt-sse-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctflt-sse-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctflt-sse-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctflt-sse-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,358 @@ +; +; jfdctflt.asm - floating-point FDCT (64-bit SSE) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_float_sse) PRIVATE + +EXTN(jconst_fdct_float_sse): + +PD_0_382 times 4 dd 0.382683432365089771728460 +PD_0_707 times 4 dd 0.707106781186547524400844 +PD_0_541 times 4 dd 0.541196100146196984399723 +PD_1_306 times 4 dd 1.306562964876376527856643 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_sse (FAST_FLOAT *data) +; + +; r10 = FAST_FLOAT *data + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_float_sse) PRIVATE + +EXTN(jsimd_fdct_float_sse): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (FAST_FLOAT *) + mov rcx, DCTSIZE/4 +.rowloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) + ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(20 30 21 31) + unpckhps xmm4,xmm1 ; xmm4=(22 32 23 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(24 34 25 35) + unpckhps xmm5,xmm3 ; xmm5=(26 36 27 37) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) + ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm4,xmm7 ; xmm4=(02 12 03 13) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(04 14 05 15) + unpckhps xmm2,xmm3 ; xmm2=(06 16 07 17) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 10 20 30)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(01 11 21 31)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(06 16 26 36)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(07 17 27 37)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(02 12 22 32)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(03 13 23 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(04 14 24 34)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(05 15 25 35)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[rel PD_0_707] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[rel PD_0_707] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[rel PD_0_382] ; xmm2=z5 + mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + add rdx, 4*DCTSIZE*SIZEOF_FAST_FLOAT + dec rcx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov rdx, r10 ; (FAST_FLOAT *) + mov rcx, DCTSIZE/4 +.columnloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) + ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(02 03 12 13) + unpckhps xmm4,xmm1 ; xmm4=(22 23 32 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(42 43 52 53) + unpckhps xmm5,xmm3 ; xmm5=(62 63 72 73) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) + ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 01 10 11) + unpckhps xmm4,xmm7 ; xmm4=(20 21 30 31) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(40 41 50 51) + unpckhps xmm2,xmm3 ; xmm2=(60 61 70 71) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 01 02 03)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(10 11 12 13)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(60 61 62 63)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(70 71 72 73)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(20 21 22 23)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(30 31 32 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(40 41 42 43)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(50 51 52 53)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[rel PD_0_707] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[rel PD_0_707] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[rel PD_0_382] ; xmm2=z5 + mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + add rdx, byte 4*SIZEOF_FAST_FLOAT + dec rcx + jnz near .columnloop + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctflt-sse.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctflt-sse.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctflt-sse.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctflt-sse.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,370 @@ +; +; jfdctflt.asm - floating-point FDCT (SSE) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_float_sse) PRIVATE + +EXTN(jconst_fdct_float_sse): + +PD_0_382 times 4 dd 0.382683432365089771728460 +PD_0_707 times 4 dd 0.707106781186547524400844 +PD_0_541 times 4 dd 0.541196100146196984399723 +PD_1_306 times 4 dd 1.306562964876376527856643 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_sse (FAST_FLOAT *data) +; + +%define data(b) (b)+8 ; FAST_FLOAT *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_float_sse) PRIVATE + +EXTN(jsimd_fdct_float_sse): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) + ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(20 30 21 31) + unpckhps xmm4,xmm1 ; xmm4=(22 32 23 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(24 34 25 35) + unpckhps xmm5,xmm3 ; xmm5=(26 36 27 37) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) + ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm4,xmm7 ; xmm4=(02 12 03 13) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(04 14 05 15) + unpckhps xmm2,xmm3 ; xmm2=(06 16 07 17) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 10 20 30)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(01 11 21 31)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(06 16 26 36)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(07 17 27 37)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(02 12 22 32)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(03 13 23 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(04 14 24 34)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(05 15 25 35)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5 + mulps xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + add edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) + ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(02 03 12 13) + unpckhps xmm4,xmm1 ; xmm4=(22 23 32 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(42 43 52 53) + unpckhps xmm5,xmm3 ; xmm5=(62 63 72 73) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) + ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 01 10 11) + unpckhps xmm4,xmm7 ; xmm4=(20 21 30 31) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(40 41 50 51) + unpckhps xmm2,xmm3 ; xmm2=(60 61 70 71) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 01 02 03)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(10 11 12 13)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(60 61 62 63)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(70 71 72 73)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(20 21 22 23)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(30 31 32 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(40 41 42 43)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(50 51 52 53)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5 + mulps xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + add edx, byte 4*SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctfst-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctfst-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctfst-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctfst-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,156 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* FAST INTEGER FORWARD DCT + * + * This is similar to the SSE2 implementation, except that we left-shift the + * constants by 1 less bit (the -1 in CONST_SHIFT.) This is because + * vec_madds(arg1, arg2, arg3) generates the 16-bit saturated sum of: + * the elements in arg3 + the most significant 17 bits of + * (the elements in arg1 * the elements in arg2). + */ + +#include "jsimd_altivec.h" + + +#define F_0_382 98 /* FIX(0.382683433) */ +#define F_0_541 139 /* FIX(0.541196100) */ +#define F_0_707 181 /* FIX(0.707106781) */ +#define F_1_306 334 /* FIX(1.306562965) */ + +#define CONST_BITS 8 +#define PRE_MULTIPLY_SCALE_BITS 2 +#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1) + + +#define DO_FDCT() \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out4 = vec_sub(tmp10, tmp11); \ + \ + z1 = vec_add(tmp12, tmp13); \ + z1 = vec_sl(z1, pre_multiply_scale_bits); \ + z1 = vec_madds(z1, pw_0707, pw_zero); \ + \ + out2 = vec_add(tmp13, z1); \ + out6 = vec_sub(tmp13, z1); \ + \ + /* Odd part */ \ + \ + tmp10 = vec_add(tmp4, tmp5); \ + tmp11 = vec_add(tmp5, tmp6); \ + tmp12 = vec_add(tmp6, tmp7); \ + \ + tmp10 = vec_sl(tmp10, pre_multiply_scale_bits); \ + tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ + z5 = vec_sub(tmp10, tmp12); \ + z5 = vec_madds(z5, pw_0382, pw_zero); \ + \ + z2 = vec_madds(tmp10, pw_0541, z5); \ + z4 = vec_madds(tmp12, pw_1306, z5); \ + \ + tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ + z3 = vec_madds(tmp11, pw_0707, pw_zero); \ + \ + z11 = vec_add(tmp7, z3); \ + z13 = vec_sub(tmp7, z3); \ + \ + out5 = vec_add(z13, z2); \ + out3 = vec_sub(z13, z2); \ + out1 = vec_add(z11, z4); \ + out7 = vec_sub(z11, z4); \ +} + + +void +jsimd_fdct_ifast_altivec (DCTELEM *data) +{ + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, + z1, z2, z3, z4, z5, z11, z13, + out0, out1, out2, out3, out4, out5, out6, out7; + + /* Constants */ + __vector short pw_zero = { __8X(0) }, + pw_0382 = { __8X(F_0_382 << CONST_SHIFT) }, + pw_0541 = { __8X(F_0_541 << CONST_SHIFT) }, + pw_0707 = { __8X(F_0_707 << CONST_SHIFT) }, + pw_1306 = { __8X(F_1_306 << CONST_SHIFT) }; + __vector unsigned short + pre_multiply_scale_bits = { __8X(PRE_MULTIPLY_SCALE_BITS) }; + + /* Pass 1: process rows */ + + row0 = vec_ld(0, data); + row1 = vec_ld(16, data); + row2 = vec_ld(32, data); + row3 = vec_ld(48, data); + row4 = vec_ld(64, data); + row5 = vec_ld(80, data); + row6 = vec_ld(96, data); + row7 = vec_ld(112, data); + + TRANSPOSE(row, col); + + tmp0 = vec_add(col0, col7); + tmp7 = vec_sub(col0, col7); + tmp1 = vec_add(col1, col6); + tmp6 = vec_sub(col1, col6); + tmp2 = vec_add(col2, col5); + tmp5 = vec_sub(col2, col5); + tmp3 = vec_add(col3, col4); + tmp4 = vec_sub(col3, col4); + + DO_FDCT(); + + /* Pass 2: process columns */ + + TRANSPOSE(out, row); + + tmp0 = vec_add(row0, row7); + tmp7 = vec_sub(row0, row7); + tmp1 = vec_add(row1, row6); + tmp6 = vec_sub(row1, row6); + tmp2 = vec_add(row2, row5); + tmp5 = vec_sub(row2, row5); + tmp3 = vec_add(row3, row4); + tmp4 = vec_sub(row3, row4); + + DO_FDCT(); + + vec_st(out0, 0, data); + vec_st(out1, 16, data); + vec_st(out2, 32, data); + vec_st(out3, 48, data); + vec_st(out4, 64, data); + vec_st(out5, 80, data); + vec_st(out6, 96, data); + vec_st(out7, 112, data); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctfst-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctfst-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctfst-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctfst-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,397 @@ +; +; jfdctfst.asm - fast integer FDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_fdct_ifast_mmx) PRIVATE + +EXTN(jconst_fdct_ifast_mmx): + +PW_F0707 times 4 dw F_0_707 << CONST_SHIFT +PW_F0382 times 4 dw F_0_382 << CONST_SHIFT +PW_F0541 times 4 dw F_0_541 << CONST_SHIFT +PW_F1306 times 4 dw F_1_306 << CONST_SHIFT + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_mmx (DCTELEM *data) +; + +%define data(b) (b)+8 ; DCTELEM *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_ifast_mmx) PRIVATE + +EXTN(jsimd_fdct_ifast_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] + + ; mm0=(20 21 22 23), mm2=(24 25 26 27) + ; mm1=(30 31 32 33), mm3=(34 35 36 37) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(24 34 25 35) + punpckhwd mm5,mm3 ; mm5=(26 36 27 37) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 01 02 03), mm1=(04 05 06 07) + ; mm7=(10 11 12 13), mm3=(14 15 16 17) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm4,mm7 ; mm4=(02 12 03 13) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(04 14 05 15) + punpckhwd mm2,mm3 ; mm2=(06 16 07 17) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 10 20 30)=data0 + punpckhdq mm7,mm0 ; mm7=(01 11 21 31)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(06 16 26 36)=data6 + punpckhdq mm3,mm5 ; mm3=(07 17 27 37)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) + movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(02 12 22 32)=data2 + punpckhdq mm7,mm2 ; mm7=(03 13 23 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(04 14 24 34)=data4 + punpckhdq mm6,mm3 ; mm6=(05 15 25 35)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + psubw mm5,mm7 ; mm5=tmp13 + psubw mm0,mm4 ; mm0=tmp12 + paddw mm1,mm7 ; mm1=tmp10 + paddw mm6,mm4 ; mm6=tmp11 + + paddw mm0,mm5 + psllw mm0,PRE_MULTIPLY_SCALE_BITS + pmulhw mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1 + + movq mm7,mm1 + movq mm4,mm5 + psubw mm1,mm6 ; mm1=data4 + psubw mm5,mm0 ; mm5=data6 + paddw mm7,mm6 ; mm7=data0 + paddw mm4,mm0 ; mm4=data2 + + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + + ; -- Odd part + + movq mm6, MMWORD [wk(0)] ; mm6=tmp6 + movq mm0, MMWORD [wk(1)] ; mm0=tmp7 + + paddw mm2,mm3 ; mm2=tmp10 + paddw mm3,mm6 ; mm3=tmp11 + paddw mm6,mm0 ; mm6=tmp12, mm0=tmp7 + + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm6,PRE_MULTIPLY_SCALE_BITS + + psllw mm3,PRE_MULTIPLY_SCALE_BITS + pmulhw mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3 + + movq mm1,mm2 ; mm1=tmp10 + psubw mm2,mm6 + pmulhw mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5 + pmulhw mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) + pmulhw mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) + paddw mm1,mm2 ; mm1=z2 + paddw mm6,mm2 ; mm6=z4 + + movq mm5,mm0 + psubw mm0,mm3 ; mm0=z13 + paddw mm5,mm3 ; mm5=z11 + + movq mm7,mm0 + movq mm4,mm5 + psubw mm0,mm1 ; mm0=data3 + psubw mm5,mm6 ; mm5=data7 + paddw mm7,mm1 ; mm7=data5 + paddw mm4,mm6 ; mm4=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 + + add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; mm0=(02 12 22 32), mm2=(42 52 62 72) + ; mm1=(03 13 23 33), mm3=(43 53 63 73) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(02 03 12 13) + punpckhwd mm4,mm1 ; mm4=(22 23 32 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(42 43 52 53) + punpckhwd mm5,mm3 ; mm5=(62 63 72 73) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 10 20 30), mm1=(40 50 60 70) + ; mm7=(01 11 21 31), mm3=(41 51 61 71) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 01 10 11) + punpckhwd mm4,mm7 ; mm4=(20 21 30 31) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(40 41 50 51) + punpckhwd mm2,mm3 ; mm2=(60 61 70 71) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 01 02 03)=data0 + punpckhdq mm7,mm0 ; mm7=(10 11 12 13)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(60 61 62 63)=data6 + punpckhdq mm3,mm5 ; mm3=(70 71 72 73)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) + movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(20 21 22 23)=data2 + punpckhdq mm7,mm2 ; mm7=(30 31 32 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(40 41 42 43)=data4 + punpckhdq mm6,mm3 ; mm6=(50 51 52 53)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + psubw mm5,mm7 ; mm5=tmp13 + psubw mm0,mm4 ; mm0=tmp12 + paddw mm1,mm7 ; mm1=tmp10 + paddw mm6,mm4 ; mm6=tmp11 + + paddw mm0,mm5 + psllw mm0,PRE_MULTIPLY_SCALE_BITS + pmulhw mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1 + + movq mm7,mm1 + movq mm4,mm5 + psubw mm1,mm6 ; mm1=data4 + psubw mm5,mm0 ; mm5=data6 + paddw mm7,mm6 ; mm7=data0 + paddw mm4,mm0 ; mm4=data2 + + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + + ; -- Odd part + + movq mm6, MMWORD [wk(0)] ; mm6=tmp6 + movq mm0, MMWORD [wk(1)] ; mm0=tmp7 + + paddw mm2,mm3 ; mm2=tmp10 + paddw mm3,mm6 ; mm3=tmp11 + paddw mm6,mm0 ; mm6=tmp12, mm0=tmp7 + + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm6,PRE_MULTIPLY_SCALE_BITS + + psllw mm3,PRE_MULTIPLY_SCALE_BITS + pmulhw mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3 + + movq mm1,mm2 ; mm1=tmp10 + psubw mm2,mm6 + pmulhw mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5 + pmulhw mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) + pmulhw mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) + paddw mm1,mm2 ; mm1=z2 + paddw mm6,mm2 ; mm6=z4 + + movq mm5,mm0 + psubw mm0,mm3 ; mm0=z13 + paddw mm5,mm3 ; mm5=z11 + + movq mm7,mm0 + movq mm4,mm5 + psubw mm0,mm1 ; mm0=data3 + psubw mm5,mm6 ; mm5=data7 + paddw mm7,mm1 ; mm7=data5 + paddw mm4,mm6 ; mm4=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 + + add edx, byte 4*SIZEOF_DCTELEM + dec ecx + jnz near .columnloop + + emms ; empty MMX state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctfst-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctfst-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctfst-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctfst-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,392 @@ +; +; jfdctfst.asm - fast integer FDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_fdct_ifast_sse2) PRIVATE + +EXTN(jconst_fdct_ifast_sse2): + +PW_F0707 times 8 dw F_0_707 << CONST_SHIFT +PW_F0382 times 8 dw F_0_382 << CONST_SHIFT +PW_F0541 times 8 dw F_0_541 << CONST_SHIFT +PW_F1306 times 8 dw F_1_306 << CONST_SHIFT + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_sse2 (DCTELEM *data) +; + +; r10 = DCTELEM *data + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_ifast_sse2) PRIVATE + +EXTN(jsimd_fdct_ifast_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + psubw xmm3,xmm1 ; xmm3=tmp13 + psubw xmm6,xmm7 ; xmm6=tmp12 + paddw xmm4,xmm1 ; xmm4=tmp10 + paddw xmm0,xmm7 ; xmm0=tmp11 + + paddw xmm6,xmm3 + psllw xmm6,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm6,[rel PW_F0707] ; xmm6=z1 + + movdqa xmm1,xmm4 + movdqa xmm7,xmm3 + psubw xmm4,xmm0 ; xmm4=data4 + psubw xmm3,xmm6 ; xmm3=data6 + paddw xmm1,xmm0 ; xmm1=data0 + paddw xmm7,xmm6 ; xmm7=data2 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 + + ; -- Odd part + + paddw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm5,xmm0 ; xmm5=tmp11 + paddw xmm0,xmm6 ; xmm0=tmp12, xmm6=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[rel PW_F0707] ; xmm5=z3 + + movdqa xmm4,xmm2 ; xmm4=tmp10 + psubw xmm2,xmm0 + pmulhw xmm2,[rel PW_F0382] ; xmm2=z5 + pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm0,[rel PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm2 ; xmm4=z2 + paddw xmm0,xmm2 ; xmm0=z4 + + movdqa xmm3,xmm6 + psubw xmm6,xmm5 ; xmm6=z13 + paddw xmm3,xmm5 ; xmm3=z11 + + movdqa xmm2,xmm6 + movdqa xmm5,xmm3 + psubw xmm6,xmm4 ; xmm6=data3 + psubw xmm3,xmm0 ; xmm3=data7 + paddw xmm2,xmm4 ; xmm2=data5 + paddw xmm5,xmm0 ; xmm5=data1 + + ; ---- Pass 2: process columns. + + ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) + ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 10 11 20 21 30 31) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 50 51 60 61 70 71) + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm6 ; xmm7=(02 03 12 13 22 23 32 33) + punpckhwd xmm0,xmm6 ; xmm0=(42 43 52 53 62 63 72 73) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 + + ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) + ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm7,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm2 ; xmm5=(04 05 14 15 24 25 34 35) + punpckhwd xmm7,xmm2 ; xmm7=(44 45 54 55 64 65 74 75) + movdqa xmm0,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm3 ; xmm6=(06 07 16 17 26 27 36 37) + punpckhwd xmm0,xmm3 ; xmm0=(46 47 56 57 66 67 76 77) + + movdqa xmm2,xmm5 ; transpose coefficients(phase 2) + punpckldq xmm5,xmm6 ; xmm5=(04 05 06 07 14 15 16 17) + punpckhdq xmm2,xmm6 ; xmm2=(24 25 26 27 34 35 36 37) + movdqa xmm3,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm0 ; xmm7=(44 45 46 47 54 55 56 57) + punpckhdq xmm3,xmm0 ; xmm3=(64 65 66 67 74 75 76 77) + + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) + + movdqa xmm2,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 10 11 12 13) + punpckhdq xmm2,xmm6 ; xmm2=(20 21 22 23 30 31 32 33) + movdqa xmm7,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(40 41 42 43 50 51 52 53) + punpckhdq xmm7,xmm0 ; xmm7=(60 61 62 63 70 71 72 73) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm6,xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm0,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm0,xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm5,xmm6 + movdqa xmm3,xmm1 + psubw xmm6,xmm7 ; xmm6=data1-data6=tmp6 + psubw xmm1,xmm0 ; xmm1=data0-data7=tmp7 + paddw xmm5,xmm7 ; xmm5=data1+data6=tmp1 + paddw xmm3,xmm0 ; xmm3=data0+data7=tmp0 + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 + + movdqa xmm6,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm6,xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm1,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm1,xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm7,xmm6 + movdqa xmm0,xmm2 + paddw xmm6,xmm4 ; xmm6=data3+data4=tmp3 + paddw xmm2,xmm1 ; xmm2=data2+data5=tmp2 + psubw xmm7,xmm4 ; xmm7=data3-data4=tmp4 + psubw xmm0,xmm1 ; xmm0=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm1,xmm5 + psubw xmm3,xmm6 ; xmm3=tmp13 + psubw xmm5,xmm2 ; xmm5=tmp12 + paddw xmm4,xmm6 ; xmm4=tmp10 + paddw xmm1,xmm2 ; xmm1=tmp11 + + paddw xmm5,xmm3 + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[rel PW_F0707] ; xmm5=z1 + + movdqa xmm6,xmm4 + movdqa xmm2,xmm3 + psubw xmm4,xmm1 ; xmm4=data4 + psubw xmm3,xmm5 ; xmm3=data6 + paddw xmm6,xmm1 ; xmm6=data0 + paddw xmm2,xmm5 ; xmm2=data2 + + movdqa XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm2 + + ; -- Odd part + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + paddw xmm7,xmm0 ; xmm7=tmp10 + paddw xmm0,xmm1 ; xmm0=tmp11 + paddw xmm1,xmm5 ; xmm1=tmp12, xmm5=tmp7 + + psllw xmm7,PRE_MULTIPLY_SCALE_BITS + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm0,[rel PW_F0707] ; xmm0=z3 + + movdqa xmm4,xmm7 ; xmm4=tmp10 + psubw xmm7,xmm1 + pmulhw xmm7,[rel PW_F0382] ; xmm7=z5 + pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm1,[rel PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm7 ; xmm4=z2 + paddw xmm1,xmm7 ; xmm1=z4 + + movdqa xmm3,xmm5 + psubw xmm5,xmm0 ; xmm5=z13 + paddw xmm3,xmm0 ; xmm3=z11 + + movdqa xmm6,xmm5 + movdqa xmm2,xmm3 + psubw xmm5,xmm4 ; xmm5=data3 + psubw xmm3,xmm1 ; xmm3=data7 + paddw xmm6,xmm4 ; xmm6=data5 + paddw xmm2,xmm1 ; xmm2=data1 + + movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm5 + movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctfst-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctfst-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctfst-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctfst-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,404 @@ +; +; jfdctfst.asm - fast integer FDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_fdct_ifast_sse2) PRIVATE + +EXTN(jconst_fdct_ifast_sse2): + +PW_F0707 times 8 dw F_0_707 << CONST_SHIFT +PW_F0382 times 8 dw F_0_382 << CONST_SHIFT +PW_F0541 times 8 dw F_0_541 << CONST_SHIFT +PW_F1306 times 8 dw F_1_306 << CONST_SHIFT + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_sse2 (DCTELEM *data) +; + +%define data(b) (b)+8 ; DCTELEM *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_ifast_sse2) PRIVATE + +EXTN(jsimd_fdct_ifast_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + psubw xmm3,xmm1 ; xmm3=tmp13 + psubw xmm6,xmm7 ; xmm6=tmp12 + paddw xmm4,xmm1 ; xmm4=tmp10 + paddw xmm0,xmm7 ; xmm0=tmp11 + + paddw xmm6,xmm3 + psllw xmm6,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm6,[GOTOFF(ebx,PW_F0707)] ; xmm6=z1 + + movdqa xmm1,xmm4 + movdqa xmm7,xmm3 + psubw xmm4,xmm0 ; xmm4=data4 + psubw xmm3,xmm6 ; xmm3=data6 + paddw xmm1,xmm0 ; xmm1=data0 + paddw xmm7,xmm6 ; xmm7=data2 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 + + ; -- Odd part + + paddw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm5,xmm0 ; xmm5=tmp11 + paddw xmm0,xmm6 ; xmm0=tmp12, xmm6=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z3 + + movdqa xmm4,xmm2 ; xmm4=tmp10 + psubw xmm2,xmm0 + pmulhw xmm2,[GOTOFF(ebx,PW_F0382)] ; xmm2=z5 + pmulhw xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm0,[GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm2 ; xmm4=z2 + paddw xmm0,xmm2 ; xmm0=z4 + + movdqa xmm3,xmm6 + psubw xmm6,xmm5 ; xmm6=z13 + paddw xmm3,xmm5 ; xmm3=z11 + + movdqa xmm2,xmm6 + movdqa xmm5,xmm3 + psubw xmm6,xmm4 ; xmm6=data3 + psubw xmm3,xmm0 ; xmm3=data7 + paddw xmm2,xmm4 ; xmm2=data5 + paddw xmm5,xmm0 ; xmm5=data1 + + ; ---- Pass 2: process columns. + +; mov edx, POINTER [data(eax)] ; (DCTELEM *) + + ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) + ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 10 11 20 21 30 31) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 50 51 60 61 70 71) + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm6 ; xmm7=(02 03 12 13 22 23 32 33) + punpckhwd xmm0,xmm6 ; xmm0=(42 43 52 53 62 63 72 73) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 + + ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) + ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm7,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm2 ; xmm5=(04 05 14 15 24 25 34 35) + punpckhwd xmm7,xmm2 ; xmm7=(44 45 54 55 64 65 74 75) + movdqa xmm0,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm3 ; xmm6=(06 07 16 17 26 27 36 37) + punpckhwd xmm0,xmm3 ; xmm0=(46 47 56 57 66 67 76 77) + + movdqa xmm2,xmm5 ; transpose coefficients(phase 2) + punpckldq xmm5,xmm6 ; xmm5=(04 05 06 07 14 15 16 17) + punpckhdq xmm2,xmm6 ; xmm2=(24 25 26 27 34 35 36 37) + movdqa xmm3,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm0 ; xmm7=(44 45 46 47 54 55 56 57) + punpckhdq xmm3,xmm0 ; xmm3=(64 65 66 67 74 75 76 77) + + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) + + movdqa xmm2,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 10 11 12 13) + punpckhdq xmm2,xmm6 ; xmm2=(20 21 22 23 30 31 32 33) + movdqa xmm7,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(40 41 42 43 50 51 52 53) + punpckhdq xmm7,xmm0 ; xmm7=(60 61 62 63 70 71 72 73) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm6,xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm0,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm0,xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm5,xmm6 + movdqa xmm3,xmm1 + psubw xmm6,xmm7 ; xmm6=data1-data6=tmp6 + psubw xmm1,xmm0 ; xmm1=data0-data7=tmp7 + paddw xmm5,xmm7 ; xmm5=data1+data6=tmp1 + paddw xmm3,xmm0 ; xmm3=data0+data7=tmp0 + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 + + movdqa xmm6,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm6,xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm1,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm1,xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm7,xmm6 + movdqa xmm0,xmm2 + paddw xmm6,xmm4 ; xmm6=data3+data4=tmp3 + paddw xmm2,xmm1 ; xmm2=data2+data5=tmp2 + psubw xmm7,xmm4 ; xmm7=data3-data4=tmp4 + psubw xmm0,xmm1 ; xmm0=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm1,xmm5 + psubw xmm3,xmm6 ; xmm3=tmp13 + psubw xmm5,xmm2 ; xmm5=tmp12 + paddw xmm4,xmm6 ; xmm4=tmp10 + paddw xmm1,xmm2 ; xmm1=tmp11 + + paddw xmm5,xmm3 + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z1 + + movdqa xmm6,xmm4 + movdqa xmm2,xmm3 + psubw xmm4,xmm1 ; xmm4=data4 + psubw xmm3,xmm5 ; xmm3=data6 + paddw xmm6,xmm1 ; xmm6=data0 + paddw xmm2,xmm5 ; xmm2=data2 + + movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2 + + ; -- Odd part + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + paddw xmm7,xmm0 ; xmm7=tmp10 + paddw xmm0,xmm1 ; xmm0=tmp11 + paddw xmm1,xmm5 ; xmm1=tmp12, xmm5=tmp7 + + psllw xmm7,PRE_MULTIPLY_SCALE_BITS + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm0,[GOTOFF(ebx,PW_F0707)] ; xmm0=z3 + + movdqa xmm4,xmm7 ; xmm4=tmp10 + psubw xmm7,xmm1 + pmulhw xmm7,[GOTOFF(ebx,PW_F0382)] ; xmm7=z5 + pmulhw xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm1,[GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm7 ; xmm4=z2 + paddw xmm1,xmm7 ; xmm1=z4 + + movdqa xmm3,xmm5 + psubw xmm5,xmm0 ; xmm5=z13 + paddw xmm3,xmm0 ; xmm3=z11 + + movdqa xmm6,xmm5 + movdqa xmm2,xmm3 + psubw xmm5,xmm4 ; xmm5=data3 + psubw xmm3,xmm1 ; xmm3=data7 + paddw xmm6,xmm4 ; xmm6=data5 + paddw xmm2,xmm1 ; xmm2=data1 + + movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5 + movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2 + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctint-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctint-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctint-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctint-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,262 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* SLOW INTEGER FORWARD DCT */ + +#include "jsimd_altivec.h" + + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS) + + +#define DO_FDCT_COMMON(PASS) \ +{ \ + /* (Original) \ + * z1 = (tmp12 + tmp13) * 0.541196100; \ + * data2 = z1 + tmp13 * 0.765366865; \ + * data6 = z1 + tmp12 * -1.847759065; \ + * \ + * (This implementation) \ + * data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \ + * data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \ + */ \ + \ + tmp1312l = vec_mergeh(tmp13, tmp12); \ + tmp1312h = vec_mergel(tmp13, tmp12); \ + \ + out2l = vec_msums(tmp1312l, pw_f130_f054, pd_descale_p##PASS); \ + out2h = vec_msums(tmp1312h, pw_f130_f054, pd_descale_p##PASS); \ + out6l = vec_msums(tmp1312l, pw_f054_mf130, pd_descale_p##PASS); \ + out6h = vec_msums(tmp1312h, pw_f054_mf130, pd_descale_p##PASS); \ + \ + out2l = vec_sra(out2l, descale_p##PASS); \ + out2h = vec_sra(out2h, descale_p##PASS); \ + out6l = vec_sra(out6l, descale_p##PASS); \ + out6h = vec_sra(out6h, descale_p##PASS); \ + \ + out2 = vec_pack(out2l, out2h); \ + out6 = vec_pack(out6l, out6h); \ + \ + /* Odd part */ \ + \ + z3 = vec_add(tmp4, tmp6); \ + z4 = vec_add(tmp5, tmp7); \ + \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ + z34l = vec_mergeh(z3, z4); \ + z34h = vec_mergel(z3, z4); \ + \ + z3l = vec_msums(z34l, pw_mf078_f117, pd_descale_p##PASS); \ + z3h = vec_msums(z34h, pw_mf078_f117, pd_descale_p##PASS); \ + z4l = vec_msums(z34l, pw_f117_f078, pd_descale_p##PASS); \ + z4h = vec_msums(z34h, pw_f117_f078, pd_descale_p##PASS); \ + \ + /* (Original) \ + * z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \ + * tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \ + * tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; \ + * data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; \ + * \ + * (This implementation) \ + * tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \ + * tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \ + * tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \ + * tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \ + * data7 = tmp4 + z3; data5 = tmp5 + z4; \ + * data3 = tmp6 + z3; data1 = tmp7 + z4; \ + */ \ + \ + tmp47l = vec_mergeh(tmp4, tmp7); \ + tmp47h = vec_mergel(tmp4, tmp7); \ + \ + out7l = vec_msums(tmp47l, pw_mf060_mf089, z3l); \ + out7h = vec_msums(tmp47h, pw_mf060_mf089, z3h); \ + out1l = vec_msums(tmp47l, pw_mf089_f060, z4l); \ + out1h = vec_msums(tmp47h, pw_mf089_f060, z4h); \ + \ + out7l = vec_sra(out7l, descale_p##PASS); \ + out7h = vec_sra(out7h, descale_p##PASS); \ + out1l = vec_sra(out1l, descale_p##PASS); \ + out1h = vec_sra(out1h, descale_p##PASS); \ + \ + out7 = vec_pack(out7l, out7h); \ + out1 = vec_pack(out1l, out1h); \ + \ + tmp56l = vec_mergeh(tmp5, tmp6); \ + tmp56h = vec_mergel(tmp5, tmp6); \ + \ + out5l = vec_msums(tmp56l, pw_mf050_mf256, z4l); \ + out5h = vec_msums(tmp56h, pw_mf050_mf256, z4h); \ + out3l = vec_msums(tmp56l, pw_mf256_f050, z3l); \ + out3h = vec_msums(tmp56h, pw_mf256_f050, z3h); \ + \ + out5l = vec_sra(out5l, descale_p##PASS); \ + out5h = vec_sra(out5h, descale_p##PASS); \ + out3l = vec_sra(out3l, descale_p##PASS); \ + out3h = vec_sra(out3h, descale_p##PASS); \ + \ + out5 = vec_pack(out5l, out5h); \ + out3 = vec_pack(out3l, out3h); \ +} + +#define DO_FDCT_PASS1() \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out0 = vec_sl(out0, pass1_bits); \ + out4 = vec_sub(tmp10, tmp11); \ + out4 = vec_sl(out4, pass1_bits); \ + \ + DO_FDCT_COMMON(1); \ +} + +#define DO_FDCT_PASS2() \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out0 = vec_add(out0, pw_descale_p2x); \ + out0 = vec_sra(out0, pass1_bits); \ + out4 = vec_sub(tmp10, tmp11); \ + out4 = vec_add(out4, pw_descale_p2x); \ + out4 = vec_sra(out4, pass1_bits); \ + \ + DO_FDCT_COMMON(2); \ +} + + +void +jsimd_fdct_islow_altivec (DCTELEM *data) +{ + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, + tmp47l, tmp47h, tmp56l, tmp56h, tmp1312l, tmp1312h, + z3, z4, z34l, z34h, + out0, out1, out2, out3, out4, out5, out6, out7; + __vector int z3l, z3h, z4l, z4h, + out1l, out1h, out2l, out2h, out3l, out3h, out5l, out5h, out6l, out6h, + out7l, out7h; + + /* Constants */ + __vector short + pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) }, + pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) }, + pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) }, + pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) }, + pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) }, + pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) }, + pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) }, + pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) }, + pw_descale_p2x = { __8X(1 << (PASS1_BITS - 1)) }; + __vector unsigned short pass1_bits = { __8X(PASS1_BITS) }; + __vector int pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) }, + pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) }; + __vector unsigned int descale_p1 = { __4X(DESCALE_P1) }, + descale_p2 = { __4X(DESCALE_P2) }; + + /* Pass 1: process rows */ + + row0 = vec_ld(0, data); + row1 = vec_ld(16, data); + row2 = vec_ld(32, data); + row3 = vec_ld(48, data); + row4 = vec_ld(64, data); + row5 = vec_ld(80, data); + row6 = vec_ld(96, data); + row7 = vec_ld(112, data); + + TRANSPOSE(row, col); + + tmp0 = vec_add(col0, col7); + tmp7 = vec_sub(col0, col7); + tmp1 = vec_add(col1, col6); + tmp6 = vec_sub(col1, col6); + tmp2 = vec_add(col2, col5); + tmp5 = vec_sub(col2, col5); + tmp3 = vec_add(col3, col4); + tmp4 = vec_sub(col3, col4); + + DO_FDCT_PASS1(); + + /* Pass 2: process columns */ + + TRANSPOSE(out, row); + + tmp0 = vec_add(row0, row7); + tmp7 = vec_sub(row0, row7); + tmp1 = vec_add(row1, row6); + tmp6 = vec_sub(row1, row6); + tmp2 = vec_add(row2, row5); + tmp5 = vec_sub(row2, row5); + tmp3 = vec_add(row3, row4); + tmp4 = vec_sub(row3, row4); + + DO_FDCT_PASS2(); + + vec_st(out0, 0, data); + vec_st(out1, 16, data); + vec_st(out2, 32, data); + vec_st(out3, 48, data); + vec_st(out4, 64, data); + vec_st(out5, 80, data); + vec_st(out6, 96, data); + vec_st(out7, 112, data); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctint-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctint-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctint-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctint-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,622 @@ +; +; jfdctint.asm - accurate integer FDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_islow_mmx) PRIVATE + +EXTN(jconst_fdct_islow_mmx): + +PW_F130_F054 times 2 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 2 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 2 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 2 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 2 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 2 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 2 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 2 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2-1) +PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_mmx (DCTELEM *data) +; + +%define data(b) (b)+8 ; DCTELEM *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_islow_mmx) PRIVATE + +EXTN(jsimd_fdct_islow_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] + + ; mm0=(20 21 22 23), mm2=(24 25 26 27) + ; mm1=(30 31 32 33), mm3=(34 35 36 37) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(24 34 25 35) + punpckhwd mm5,mm3 ; mm5=(26 36 27 37) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 01 02 03), mm1=(04 05 06 07) + ; mm7=(10 11 12 13), mm3=(14 15 16 17) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm4,mm7 ; mm4=(02 12 03 13) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(04 14 05 15) + punpckhwd mm2,mm3 ; mm2=(06 16 07 17) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 10 20 30)=data0 + punpckhdq mm7,mm0 ; mm7=(01 11 21 31)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(06 16 26 36)=data6 + punpckhdq mm3,mm5 ; mm3=(07 17 27 37)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) + movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(02 12 22 32)=data2 + punpckhdq mm7,mm2 ; mm7=(03 13 23 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(04 14 24 34)=data4 + punpckhdq mm6,mm3 ; mm6=(05 15 25 35)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + paddw mm5,mm7 ; mm5=tmp10 + paddw mm0,mm4 ; mm0=tmp11 + psubw mm1,mm7 ; mm1=tmp13 + psubw mm6,mm4 ; mm6=tmp12 + + movq mm7,mm5 + paddw mm5,mm0 ; mm5=tmp10+tmp11 + psubw mm7,mm0 ; mm7=tmp10-tmp11 + + psllw mm5,PASS1_BITS ; mm5=data0 + psllw mm7,PASS1_BITS ; mm7=data4 + + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movq mm4,mm1 ; mm1=tmp13 + movq mm0,mm1 + punpcklwd mm4,mm6 ; mm6=tmp12 + punpckhwd mm0,mm6 + movq mm1,mm4 + movq mm6,mm0 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L + pmaddwd mm0,[GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L + pmaddwd mm6,[GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm0,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm4,DESCALE_P1 + psrad mm0,DESCALE_P1 + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm6,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm1,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm4,mm0 ; mm4=data2 + packssdw mm1,mm6 ; mm1=data6 + + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1 + + ; -- Odd part + + movq mm5, MMWORD [wk(0)] ; mm5=tmp6 + movq mm7, MMWORD [wk(1)] ; mm7=tmp7 + + movq mm0,mm2 ; mm2=tmp4 + movq mm6,mm3 ; mm3=tmp5 + paddw mm0,mm5 ; mm0=z3 + paddw mm6,mm7 ; mm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm4,mm0 + movq mm1,mm0 + punpcklwd mm4,mm6 + punpckhwd mm1,mm6 + movq mm0,mm4 + movq mm6,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L + pmaddwd mm1,[GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H + pmaddwd mm0,[GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L + pmaddwd mm6,[GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H + + movq MMWORD [wk(0)], mm4 ; wk(0)=z3L + movq MMWORD [wk(1)], mm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movq mm4,mm2 + movq mm1,mm2 + punpcklwd mm4,mm7 + punpckhwd mm1,mm7 + movq mm2,mm4 + movq mm7,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L + pmaddwd mm1,[GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H + pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + + paddd mm4, MMWORD [wk(0)] ; mm4=data7L + paddd mm1, MMWORD [wk(1)] ; mm1=data7H + paddd mm2,mm0 ; mm2=data1L + paddd mm7,mm6 ; mm7=data1H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm4,DESCALE_P1 + psrad mm1,DESCALE_P1 + paddd mm2,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm2,DESCALE_P1 + psrad mm7,DESCALE_P1 + + packssdw mm4,mm1 ; mm4=data7 + packssdw mm2,mm7 ; mm2=data1 + + movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 + + movq mm1,mm3 + movq mm7,mm3 + punpcklwd mm1,mm5 + punpckhwd mm7,mm5 + movq mm3,mm1 + movq mm5,mm7 + pmaddwd mm1,[GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L + pmaddwd mm7,[GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H + pmaddwd mm3,[GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L + pmaddwd mm5,[GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H + + paddd mm1,mm0 ; mm1=data5L + paddd mm7,mm6 ; mm7=data5H + paddd mm3, MMWORD [wk(0)] ; mm3=data3L + paddd mm5, MMWORD [wk(1)] ; mm5=data3H + + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm1,DESCALE_P1 + psrad mm7,DESCALE_P1 + paddd mm3,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm5,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm3,DESCALE_P1 + psrad mm5,DESCALE_P1 + + packssdw mm1,mm7 ; mm1=data5 + packssdw mm3,mm5 ; mm3=data3 + + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 + + add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; mm0=(02 12 22 32), mm2=(42 52 62 72) + ; mm1=(03 13 23 33), mm3=(43 53 63 73) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(02 03 12 13) + punpckhwd mm4,mm1 ; mm4=(22 23 32 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(42 43 52 53) + punpckhwd mm5,mm3 ; mm5=(62 63 72 73) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 10 20 30), mm1=(40 50 60 70) + ; mm7=(01 11 21 31), mm3=(41 51 61 71) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 01 10 11) + punpckhwd mm4,mm7 ; mm4=(20 21 30 31) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(40 41 50 51) + punpckhwd mm2,mm3 ; mm2=(60 61 70 71) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 01 02 03)=data0 + punpckhdq mm7,mm0 ; mm7=(10 11 12 13)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(60 61 62 63)=data6 + punpckhdq mm3,mm5 ; mm3=(70 71 72 73)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) + movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(20 21 22 23)=data2 + punpckhdq mm7,mm2 ; mm7=(30 31 32 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(40 41 42 43)=data4 + punpckhdq mm6,mm3 ; mm6=(50 51 52 53)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + paddw mm5,mm7 ; mm5=tmp10 + paddw mm0,mm4 ; mm0=tmp11 + psubw mm1,mm7 ; mm1=tmp13 + psubw mm6,mm4 ; mm6=tmp12 + + movq mm7,mm5 + paddw mm5,mm0 ; mm5=tmp10+tmp11 + psubw mm7,mm0 ; mm7=tmp10-tmp11 + + paddw mm5,[GOTOFF(ebx,PW_DESCALE_P2X)] + paddw mm7,[GOTOFF(ebx,PW_DESCALE_P2X)] + psraw mm5,PASS1_BITS ; mm5=data0 + psraw mm7,PASS1_BITS ; mm7=data4 + + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movq mm4,mm1 ; mm1=tmp13 + movq mm0,mm1 + punpcklwd mm4,mm6 ; mm6=tmp12 + punpckhwd mm0,mm6 + movq mm1,mm4 + movq mm6,mm0 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L + pmaddwd mm0,[GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L + pmaddwd mm6,[GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm0,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm4,DESCALE_P2 + psrad mm0,DESCALE_P2 + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm6,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm1,DESCALE_P2 + psrad mm6,DESCALE_P2 + + packssdw mm4,mm0 ; mm4=data2 + packssdw mm1,mm6 ; mm1=data6 + + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1 + + ; -- Odd part + + movq mm5, MMWORD [wk(0)] ; mm5=tmp6 + movq mm7, MMWORD [wk(1)] ; mm7=tmp7 + + movq mm0,mm2 ; mm2=tmp4 + movq mm6,mm3 ; mm3=tmp5 + paddw mm0,mm5 ; mm0=z3 + paddw mm6,mm7 ; mm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm4,mm0 + movq mm1,mm0 + punpcklwd mm4,mm6 + punpckhwd mm1,mm6 + movq mm0,mm4 + movq mm6,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L + pmaddwd mm1,[GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H + pmaddwd mm0,[GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L + pmaddwd mm6,[GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H + + movq MMWORD [wk(0)], mm4 ; wk(0)=z3L + movq MMWORD [wk(1)], mm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movq mm4,mm2 + movq mm1,mm2 + punpcklwd mm4,mm7 + punpckhwd mm1,mm7 + movq mm2,mm4 + movq mm7,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L + pmaddwd mm1,[GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H + pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + + paddd mm4, MMWORD [wk(0)] ; mm4=data7L + paddd mm1, MMWORD [wk(1)] ; mm1=data7H + paddd mm2,mm0 ; mm2=data1L + paddd mm7,mm6 ; mm7=data1H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm4,DESCALE_P2 + psrad mm1,DESCALE_P2 + paddd mm2,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm2,DESCALE_P2 + psrad mm7,DESCALE_P2 + + packssdw mm4,mm1 ; mm4=data7 + packssdw mm2,mm7 ; mm2=data1 + + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 + + movq mm1,mm3 + movq mm7,mm3 + punpcklwd mm1,mm5 + punpckhwd mm7,mm5 + movq mm3,mm1 + movq mm5,mm7 + pmaddwd mm1,[GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L + pmaddwd mm7,[GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H + pmaddwd mm3,[GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L + pmaddwd mm5,[GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H + + paddd mm1,mm0 ; mm1=data5L + paddd mm7,mm6 ; mm7=data5H + paddd mm3, MMWORD [wk(0)] ; mm3=data3L + paddd mm5, MMWORD [wk(1)] ; mm5=data3H + + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm1,DESCALE_P2 + psrad mm7,DESCALE_P2 + paddd mm3,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm3,DESCALE_P2 + psrad mm5,DESCALE_P2 + + packssdw mm1,mm7 ; mm1=data5 + packssdw mm3,mm5 ; mm3=data3 + + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 + + add edx, byte 4*SIZEOF_DCTELEM + dec ecx + jnz near .columnloop + + emms ; empty MMX state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctint-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctint-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctint-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctint-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,622 @@ +; +; jfdctint.asm - accurate integer FDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_islow_sse2) PRIVATE + +EXTN(jconst_fdct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_sse2 (DCTELEM *data) +; + +; r10 = DCTELEM *data + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 6 + + align 16 + global EXTN(jsimd_fdct_islow_sse2) PRIVATE + +EXTN(jsimd_fdct_islow_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + paddw xmm3,xmm1 ; xmm3=tmp10 + paddw xmm6,xmm7 ; xmm6=tmp11 + psubw xmm4,xmm1 ; xmm4=tmp13 + psubw xmm0,xmm7 ; xmm0=tmp12 + + movdqa xmm1,xmm3 + paddw xmm3,xmm6 ; xmm3=tmp10+tmp11 + psubw xmm1,xmm6 ; xmm1=tmp10-tmp11 + + psllw xmm3,PASS1_BITS ; xmm3=data0 + psllw xmm1,PASS1_BITS ; xmm1=data4 + + movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 + movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm7,xmm4 ; xmm4=tmp13 + movdqa xmm6,xmm4 + punpcklwd xmm7,xmm0 ; xmm0=tmp12 + punpckhwd xmm6,xmm0 + movdqa xmm4,xmm7 + movdqa xmm0,xmm6 + pmaddwd xmm7,[rel PW_F130_F054] ; xmm7=data2L + pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=data2H + pmaddwd xmm4,[rel PW_F054_MF130] ; xmm4=data6L + pmaddwd xmm0,[rel PW_F054_MF130] ; xmm0=data6H + + paddd xmm7,[rel PD_DESCALE_P1] + paddd xmm6,[rel PD_DESCALE_P1] + psrad xmm7,DESCALE_P1 + psrad xmm6,DESCALE_P1 + paddd xmm4,[rel PD_DESCALE_P1] + paddd xmm0,[rel PD_DESCALE_P1] + psrad xmm4,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm7,xmm6 ; xmm7=data2 + packssdw xmm4,xmm0 ; xmm4=data6 + + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 + + ; -- Odd part + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 + + movdqa xmm6,xmm2 ; xmm2=tmp4 + movdqa xmm0,xmm5 ; xmm5=tmp5 + paddw xmm6,xmm3 ; xmm6=z3 + paddw xmm0,xmm1 ; xmm0=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm7,xmm6 + movdqa xmm4,xmm6 + punpcklwd xmm7,xmm0 + punpckhwd xmm4,xmm0 + movdqa xmm6,xmm7 + movdqa xmm0,xmm4 + pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3L + pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3H + pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4L + pmaddwd xmm0,[rel PW_F117_F078] ; xmm0=z4H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm7,xmm2 + movdqa xmm4,xmm2 + punpcklwd xmm7,xmm1 + punpckhwd xmm4,xmm1 + movdqa xmm2,xmm7 + movdqa xmm1,xmm4 + pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp4L + pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4H + pmaddwd xmm2,[rel PW_MF089_F060] ; xmm2=tmp7L + pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp7H + + paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L + paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H + paddd xmm2,xmm6 ; xmm2=data1L + paddd xmm1,xmm0 ; xmm1=data1H + + paddd xmm7,[rel PD_DESCALE_P1] + paddd xmm4,[rel PD_DESCALE_P1] + psrad xmm7,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm2,[rel PD_DESCALE_P1] + paddd xmm1,[rel PD_DESCALE_P1] + psrad xmm2,DESCALE_P1 + psrad xmm1,DESCALE_P1 + + packssdw xmm7,xmm4 ; xmm7=data7 + packssdw xmm2,xmm1 ; xmm2=data1 + + movdqa xmm4,xmm5 + movdqa xmm1,xmm5 + punpcklwd xmm4,xmm3 + punpckhwd xmm1,xmm3 + movdqa xmm5,xmm4 + movdqa xmm3,xmm1 + pmaddwd xmm4,[rel PW_MF050_MF256] ; xmm4=tmp5L + pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5H + pmaddwd xmm5,[rel PW_MF256_F050] ; xmm5=tmp6L + pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6H + + paddd xmm4,xmm6 ; xmm4=data5L + paddd xmm1,xmm0 ; xmm1=data5H + paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L + paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H + + paddd xmm4,[rel PD_DESCALE_P1] + paddd xmm1,[rel PD_DESCALE_P1] + psrad xmm4,DESCALE_P1 + psrad xmm1,DESCALE_P1 + paddd xmm5,[rel PD_DESCALE_P1] + paddd xmm3,[rel PD_DESCALE_P1] + psrad xmm5,DESCALE_P1 + psrad xmm3,DESCALE_P1 + + packssdw xmm4,xmm1 ; xmm4=data5 + packssdw xmm5,xmm3 ; xmm5=data3 + + ; ---- Pass 2: process columns. + + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 + movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 + + ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) + ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) + + movdqa xmm1,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm2 ; xmm6=(00 01 10 11 20 21 30 31) + punpckhwd xmm1,xmm2 ; xmm1=(40 41 50 51 60 61 70 71) + movdqa xmm3,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(02 03 12 13 22 23 32 33) + punpckhwd xmm3,xmm5 ; xmm3=(42 43 52 53 62 63 72 73) + + movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 + movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 + + ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) + ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm0,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm4 ; xmm2=(04 05 14 15 24 25 34 35) + punpckhwd xmm0,xmm4 ; xmm0=(44 45 54 55 64 65 74 75) + movdqa xmm3,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm7 ; xmm5=(06 07 16 17 26 27 36 37) + punpckhwd xmm3,xmm7 ; xmm3=(46 47 56 57 66 67 76 77) + + movdqa xmm4,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(04 05 06 07 14 15 16 17) + punpckhdq xmm4,xmm5 ; xmm4=(24 25 26 27 34 35 36 37) + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(44 45 46 47 54 55 56 57) + punpckhdq xmm7,xmm3 ; xmm7=(64 65 66 67 74 75 76 77) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm5 ; xmm6=(00 01 02 03 10 11 12 13) + punpckhdq xmm4,xmm5 ; xmm4=(20 21 22 23 30 31 32 33) + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm3 ; xmm1=(40 41 42 43 50 51 52 53) + punpckhdq xmm0,xmm3 ; xmm0=(60 61 62 63 70 71 72 73) + + movdqa xmm5,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm5,xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm3,xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm2,xmm5 + movdqa xmm7,xmm6 + psubw xmm5,xmm0 ; xmm5=data1-data6=tmp6 + psubw xmm6,xmm3 ; xmm6=data0-data7=tmp7 + paddw xmm2,xmm0 ; xmm2=data1+data6=tmp1 + paddw xmm7,xmm3 ; xmm7=data0+data7=tmp0 + + movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) + movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movdqa xmm5,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm5,xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm6,xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm0,xmm5 + movdqa xmm3,xmm4 + paddw xmm5,xmm1 ; xmm5=data3+data4=tmp3 + paddw xmm4,xmm6 ; xmm4=data2+data5=tmp2 + psubw xmm0,xmm1 ; xmm0=data3-data4=tmp4 + psubw xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm1,xmm7 + movdqa xmm6,xmm2 + paddw xmm7,xmm5 ; xmm7=tmp10 + paddw xmm2,xmm4 ; xmm2=tmp11 + psubw xmm1,xmm5 ; xmm1=tmp13 + psubw xmm6,xmm4 ; xmm6=tmp12 + + movdqa xmm5,xmm7 + paddw xmm7,xmm2 ; xmm7=tmp10+tmp11 + psubw xmm5,xmm2 ; xmm5=tmp10-tmp11 + + paddw xmm7,[rel PW_DESCALE_P2X] + paddw xmm5,[rel PW_DESCALE_P2X] + psraw xmm7,PASS1_BITS ; xmm7=data0 + psraw xmm5,PASS1_BITS ; xmm5=data4 + + movdqa XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm7 + movdqa XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm5 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm4,xmm1 ; xmm1=tmp13 + movdqa xmm2,xmm1 + punpcklwd xmm4,xmm6 ; xmm6=tmp12 + punpckhwd xmm2,xmm6 + movdqa xmm1,xmm4 + movdqa xmm6,xmm2 + pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=data2L + pmaddwd xmm2,[rel PW_F130_F054] ; xmm2=data2H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=data6L + pmaddwd xmm6,[rel PW_F054_MF130] ; xmm6=data6H + + paddd xmm4,[rel PD_DESCALE_P2] + paddd xmm2,[rel PD_DESCALE_P2] + psrad xmm4,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm1,[rel PD_DESCALE_P2] + paddd xmm6,[rel PD_DESCALE_P2] + psrad xmm1,DESCALE_P2 + psrad xmm6,DESCALE_P2 + + packssdw xmm4,xmm2 ; xmm4=data2 + packssdw xmm1,xmm6 ; xmm1=data6 + + movdqa XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm1 + + ; -- Odd part + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + movdqa xmm2,xmm0 ; xmm0=tmp4 + movdqa xmm6,xmm3 ; xmm3=tmp5 + paddw xmm2,xmm7 ; xmm2=z3 + paddw xmm6,xmm5 ; xmm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm4,xmm2 + movdqa xmm1,xmm2 + punpcklwd xmm4,xmm6 + punpckhwd xmm1,xmm6 + movdqa xmm2,xmm4 + movdqa xmm6,xmm1 + pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3L + pmaddwd xmm1,[rel PW_MF078_F117] ; xmm1=z3H + pmaddwd xmm2,[rel PW_F117_F078] ; xmm2=z4L + pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm4,xmm0 + movdqa xmm1,xmm0 + punpcklwd xmm4,xmm5 + punpckhwd xmm1,xmm5 + movdqa xmm0,xmm4 + movdqa xmm5,xmm1 + pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4L + pmaddwd xmm1,[rel PW_MF060_MF089] ; xmm1=tmp4H + pmaddwd xmm0,[rel PW_MF089_F060] ; xmm0=tmp7L + pmaddwd xmm5,[rel PW_MF089_F060] ; xmm5=tmp7H + + paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L + paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H + paddd xmm0,xmm2 ; xmm0=data1L + paddd xmm5,xmm6 ; xmm5=data1H + + paddd xmm4,[rel PD_DESCALE_P2] + paddd xmm1,[rel PD_DESCALE_P2] + psrad xmm4,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm0,[rel PD_DESCALE_P2] + paddd xmm5,[rel PD_DESCALE_P2] + psrad xmm0,DESCALE_P2 + psrad xmm5,DESCALE_P2 + + packssdw xmm4,xmm1 ; xmm4=data7 + packssdw xmm0,xmm5 ; xmm0=data1 + + movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm0 + + movdqa xmm1,xmm3 + movdqa xmm5,xmm3 + punpcklwd xmm1,xmm7 + punpckhwd xmm5,xmm7 + movdqa xmm3,xmm1 + movdqa xmm7,xmm5 + pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5L + pmaddwd xmm5,[rel PW_MF050_MF256] ; xmm5=tmp5H + pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6L + pmaddwd xmm7,[rel PW_MF256_F050] ; xmm7=tmp6H + + paddd xmm1,xmm2 ; xmm1=data5L + paddd xmm5,xmm6 ; xmm5=data5H + paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L + paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H + + paddd xmm1,[rel PD_DESCALE_P2] + paddd xmm5,[rel PD_DESCALE_P2] + psrad xmm1,DESCALE_P2 + psrad xmm5,DESCALE_P2 + paddd xmm3,[rel PD_DESCALE_P2] + paddd xmm7,[rel PD_DESCALE_P2] + psrad xmm3,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm1,xmm5 ; xmm1=data5 + packssdw xmm3,xmm7 ; xmm3=data3 + + movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctint-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctint-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jfdctint-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jfdctint-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,634 @@ +; +; jfdctint.asm - accurate integer FDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_islow_sse2) PRIVATE + +EXTN(jconst_fdct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_sse2 (DCTELEM *data) +; + +%define data(b) (b)+8 ; DCTELEM *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 6 + + align 16 + global EXTN(jsimd_fdct_islow_sse2) PRIVATE + +EXTN(jsimd_fdct_islow_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + paddw xmm3,xmm1 ; xmm3=tmp10 + paddw xmm6,xmm7 ; xmm6=tmp11 + psubw xmm4,xmm1 ; xmm4=tmp13 + psubw xmm0,xmm7 ; xmm0=tmp12 + + movdqa xmm1,xmm3 + paddw xmm3,xmm6 ; xmm3=tmp10+tmp11 + psubw xmm1,xmm6 ; xmm1=tmp10-tmp11 + + psllw xmm3,PASS1_BITS ; xmm3=data0 + psllw xmm1,PASS1_BITS ; xmm1=data4 + + movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 + movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm7,xmm4 ; xmm4=tmp13 + movdqa xmm6,xmm4 + punpcklwd xmm7,xmm0 ; xmm0=tmp12 + punpckhwd xmm6,xmm0 + movdqa xmm4,xmm7 + movdqa xmm0,xmm6 + pmaddwd xmm7,[GOTOFF(ebx,PW_F130_F054)] ; xmm7=data2L + pmaddwd xmm6,[GOTOFF(ebx,PW_F130_F054)] ; xmm6=data2H + pmaddwd xmm4,[GOTOFF(ebx,PW_F054_MF130)] ; xmm4=data6L + pmaddwd xmm0,[GOTOFF(ebx,PW_F054_MF130)] ; xmm0=data6H + + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm7,DESCALE_P1 + psrad xmm6,DESCALE_P1 + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm0,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm4,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm7,xmm6 ; xmm7=data2 + packssdw xmm4,xmm0 ; xmm4=data6 + + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 + + ; -- Odd part + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 + + movdqa xmm6,xmm2 ; xmm2=tmp4 + movdqa xmm0,xmm5 ; xmm5=tmp5 + paddw xmm6,xmm3 ; xmm6=z3 + paddw xmm0,xmm1 ; xmm0=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm7,xmm6 + movdqa xmm4,xmm6 + punpcklwd xmm7,xmm0 + punpckhwd xmm4,xmm0 + movdqa xmm6,xmm7 + movdqa xmm0,xmm4 + pmaddwd xmm7,[GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3H + pmaddwd xmm6,[GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4L + pmaddwd xmm0,[GOTOFF(ebx,PW_F117_F078)] ; xmm0=z4H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm7,xmm2 + movdqa xmm4,xmm2 + punpcklwd xmm7,xmm1 + punpckhwd xmm4,xmm1 + movdqa xmm2,xmm7 + movdqa xmm1,xmm4 + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp4L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4H + pmaddwd xmm2,[GOTOFF(ebx,PW_MF089_F060)] ; xmm2=tmp7L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp7H + + paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L + paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H + paddd xmm2,xmm6 ; xmm2=data1L + paddd xmm1,xmm0 ; xmm1=data1H + + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm7,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm2,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm2,DESCALE_P1 + psrad xmm1,DESCALE_P1 + + packssdw xmm7,xmm4 ; xmm7=data7 + packssdw xmm2,xmm1 ; xmm2=data1 + + movdqa xmm4,xmm5 + movdqa xmm1,xmm5 + punpcklwd xmm4,xmm3 + punpckhwd xmm1,xmm3 + movdqa xmm5,xmm4 + movdqa xmm3,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm4=tmp5L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5H + pmaddwd xmm5,[GOTOFF(ebx,PW_MF256_F050)] ; xmm5=tmp6L + pmaddwd xmm3,[GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6H + + paddd xmm4,xmm6 ; xmm4=data5L + paddd xmm1,xmm0 ; xmm1=data5H + paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L + paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm4,DESCALE_P1 + psrad xmm1,DESCALE_P1 + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm3,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm5,DESCALE_P1 + psrad xmm3,DESCALE_P1 + + packssdw xmm4,xmm1 ; xmm4=data5 + packssdw xmm5,xmm3 ; xmm5=data3 + + ; ---- Pass 2: process columns. + +; mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 + movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 + + ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) + ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) + + movdqa xmm1,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm2 ; xmm6=(00 01 10 11 20 21 30 31) + punpckhwd xmm1,xmm2 ; xmm1=(40 41 50 51 60 61 70 71) + movdqa xmm3,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(02 03 12 13 22 23 32 33) + punpckhwd xmm3,xmm5 ; xmm3=(42 43 52 53 62 63 72 73) + + movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 + movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 + + ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) + ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm0,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm4 ; xmm2=(04 05 14 15 24 25 34 35) + punpckhwd xmm0,xmm4 ; xmm0=(44 45 54 55 64 65 74 75) + movdqa xmm3,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm7 ; xmm5=(06 07 16 17 26 27 36 37) + punpckhwd xmm3,xmm7 ; xmm3=(46 47 56 57 66 67 76 77) + + movdqa xmm4,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(04 05 06 07 14 15 16 17) + punpckhdq xmm4,xmm5 ; xmm4=(24 25 26 27 34 35 36 37) + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(44 45 46 47 54 55 56 57) + punpckhdq xmm7,xmm3 ; xmm7=(64 65 66 67 74 75 76 77) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm5 ; xmm6=(00 01 02 03 10 11 12 13) + punpckhdq xmm4,xmm5 ; xmm4=(20 21 22 23 30 31 32 33) + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm3 ; xmm1=(40 41 42 43 50 51 52 53) + punpckhdq xmm0,xmm3 ; xmm0=(60 61 62 63 70 71 72 73) + + movdqa xmm5,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm5,xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm3,xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm2,xmm5 + movdqa xmm7,xmm6 + psubw xmm5,xmm0 ; xmm5=data1-data6=tmp6 + psubw xmm6,xmm3 ; xmm6=data0-data7=tmp7 + paddw xmm2,xmm0 ; xmm2=data1+data6=tmp1 + paddw xmm7,xmm3 ; xmm7=data0+data7=tmp0 + + movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) + movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movdqa xmm5,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm5,xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm6,xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm0,xmm5 + movdqa xmm3,xmm4 + paddw xmm5,xmm1 ; xmm5=data3+data4=tmp3 + paddw xmm4,xmm6 ; xmm4=data2+data5=tmp2 + psubw xmm0,xmm1 ; xmm0=data3-data4=tmp4 + psubw xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm1,xmm7 + movdqa xmm6,xmm2 + paddw xmm7,xmm5 ; xmm7=tmp10 + paddw xmm2,xmm4 ; xmm2=tmp11 + psubw xmm1,xmm5 ; xmm1=tmp13 + psubw xmm6,xmm4 ; xmm6=tmp12 + + movdqa xmm5,xmm7 + paddw xmm7,xmm2 ; xmm7=tmp10+tmp11 + psubw xmm5,xmm2 ; xmm5=tmp10-tmp11 + + paddw xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)] + paddw xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)] + psraw xmm7,PASS1_BITS ; xmm7=data0 + psraw xmm5,PASS1_BITS ; xmm5=data4 + + movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7 + movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm4,xmm1 ; xmm1=tmp13 + movdqa xmm2,xmm1 + punpcklwd xmm4,xmm6 ; xmm6=tmp12 + punpckhwd xmm2,xmm6 + movdqa xmm1,xmm4 + movdqa xmm6,xmm2 + pmaddwd xmm4,[GOTOFF(ebx,PW_F130_F054)] ; xmm4=data2L + pmaddwd xmm2,[GOTOFF(ebx,PW_F130_F054)] ; xmm2=data2H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=data6L + pmaddwd xmm6,[GOTOFF(ebx,PW_F054_MF130)] ; xmm6=data6H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm2,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm4,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm1,DESCALE_P2 + psrad xmm6,DESCALE_P2 + + packssdw xmm4,xmm2 ; xmm4=data2 + packssdw xmm1,xmm6 ; xmm1=data6 + + movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1 + + ; -- Odd part + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + movdqa xmm2,xmm0 ; xmm0=tmp4 + movdqa xmm6,xmm3 ; xmm3=tmp5 + paddw xmm2,xmm7 ; xmm2=z3 + paddw xmm6,xmm5 ; xmm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm4,xmm2 + movdqa xmm1,xmm2 + punpcklwd xmm4,xmm6 + punpckhwd xmm1,xmm6 + movdqa xmm2,xmm4 + movdqa xmm6,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF078_F117)] ; xmm1=z3H + pmaddwd xmm2,[GOTOFF(ebx,PW_F117_F078)] ; xmm2=z4L + pmaddwd xmm6,[GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm4,xmm0 + movdqa xmm1,xmm0 + punpcklwd xmm4,xmm5 + punpckhwd xmm1,xmm5 + movdqa xmm0,xmm4 + movdqa xmm5,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm1=tmp4H + pmaddwd xmm0,[GOTOFF(ebx,PW_MF089_F060)] ; xmm0=tmp7L + pmaddwd xmm5,[GOTOFF(ebx,PW_MF089_F060)] ; xmm5=tmp7H + + paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L + paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H + paddd xmm0,xmm2 ; xmm0=data1L + paddd xmm5,xmm6 ; xmm5=data1H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm4,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm0,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm0,DESCALE_P2 + psrad xmm5,DESCALE_P2 + + packssdw xmm4,xmm1 ; xmm4=data7 + packssdw xmm0,xmm5 ; xmm0=data1 + + movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0 + + movdqa xmm1,xmm3 + movdqa xmm5,xmm3 + punpcklwd xmm1,xmm7 + punpckhwd xmm5,xmm7 + movdqa xmm3,xmm1 + movdqa xmm7,xmm5 + pmaddwd xmm1,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5L + pmaddwd xmm5,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm5=tmp5H + pmaddwd xmm3,[GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF256_F050)] ; xmm7=tmp6H + + paddd xmm1,xmm2 ; xmm1=data5L + paddd xmm5,xmm6 ; xmm5=data5H + paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L + paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H + + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm1,DESCALE_P2 + psrad xmm5,DESCALE_P2 + paddd xmm3,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm3,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm1,xmm5 ; xmm1=data5 + packssdw xmm3,xmm7 ; xmm3=data3 + + movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3 + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctflt-3dn.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctflt-3dn.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctflt-3dn.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctflt-3dn.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,452 @@ +; +; jidctflt.asm - floating-point IDCT (3DNow! & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_3dnow) PRIVATE + +EXTN(jconst_idct_float_3dnow): + +PD_1_414 times 2 dd 1.414213562373095048801689 +PD_1_847 times 2 dd 1.847759065022573512256366 +PD_1_082 times 2 dd 1.082392200292393968799446 +PD_2_613 times 2 dd 2.613125929752753055713286 +PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_3dnow (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_float_3dnow) PRIVATE + +EXTN(jsimd_idct_float_3dnow): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/2 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + pushpic ebx ; save GOT address + mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + or eax,ebx + poppic ebx ; restore GOT address + jnz short .columnDCT + + ; -- AC terms all zero + + movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm0,mm0 + psrad mm0,(DWORD_BIT-WORD_BIT) + pi2fd mm0,mm0 + + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm1,mm0 + punpckldq mm0,mm0 + punpckhdq mm1,mm1 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm0,mm0 + punpcklwd mm1,mm1 + psrad mm0,(DWORD_BIT-WORD_BIT) + psrad mm1,(DWORD_BIT-WORD_BIT) + pi2fd mm0,mm0 + pi2fd mm1,mm1 + + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm2,mm2 + punpcklwd mm3,mm3 + psrad mm2,(DWORD_BIT-WORD_BIT) + psrad mm3,(DWORD_BIT-WORD_BIT) + pi2fd mm2,mm2 + pi2fd mm3,mm3 + + pfmul mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm1 + pfsub mm0,mm2 ; mm0=tmp11 + pfsub mm1,mm3 + pfadd mm4,mm2 ; mm4=tmp10 + pfadd mm5,mm3 ; mm5=tmp13 + + pfmul mm1,[GOTOFF(ebx,PD_1_414)] + pfsub mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm5 ; mm4=tmp3 + pfsub mm0,mm1 ; mm0=tmp2 + pfadd mm6,mm5 ; mm6=tmp0 + pfadd mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; tmp3 + movq MMWORD [wk(0)], mm0 ; tmp2 + + ; -- Odd part + + movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm2,mm2 + punpcklwd mm3,mm3 + psrad mm2,(DWORD_BIT-WORD_BIT) + psrad mm3,(DWORD_BIT-WORD_BIT) + pi2fd mm2,mm2 + pi2fd mm3,mm3 + + pfmul mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm5,mm5 + punpcklwd mm1,mm1 + psrad mm5,(DWORD_BIT-WORD_BIT) + psrad mm1,(DWORD_BIT-WORD_BIT) + pi2fd mm5,mm5 + pi2fd mm1,mm1 + + pfmul mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4,mm2 + movq mm0,mm5 + pfadd mm2,mm1 ; mm2=z11 + pfadd mm5,mm3 ; mm5=z13 + pfsub mm4,mm1 ; mm4=z12 + pfsub mm0,mm3 ; mm0=z10 + + movq mm1,mm2 + pfsub mm2,mm5 + pfadd mm1,mm5 ; mm1=tmp7 + + pfmul mm2,[GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 + + movq mm3,mm0 + pfadd mm0,mm4 + pfmul mm0,[GOTOFF(ebx,PD_1_847)] ; mm0=z5 + pfmul mm3,[GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) + pfmul mm4,[GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) + pfsubr mm3,mm0 ; mm3=tmp12 + pfsub mm4,mm0 ; mm4=tmp10 + + ; -- Final output stage + + pfsub mm3,mm1 ; mm3=tmp6 + movq mm5,mm6 + movq mm0,mm7 + pfadd mm6,mm1 ; mm6=data0=(00 01) + pfadd mm7,mm3 ; mm7=data1=(10 11) + pfsub mm5,mm1 ; mm5=data7=(70 71) + pfsub mm0,mm3 ; mm0=data6=(60 61) + pfsub mm2,mm3 ; mm2=tmp5 + + movq mm1,mm6 ; transpose coefficients + punpckldq mm6,mm7 ; mm6=(00 10) + punpckhdq mm1,mm7 ; mm1=(01 11) + movq mm3,mm0 ; transpose coefficients + punpckldq mm0,mm5 ; mm0=(60 70) + punpckhdq mm3,mm5 ; mm3=(61 71) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3 + + movq mm7, MMWORD [wk(0)] ; mm7=tmp2 + movq mm5, MMWORD [wk(1)] ; mm5=tmp3 + + pfadd mm4,mm2 ; mm4=tmp4 + movq mm6,mm7 + movq mm1,mm5 + pfadd mm7,mm2 ; mm7=data2=(20 21) + pfadd mm5,mm4 ; mm5=data4=(40 41) + pfsub mm6,mm2 ; mm6=data5=(50 51) + pfsub mm1,mm4 ; mm1=data3=(30 31) + + movq mm0,mm7 ; transpose coefficients + punpckldq mm7,mm1 ; mm7=(20 30) + punpckhdq mm0,mm1 ; mm0=(21 31) + movq mm3,mm5 ; transpose coefficients + punpckldq mm5,mm6 ; mm5=(40 50) + punpckhdq mm3,mm6 ; mm3=(41 51) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3 + +.nextcolumn: + add esi, byte 2*SIZEOF_JCOEF ; coef_block + add edx, byte 2*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/2 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movq mm4,mm0 + movq mm5,mm1 + pfsub mm0,mm2 ; mm0=tmp11 + pfsub mm1,mm3 + pfadd mm4,mm2 ; mm4=tmp10 + pfadd mm5,mm3 ; mm5=tmp13 + + pfmul mm1,[GOTOFF(ebx,PD_1_414)] + pfsub mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm5 ; mm4=tmp3 + pfsub mm0,mm1 ; mm0=tmp2 + pfadd mm6,mm5 ; mm6=tmp0 + pfadd mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; tmp3 + movq MMWORD [wk(0)], mm0 ; tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movq mm4,mm2 + movq mm0,mm5 + pfadd mm2,mm1 ; mm2=z11 + pfadd mm5,mm3 ; mm5=z13 + pfsub mm4,mm1 ; mm4=z12 + pfsub mm0,mm3 ; mm0=z10 + + movq mm1,mm2 + pfsub mm2,mm5 + pfadd mm1,mm5 ; mm1=tmp7 + + pfmul mm2,[GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 + + movq mm3,mm0 + pfadd mm0,mm4 + pfmul mm0,[GOTOFF(ebx,PD_1_847)] ; mm0=z5 + pfmul mm3,[GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) + pfmul mm4,[GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) + pfsubr mm3,mm0 ; mm3=tmp12 + pfsub mm4,mm0 ; mm4=tmp10 + + ; -- Final output stage + + pfsub mm3,mm1 ; mm3=tmp6 + movq mm5,mm6 + movq mm0,mm7 + pfadd mm6,mm1 ; mm6=data0=(00 10) + pfadd mm7,mm3 ; mm7=data1=(01 11) + pfsub mm5,mm1 ; mm5=data7=(07 17) + pfsub mm0,mm3 ; mm0=data6=(06 16) + pfsub mm2,mm3 ; mm2=tmp5 + + movq mm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm1=[PD_RNDINT_MAGIC] + pcmpeqd mm3,mm3 + psrld mm3,WORD_BIT ; mm3={0xFFFF 0x0000 0xFFFF 0x0000} + + pfadd mm6,mm1 ; mm6=roundint(data0/8)=(00 ** 10 **) + pfadd mm7,mm1 ; mm7=roundint(data1/8)=(01 ** 11 **) + pfadd mm0,mm1 ; mm0=roundint(data6/8)=(06 ** 16 **) + pfadd mm5,mm1 ; mm5=roundint(data7/8)=(07 ** 17 **) + + pand mm6,mm3 ; mm6=(00 -- 10 --) + pslld mm7,WORD_BIT ; mm7=(-- 01 -- 11) + pand mm0,mm3 ; mm0=(06 -- 16 --) + pslld mm5,WORD_BIT ; mm5=(-- 07 -- 17) + por mm6,mm7 ; mm6=(00 01 10 11) + por mm0,mm5 ; mm0=(06 07 16 17) + + movq mm1, MMWORD [wk(0)] ; mm1=tmp2 + movq mm3, MMWORD [wk(1)] ; mm3=tmp3 + + pfadd mm4,mm2 ; mm4=tmp4 + movq mm7,mm1 + movq mm5,mm3 + pfadd mm1,mm2 ; mm1=data2=(02 12) + pfadd mm3,mm4 ; mm3=data4=(04 14) + pfsub mm7,mm2 ; mm7=data5=(05 15) + pfsub mm5,mm4 ; mm5=data3=(03 13) + + movq mm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm2=[PD_RNDINT_MAGIC] + pcmpeqd mm4,mm4 + psrld mm4,WORD_BIT ; mm4={0xFFFF 0x0000 0xFFFF 0x0000} + + pfadd mm3,mm2 ; mm3=roundint(data4/8)=(04 ** 14 **) + pfadd mm7,mm2 ; mm7=roundint(data5/8)=(05 ** 15 **) + pfadd mm1,mm2 ; mm1=roundint(data2/8)=(02 ** 12 **) + pfadd mm5,mm2 ; mm5=roundint(data3/8)=(03 ** 13 **) + + pand mm3,mm4 ; mm3=(04 -- 14 --) + pslld mm7,WORD_BIT ; mm7=(-- 05 -- 15) + pand mm1,mm4 ; mm1=(02 -- 12 --) + pslld mm5,WORD_BIT ; mm5=(-- 03 -- 13) + por mm3,mm7 ; mm3=(04 05 14 15) + por mm1,mm5 ; mm1=(02 03 12 13) + + movq mm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm2=[PB_CENTERJSAMP] + + packsswb mm6,mm3 ; mm6=(00 01 10 11 04 05 14 15) + packsswb mm1,mm0 ; mm1=(02 03 12 13 06 07 16 17) + paddb mm6,mm2 + paddb mm1,mm2 + + movq mm4,mm6 ; transpose coefficients(phase 2) + punpcklwd mm6,mm1 ; mm6=(00 01 02 03 10 11 12 13) + punpckhwd mm4,mm1 ; mm4=(04 05 06 07 14 15 16 17) + + movq mm7,mm6 ; transpose coefficients(phase 3) + punpckldq mm6,mm4 ; mm6=(00 01 02 03 04 05 06 07) + punpckhdq mm7,mm4 ; mm7=(10 11 12 13 14 15 16 17) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 + + poppic ebx ; restore GOT address + + add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 2*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctflt-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctflt-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctflt-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctflt-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,483 @@ +; +; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_sse2) PRIVATE + +EXTN(jconst_idct_float_sse2): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + +%define original_rbp rbp+0 +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_float_sse2) PRIVATE + +EXTN(jsimd_idct_float_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [workspace] + collect_args + push rbx + + ; ---- Pass 1: process columns from input, store into work array. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + lea rdi, [workspace] ; FAST_FLOAT *wsptr + mov rcx, DCTSIZE/4 ; ctr +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + movq xmm4, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movq xmm6, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + movq xmm7, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1,xmm2 + por xmm3,xmm4 + por xmm5,xmm6 + por xmm1,xmm3 + por xmm5,xmm7 + por xmm1,xmm5 + packsswb xmm1,xmm1 + movd eax,xmm1 + test rax,rax + jnz short .columnDCT + + ; -- AC terms all zero + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1,xmm0 + movaps xmm2,xmm0 + movaps xmm3,xmm0 + + shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) + shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) + shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) + shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn +%endif +.columnDCT: + + ; -- Even part + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpcklwd xmm1,xmm1 ; xmm1=(20 20 21 21 22 22 23 23) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm1,xmm1 ; xmm1=in2=(20 21 22 23) + + punpcklwd xmm2,xmm2 ; xmm2=(40 40 41 41 42 42 43 43) + punpcklwd xmm3,xmm3 ; xmm3=(60 60 61 61 62 62 63 63) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) + cvtdq2ps xmm2,xmm2 ; xmm2=in4=(40 41 42 43) + cvtdq2ps xmm3,xmm3 ; xmm3=in6=(60 61 62 63) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[rel PD_1_414] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq xmm2, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm2,xmm2 ; xmm2=(10 10 11 11 12 12 13 13) + punpcklwd xmm3,xmm3 ; xmm3=(30 30 31 31 32 32 33 33) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) + cvtdq2ps xmm2,xmm2 ; xmm2=in1=(10 11 12 13) + cvtdq2ps xmm3,xmm3 ; xmm3=in3=(30 31 32 33) + + punpcklwd xmm5,xmm5 ; xmm5=(50 50 51 51 52 52 53 53) + punpcklwd xmm1,xmm1 ; xmm1=(70 70 71 71 72 72 73 73) + psrad xmm5,(DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) + cvtdq2ps xmm5,xmm5 ; xmm5=in5=(50 51 52 53) + cvtdq2ps xmm1,xmm1 ; xmm1=in7=(70 71 72 73) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[rel PD_1_414] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[rel PD_1_847] ; xmm0=z5 + mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) + movaps xmm3,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm0,xmm7 + movaps xmm3,xmm5 + addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2,xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) + movaps xmm4,xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) + movaps xmm0,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6,xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) + movaps xmm3,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add rsi, byte 4*SIZEOF_JCOEF ; coef_block + add rdx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add rdi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec rcx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + lea rsi, [workspace] ; FAST_FLOAT *wsptr + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + mov rcx, DCTSIZE/4 ; ctr +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[rel PD_1_414] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[rel PD_1_414] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[rel PD_1_847] ; xmm0=z5 + mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,[rel PD_RNDINT_MAGIC] ; xmm1=[rel PD_RNDINT_MAGIC] + pcmpeqd xmm3,xmm3 + psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm6,xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) + addps xmm7,xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) + addps xmm0,xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) + addps xmm5,xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) + + pand xmm6,xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) + pand xmm0,xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) + por xmm6,xmm7 ; xmm6=(00 01 10 11 20 21 30 31) + por xmm0,xmm5 ; xmm0=(06 07 16 17 26 27 36 37) + + movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 + movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm7,xmm1 + movaps xmm5,xmm3 + addps xmm1,xmm2 ; xmm1=data2=(02 12 22 32) + addps xmm3,xmm4 ; xmm3=data4=(04 14 24 34) + subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35) + subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33) + + movaps xmm2,[rel PD_RNDINT_MAGIC] ; xmm2=[rel PD_RNDINT_MAGIC] + pcmpeqd xmm4,xmm4 + psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm3,xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) + addps xmm7,xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) + addps xmm1,xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) + addps xmm5,xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) + + pand xmm3,xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) + pand xmm1,xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) + por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35) + por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33) + + movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] + + packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) + packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) + paddb xmm6,xmm2 + paddb xmm1,xmm2 + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 3) + punpckldq xmm6,xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm7,xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + + pshufd xmm5,xmm6,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm3,xmm7,0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7 + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3 + + add rsi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add rdi, byte 4*SIZEOF_JSAMPROW + dec rcx ; ctr + jnz near .rowloop + + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctflt-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctflt-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctflt-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctflt-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,498 @@ +; +; jidctflt.asm - floating-point IDCT (SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_sse2) PRIVATE + +EXTN(jconst_idct_float_sse2): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_float_sse2) PRIVATE + +EXTN(jsimd_idct_float_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq xmm4, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq xmm6, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + movq xmm7, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm2 + por xmm3,xmm4 + por xmm5,xmm6 + por xmm1,xmm3 + por xmm5,xmm7 + por xmm1,xmm5 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1,xmm0 + movaps xmm2,xmm0 + movaps xmm3,xmm0 + + shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) + shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) + shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) + shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpcklwd xmm1,xmm1 ; xmm1=(20 20 21 21 22 22 23 23) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm1,xmm1 ; xmm1=in2=(20 21 22 23) + + punpcklwd xmm2,xmm2 ; xmm2=(40 40 41 41 42 42 43 43) + punpcklwd xmm3,xmm3 ; xmm3=(60 60 61 61 62 62 63 63) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) + cvtdq2ps xmm2,xmm2 ; xmm2=in4=(40 41 42 43) + cvtdq2ps xmm3,xmm3 ; xmm3=in6=(60 61 62 63) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq xmm2, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm2,xmm2 ; xmm2=(10 10 11 11 12 12 13 13) + punpcklwd xmm3,xmm3 ; xmm3=(30 30 31 31 32 32 33 33) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) + cvtdq2ps xmm2,xmm2 ; xmm2=in1=(10 11 12 13) + cvtdq2ps xmm3,xmm3 ; xmm3=in3=(30 31 32 33) + + punpcklwd xmm5,xmm5 ; xmm5=(50 50 51 51 52 52 53 53) + punpcklwd xmm1,xmm1 ; xmm1=(70 70 71 71 72 72 73 73) + psrad xmm5,(DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) + cvtdq2ps xmm5,xmm5 ; xmm5=in5=(50 51 52 53) + cvtdq2ps xmm1,xmm1 ; xmm1=in7=(70 71 72 73) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) + movaps xmm3,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm0,xmm7 + movaps xmm3,xmm5 + addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2,xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) + movaps xmm4,xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) + movaps xmm0,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6,xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) + movaps xmm3,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm1=[PD_RNDINT_MAGIC] + pcmpeqd xmm3,xmm3 + psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm6,xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) + addps xmm7,xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) + addps xmm0,xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) + addps xmm5,xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) + + pand xmm6,xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) + pand xmm0,xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) + por xmm6,xmm7 ; xmm6=(00 01 10 11 20 21 30 31) + por xmm0,xmm5 ; xmm0=(06 07 16 17 26 27 36 37) + + movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 + movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm7,xmm1 + movaps xmm5,xmm3 + addps xmm1,xmm2 ; xmm1=data2=(02 12 22 32) + addps xmm3,xmm4 ; xmm3=data4=(04 14 24 34) + subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35) + subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33) + + movaps xmm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm2=[PD_RNDINT_MAGIC] + pcmpeqd xmm4,xmm4 + psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm3,xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) + addps xmm7,xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) + addps xmm1,xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) + addps xmm5,xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) + + pand xmm3,xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) + pand xmm1,xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) + por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35) + por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33) + + movdqa xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] + + packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) + packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) + paddb xmm6,xmm2 + paddb xmm1,xmm2 + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 3) + punpckldq xmm6,xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm7,xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + + pshufd xmm5,xmm6,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm3,xmm7,0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7 + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctflt-sse.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctflt-sse.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctflt-sse.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctflt-sse.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,572 @@ +; +; jidctflt.asm - floating-point IDCT (SSE & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_sse) PRIVATE + +EXTN(jconst_idct_float_sse): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_0_125 times 4 dd 0.125 ; 1/8 +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_float_sse) PRIVATE + +EXTN(jsimd_idct_float_sse): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm1,mm0 ; mm1=(** 02 ** 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in0H=(02 03) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) + cvtpi2ps xmm3,mm1 ; xmm3=(02 03 ** **) + cvtpi2ps xmm0,mm0 ; xmm0=(00 01 ** **) + movlhps xmm0,xmm3 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1,xmm0 + movaps xmm2,xmm0 + movaps xmm3,xmm0 + + shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) + shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) + shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) + shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm4,mm0 ; mm4=(** 02 ** 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm5,mm1 ; mm5=(** 22 ** 23) + punpcklwd mm1,mm1 ; mm1=(20 20 21 21) + + psrad mm4,(DWORD_BIT-WORD_BIT) ; mm4=in0H=(02 03) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) + cvtpi2ps xmm4,mm4 ; xmm4=(02 03 ** **) + cvtpi2ps xmm0,mm0 ; xmm0=(00 01 ** **) + psrad mm5,(DWORD_BIT-WORD_BIT) ; mm5=in2H=(22 23) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in2L=(20 21) + cvtpi2ps xmm5,mm5 ; xmm5=(22 23 ** **) + cvtpi2ps xmm1,mm1 ; xmm1=(20 21 ** **) + + punpckhwd mm6,mm2 ; mm6=(** 42 ** 43) + punpcklwd mm2,mm2 ; mm2=(40 40 41 41) + punpckhwd mm7,mm3 ; mm7=(** 62 ** 63) + punpcklwd mm3,mm3 ; mm3=(60 60 61 61) + + psrad mm6,(DWORD_BIT-WORD_BIT) ; mm6=in4H=(42 43) + psrad mm2,(DWORD_BIT-WORD_BIT) ; mm2=in4L=(40 41) + cvtpi2ps xmm6,mm6 ; xmm6=(42 43 ** **) + cvtpi2ps xmm2,mm2 ; xmm2=(40 41 ** **) + psrad mm7,(DWORD_BIT-WORD_BIT) ; mm7=in6H=(62 63) + psrad mm3,(DWORD_BIT-WORD_BIT) ; mm3=in6L=(60 61) + cvtpi2ps xmm7,mm7 ; xmm7=(62 63 ** **) + cvtpi2ps xmm3,mm3 ; xmm3=(60 61 ** **) + + movlhps xmm0,xmm4 ; xmm0=in0=(00 01 02 03) + movlhps xmm1,xmm5 ; xmm1=in2=(20 21 22 23) + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movlhps xmm2,xmm6 ; xmm2=in4=(40 41 42 43) + movlhps xmm3,xmm7 ; xmm3=in6=(60 61 62 63) + mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm6,mm4 ; mm6=(** 12 ** 13) + punpcklwd mm4,mm4 ; mm4=(10 10 11 11) + punpckhwd mm2,mm0 ; mm2=(** 32 ** 33) + punpcklwd mm0,mm0 ; mm0=(30 30 31 31) + + psrad mm6,(DWORD_BIT-WORD_BIT) ; mm6=in1H=(12 13) + psrad mm4,(DWORD_BIT-WORD_BIT) ; mm4=in1L=(10 11) + cvtpi2ps xmm4,mm6 ; xmm4=(12 13 ** **) + cvtpi2ps xmm2,mm4 ; xmm2=(10 11 ** **) + psrad mm2,(DWORD_BIT-WORD_BIT) ; mm2=in3H=(32 33) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in3L=(30 31) + cvtpi2ps xmm0,mm2 ; xmm0=(32 33 ** **) + cvtpi2ps xmm3,mm0 ; xmm3=(30 31 ** **) + + punpckhwd mm7,mm5 ; mm7=(** 52 ** 53) + punpcklwd mm5,mm5 ; mm5=(50 50 51 51) + punpckhwd mm3,mm1 ; mm3=(** 72 ** 73) + punpcklwd mm1,mm1 ; mm1=(70 70 71 71) + + movlhps xmm2,xmm4 ; xmm2=in1=(10 11 12 13) + movlhps xmm3,xmm0 ; xmm3=in3=(30 31 32 33) + + psrad mm7,(DWORD_BIT-WORD_BIT) ; mm7=in5H=(52 53) + psrad mm5,(DWORD_BIT-WORD_BIT) ; mm5=in5L=(50 51) + cvtpi2ps xmm4,mm7 ; xmm4=(52 53 ** **) + cvtpi2ps xmm5,mm5 ; xmm5=(50 51 ** **) + psrad mm3,(DWORD_BIT-WORD_BIT) ; mm3=in7H=(72 73) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in7L=(70 71) + cvtpi2ps xmm0,mm3 ; xmm0=(72 73 ** **) + cvtpi2ps xmm1,mm1 ; xmm1=(70 71 ** **) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movlhps xmm5,xmm4 ; xmm5=in5=(50 51 52 53) + movlhps xmm1,xmm0 ; xmm1=in7=(70 71 72 73) + mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) + movaps xmm3,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm0,xmm7 + movaps xmm3,xmm5 + addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2,xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) + movaps xmm4,xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) + movaps xmm0,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6,xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) + movaps xmm3,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,[GOTOFF(ebx,PD_0_125)] ; xmm1=[PD_0_125] + + mulps xmm6,xmm1 ; descale(1/8) + mulps xmm7,xmm1 ; descale(1/8) + mulps xmm5,xmm1 ; descale(1/8) + mulps xmm0,xmm1 ; descale(1/8) + + movhlps xmm3,xmm6 + movhlps xmm1,xmm7 + cvtps2pi mm0,xmm6 ; round to int32, mm0=data0L=(00 10) + cvtps2pi mm1,xmm7 ; round to int32, mm1=data1L=(01 11) + cvtps2pi mm2,xmm3 ; round to int32, mm2=data0H=(20 30) + cvtps2pi mm3,xmm1 ; round to int32, mm3=data1H=(21 31) + packssdw mm0,mm2 ; mm0=data0=(00 10 20 30) + packssdw mm1,mm3 ; mm1=data1=(01 11 21 31) + + movhlps xmm6,xmm5 + movhlps xmm7,xmm0 + cvtps2pi mm4,xmm5 ; round to int32, mm4=data7L=(07 17) + cvtps2pi mm5,xmm0 ; round to int32, mm5=data6L=(06 16) + cvtps2pi mm6,xmm6 ; round to int32, mm6=data7H=(27 37) + cvtps2pi mm7,xmm7 ; round to int32, mm7=data6H=(26 36) + packssdw mm4,mm6 ; mm4=data7=(07 17 27 37) + packssdw mm5,mm7 ; mm5=data6=(06 16 26 36) + + packsswb mm0,mm5 ; mm0=(00 10 20 30 06 16 26 36) + packsswb mm1,mm4 ; mm1=(01 11 21 31 07 17 27 37) + + movaps xmm3, XMMWORD [wk(0)] ; xmm3=tmp2 + movaps xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movaps xmm6,[GOTOFF(ebx,PD_0_125)] ; xmm6=[PD_0_125] + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm5,xmm3 + movaps xmm0,xmm1 + addps xmm3,xmm2 ; xmm3=data2=(02 12 22 32) + addps xmm1,xmm4 ; xmm1=data4=(04 14 24 34) + subps xmm5,xmm2 ; xmm5=data5=(05 15 25 35) + subps xmm0,xmm4 ; xmm0=data3=(03 13 23 33) + + mulps xmm3,xmm6 ; descale(1/8) + mulps xmm1,xmm6 ; descale(1/8) + mulps xmm5,xmm6 ; descale(1/8) + mulps xmm0,xmm6 ; descale(1/8) + + movhlps xmm7,xmm3 + movhlps xmm2,xmm1 + cvtps2pi mm2,xmm3 ; round to int32, mm2=data2L=(02 12) + cvtps2pi mm3,xmm1 ; round to int32, mm3=data4L=(04 14) + cvtps2pi mm6,xmm7 ; round to int32, mm6=data2H=(22 32) + cvtps2pi mm7,xmm2 ; round to int32, mm7=data4H=(24 34) + packssdw mm2,mm6 ; mm2=data2=(02 12 22 32) + packssdw mm3,mm7 ; mm3=data4=(04 14 24 34) + + movhlps xmm4,xmm5 + movhlps xmm6,xmm0 + cvtps2pi mm5,xmm5 ; round to int32, mm5=data5L=(05 15) + cvtps2pi mm4,xmm0 ; round to int32, mm4=data3L=(03 13) + cvtps2pi mm6,xmm4 ; round to int32, mm6=data5H=(25 35) + cvtps2pi mm7,xmm6 ; round to int32, mm7=data3H=(23 33) + packssdw mm5,mm6 ; mm5=data5=(05 15 25 35) + packssdw mm4,mm7 ; mm4=data3=(03 13 23 33) + + movq mm6,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] + + packsswb mm2,mm3 ; mm2=(02 12 22 32 04 14 24 34) + packsswb mm4,mm5 ; mm4=(03 13 23 33 05 15 25 35) + + paddb mm0,mm6 + paddb mm1,mm6 + paddb mm2,mm6 + paddb mm4,mm6 + + movq mm7,mm0 ; transpose coefficients(phase 1) + punpcklbw mm0,mm1 ; mm0=(00 01 10 11 20 21 30 31) + punpckhbw mm7,mm1 ; mm7=(06 07 16 17 26 27 36 37) + movq mm3,mm2 ; transpose coefficients(phase 1) + punpcklbw mm2,mm4 ; mm2=(02 03 12 13 22 23 32 33) + punpckhbw mm3,mm4 ; mm3=(04 05 14 15 24 25 34 35) + + movq mm5,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm2 ; mm0=(00 01 02 03 10 11 12 13) + punpckhwd mm5,mm2 ; mm5=(20 21 22 23 30 31 32 33) + movq mm6,mm3 ; transpose coefficients(phase 2) + punpcklwd mm3,mm7 ; mm3=(04 05 06 07 14 15 16 17) + punpckhwd mm6,mm7 ; mm6=(24 25 26 27 34 35 36 37) + + movq mm1,mm0 ; transpose coefficients(phase 3) + punpckldq mm0,mm3 ; mm0=(00 01 02 03 04 05 06 07) + punpckhdq mm1,mm3 ; mm1=(10 11 12 13 14 15 16 17) + movq mm4,mm5 ; transpose coefficients(phase 3) + punpckldq mm5,mm6 ; mm5=(20 21 22 23 24 25 26 27) + punpckhdq mm4,mm6 ; mm4=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctfst-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctfst-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctfst-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctfst-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,257 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* FAST INTEGER INVERSE DCT + * + * This is similar to the SSE2 implementation, except that we left-shift the + * constants by 1 less bit (the -1 in CONST_SHIFT.) This is because + * vec_madds(arg1, arg2, arg3) generates the 16-bit saturated sum of: + * the elements in arg3 + the most significant 17 bits of + * (the elements in arg1 * the elements in arg2). + */ + +#include "jsimd_altivec.h" + + +#define F_1_082 277 /* FIX(1.082392200) */ +#define F_1_414 362 /* FIX(1.414213562) */ +#define F_1_847 473 /* FIX(1.847759065) */ +#define F_2_613 669 /* FIX(2.613125930) */ +#define F_1_613 (F_2_613 - 256) /* FIX(2.613125930) - FIX(1) */ + +#define CONST_BITS 8 +#define PASS1_BITS 2 +#define PRE_MULTIPLY_SCALE_BITS 2 +#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1) + + +#define DO_IDCT(in) \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(in##0, in##4); \ + tmp11 = vec_sub(in##0, in##4); \ + tmp13 = vec_add(in##2, in##6); \ + \ + tmp12 = vec_sub(in##2, in##6); \ + tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ + tmp12 = vec_madds(tmp12, pw_F1414, pw_zero); \ + tmp12 = vec_sub(tmp12, tmp13); \ + \ + tmp0 = vec_add(tmp10, tmp13); \ + tmp3 = vec_sub(tmp10, tmp13); \ + tmp1 = vec_add(tmp11, tmp12); \ + tmp2 = vec_sub(tmp11, tmp12); \ + \ + /* Odd part */ \ + \ + z13 = vec_add(in##5, in##3); \ + z10 = vec_sub(in##5, in##3); \ + z10s = vec_sl(z10, pre_multiply_scale_bits); \ + z11 = vec_add(in##1, in##7); \ + z12s = vec_sub(in##1, in##7); \ + z12s = vec_sl(z12s, pre_multiply_scale_bits); \ + \ + tmp11 = vec_sub(z11, z13); \ + tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ + tmp11 = vec_madds(tmp11, pw_F1414, pw_zero); \ + \ + tmp7 = vec_add(z11, z13); \ + \ + /* To avoid overflow... \ + * \ + * (Original) \ + * tmp12 = -2.613125930 * z10 + z5; \ + * \ + * (This implementation) \ + * tmp12 = (-1.613125930 - 1) * z10 + z5; \ + * = -1.613125930 * z10 - z10 + z5; \ + */ \ + \ + z5 = vec_add(z10s, z12s); \ + z5 = vec_madds(z5, pw_F1847, pw_zero); \ + \ + tmp10 = vec_madds(z12s, pw_F1082, pw_zero); \ + tmp10 = vec_sub(tmp10, z5); \ + tmp12 = vec_madds(z10s, pw_MF1613, z5); \ + tmp12 = vec_sub(tmp12, z10); \ + \ + tmp6 = vec_sub(tmp12, tmp7); \ + tmp5 = vec_sub(tmp11, tmp6); \ + tmp4 = vec_add(tmp10, tmp5); \ + \ + out0 = vec_add(tmp0, tmp7); \ + out1 = vec_add(tmp1, tmp6); \ + out2 = vec_add(tmp2, tmp5); \ + out3 = vec_sub(tmp3, tmp4); \ + out4 = vec_add(tmp3, tmp4); \ + out5 = vec_sub(tmp2, tmp5); \ + out6 = vec_sub(tmp1, tmp6); \ + out7 = vec_sub(tmp0, tmp7); \ +} + + +void +jsimd_idct_ifast_altivec (void *dct_table_, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + short *dct_table = (short *)dct_table_; + int *outptr; + + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7, + tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, + z5, z10, z10s, z11, z12s, z13, + out0, out1, out2, out3, out4, out5, out6, out7; + __vector signed char outb; + + /* Constants */ + __vector short pw_zero = { __8X(0) }, + pw_F1414 = { __8X(F_1_414 << CONST_SHIFT) }, + pw_F1847 = { __8X(F_1_847 << CONST_SHIFT) }, + pw_MF1613 = { __8X(-F_1_613 << CONST_SHIFT) }, + pw_F1082 = { __8X(F_1_082 << CONST_SHIFT) }; + __vector unsigned short + pre_multiply_scale_bits = { __8X(PRE_MULTIPLY_SCALE_BITS) }, + pass1_bits3 = { __8X(PASS1_BITS + 3) }; + __vector signed char pb_centerjsamp = { __16X(CENTERJSAMPLE) }; + + /* Pass 1: process columns */ + + col0 = vec_ld(0, coef_block); + col1 = vec_ld(16, coef_block); + col2 = vec_ld(32, coef_block); + col3 = vec_ld(48, coef_block); + col4 = vec_ld(64, coef_block); + col5 = vec_ld(80, coef_block); + col6 = vec_ld(96, coef_block); + col7 = vec_ld(112, coef_block); + + tmp1 = vec_or(col1, col2); + tmp2 = vec_or(col3, col4); + tmp1 = vec_or(tmp1, tmp2); + tmp3 = vec_or(col5, col6); + tmp3 = vec_or(tmp3, col7); + tmp1 = vec_or(tmp1, tmp3); + + quant0 = vec_ld(0, dct_table); + col0 = vec_mladd(col0, quant0, pw_zero); + + if (vec_all_eq(tmp1, pw_zero)) { + /* AC terms all zero */ + + row0 = vec_splat(col0, 0); + row1 = vec_splat(col0, 1); + row2 = vec_splat(col0, 2); + row3 = vec_splat(col0, 3); + row4 = vec_splat(col0, 4); + row5 = vec_splat(col0, 5); + row6 = vec_splat(col0, 6); + row7 = vec_splat(col0, 7); + + } else { + + quant1 = vec_ld(16, dct_table); + quant2 = vec_ld(32, dct_table); + quant3 = vec_ld(48, dct_table); + quant4 = vec_ld(64, dct_table); + quant5 = vec_ld(80, dct_table); + quant6 = vec_ld(96, dct_table); + quant7 = vec_ld(112, dct_table); + + col1 = vec_mladd(col1, quant1, pw_zero); + col2 = vec_mladd(col2, quant2, pw_zero); + col3 = vec_mladd(col3, quant3, pw_zero); + col4 = vec_mladd(col4, quant4, pw_zero); + col5 = vec_mladd(col5, quant5, pw_zero); + col6 = vec_mladd(col6, quant6, pw_zero); + col7 = vec_mladd(col7, quant7, pw_zero); + + DO_IDCT(col); + + TRANSPOSE(out, row); + } + + /* Pass 2: process rows */ + + DO_IDCT(row); + + out0 = vec_sra(out0, pass1_bits3); + out1 = vec_sra(out1, pass1_bits3); + out2 = vec_sra(out2, pass1_bits3); + out3 = vec_sra(out3, pass1_bits3); + out4 = vec_sra(out4, pass1_bits3); + out5 = vec_sra(out5, pass1_bits3); + out6 = vec_sra(out6, pass1_bits3); + out7 = vec_sra(out7, pass1_bits3); + + TRANSPOSE(out, col); + + outb = vec_packs(col0, col0); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[0] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col1, col1); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[1] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col2, col2); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[2] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col3, col3); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[3] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col4, col4); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[4] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col5, col5); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[5] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col6, col6); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[6] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col7, col7); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[7] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctfst-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctfst-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctfst-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctfst-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,500 @@ +; +; jidctfst.asm - fast integer IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_idct_ifast_mmx) PRIVATE + +EXTN(jconst_idct_ifast_mmx): + +PW_F1414 times 4 dw F_1_414 << CONST_SHIFT +PW_F1847 times 4 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 4 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_mmx (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; jpeg_component_info *compptr +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_ifast_mmx) PRIVATE + +EXTN(jsimd_idct_ifast_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm1 + psubw mm0,mm2 ; mm0=tmp11 + psubw mm1,mm3 + paddw mm4,mm2 ; mm4=tmp10 + paddw mm5,mm3 ; mm5=tmp13 + + psllw mm1,PRE_MULTIPLY_SCALE_BITS + pmulhw mm1,[GOTOFF(ebx,PW_F1414)] + psubw mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + psubw mm4,mm5 ; mm4=tmp3 + psubw mm0,mm1 ; mm0=tmp2 + paddw mm6,mm5 ; mm6=tmp0 + paddw mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 + movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4,mm2 + movq mm0,mm5 + psubw mm2,mm1 ; mm2=z12 + psubw mm5,mm3 ; mm5=z10 + paddw mm4,mm1 ; mm4=z11 + paddw mm0,mm3 ; mm0=z13 + + movq mm1,mm5 ; mm1=z10(unscaled) + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm5,PRE_MULTIPLY_SCALE_BITS + + movq mm3,mm4 + psubw mm4,mm0 + paddw mm3,mm0 ; mm3=tmp7 + + psllw mm4,PRE_MULTIPLY_SCALE_BITS + pmulhw mm4,[GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movq mm0,mm5 + paddw mm5,mm2 + pmulhw mm5,[GOTOFF(ebx,PW_F1847)] ; mm5=z5 + pmulhw mm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw mm2,[GOTOFF(ebx,PW_F1082)] + psubw mm0,mm1 + psubw mm2,mm5 ; mm2=tmp10 + paddw mm0,mm5 ; mm0=tmp12 + + ; -- Final output stage + + psubw mm0,mm3 ; mm0=tmp6 + movq mm1,mm6 + movq mm5,mm7 + paddw mm6,mm3 ; mm6=data0=(00 01 02 03) + paddw mm7,mm0 ; mm7=data1=(10 11 12 13) + psubw mm1,mm3 ; mm1=data7=(70 71 72 73) + psubw mm5,mm0 ; mm5=data6=(60 61 62 63) + psubw mm4,mm0 ; mm4=tmp5 + + movq mm3,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm3,mm7 ; mm3=(02 12 03 13) + movq mm0,mm5 ; transpose coefficients(phase 1) + punpcklwd mm5,mm1 ; mm5=(60 70 61 71) + punpckhwd mm0,mm1 ; mm0=(62 72 63 73) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp2 + movq mm1, MMWORD [wk(1)] ; mm1=tmp3 + + movq MMWORD [wk(0)], mm5 ; wk(0)=(60 70 61 71) + movq MMWORD [wk(1)], mm0 ; wk(1)=(62 72 63 73) + + paddw mm2,mm4 ; mm2=tmp4 + movq mm5,mm7 + movq mm0,mm1 + paddw mm7,mm4 ; mm7=data2=(20 21 22 23) + paddw mm1,mm2 ; mm1=data4=(40 41 42 43) + psubw mm5,mm4 ; mm5=data5=(50 51 52 53) + psubw mm0,mm2 ; mm0=data3=(30 31 32 33) + + movq mm4,mm7 ; transpose coefficients(phase 1) + punpcklwd mm7,mm0 ; mm7=(20 30 21 31) + punpckhwd mm4,mm0 ; mm4=(22 32 23 33) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm5 ; mm1=(40 50 41 51) + punpckhwd mm2,mm5 ; mm2=(42 52 43 53) + + movq mm0,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm7 ; mm6=(00 10 20 30) + punpckhdq mm0,mm7 ; mm0=(01 11 21 31) + movq mm5,mm3 ; transpose coefficients(phase 2) + punpckldq mm3,mm4 ; mm3=(02 12 22 32) + punpckhdq mm5,mm4 ; mm5=(03 13 23 33) + + movq mm7, MMWORD [wk(0)] ; mm7=(60 70 61 71) + movq mm4, MMWORD [wk(1)] ; mm4=(62 72 63 73) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 + + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm7 ; mm1=(40 50 60 70) + punpckhdq mm6,mm7 ; mm6=(41 51 61 71) + movq mm0,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm4 ; mm2=(42 52 62 72) + punpckhdq mm0,mm4 ; mm0=(43 53 63 73) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_IFAST_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm0 + movq mm5,mm1 + psubw mm0,mm2 ; mm0=tmp11 + psubw mm1,mm3 + paddw mm4,mm2 ; mm4=tmp10 + paddw mm5,mm3 ; mm5=tmp13 + + psllw mm1,PRE_MULTIPLY_SCALE_BITS + pmulhw mm1,[GOTOFF(ebx,PW_F1414)] + psubw mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + psubw mm4,mm5 ; mm4=tmp3 + psubw mm0,mm1 ; mm0=tmp2 + paddw mm6,mm5 ; mm6=tmp0 + paddw mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 + movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm2 + movq mm0,mm5 + psubw mm2,mm1 ; mm2=z12 + psubw mm5,mm3 ; mm5=z10 + paddw mm4,mm1 ; mm4=z11 + paddw mm0,mm3 ; mm0=z13 + + movq mm1,mm5 ; mm1=z10(unscaled) + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm5,PRE_MULTIPLY_SCALE_BITS + + movq mm3,mm4 + psubw mm4,mm0 + paddw mm3,mm0 ; mm3=tmp7 + + psllw mm4,PRE_MULTIPLY_SCALE_BITS + pmulhw mm4,[GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movq mm0,mm5 + paddw mm5,mm2 + pmulhw mm5,[GOTOFF(ebx,PW_F1847)] ; mm5=z5 + pmulhw mm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw mm2,[GOTOFF(ebx,PW_F1082)] + psubw mm0,mm1 + psubw mm2,mm5 ; mm2=tmp10 + paddw mm0,mm5 ; mm0=tmp12 + + ; -- Final output stage + + psubw mm0,mm3 ; mm0=tmp6 + movq mm1,mm6 + movq mm5,mm7 + paddw mm6,mm3 ; mm6=data0=(00 10 20 30) + paddw mm7,mm0 ; mm7=data1=(01 11 21 31) + psraw mm6,(PASS1_BITS+3) ; descale + psraw mm7,(PASS1_BITS+3) ; descale + psubw mm1,mm3 ; mm1=data7=(07 17 27 37) + psubw mm5,mm0 ; mm5=data6=(06 16 26 36) + psraw mm1,(PASS1_BITS+3) ; descale + psraw mm5,(PASS1_BITS+3) ; descale + psubw mm4,mm0 ; mm4=tmp5 + + packsswb mm6,mm5 ; mm6=(00 10 20 30 06 16 26 36) + packsswb mm7,mm1 ; mm7=(01 11 21 31 07 17 27 37) + + movq mm3, MMWORD [wk(0)] ; mm3=tmp2 + movq mm0, MMWORD [wk(1)] ; mm0=tmp3 + + paddw mm2,mm4 ; mm2=tmp4 + movq mm5,mm3 + movq mm1,mm0 + paddw mm3,mm4 ; mm3=data2=(02 12 22 32) + paddw mm0,mm2 ; mm0=data4=(04 14 24 34) + psraw mm3,(PASS1_BITS+3) ; descale + psraw mm0,(PASS1_BITS+3) ; descale + psubw mm5,mm4 ; mm5=data5=(05 15 25 35) + psubw mm1,mm2 ; mm1=data3=(03 13 23 33) + psraw mm5,(PASS1_BITS+3) ; descale + psraw mm1,(PASS1_BITS+3) ; descale + + movq mm4,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm4=[PB_CENTERJSAMP] + + packsswb mm3,mm0 ; mm3=(02 12 22 32 04 14 24 34) + packsswb mm1,mm5 ; mm1=(03 13 23 33 05 15 25 35) + + paddb mm6,mm4 + paddb mm7,mm4 + paddb mm3,mm4 + paddb mm1,mm4 + + movq mm2,mm6 ; transpose coefficients(phase 1) + punpcklbw mm6,mm7 ; mm6=(00 01 10 11 20 21 30 31) + punpckhbw mm2,mm7 ; mm2=(06 07 16 17 26 27 36 37) + movq mm0,mm3 ; transpose coefficients(phase 1) + punpcklbw mm3,mm1 ; mm3=(02 03 12 13 22 23 32 33) + punpckhbw mm0,mm1 ; mm0=(04 05 14 15 24 25 34 35) + + movq mm5,mm6 ; transpose coefficients(phase 2) + punpcklwd mm6,mm3 ; mm6=(00 01 02 03 10 11 12 13) + punpckhwd mm5,mm3 ; mm5=(20 21 22 23 30 31 32 33) + movq mm4,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm2 ; mm0=(04 05 06 07 14 15 16 17) + punpckhwd mm4,mm2 ; mm4=(24 25 26 27 34 35 36 37) + + movq mm7,mm6 ; transpose coefficients(phase 3) + punpckldq mm6,mm0 ; mm6=(00 01 02 03 04 05 06 07) + punpckhdq mm7,mm0 ; mm7=(10 11 12 13 14 15 16 17) + movq mm1,mm5 ; transpose coefficients(phase 3) + punpckldq mm5,mm4 ; mm5=(20 21 22 23 24 25 26 27) + punpckhdq mm1,mm4 ; mm1=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_JCOEF ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctfst-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctfst-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctfst-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctfst-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,492 @@ +; +; jidctfst.asm - fast integer IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_idct_ifast_sse2) PRIVATE + +EXTN(jconst_idct_ifast_sse2): + +PW_F1414 times 8 dw F_1_414 << CONST_SHIFT +PW_F1847 times 8 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 8 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = jpeg_component_info *compptr +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + +%define original_rbp rbp+0 +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_idct_ifast_sse2) PRIVATE + +EXTN(jsimd_idct_ifast_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test rax,rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm7,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm7,xmm7 ; xmm7=(04 04 05 05 06 06 07 07) + + pshufd xmm6,xmm0,0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) + pshufd xmm2,xmm0,0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) + pshufd xmm5,xmm0,0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) + pshufd xmm0,xmm0,0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) + pshufd xmm1,xmm7,0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) + pshufd xmm4,xmm7,0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) + pshufd xmm3,xmm7,0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) + pshufd xmm7,xmm7,0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 + jmp near .column_end +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + psubw xmm0,xmm2 ; xmm0=tmp11 + psubw xmm1,xmm3 + paddw xmm4,xmm2 ; xmm4=tmp10 + paddw xmm5,xmm3 ; xmm5=tmp13 + + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm1,[rel PW_F1414] + psubw xmm1,xmm5 ; xmm1=tmp12 + + movdqa xmm6,xmm4 + movdqa xmm7,xmm0 + psubw xmm4,xmm5 ; xmm4=tmp3 + psubw xmm0,xmm1 ; xmm0=tmp2 + paddw xmm6,xmm5 ; xmm6=tmp0 + paddw xmm7,xmm1 ; xmm7=tmp1 + + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 + + ; -- Odd part + + movdqa xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm2 + movdqa xmm0,xmm5 + psubw xmm2,xmm1 ; xmm2=z12 + psubw xmm5,xmm3 ; xmm5=z10 + paddw xmm4,xmm1 ; xmm4=z11 + paddw xmm0,xmm3 ; xmm0=z13 + + movdqa xmm1,xmm5 ; xmm1=z10(unscaled) + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm3,xmm4 + psubw xmm4,xmm0 + paddw xmm3,xmm0 ; xmm3=tmp7 + + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm4,[rel PW_F1414] ; xmm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm0,xmm5 + paddw xmm5,xmm2 + pmulhw xmm5,[rel PW_F1847] ; xmm5=z5 + pmulhw xmm0,[rel PW_MF1613] + pmulhw xmm2,[rel PW_F1082] + psubw xmm0,xmm1 + psubw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm0,xmm5 ; xmm0=tmp12 + + ; -- Final output stage + + psubw xmm0,xmm3 ; xmm0=tmp6 + movdqa xmm1,xmm6 + movdqa xmm5,xmm7 + paddw xmm6,xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) + paddw xmm7,xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) + psubw xmm1,xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) + psubw xmm5,xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) + psubw xmm4,xmm0 ; xmm4=tmp5 + + movdqa xmm3,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(00 10 01 11 02 12 03 13) + punpckhwd xmm3,xmm7 ; xmm3=(04 14 05 15 06 16 07 17) + movdqa xmm0,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm1 ; xmm5=(60 70 61 71 62 72 63 73) + punpckhwd xmm0,xmm1 ; xmm0=(64 74 65 75 66 76 67 77) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) + + paddw xmm2,xmm4 ; xmm2=tmp4 + movdqa xmm5,xmm7 + movdqa xmm0,xmm1 + paddw xmm7,xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) + paddw xmm1,xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) + psubw xmm5,xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) + psubw xmm0,xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm0 ; xmm7=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm0 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm2,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm5 ; xmm2=(44 54 45 55 46 56 47 57) + + movdqa xmm0,xmm3 ; transpose coefficients(phase 2) + punpckldq xmm3,xmm4 ; xmm3=(04 14 24 34 05 15 25 35) + punpckhdq xmm0,xmm4 ; xmm0=(06 16 26 36 07 17 27 37) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=(00 10 20 30 01 11 21 31) + punpckhdq xmm5,xmm7 ; xmm5=(02 12 22 32 03 13 23 33) + + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm4 ; xmm1=(40 50 60 70 41 51 61 71) + punpckhdq xmm3,xmm4 ; xmm3=(42 52 62 72 43 53 63 73) + movdqa xmm0,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm7 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm0,xmm7 ; xmm0=(46 56 66 76 47 57 67 77) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm4,xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) + movdqa xmm7,xmm5 ; transpose coefficients(phase 3) + punpcklqdq xmm5,xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm7,xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 + + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm4,xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) + movdqa xmm7,xmm3 ; transpose coefficients(phase 3) + punpcklqdq xmm3,xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm7,xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 + + movdqa xmm2,xmm6 + movdqa xmm0,xmm5 + psubw xmm6,xmm1 ; xmm6=tmp11 + psubw xmm5,xmm3 + paddw xmm2,xmm1 ; xmm2=tmp10 + paddw xmm0,xmm3 ; xmm0=tmp13 + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[rel PW_F1414] + psubw xmm5,xmm0 ; xmm5=tmp12 + + movdqa xmm1,xmm2 + movdqa xmm3,xmm6 + psubw xmm2,xmm0 ; xmm2=tmp3 + psubw xmm6,xmm5 ; xmm6=tmp2 + paddw xmm1,xmm0 ; xmm1=tmp0 + paddw xmm3,xmm5 ; xmm3=tmp1 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 + + ; -- Odd part + + ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 + + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + psubw xmm0,xmm7 ; xmm0=z12 + psubw xmm4,xmm5 ; xmm4=z10 + paddw xmm2,xmm7 ; xmm2=z11 + paddw xmm6,xmm5 ; xmm6=z13 + + movdqa xmm7,xmm4 ; xmm7=z10(unscaled) + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm5,xmm2 + psubw xmm2,xmm6 + paddw xmm5,xmm6 ; xmm5=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm2,[rel PW_F1414] ; xmm2=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm6,xmm4 + paddw xmm4,xmm0 + pmulhw xmm4,[rel PW_F1847] ; xmm4=z5 + pmulhw xmm6,[rel PW_MF1613] + pmulhw xmm0,[rel PW_F1082] + psubw xmm6,xmm7 + psubw xmm0,xmm4 ; xmm0=tmp10 + paddw xmm6,xmm4 ; xmm6=tmp12 + + ; -- Final output stage + + psubw xmm6,xmm5 ; xmm6=tmp6 + movdqa xmm7,xmm1 + movdqa xmm4,xmm3 + paddw xmm1,xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) + paddw xmm3,xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) + psraw xmm1,(PASS1_BITS+3) ; descale + psraw xmm3,(PASS1_BITS+3) ; descale + psubw xmm7,xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) + psubw xmm4,xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) + psraw xmm7,(PASS1_BITS+3) ; descale + psraw xmm4,(PASS1_BITS+3) ; descale + psubw xmm2,xmm6 ; xmm2=tmp5 + + packsswb xmm1,xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 + + paddw xmm0,xmm2 ; xmm0=tmp4 + movdqa xmm4,xmm5 + movdqa xmm7,xmm6 + paddw xmm5,xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) + paddw xmm6,xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) + psraw xmm5,(PASS1_BITS+3) ; descale + psraw xmm6,(PASS1_BITS+3) ; descale + psubw xmm4,xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) + psubw xmm7,xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) + psraw xmm4,(PASS1_BITS+3) ; descale + psraw xmm7,(PASS1_BITS+3) ; descale + + movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] + + packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm1,xmm2 + paddb xmm3,xmm2 + paddb xmm5,xmm2 + paddb xmm7,xmm2 + + movdqa xmm0,xmm1 ; transpose coefficients(phase 1) + punpcklbw xmm1,xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklbw xmm5,xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm6,xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 2) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm2,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm2,xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 3) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm3,xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm7,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm7,xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm5,xmm1,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm3,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm6,xmm4,0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm2,xmm7,0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7 + + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0 + mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctfst-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctfst-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctfst-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctfst-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,502 @@ +; +; jidctfst.asm - fast integer IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_idct_ifast_sse2) PRIVATE + +EXTN(jconst_idct_ifast_sse2): + +PW_F1414 times 8 dw F_1_414 << CONST_SHIFT +PW_F1847 times 8 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 8 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; jpeg_component_info *compptr +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_idct_ifast_sse2) PRIVATE + +EXTN(jsimd_idct_ifast_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm7,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm7,xmm7 ; xmm7=(04 04 05 05 06 06 07 07) + + pshufd xmm6,xmm0,0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) + pshufd xmm2,xmm0,0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) + pshufd xmm5,xmm0,0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) + pshufd xmm0,xmm0,0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) + pshufd xmm1,xmm7,0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) + pshufd xmm4,xmm7,0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) + pshufd xmm3,xmm7,0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) + pshufd xmm7,xmm7,0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + psubw xmm0,xmm2 ; xmm0=tmp11 + psubw xmm1,xmm3 + paddw xmm4,xmm2 ; xmm4=tmp10 + paddw xmm5,xmm3 ; xmm5=tmp13 + + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm1,[GOTOFF(ebx,PW_F1414)] + psubw xmm1,xmm5 ; xmm1=tmp12 + + movdqa xmm6,xmm4 + movdqa xmm7,xmm0 + psubw xmm4,xmm5 ; xmm4=tmp3 + psubw xmm0,xmm1 ; xmm0=tmp2 + paddw xmm6,xmm5 ; xmm6=tmp0 + paddw xmm7,xmm1 ; xmm7=tmp1 + + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 + + ; -- Odd part + + movdqa xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm2 + movdqa xmm0,xmm5 + psubw xmm2,xmm1 ; xmm2=z12 + psubw xmm5,xmm3 ; xmm5=z10 + paddw xmm4,xmm1 ; xmm4=z11 + paddw xmm0,xmm3 ; xmm0=z13 + + movdqa xmm1,xmm5 ; xmm1=z10(unscaled) + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm3,xmm4 + psubw xmm4,xmm0 + paddw xmm3,xmm0 ; xmm3=tmp7 + + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm4,[GOTOFF(ebx,PW_F1414)] ; xmm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm0,xmm5 + paddw xmm5,xmm2 + pmulhw xmm5,[GOTOFF(ebx,PW_F1847)] ; xmm5=z5 + pmulhw xmm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw xmm2,[GOTOFF(ebx,PW_F1082)] + psubw xmm0,xmm1 + psubw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm0,xmm5 ; xmm0=tmp12 + + ; -- Final output stage + + psubw xmm0,xmm3 ; xmm0=tmp6 + movdqa xmm1,xmm6 + movdqa xmm5,xmm7 + paddw xmm6,xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) + paddw xmm7,xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) + psubw xmm1,xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) + psubw xmm5,xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) + psubw xmm4,xmm0 ; xmm4=tmp5 + + movdqa xmm3,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(00 10 01 11 02 12 03 13) + punpckhwd xmm3,xmm7 ; xmm3=(04 14 05 15 06 16 07 17) + movdqa xmm0,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm1 ; xmm5=(60 70 61 71 62 72 63 73) + punpckhwd xmm0,xmm1 ; xmm0=(64 74 65 75 66 76 67 77) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) + + paddw xmm2,xmm4 ; xmm2=tmp4 + movdqa xmm5,xmm7 + movdqa xmm0,xmm1 + paddw xmm7,xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) + paddw xmm1,xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) + psubw xmm5,xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) + psubw xmm0,xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm0 ; xmm7=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm0 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm2,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm5 ; xmm2=(44 54 45 55 46 56 47 57) + + movdqa xmm0,xmm3 ; transpose coefficients(phase 2) + punpckldq xmm3,xmm4 ; xmm3=(04 14 24 34 05 15 25 35) + punpckhdq xmm0,xmm4 ; xmm0=(06 16 26 36 07 17 27 37) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=(00 10 20 30 01 11 21 31) + punpckhdq xmm5,xmm7 ; xmm5=(02 12 22 32 03 13 23 33) + + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm4 ; xmm1=(40 50 60 70 41 51 61 71) + punpckhdq xmm3,xmm4 ; xmm3=(42 52 62 72 43 53 63 73) + movdqa xmm0,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm7 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm0,xmm7 ; xmm0=(46 56 66 76 47 57 67 77) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm4,xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) + movdqa xmm7,xmm5 ; transpose coefficients(phase 3) + punpcklqdq xmm5,xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm7,xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 + + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm4,xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) + movdqa xmm7,xmm3 ; transpose coefficients(phase 3) + punpcklqdq xmm3,xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm7,xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 + + movdqa xmm2,xmm6 + movdqa xmm0,xmm5 + psubw xmm6,xmm1 ; xmm6=tmp11 + psubw xmm5,xmm3 + paddw xmm2,xmm1 ; xmm2=tmp10 + paddw xmm0,xmm3 ; xmm0=tmp13 + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F1414)] + psubw xmm5,xmm0 ; xmm5=tmp12 + + movdqa xmm1,xmm2 + movdqa xmm3,xmm6 + psubw xmm2,xmm0 ; xmm2=tmp3 + psubw xmm6,xmm5 ; xmm6=tmp2 + paddw xmm1,xmm0 ; xmm1=tmp0 + paddw xmm3,xmm5 ; xmm3=tmp1 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 + + ; -- Odd part + + ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 + + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + psubw xmm0,xmm7 ; xmm0=z12 + psubw xmm4,xmm5 ; xmm4=z10 + paddw xmm2,xmm7 ; xmm2=z11 + paddw xmm6,xmm5 ; xmm6=z13 + + movdqa xmm7,xmm4 ; xmm7=z10(unscaled) + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm5,xmm2 + psubw xmm2,xmm6 + paddw xmm5,xmm6 ; xmm5=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm2,[GOTOFF(ebx,PW_F1414)] ; xmm2=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm6,xmm4 + paddw xmm4,xmm0 + pmulhw xmm4,[GOTOFF(ebx,PW_F1847)] ; xmm4=z5 + pmulhw xmm6,[GOTOFF(ebx,PW_MF1613)] + pmulhw xmm0,[GOTOFF(ebx,PW_F1082)] + psubw xmm6,xmm7 + psubw xmm0,xmm4 ; xmm0=tmp10 + paddw xmm6,xmm4 ; xmm6=tmp12 + + ; -- Final output stage + + psubw xmm6,xmm5 ; xmm6=tmp6 + movdqa xmm7,xmm1 + movdqa xmm4,xmm3 + paddw xmm1,xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) + paddw xmm3,xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) + psraw xmm1,(PASS1_BITS+3) ; descale + psraw xmm3,(PASS1_BITS+3) ; descale + psubw xmm7,xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) + psubw xmm4,xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) + psraw xmm7,(PASS1_BITS+3) ; descale + psraw xmm4,(PASS1_BITS+3) ; descale + psubw xmm2,xmm6 ; xmm2=tmp5 + + packsswb xmm1,xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 + + paddw xmm0,xmm2 ; xmm0=tmp4 + movdqa xmm4,xmm5 + movdqa xmm7,xmm6 + paddw xmm5,xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) + paddw xmm6,xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) + psraw xmm5,(PASS1_BITS+3) ; descale + psraw xmm6,(PASS1_BITS+3) ; descale + psubw xmm4,xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) + psubw xmm7,xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) + psraw xmm4,(PASS1_BITS+3) ; descale + psraw xmm7,(PASS1_BITS+3) ; descale + + movdqa xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] + + packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm1,xmm2 + paddb xmm3,xmm2 + paddb xmm5,xmm2 + paddb xmm7,xmm2 + + movdqa xmm0,xmm1 ; transpose coefficients(phase 1) + punpcklbw xmm1,xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklbw xmm5,xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm6,xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 2) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm2,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm2,xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 3) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm3,xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm7,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm7,xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm5,xmm1,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm3,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm6,xmm4,0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm2,xmm7,0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7 + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 + mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctint-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctint-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctint-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctint-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,359 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* SLOW INTEGER INVERSE DCT */ + +#include "jsimd_altivec.h" + + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) + + +#define DO_IDCT(in, PASS) \ +{ \ + /* Even part \ + * \ + * (Original) \ + * z1 = (z2 + z3) * 0.541196100; \ + * tmp2 = z1 + z3 * -1.847759065; \ + * tmp3 = z1 + z2 * 0.765366865; \ + * \ + * (This implementation) \ + * tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \ + * tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \ + */ \ + \ + in##26l = vec_mergeh(in##2, in##6); \ + in##26h = vec_mergel(in##2, in##6); \ + \ + tmp3l = vec_msums(in##26l, pw_f130_f054, pd_zero); \ + tmp3h = vec_msums(in##26h, pw_f130_f054, pd_zero); \ + tmp2l = vec_msums(in##26l, pw_f054_mf130, pd_zero); \ + tmp2h = vec_msums(in##26h, pw_f054_mf130, pd_zero); \ + \ + tmp0 = vec_add(in##0, in##4); \ + tmp1 = vec_sub(in##0, in##4); \ + \ + tmp0l = vec_unpackh(tmp0); \ + tmp0h = vec_unpackl(tmp0); \ + tmp0l = vec_sl(tmp0l, const_bits); \ + tmp0h = vec_sl(tmp0h, const_bits); \ + tmp0l = vec_add(tmp0l, pd_descale_p##PASS); \ + tmp0h = vec_add(tmp0h, pd_descale_p##PASS); \ + \ + tmp10l = vec_add(tmp0l, tmp3l); \ + tmp10h = vec_add(tmp0h, tmp3h); \ + tmp13l = vec_sub(tmp0l, tmp3l); \ + tmp13h = vec_sub(tmp0h, tmp3h); \ + \ + tmp1l = vec_unpackh(tmp1); \ + tmp1h = vec_unpackl(tmp1); \ + tmp1l = vec_sl(tmp1l, const_bits); \ + tmp1h = vec_sl(tmp1h, const_bits); \ + tmp1l = vec_add(tmp1l, pd_descale_p##PASS); \ + tmp1h = vec_add(tmp1h, pd_descale_p##PASS); \ + \ + tmp11l = vec_add(tmp1l, tmp2l); \ + tmp11h = vec_add(tmp1h, tmp2h); \ + tmp12l = vec_sub(tmp1l, tmp2l); \ + tmp12h = vec_sub(tmp1h, tmp2h); \ + \ + /* Odd part */ \ + \ + z3 = vec_add(in##3, in##7); \ + z4 = vec_add(in##1, in##5); \ + \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ + z34l = vec_mergeh(z3, z4); \ + z34h = vec_mergel(z3, z4); \ + \ + z3l = vec_msums(z34l, pw_mf078_f117, pd_zero); \ + z3h = vec_msums(z34h, pw_mf078_f117, pd_zero); \ + z4l = vec_msums(z34l, pw_f117_f078, pd_zero); \ + z4h = vec_msums(z34h, pw_f117_f078, pd_zero); \ + \ + /* (Original) \ + * z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \ + * tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \ + * tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * tmp0 += z1 + z3; tmp1 += z2 + z4; \ + * tmp2 += z2 + z3; tmp3 += z1 + z4; \ + * \ + * (This implementation) \ + * tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \ + * tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \ + * tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \ + * tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \ + * tmp0 += z3; tmp1 += z4; \ + * tmp2 += z3; tmp3 += z4; \ + */ \ + \ + in##71l = vec_mergeh(in##7, in##1); \ + in##71h = vec_mergel(in##7, in##1); \ + \ + tmp0l = vec_msums(in##71l, pw_mf060_mf089, z3l); \ + tmp0h = vec_msums(in##71h, pw_mf060_mf089, z3h); \ + tmp3l = vec_msums(in##71l, pw_mf089_f060, z4l); \ + tmp3h = vec_msums(in##71h, pw_mf089_f060, z4h); \ + \ + in##53l = vec_mergeh(in##5, in##3); \ + in##53h = vec_mergel(in##5, in##3); \ + \ + tmp1l = vec_msums(in##53l, pw_mf050_mf256, z4l); \ + tmp1h = vec_msums(in##53h, pw_mf050_mf256, z4h); \ + tmp2l = vec_msums(in##53l, pw_mf256_f050, z3l); \ + tmp2h = vec_msums(in##53h, pw_mf256_f050, z3h); \ + \ + /* Final output stage */ \ + \ + out0l = vec_add(tmp10l, tmp3l); \ + out0h = vec_add(tmp10h, tmp3h); \ + out7l = vec_sub(tmp10l, tmp3l); \ + out7h = vec_sub(tmp10h, tmp3h); \ + \ + out0l = vec_sra(out0l, descale_p##PASS); \ + out0h = vec_sra(out0h, descale_p##PASS); \ + out7l = vec_sra(out7l, descale_p##PASS); \ + out7h = vec_sra(out7h, descale_p##PASS); \ + \ + out0 = vec_pack(out0l, out0h); \ + out7 = vec_pack(out7l, out7h); \ + \ + out1l = vec_add(tmp11l, tmp2l); \ + out1h = vec_add(tmp11h, tmp2h); \ + out6l = vec_sub(tmp11l, tmp2l); \ + out6h = vec_sub(tmp11h, tmp2h); \ + \ + out1l = vec_sra(out1l, descale_p##PASS); \ + out1h = vec_sra(out1h, descale_p##PASS); \ + out6l = vec_sra(out6l, descale_p##PASS); \ + out6h = vec_sra(out6h, descale_p##PASS); \ + \ + out1 = vec_pack(out1l, out1h); \ + out6 = vec_pack(out6l, out6h); \ + \ + out2l = vec_add(tmp12l, tmp1l); \ + out2h = vec_add(tmp12h, tmp1h); \ + out5l = vec_sub(tmp12l, tmp1l); \ + out5h = vec_sub(tmp12h, tmp1h); \ + \ + out2l = vec_sra(out2l, descale_p##PASS); \ + out2h = vec_sra(out2h, descale_p##PASS); \ + out5l = vec_sra(out5l, descale_p##PASS); \ + out5h = vec_sra(out5h, descale_p##PASS); \ + \ + out2 = vec_pack(out2l, out2h); \ + out5 = vec_pack(out5l, out5h); \ + \ + out3l = vec_add(tmp13l, tmp0l); \ + out3h = vec_add(tmp13h, tmp0h); \ + out4l = vec_sub(tmp13l, tmp0l); \ + out4h = vec_sub(tmp13h, tmp0h); \ + \ + out3l = vec_sra(out3l, descale_p##PASS); \ + out3h = vec_sra(out3h, descale_p##PASS); \ + out4l = vec_sra(out4l, descale_p##PASS); \ + out4h = vec_sra(out4h, descale_p##PASS); \ + \ + out3 = vec_pack(out3l, out3h); \ + out4 = vec_pack(out4l, out4h); \ +} + + +void +jsimd_idct_islow_altivec (void *dct_table_, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + short *dct_table = (short *)dct_table_; + int *outptr; + + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7, + tmp0, tmp1, tmp2, tmp3, z3, z4, + z34l, z34h, col71l, col71h, col26l, col26h, col53l, col53h, + row71l, row71h, row26l, row26h, row53l, row53h, + out0, out1, out2, out3, out4, out5, out6, out7; + __vector int tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h, + tmp10l, tmp10h, tmp11l, tmp11h, tmp12l, tmp12h, tmp13l, tmp13h, + z3l, z3h, z4l, z4h, + out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h, + out5l, out5h, out6l, out6h, out7l, out7h; + __vector signed char outb; + + /* Constants */ + __vector short pw_zero = { __8X(0) }, + pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) }, + pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) }, + pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) }, + pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) }, + pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) }, + pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) }, + pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) }, + pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) }; + __vector unsigned short pass1_bits = { __8X(PASS1_BITS) }; + __vector int pd_zero = { __4X(0) }, + pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) }, + pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) }; + __vector unsigned int descale_p1 = { __4X(DESCALE_P1) }, + descale_p2 = { __4X(DESCALE_P2) }, + const_bits = { __4X(CONST_BITS) }; + __vector signed char pb_centerjsamp = { __16X(CENTERJSAMPLE) }; + + /* Pass 1: process columns */ + + col0 = vec_ld(0, coef_block); + col1 = vec_ld(16, coef_block); + col2 = vec_ld(32, coef_block); + col3 = vec_ld(48, coef_block); + col4 = vec_ld(64, coef_block); + col5 = vec_ld(80, coef_block); + col6 = vec_ld(96, coef_block); + col7 = vec_ld(112, coef_block); + + tmp1 = vec_or(col1, col2); + tmp2 = vec_or(col3, col4); + tmp1 = vec_or(tmp1, tmp2); + tmp3 = vec_or(col5, col6); + tmp3 = vec_or(tmp3, col7); + tmp1 = vec_or(tmp1, tmp3); + + quant0 = vec_ld(0, dct_table); + col0 = vec_mladd(col0, quant0, pw_zero); + + if (vec_all_eq(tmp1, pw_zero)) { + /* AC terms all zero */ + + col0 = vec_sl(col0, pass1_bits); + + row0 = vec_splat(col0, 0); + row1 = vec_splat(col0, 1); + row2 = vec_splat(col0, 2); + row3 = vec_splat(col0, 3); + row4 = vec_splat(col0, 4); + row5 = vec_splat(col0, 5); + row6 = vec_splat(col0, 6); + row7 = vec_splat(col0, 7); + + } else { + + quant1 = vec_ld(16, dct_table); + quant2 = vec_ld(32, dct_table); + quant3 = vec_ld(48, dct_table); + quant4 = vec_ld(64, dct_table); + quant5 = vec_ld(80, dct_table); + quant6 = vec_ld(96, dct_table); + quant7 = vec_ld(112, dct_table); + + col1 = vec_mladd(col1, quant1, pw_zero); + col2 = vec_mladd(col2, quant2, pw_zero); + col3 = vec_mladd(col3, quant3, pw_zero); + col4 = vec_mladd(col4, quant4, pw_zero); + col5 = vec_mladd(col5, quant5, pw_zero); + col6 = vec_mladd(col6, quant6, pw_zero); + col7 = vec_mladd(col7, quant7, pw_zero); + + DO_IDCT(col, 1); + + TRANSPOSE(out, row); + } + + /* Pass 2: process rows */ + + DO_IDCT(row, 2); + + TRANSPOSE(out, col); + + outb = vec_packs(col0, col0); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[0] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col1, col1); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[1] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col2, col2); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[2] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col3, col3); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[3] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col4, col4); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[4] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col5, col5); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[5] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col6, col6); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[6] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col7, col7); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[7] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctint-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctint-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctint-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctint-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,852 @@ +; +; jidctint.asm - accurate integer IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_islow_mmx) PRIVATE + +EXTN(jconst_idct_islow_mmx): + +PW_F130_F054 times 2 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 2 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 2 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 2 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 2 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 2 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 2 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 2 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2-1) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_mmx (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; jpeg_component_info *compptr +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 12 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_islow_mmx) PRIVATE + +EXTN(jsimd_idct_islow_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0,PASS1_BITS + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movq mm4,mm1 ; mm1=in2=z2 + movq mm5,mm1 + punpcklwd mm4,mm3 ; mm3=in6=z3 + punpckhwd mm5,mm3 + movq mm1,mm4 + movq mm3,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L + pmaddwd mm5,[GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H + + movq mm6,mm0 + paddw mm0,mm2 ; mm0=in0+in4 + psubw mm6,mm2 ; mm6=in0-in4 + + pxor mm7,mm7 + pxor mm2,mm2 + punpcklwd mm7,mm0 ; mm7=tmp0L + punpckhwd mm2,mm0 ; mm2=tmp0H + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + psrad mm2,(16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS + + movq mm0,mm7 + paddd mm7,mm4 ; mm7=tmp10L + psubd mm0,mm4 ; mm0=tmp13L + movq mm4,mm2 + paddd mm2,mm5 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp13H + + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L + movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H + movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L + movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H + + pxor mm5,mm5 + pxor mm7,mm7 + punpcklwd mm5,mm6 ; mm5=tmp1L + punpckhwd mm7,mm6 ; mm7=tmp1H + psrad mm5,(16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + + movq mm2,mm5 + paddd mm5,mm1 ; mm5=tmp11L + psubd mm2,mm1 ; mm2=tmp12L + movq mm0,mm7 + paddd mm7,mm3 ; mm7=tmp11H + psubd mm0,mm3 ; mm0=tmp12H + + movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L + movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H + movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L + movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm5,mm6 + movq mm7,mm4 + paddw mm5,mm3 ; mm5=z3 + paddw mm7,mm1 ; mm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm2,mm5 + movq mm0,mm5 + punpcklwd mm2,mm7 + punpckhwd mm0,mm7 + movq mm5,mm2 + movq mm7,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L + pmaddwd mm0,[GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H + pmaddwd mm5,[GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L + pmaddwd mm7,[GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H + + movq MMWORD [wk(10)], mm2 ; wk(10)=z3L + movq MMWORD [wk(11)], mm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movq mm2,mm3 + movq mm0,mm3 + punpcklwd mm2,mm4 + punpckhwd mm0,mm4 + movq mm3,mm2 + movq mm4,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L + pmaddwd mm0,[GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H + pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + + paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L + paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H + paddd mm3,mm5 ; mm3=tmp3L + paddd mm4,mm7 ; mm4=tmp3H + + movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L + movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H + + movq mm2,mm1 + movq mm0,mm1 + punpcklwd mm2,mm6 + punpckhwd mm0,mm6 + movq mm1,mm2 + movq mm6,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L + pmaddwd mm0,[GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H + pmaddwd mm1,[GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L + pmaddwd mm6,[GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H + + paddd mm2,mm5 ; mm2=tmp1L + paddd mm0,mm7 ; mm0=tmp1H + paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L + paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movq mm5, MMWORD [wk(0)] ; mm5=tmp10L + movq mm7, MMWORD [wk(1)] ; mm7=tmp10H + + movq mm2,mm5 + movq mm0,mm7 + paddd mm5,mm3 ; mm5=data0L + paddd mm7,mm4 ; mm7=data0H + psubd mm2,mm3 ; mm2=data7L + psubd mm0,mm4 ; mm0=data7H + + movq mm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm3=[PD_DESCALE_P1] + + paddd mm5,mm3 + paddd mm7,mm3 + psrad mm5,DESCALE_P1 + psrad mm7,DESCALE_P1 + paddd mm2,mm3 + paddd mm0,mm3 + psrad mm2,DESCALE_P1 + psrad mm0,DESCALE_P1 + + packssdw mm5,mm7 ; mm5=data0=(00 01 02 03) + packssdw mm2,mm0 ; mm2=data7=(70 71 72 73) + + movq mm4, MMWORD [wk(4)] ; mm4=tmp11L + movq mm3, MMWORD [wk(5)] ; mm3=tmp11H + + movq mm7,mm4 + movq mm0,mm3 + paddd mm4,mm1 ; mm4=data1L + paddd mm3,mm6 ; mm3=data1H + psubd mm7,mm1 ; mm7=data6L + psubd mm0,mm6 ; mm0=data6H + + movq mm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm1=[PD_DESCALE_P1] + + paddd mm4,mm1 + paddd mm3,mm1 + psrad mm4,DESCALE_P1 + psrad mm3,DESCALE_P1 + paddd mm7,mm1 + paddd mm0,mm1 + psrad mm7,DESCALE_P1 + psrad mm0,DESCALE_P1 + + packssdw mm4,mm3 ; mm4=data1=(10 11 12 13) + packssdw mm7,mm0 ; mm7=data6=(60 61 62 63) + + movq mm6,mm5 ; transpose coefficients(phase 1) + punpcklwd mm5,mm4 ; mm5=(00 10 01 11) + punpckhwd mm6,mm4 ; mm6=(02 12 03 13) + movq mm1,mm7 ; transpose coefficients(phase 1) + punpcklwd mm7,mm2 ; mm7=(60 70 61 71) + punpckhwd mm1,mm2 ; mm1=(62 72 63 73) + + movq mm3, MMWORD [wk(6)] ; mm3=tmp12L + movq mm0, MMWORD [wk(7)] ; mm0=tmp12H + movq mm4, MMWORD [wk(10)] ; mm4=tmp1L + movq mm2, MMWORD [wk(11)] ; mm2=tmp1H + + movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 01 11) + movq MMWORD [wk(1)], mm6 ; wk(1)=(02 12 03 13) + movq MMWORD [wk(4)], mm7 ; wk(4)=(60 70 61 71) + movq MMWORD [wk(5)], mm1 ; wk(5)=(62 72 63 73) + + movq mm5,mm3 + movq mm6,mm0 + paddd mm3,mm4 ; mm3=data2L + paddd mm0,mm2 ; mm0=data2H + psubd mm5,mm4 ; mm5=data5L + psubd mm6,mm2 ; mm6=data5H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm7=[PD_DESCALE_P1] + + paddd mm3,mm7 + paddd mm0,mm7 + psrad mm3,DESCALE_P1 + psrad mm0,DESCALE_P1 + paddd mm5,mm7 + paddd mm6,mm7 + psrad mm5,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm3,mm0 ; mm3=data2=(20 21 22 23) + packssdw mm5,mm6 ; mm5=data5=(50 51 52 53) + + movq mm1, MMWORD [wk(2)] ; mm1=tmp13L + movq mm4, MMWORD [wk(3)] ; mm4=tmp13H + movq mm2, MMWORD [wk(8)] ; mm2=tmp0L + movq mm7, MMWORD [wk(9)] ; mm7=tmp0H + + movq mm0,mm1 + movq mm6,mm4 + paddd mm1,mm2 ; mm1=data3L + paddd mm4,mm7 ; mm4=data3H + psubd mm0,mm2 ; mm0=data4L + psubd mm6,mm7 ; mm6=data4H + + movq mm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm2=[PD_DESCALE_P1] + + paddd mm1,mm2 + paddd mm4,mm2 + psrad mm1,DESCALE_P1 + psrad mm4,DESCALE_P1 + paddd mm0,mm2 + paddd mm6,mm2 + psrad mm0,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm1,mm4 ; mm1=data3=(30 31 32 33) + packssdw mm0,mm6 ; mm0=data4=(40 41 42 43) + + movq mm7, MMWORD [wk(0)] ; mm7=(00 10 01 11) + movq mm2, MMWORD [wk(1)] ; mm2=(02 12 03 13) + + movq mm4,mm3 ; transpose coefficients(phase 1) + punpcklwd mm3,mm1 ; mm3=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm6,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm5 ; mm0=(40 50 41 51) + punpckhwd mm6,mm5 ; mm6=(42 52 43 53) + + movq mm1,mm7 ; transpose coefficients(phase 2) + punpckldq mm7,mm3 ; mm7=(00 10 20 30) + punpckhdq mm1,mm3 ; mm1=(01 11 21 31) + movq mm5,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm4 ; mm2=(02 12 22 32) + punpckhdq mm5,mm4 ; mm5=(03 13 23 33) + + movq mm3, MMWORD [wk(4)] ; mm3=(60 70 61 71) + movq mm4, MMWORD [wk(5)] ; mm4=(62 72 63 73) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 + + movq mm7,mm0 ; transpose coefficients(phase 2) + punpckldq mm0,mm3 ; mm0=(40 50 60 70) + punpckhdq mm7,mm3 ; mm7=(41 51 61 71) + movq mm1,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm4 ; mm6=(42 52 62 72) + punpckhdq mm1,mm4 ; mm1=(43 53 63 73) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movq mm4,mm1 ; mm1=in2=z2 + movq mm5,mm1 + punpcklwd mm4,mm3 ; mm3=in6=z3 + punpckhwd mm5,mm3 + movq mm1,mm4 + movq mm3,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L + pmaddwd mm5,[GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H + + movq mm6,mm0 + paddw mm0,mm2 ; mm0=in0+in4 + psubw mm6,mm2 ; mm6=in0-in4 + + pxor mm7,mm7 + pxor mm2,mm2 + punpcklwd mm7,mm0 ; mm7=tmp0L + punpckhwd mm2,mm0 ; mm2=tmp0H + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + psrad mm2,(16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS + + movq mm0,mm7 + paddd mm7,mm4 ; mm7=tmp10L + psubd mm0,mm4 ; mm0=tmp13L + movq mm4,mm2 + paddd mm2,mm5 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp13H + + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L + movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H + movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L + movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H + + pxor mm5,mm5 + pxor mm7,mm7 + punpcklwd mm5,mm6 ; mm5=tmp1L + punpckhwd mm7,mm6 ; mm7=tmp1H + psrad mm5,(16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + + movq mm2,mm5 + paddd mm5,mm1 ; mm5=tmp11L + psubd mm2,mm1 ; mm2=tmp12L + movq mm0,mm7 + paddd mm7,mm3 ; mm7=tmp11H + psubd mm0,mm3 ; mm0=tmp12H + + movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L + movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H + movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L + movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm5,mm6 + movq mm7,mm4 + paddw mm5,mm3 ; mm5=z3 + paddw mm7,mm1 ; mm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm2,mm5 + movq mm0,mm5 + punpcklwd mm2,mm7 + punpckhwd mm0,mm7 + movq mm5,mm2 + movq mm7,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L + pmaddwd mm0,[GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H + pmaddwd mm5,[GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L + pmaddwd mm7,[GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H + + movq MMWORD [wk(10)], mm2 ; wk(10)=z3L + movq MMWORD [wk(11)], mm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movq mm2,mm3 + movq mm0,mm3 + punpcklwd mm2,mm4 + punpckhwd mm0,mm4 + movq mm3,mm2 + movq mm4,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L + pmaddwd mm0,[GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H + pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + + paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L + paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H + paddd mm3,mm5 ; mm3=tmp3L + paddd mm4,mm7 ; mm4=tmp3H + + movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L + movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H + + movq mm2,mm1 + movq mm0,mm1 + punpcklwd mm2,mm6 + punpckhwd mm0,mm6 + movq mm1,mm2 + movq mm6,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L + pmaddwd mm0,[GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H + pmaddwd mm1,[GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L + pmaddwd mm6,[GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H + + paddd mm2,mm5 ; mm2=tmp1L + paddd mm0,mm7 ; mm0=tmp1H + paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L + paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movq mm5, MMWORD [wk(0)] ; mm5=tmp10L + movq mm7, MMWORD [wk(1)] ; mm7=tmp10H + + movq mm2,mm5 + movq mm0,mm7 + paddd mm5,mm3 ; mm5=data0L + paddd mm7,mm4 ; mm7=data0H + psubd mm2,mm3 ; mm2=data7L + psubd mm0,mm4 ; mm0=data7H + + movq mm3,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm3=[PD_DESCALE_P2] + + paddd mm5,mm3 + paddd mm7,mm3 + psrad mm5,DESCALE_P2 + psrad mm7,DESCALE_P2 + paddd mm2,mm3 + paddd mm0,mm3 + psrad mm2,DESCALE_P2 + psrad mm0,DESCALE_P2 + + packssdw mm5,mm7 ; mm5=data0=(00 10 20 30) + packssdw mm2,mm0 ; mm2=data7=(07 17 27 37) + + movq mm4, MMWORD [wk(4)] ; mm4=tmp11L + movq mm3, MMWORD [wk(5)] ; mm3=tmp11H + + movq mm7,mm4 + movq mm0,mm3 + paddd mm4,mm1 ; mm4=data1L + paddd mm3,mm6 ; mm3=data1H + psubd mm7,mm1 ; mm7=data6L + psubd mm0,mm6 ; mm0=data6H + + movq mm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm1=[PD_DESCALE_P2] + + paddd mm4,mm1 + paddd mm3,mm1 + psrad mm4,DESCALE_P2 + psrad mm3,DESCALE_P2 + paddd mm7,mm1 + paddd mm0,mm1 + psrad mm7,DESCALE_P2 + psrad mm0,DESCALE_P2 + + packssdw mm4,mm3 ; mm4=data1=(01 11 21 31) + packssdw mm7,mm0 ; mm7=data6=(06 16 26 36) + + packsswb mm5,mm7 ; mm5=(00 10 20 30 06 16 26 36) + packsswb mm4,mm2 ; mm4=(01 11 21 31 07 17 27 37) + + movq mm6, MMWORD [wk(6)] ; mm6=tmp12L + movq mm1, MMWORD [wk(7)] ; mm1=tmp12H + movq mm3, MMWORD [wk(10)] ; mm3=tmp1L + movq mm0, MMWORD [wk(11)] ; mm0=tmp1H + + movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 20 30 06 16 26 36) + movq MMWORD [wk(1)], mm4 ; wk(1)=(01 11 21 31 07 17 27 37) + + movq mm7,mm6 + movq mm2,mm1 + paddd mm6,mm3 ; mm6=data2L + paddd mm1,mm0 ; mm1=data2H + psubd mm7,mm3 ; mm7=data5L + psubd mm2,mm0 ; mm2=data5H + + movq mm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm5=[PD_DESCALE_P2] + + paddd mm6,mm5 + paddd mm1,mm5 + psrad mm6,DESCALE_P2 + psrad mm1,DESCALE_P2 + paddd mm7,mm5 + paddd mm2,mm5 + psrad mm7,DESCALE_P2 + psrad mm2,DESCALE_P2 + + packssdw mm6,mm1 ; mm6=data2=(02 12 22 32) + packssdw mm7,mm2 ; mm7=data5=(05 15 25 35) + + movq mm4, MMWORD [wk(2)] ; mm4=tmp13L + movq mm3, MMWORD [wk(3)] ; mm3=tmp13H + movq mm0, MMWORD [wk(8)] ; mm0=tmp0L + movq mm5, MMWORD [wk(9)] ; mm5=tmp0H + + movq mm1,mm4 + movq mm2,mm3 + paddd mm4,mm0 ; mm4=data3L + paddd mm3,mm5 ; mm3=data3H + psubd mm1,mm0 ; mm1=data4L + psubd mm2,mm5 ; mm2=data4H + + movq mm0,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm0=[PD_DESCALE_P2] + + paddd mm4,mm0 + paddd mm3,mm0 + psrad mm4,DESCALE_P2 + psrad mm3,DESCALE_P2 + paddd mm1,mm0 + paddd mm2,mm0 + psrad mm1,DESCALE_P2 + psrad mm2,DESCALE_P2 + + movq mm5,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm5=[PB_CENTERJSAMP] + + packssdw mm4,mm3 ; mm4=data3=(03 13 23 33) + packssdw mm1,mm2 ; mm1=data4=(04 14 24 34) + + movq mm0, MMWORD [wk(0)] ; mm0=(00 10 20 30 06 16 26 36) + movq mm3, MMWORD [wk(1)] ; mm3=(01 11 21 31 07 17 27 37) + + packsswb mm6,mm1 ; mm6=(02 12 22 32 04 14 24 34) + packsswb mm4,mm7 ; mm4=(03 13 23 33 05 15 25 35) + + paddb mm0,mm5 + paddb mm3,mm5 + paddb mm6,mm5 + paddb mm4,mm5 + + movq mm2,mm0 ; transpose coefficients(phase 1) + punpcklbw mm0,mm3 ; mm0=(00 01 10 11 20 21 30 31) + punpckhbw mm2,mm3 ; mm2=(06 07 16 17 26 27 36 37) + movq mm1,mm6 ; transpose coefficients(phase 1) + punpcklbw mm6,mm4 ; mm6=(02 03 12 13 22 23 32 33) + punpckhbw mm1,mm4 ; mm1=(04 05 14 15 24 25 34 35) + + movq mm7,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm6 ; mm0=(00 01 02 03 10 11 12 13) + punpckhwd mm7,mm6 ; mm7=(20 21 22 23 30 31 32 33) + movq mm5,mm1 ; transpose coefficients(phase 2) + punpcklwd mm1,mm2 ; mm1=(04 05 06 07 14 15 16 17) + punpckhwd mm5,mm2 ; mm5=(24 25 26 27 34 35 36 37) + + movq mm3,mm0 ; transpose coefficients(phase 3) + punpckldq mm0,mm1 ; mm0=(00 01 02 03 04 05 06 07) + punpckhdq mm3,mm1 ; mm3=(10 11 12 13 14 15 16 17) + movq mm4,mm7 ; transpose coefficients(phase 3) + punpckldq mm7,mm5 ; mm7=(20 21 22 23 24 25 26 27) + punpckhdq mm4,mm5 ; mm4=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_JCOEF ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctint-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctint-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctint-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctint-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,848 @@ +; +; jidctint.asm - accurate integer IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_islow_sse2) PRIVATE + +EXTN(jconst_idct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = jpeg_component_info *compptr +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + +%define original_rbp rbp+0 +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 12 + + align 16 + global EXTN(jsimd_idct_islow_sse2) PRIVATE + +EXTN(jsimd_idct_islow_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test rax,rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm5,PASS1_BITS + + movdqa xmm4,xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm5,xmm5 ; xmm5=(00 00 01 01 02 02 03 03) + punpckhwd xmm4,xmm4 ; xmm4=(04 04 05 05 06 06 07 07) + + pshufd xmm7,xmm5,0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) + pshufd xmm6,xmm5,0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) + pshufd xmm1,xmm5,0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) + pshufd xmm5,xmm5,0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) + pshufd xmm0,xmm4,0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) + pshufd xmm3,xmm4,0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) + pshufd xmm2,xmm4,0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) + pshufd xmm4,xmm4,0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 + jmp near .column_end +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm4,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm4,xmm3 ; xmm3=in6=z3 + punpckhwd xmm5,xmm3 + movdqa xmm1,xmm4 + movdqa xmm3,xmm5 + pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=tmp3L + pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L + pmaddwd xmm3,[rel PW_F054_MF130] ; xmm3=tmp2H + + movdqa xmm6,xmm0 + paddw xmm0,xmm2 ; xmm0=in0+in4 + psubw xmm6,xmm2 ; xmm6=in0-in4 + + pxor xmm7,xmm7 + pxor xmm2,xmm2 + punpcklwd xmm7,xmm0 ; xmm7=tmp0L + punpckhwd xmm2,xmm0 ; xmm2=tmp0H + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + psrad xmm2,(16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS + + movdqa xmm0,xmm7 + paddd xmm7,xmm4 ; xmm7=tmp10L + psubd xmm0,xmm4 ; xmm0=tmp13L + movdqa xmm4,xmm2 + paddd xmm2,xmm5 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp13H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm7,xmm7 + punpcklwd xmm5,xmm6 ; xmm5=tmp1L + punpckhwd xmm7,xmm6 ; xmm7=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + + movdqa xmm2,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm2,xmm1 ; xmm2=tmp12L + movdqa xmm0,xmm7 + paddd xmm7,xmm3 ; xmm7=tmp11H + psubd xmm0,xmm3 ; xmm0=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm4, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm6, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm6, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm1, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm5,xmm6 + movdqa xmm7,xmm4 + paddw xmm5,xmm3 ; xmm5=z3 + paddw xmm7,xmm1 ; xmm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm2,xmm5 + movdqa xmm0,xmm5 + punpcklwd xmm2,xmm7 + punpckhwd xmm0,xmm7 + movdqa xmm5,xmm2 + movdqa xmm7,xmm0 + pmaddwd xmm2,[rel PW_MF078_F117] ; xmm2=z3L + pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3H + pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L + pmaddwd xmm7,[rel PW_F117_F078] ; xmm7=z4H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm2,xmm3 + movdqa xmm0,xmm3 + punpcklwd xmm2,xmm4 + punpckhwd xmm0,xmm4 + movdqa xmm3,xmm2 + movdqa xmm4,xmm0 + pmaddwd xmm2,[rel PW_MF060_MF089] ; xmm2=tmp0L + pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0H + pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3L + pmaddwd xmm4,[rel PW_MF089_F060] ; xmm4=tmp3H + + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L + paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H + paddd xmm3,xmm5 ; xmm3=tmp3L + paddd xmm4,xmm7 ; xmm4=tmp3H + + movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H + + movdqa xmm2,xmm1 + movdqa xmm0,xmm1 + punpcklwd xmm2,xmm6 + punpckhwd xmm0,xmm6 + movdqa xmm1,xmm2 + movdqa xmm6,xmm0 + pmaddwd xmm2,[rel PW_MF050_MF256] ; xmm2=tmp1L + pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1H + pmaddwd xmm1,[rel PW_MF256_F050] ; xmm1=tmp2L + pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H + + paddd xmm2,xmm5 ; xmm2=tmp1L + paddd xmm0,xmm7 ; xmm0=tmp1H + paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H + + movdqa xmm2,xmm5 + movdqa xmm0,xmm7 + paddd xmm5,xmm3 ; xmm5=data0L + paddd xmm7,xmm4 ; xmm7=data0H + psubd xmm2,xmm3 ; xmm2=data7L + psubd xmm0,xmm4 ; xmm0=data7H + + movdqa xmm3,[rel PD_DESCALE_P1] ; xmm3=[rel PD_DESCALE_P1] + + paddd xmm5,xmm3 + paddd xmm7,xmm3 + psrad xmm5,DESCALE_P1 + psrad xmm7,DESCALE_P1 + paddd xmm2,xmm3 + paddd xmm0,xmm3 + psrad xmm2,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm5,xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) + packssdw xmm2,xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) + + movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L + movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H + + movdqa xmm7,xmm4 + movdqa xmm0,xmm3 + paddd xmm4,xmm1 ; xmm4=data1L + paddd xmm3,xmm6 ; xmm3=data1H + psubd xmm7,xmm1 ; xmm7=data6L + psubd xmm0,xmm6 ; xmm0=data6H + + movdqa xmm1,[rel PD_DESCALE_P1] ; xmm1=[rel PD_DESCALE_P1] + + paddd xmm4,xmm1 + paddd xmm3,xmm1 + psrad xmm4,DESCALE_P1 + psrad xmm3,DESCALE_P1 + paddd xmm7,xmm1 + paddd xmm0,xmm1 + psrad xmm7,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm4,xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm7,xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) + + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm4 ; xmm5=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm1,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm2 ; xmm7=(60 70 61 71 62 72 63 73) + punpckhwd xmm1,xmm2 ; xmm1=(64 74 65 75 66 76 67 77) + + movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L + movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H + movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L + movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) + + movdqa xmm5,xmm3 + movdqa xmm6,xmm0 + paddd xmm3,xmm4 ; xmm3=data2L + paddd xmm0,xmm2 ; xmm0=data2H + psubd xmm5,xmm4 ; xmm5=data5L + psubd xmm6,xmm2 ; xmm6=data5H + + movdqa xmm7,[rel PD_DESCALE_P1] ; xmm7=[rel PD_DESCALE_P1] + + paddd xmm3,xmm7 + paddd xmm0,xmm7 + psrad xmm3,DESCALE_P1 + psrad xmm0,DESCALE_P1 + paddd xmm5,xmm7 + paddd xmm6,xmm7 + psrad xmm5,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm3,xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) + packssdw xmm5,xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L + movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H + movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L + movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H + + movdqa xmm0,xmm1 + movdqa xmm6,xmm4 + paddd xmm1,xmm2 ; xmm1=data3L + paddd xmm4,xmm7 ; xmm4=data3H + psubd xmm0,xmm2 ; xmm0=data4L + psubd xmm6,xmm7 ; xmm6=data4H + + movdqa xmm2,[rel PD_DESCALE_P1] ; xmm2=[rel PD_DESCALE_P1] + + paddd xmm1,xmm2 + paddd xmm4,xmm2 + psrad xmm1,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm0,xmm2 + paddd xmm6,xmm2 + psrad xmm0,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm1,xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) + packssdw xmm0,xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) + movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) + + movdqa xmm4,xmm3 ; transpose coefficients(phase 1) + punpcklwd xmm3,xmm1 ; xmm3=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm1 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm6,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(40 50 41 51 42 52 43 53) + punpckhwd xmm6,xmm5 ; xmm6=(44 54 45 55 46 56 47 57) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm3 ; xmm7=(00 10 20 30 01 11 21 31) + punpckhdq xmm1,xmm3 ; xmm1=(02 12 22 32 03 13 23 33) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm4 ; xmm2=(04 14 24 34 05 15 25 35) + punpckhdq xmm5,xmm4 ; xmm5=(06 16 26 36 07 17 27 37) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) + movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) + + movdqa xmm2,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(40 50 60 70 41 51 61 71) + punpckhdq xmm2,xmm3 ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm4 ; xmm6=(44 54 64 74 45 55 65 75) + punpckhdq xmm5,xmm4 ; xmm5=(46 56 66 76 47 57 67 77) + + movdqa xmm3,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm3,xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm4,xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 + + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm3,xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) + movdqa xmm4,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm4,xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm6,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm6,xmm2 ; xmm2=in6=z3 + punpckhwd xmm5,xmm2 + movdqa xmm1,xmm6 + movdqa xmm2,xmm5 + pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=tmp3L + pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L + pmaddwd xmm2,[rel PW_F054_MF130] ; xmm2=tmp2H + + movdqa xmm3,xmm7 + paddw xmm7,xmm0 ; xmm7=in0+in4 + psubw xmm3,xmm0 ; xmm3=in0-in4 + + pxor xmm4,xmm4 + pxor xmm0,xmm0 + punpcklwd xmm4,xmm7 ; xmm4=tmp0L + punpckhwd xmm0,xmm7 ; xmm0=tmp0H + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + psrad xmm0,(16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS + + movdqa xmm7,xmm4 + paddd xmm4,xmm6 ; xmm4=tmp10L + psubd xmm7,xmm6 ; xmm7=tmp13L + movdqa xmm6,xmm0 + paddd xmm0,xmm5 ; xmm0=tmp10H + psubd xmm6,xmm5 ; xmm6=tmp13H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm4,xmm4 + punpcklwd xmm5,xmm3 ; xmm5=tmp1L + punpckhwd xmm4,xmm3 ; xmm4=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + + movdqa xmm0,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm0,xmm1 ; xmm0=tmp12L + movdqa xmm7,xmm4 + paddd xmm4,xmm2 ; xmm4=tmp11H + psubd xmm7,xmm2 ; xmm7=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 + movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 + movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 + movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 + + movdqa xmm5,xmm6 + movdqa xmm4,xmm3 + paddw xmm5,xmm1 ; xmm5=z3 + paddw xmm4,xmm2 ; xmm4=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm0,xmm5 + movdqa xmm7,xmm5 + punpcklwd xmm0,xmm4 + punpckhwd xmm7,xmm4 + movdqa xmm5,xmm0 + movdqa xmm4,xmm7 + pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3L + pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3H + pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L + pmaddwd xmm4,[rel PW_F117_F078] ; xmm4=z4H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm0,xmm1 + movdqa xmm7,xmm1 + punpcklwd xmm0,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm1,xmm0 + movdqa xmm3,xmm7 + pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0L + pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp0H + pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp3L + pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3H + + paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L + paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H + paddd xmm1,xmm5 ; xmm1=tmp3L + paddd xmm3,xmm4 ; xmm3=tmp3H + + movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H + + movdqa xmm0,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm0,xmm6 + punpckhwd xmm7,xmm6 + movdqa xmm2,xmm0 + movdqa xmm6,xmm7 + pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1L + pmaddwd xmm7,[rel PW_MF050_MF256] ; xmm7=tmp1H + pmaddwd xmm2,[rel PW_MF256_F050] ; xmm2=tmp2L + pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H + + paddd xmm0,xmm5 ; xmm0=tmp1L + paddd xmm7,xmm4 ; xmm7=tmp1H + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H + + movdqa xmm0,xmm5 + movdqa xmm7,xmm4 + paddd xmm5,xmm1 ; xmm5=data0L + paddd xmm4,xmm3 ; xmm4=data0H + psubd xmm0,xmm1 ; xmm0=data7L + psubd xmm7,xmm3 ; xmm7=data7H + + movdqa xmm1,[rel PD_DESCALE_P2] ; xmm1=[rel PD_DESCALE_P2] + + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrad xmm5,DESCALE_P2 + psrad xmm4,DESCALE_P2 + paddd xmm0,xmm1 + paddd xmm7,xmm1 + psrad xmm0,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm5,xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) + packssdw xmm0,xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L + movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H + + movdqa xmm4,xmm3 + movdqa xmm7,xmm1 + paddd xmm3,xmm2 ; xmm3=data1L + paddd xmm1,xmm6 ; xmm1=data1H + psubd xmm4,xmm2 ; xmm4=data6L + psubd xmm7,xmm6 ; xmm7=data6H + + movdqa xmm2,[rel PD_DESCALE_P2] ; xmm2=[rel PD_DESCALE_P2] + + paddd xmm3,xmm2 + paddd xmm1,xmm2 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm4,xmm2 + paddd xmm7,xmm2 + psrad xmm4,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm3,xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) + packssdw xmm4,xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) + + packsswb xmm5,xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H + movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L + movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm4,xmm6 + movdqa xmm0,xmm2 + paddd xmm6,xmm1 ; xmm6=data2L + paddd xmm2,xmm7 ; xmm2=data2H + psubd xmm4,xmm1 ; xmm4=data5L + psubd xmm0,xmm7 ; xmm0=data5H + + movdqa xmm5,[rel PD_DESCALE_P2] ; xmm5=[rel PD_DESCALE_P2] + + paddd xmm6,xmm5 + paddd xmm2,xmm5 + psrad xmm6,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm4,xmm5 + paddd xmm0,xmm5 + psrad xmm4,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + packssdw xmm6,xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) + packssdw xmm4,xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) + + movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L + movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H + movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L + movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H + + movdqa xmm2,xmm3 + movdqa xmm0,xmm1 + paddd xmm3,xmm7 ; xmm3=data3L + paddd xmm1,xmm5 ; xmm1=data3H + psubd xmm2,xmm7 ; xmm2=data4L + psubd xmm0,xmm5 ; xmm0=data4H + + movdqa xmm7,[rel PD_DESCALE_P2] ; xmm7=[rel PD_DESCALE_P2] + + paddd xmm3,xmm7 + paddd xmm1,xmm7 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm2,xmm7 + paddd xmm0,xmm7 + psrad xmm2,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + movdqa xmm5,[rel PB_CENTERJSAMP] ; xmm5=[rel PB_CENTERJSAMP] + + packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) + packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + packsswb xmm6,xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm3,xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm7,xmm5 + paddb xmm1,xmm5 + paddb xmm6,xmm5 + paddb xmm3,xmm5 + + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklbw xmm7,xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklbw xmm6,xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm2,xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 2) + punpcklwd xmm7,xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpcklwd xmm2,xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm5,xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpckldq xmm7,xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm1,xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm3,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm3,xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm6,xmm7,0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm1,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm2,xmm4,0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm5,xmm3,0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1 + mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0 + mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctint-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctint-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctint-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctint-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,859 @@ +; +; jidctint.asm - accurate integer IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_islow_sse2) PRIVATE + +EXTN(jconst_idct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; jpeg_component_info *compptr +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 12 + + align 16 + global EXTN(jsimd_idct_islow_sse2) PRIVATE + +EXTN(jsimd_idct_islow_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm5,PASS1_BITS + + movdqa xmm4,xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm5,xmm5 ; xmm5=(00 00 01 01 02 02 03 03) + punpckhwd xmm4,xmm4 ; xmm4=(04 04 05 05 06 06 07 07) + + pshufd xmm7,xmm5,0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) + pshufd xmm6,xmm5,0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) + pshufd xmm1,xmm5,0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) + pshufd xmm5,xmm5,0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) + pshufd xmm0,xmm4,0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) + pshufd xmm3,xmm4,0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) + pshufd xmm2,xmm4,0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) + pshufd xmm4,xmm4,0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm4,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm4,xmm3 ; xmm3=in6=z3 + punpckhwd xmm5,xmm3 + movdqa xmm1,xmm4 + movdqa xmm3,xmm5 + pmaddwd xmm4,[GOTOFF(ebx,PW_F130_F054)] ; xmm4=tmp3L + pmaddwd xmm5,[GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L + pmaddwd xmm3,[GOTOFF(ebx,PW_F054_MF130)] ; xmm3=tmp2H + + movdqa xmm6,xmm0 + paddw xmm0,xmm2 ; xmm0=in0+in4 + psubw xmm6,xmm2 ; xmm6=in0-in4 + + pxor xmm7,xmm7 + pxor xmm2,xmm2 + punpcklwd xmm7,xmm0 ; xmm7=tmp0L + punpckhwd xmm2,xmm0 ; xmm2=tmp0H + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + psrad xmm2,(16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS + + movdqa xmm0,xmm7 + paddd xmm7,xmm4 ; xmm7=tmp10L + psubd xmm0,xmm4 ; xmm0=tmp13L + movdqa xmm4,xmm2 + paddd xmm2,xmm5 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp13H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm7,xmm7 + punpcklwd xmm5,xmm6 ; xmm5=tmp1L + punpckhwd xmm7,xmm6 ; xmm7=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + + movdqa xmm2,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm2,xmm1 ; xmm2=tmp12L + movdqa xmm0,xmm7 + paddd xmm7,xmm3 ; xmm7=tmp11H + psubd xmm0,xmm3 ; xmm0=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm5,xmm6 + movdqa xmm7,xmm4 + paddw xmm5,xmm3 ; xmm5=z3 + paddw xmm7,xmm1 ; xmm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm2,xmm5 + movdqa xmm0,xmm5 + punpcklwd xmm2,xmm7 + punpckhwd xmm0,xmm7 + movdqa xmm5,xmm2 + movdqa xmm7,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF078_F117)] ; xmm2=z3L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3H + pmaddwd xmm5,[GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L + pmaddwd xmm7,[GOTOFF(ebx,PW_F117_F078)] ; xmm7=z4H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm2,xmm3 + movdqa xmm0,xmm3 + punpcklwd xmm2,xmm4 + punpckhwd xmm0,xmm4 + movdqa xmm3,xmm2 + movdqa xmm4,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm2=tmp0L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0H + pmaddwd xmm3,[GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF089_F060)] ; xmm4=tmp3H + + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L + paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H + paddd xmm3,xmm5 ; xmm3=tmp3L + paddd xmm4,xmm7 ; xmm4=tmp3H + + movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H + + movdqa xmm2,xmm1 + movdqa xmm0,xmm1 + punpcklwd xmm2,xmm6 + punpckhwd xmm0,xmm6 + movdqa xmm1,xmm2 + movdqa xmm6,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm2=tmp1L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1H + pmaddwd xmm1,[GOTOFF(ebx,PW_MF256_F050)] ; xmm1=tmp2L + pmaddwd xmm6,[GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H + + paddd xmm2,xmm5 ; xmm2=tmp1L + paddd xmm0,xmm7 ; xmm0=tmp1H + paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H + + movdqa xmm2,xmm5 + movdqa xmm0,xmm7 + paddd xmm5,xmm3 ; xmm5=data0L + paddd xmm7,xmm4 ; xmm7=data0H + psubd xmm2,xmm3 ; xmm2=data7L + psubd xmm0,xmm4 ; xmm0=data7H + + movdqa xmm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm3=[PD_DESCALE_P1] + + paddd xmm5,xmm3 + paddd xmm7,xmm3 + psrad xmm5,DESCALE_P1 + psrad xmm7,DESCALE_P1 + paddd xmm2,xmm3 + paddd xmm0,xmm3 + psrad xmm2,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm5,xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) + packssdw xmm2,xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) + + movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L + movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H + + movdqa xmm7,xmm4 + movdqa xmm0,xmm3 + paddd xmm4,xmm1 ; xmm4=data1L + paddd xmm3,xmm6 ; xmm3=data1H + psubd xmm7,xmm1 ; xmm7=data6L + psubd xmm0,xmm6 ; xmm0=data6H + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm1=[PD_DESCALE_P1] + + paddd xmm4,xmm1 + paddd xmm3,xmm1 + psrad xmm4,DESCALE_P1 + psrad xmm3,DESCALE_P1 + paddd xmm7,xmm1 + paddd xmm0,xmm1 + psrad xmm7,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm4,xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm7,xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) + + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm4 ; xmm5=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm1,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm2 ; xmm7=(60 70 61 71 62 72 63 73) + punpckhwd xmm1,xmm2 ; xmm1=(64 74 65 75 66 76 67 77) + + movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L + movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H + movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L + movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) + + movdqa xmm5,xmm3 + movdqa xmm6,xmm0 + paddd xmm3,xmm4 ; xmm3=data2L + paddd xmm0,xmm2 ; xmm0=data2H + psubd xmm5,xmm4 ; xmm5=data5L + psubd xmm6,xmm2 ; xmm6=data5H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm7=[PD_DESCALE_P1] + + paddd xmm3,xmm7 + paddd xmm0,xmm7 + psrad xmm3,DESCALE_P1 + psrad xmm0,DESCALE_P1 + paddd xmm5,xmm7 + paddd xmm6,xmm7 + psrad xmm5,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm3,xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) + packssdw xmm5,xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L + movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H + movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L + movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H + + movdqa xmm0,xmm1 + movdqa xmm6,xmm4 + paddd xmm1,xmm2 ; xmm1=data3L + paddd xmm4,xmm7 ; xmm4=data3H + psubd xmm0,xmm2 ; xmm0=data4L + psubd xmm6,xmm7 ; xmm6=data4H + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm2=[PD_DESCALE_P1] + + paddd xmm1,xmm2 + paddd xmm4,xmm2 + psrad xmm1,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm0,xmm2 + paddd xmm6,xmm2 + psrad xmm0,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm1,xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) + packssdw xmm0,xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) + movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) + + movdqa xmm4,xmm3 ; transpose coefficients(phase 1) + punpcklwd xmm3,xmm1 ; xmm3=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm1 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm6,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(40 50 41 51 42 52 43 53) + punpckhwd xmm6,xmm5 ; xmm6=(44 54 45 55 46 56 47 57) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm3 ; xmm7=(00 10 20 30 01 11 21 31) + punpckhdq xmm1,xmm3 ; xmm1=(02 12 22 32 03 13 23 33) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm4 ; xmm2=(04 14 24 34 05 15 25 35) + punpckhdq xmm5,xmm4 ; xmm5=(06 16 26 36 07 17 27 37) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) + movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) + + movdqa xmm2,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(40 50 60 70 41 51 61 71) + punpckhdq xmm2,xmm3 ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm4 ; xmm6=(44 54 64 74 45 55 65 75) + punpckhdq xmm5,xmm4 ; xmm5=(46 56 66 76 47 57 67 77) + + movdqa xmm3,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm3,xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm4,xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 + + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm3,xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) + movdqa xmm4,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm4,xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm6,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm6,xmm2 ; xmm2=in6=z3 + punpckhwd xmm5,xmm2 + movdqa xmm1,xmm6 + movdqa xmm2,xmm5 + pmaddwd xmm6,[GOTOFF(ebx,PW_F130_F054)] ; xmm6=tmp3L + pmaddwd xmm5,[GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L + pmaddwd xmm2,[GOTOFF(ebx,PW_F054_MF130)] ; xmm2=tmp2H + + movdqa xmm3,xmm7 + paddw xmm7,xmm0 ; xmm7=in0+in4 + psubw xmm3,xmm0 ; xmm3=in0-in4 + + pxor xmm4,xmm4 + pxor xmm0,xmm0 + punpcklwd xmm4,xmm7 ; xmm4=tmp0L + punpckhwd xmm0,xmm7 ; xmm0=tmp0H + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + psrad xmm0,(16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS + + movdqa xmm7,xmm4 + paddd xmm4,xmm6 ; xmm4=tmp10L + psubd xmm7,xmm6 ; xmm7=tmp13L + movdqa xmm6,xmm0 + paddd xmm0,xmm5 ; xmm0=tmp10H + psubd xmm6,xmm5 ; xmm6=tmp13H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm4,xmm4 + punpcklwd xmm5,xmm3 ; xmm5=tmp1L + punpckhwd xmm4,xmm3 ; xmm4=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + + movdqa xmm0,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm0,xmm1 ; xmm0=tmp12L + movdqa xmm7,xmm4 + paddd xmm4,xmm2 ; xmm4=tmp11H + psubd xmm7,xmm2 ; xmm7=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 + movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 + movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 + movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 + + movdqa xmm5,xmm6 + movdqa xmm4,xmm3 + paddw xmm5,xmm1 ; xmm5=z3 + paddw xmm4,xmm2 ; xmm4=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm0,xmm5 + movdqa xmm7,xmm5 + punpcklwd xmm0,xmm4 + punpckhwd xmm7,xmm4 + movdqa xmm5,xmm0 + movdqa xmm4,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3H + pmaddwd xmm5,[GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L + pmaddwd xmm4,[GOTOFF(ebx,PW_F117_F078)] ; xmm4=z4H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm0,xmm1 + movdqa xmm7,xmm1 + punpcklwd xmm0,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm1,xmm0 + movdqa xmm3,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp0H + pmaddwd xmm1,[GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp3L + pmaddwd xmm3,[GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3H + + paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L + paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H + paddd xmm1,xmm5 ; xmm1=tmp3L + paddd xmm3,xmm4 ; xmm3=tmp3H + + movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H + + movdqa xmm0,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm0,xmm6 + punpckhwd xmm7,xmm6 + movdqa xmm2,xmm0 + movdqa xmm6,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm7=tmp1H + pmaddwd xmm2,[GOTOFF(ebx,PW_MF256_F050)] ; xmm2=tmp2L + pmaddwd xmm6,[GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H + + paddd xmm0,xmm5 ; xmm0=tmp1L + paddd xmm7,xmm4 ; xmm7=tmp1H + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H + + movdqa xmm0,xmm5 + movdqa xmm7,xmm4 + paddd xmm5,xmm1 ; xmm5=data0L + paddd xmm4,xmm3 ; xmm4=data0H + psubd xmm0,xmm1 ; xmm0=data7L + psubd xmm7,xmm3 ; xmm7=data7H + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm1=[PD_DESCALE_P2] + + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrad xmm5,DESCALE_P2 + psrad xmm4,DESCALE_P2 + paddd xmm0,xmm1 + paddd xmm7,xmm1 + psrad xmm0,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm5,xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) + packssdw xmm0,xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L + movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H + + movdqa xmm4,xmm3 + movdqa xmm7,xmm1 + paddd xmm3,xmm2 ; xmm3=data1L + paddd xmm1,xmm6 ; xmm1=data1H + psubd xmm4,xmm2 ; xmm4=data6L + psubd xmm7,xmm6 ; xmm7=data6H + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm2=[PD_DESCALE_P2] + + paddd xmm3,xmm2 + paddd xmm1,xmm2 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm4,xmm2 + paddd xmm7,xmm2 + psrad xmm4,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm3,xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) + packssdw xmm4,xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) + + packsswb xmm5,xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H + movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L + movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm4,xmm6 + movdqa xmm0,xmm2 + paddd xmm6,xmm1 ; xmm6=data2L + paddd xmm2,xmm7 ; xmm2=data2H + psubd xmm4,xmm1 ; xmm4=data5L + psubd xmm0,xmm7 ; xmm0=data5H + + movdqa xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm5=[PD_DESCALE_P2] + + paddd xmm6,xmm5 + paddd xmm2,xmm5 + psrad xmm6,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm4,xmm5 + paddd xmm0,xmm5 + psrad xmm4,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + packssdw xmm6,xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) + packssdw xmm4,xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) + + movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L + movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H + movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L + movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H + + movdqa xmm2,xmm3 + movdqa xmm0,xmm1 + paddd xmm3,xmm7 ; xmm3=data3L + paddd xmm1,xmm5 ; xmm1=data3H + psubd xmm2,xmm7 ; xmm2=data4L + psubd xmm0,xmm5 ; xmm0=data4H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm7=[PD_DESCALE_P2] + + paddd xmm3,xmm7 + paddd xmm1,xmm7 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm2,xmm7 + paddd xmm0,xmm7 + psrad xmm2,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + movdqa xmm5,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm5=[PB_CENTERJSAMP] + + packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) + packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + packsswb xmm6,xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm3,xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm7,xmm5 + paddb xmm1,xmm5 + paddb xmm6,xmm5 + paddb xmm3,xmm5 + + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklbw xmm7,xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklbw xmm6,xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm2,xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 2) + punpcklwd xmm7,xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpcklwd xmm2,xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm5,xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpckldq xmm7,xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm1,xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm3,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm3,xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm6,xmm7,0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm1,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm2,xmm4,0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm5,xmm3,0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1 + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 + mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctred-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctred-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctred-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctred-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,706 @@ +; +; jidctred.asm - reduced-size IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) +%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) +%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) +%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_red_mmx) PRIVATE + +EXTN(jconst_idct_red_mmx): + +PW_F184_MF076 times 2 dw F_1_847,-F_0_765 +PW_F256_F089 times 2 dw F_2_562, F_0_899 +PW_F106_MF217 times 2 dw F_1_061,-F_2_172 +PW_MF060_MF050 times 2 dw -F_0_601,-F_0_509 +PW_F145_MF021 times 2 dw F_1_451,-F_0_211 +PW_F362_MF127 times 2 dw F_3_624,-F_1_272 +PW_F085_MF072 times 2 dw F_0_850,-F_0_720 +PD_DESCALE_P1_4 times 2 dd 1 << (DESCALE_P1_4-1) +PD_DESCALE_P2_4 times 2 dd 1 << (DESCALE_P2_4-1) +PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2-1) +PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2-1) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_mmx (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_4x4_mmx) PRIVATE + +EXTN(jsimd_idct_4x4_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm0,mm1 + packsswb mm0,mm0 + movd eax,mm0 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0,PASS1_BITS + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm0 + punpcklwd mm4,mm1 + punpckhwd mm5,mm1 + movq mm0,mm4 + movq mm1,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) + pmaddwd mm5,[GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) + pmaddwd mm0,[GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) + pmaddwd mm1,[GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) + + movq mm6,mm2 + movq mm7,mm2 + punpcklwd mm6,mm3 + punpckhwd mm7,mm3 + movq mm2,mm6 + movq mm3,mm7 + pmaddwd mm6,[GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) + pmaddwd mm7,[GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) + pmaddwd mm2,[GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) + pmaddwd mm3,[GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) + + paddd mm6,mm4 ; mm6=tmp2L + paddd mm7,mm5 ; mm7=tmp2H + paddd mm2,mm0 ; mm2=tmp0L + paddd mm3,mm1 ; mm3=tmp0H + + movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L + movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H + + ; -- Even part + + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor mm1,mm1 + pxor mm2,mm2 + punpcklwd mm1,mm4 ; mm1=tmp0L + punpckhwd mm2,mm4 ; mm2=tmp0H + psrad mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 + psrad mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 + + movq mm3,mm5 ; mm5=in2=z2 + punpcklwd mm5,mm0 ; mm0=in6=z3 + punpckhwd mm3,mm0 + pmaddwd mm5,[GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H + + movq mm4,mm1 + movq mm0,mm2 + paddd mm1,mm5 ; mm1=tmp10L + paddd mm2,mm3 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp12L + psubd mm0,mm3 ; mm0=tmp12H + + ; -- Final output stage + + movq mm5,mm1 + movq mm3,mm2 + paddd mm1,mm6 ; mm1=data0L + paddd mm2,mm7 ; mm2=data0H + psubd mm5,mm6 ; mm5=data3L + psubd mm3,mm7 ; mm3=data3H + + movq mm6,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm6=[PD_DESCALE_P1_4] + + paddd mm1,mm6 + paddd mm2,mm6 + psrad mm1,DESCALE_P1_4 + psrad mm2,DESCALE_P1_4 + paddd mm5,mm6 + paddd mm3,mm6 + psrad mm5,DESCALE_P1_4 + psrad mm3,DESCALE_P1_4 + + packssdw mm1,mm2 ; mm1=data0=(00 01 02 03) + packssdw mm5,mm3 ; mm5=data3=(30 31 32 33) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp0L + movq mm6, MMWORD [wk(1)] ; mm6=tmp0H + + movq mm2,mm4 + movq mm3,mm0 + paddd mm4,mm7 ; mm4=data1L + paddd mm0,mm6 ; mm0=data1H + psubd mm2,mm7 ; mm2=data2L + psubd mm3,mm6 ; mm3=data2H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm7=[PD_DESCALE_P1_4] + + paddd mm4,mm7 + paddd mm0,mm7 + psrad mm4,DESCALE_P1_4 + psrad mm0,DESCALE_P1_4 + paddd mm2,mm7 + paddd mm3,mm7 + psrad mm2,DESCALE_P1_4 + psrad mm3,DESCALE_P1_4 + + packssdw mm4,mm0 ; mm4=data1=(10 11 12 13) + packssdw mm2,mm3 ; mm2=data2=(20 21 22 23) + + movq mm6,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm4 ; mm1=(00 10 01 11) + punpckhwd mm6,mm4 ; mm6=(02 12 03 13) + movq mm7,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm5 ; mm2=(20 30 21 31) + punpckhwd mm7,mm5 ; mm7=(22 32 23 33) + + movq mm0,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm2 ; mm1=(00 10 20 30) + punpckhdq mm0,mm2 ; mm0=(01 11 21 31) + movq mm3,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm7 ; mm6=(02 12 22 32) + punpckhdq mm3,mm7 ; mm3=(03 13 23 33) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm0 + movq mm5,mm0 + punpcklwd mm4,mm1 + punpckhwd mm5,mm1 + movq mm0,mm4 + movq mm1,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) + pmaddwd mm5,[GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) + pmaddwd mm0,[GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) + pmaddwd mm1,[GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) + + movq mm6,mm2 + movq mm7,mm2 + punpcklwd mm6,mm3 + punpckhwd mm7,mm3 + movq mm2,mm6 + movq mm3,mm7 + pmaddwd mm6,[GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) + pmaddwd mm7,[GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) + pmaddwd mm2,[GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) + pmaddwd mm3,[GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) + + paddd mm6,mm4 ; mm6=tmp2L + paddd mm7,mm5 ; mm7=tmp2H + paddd mm2,mm0 ; mm2=tmp0L + paddd mm3,mm1 ; mm3=tmp0H + + movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L + movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H + + ; -- Even part + + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + pxor mm1,mm1 + pxor mm2,mm2 + punpcklwd mm1,mm4 ; mm1=tmp0L + punpckhwd mm2,mm4 ; mm2=tmp0H + psrad mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 + psrad mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 + + movq mm3,mm5 ; mm5=in2=z2 + punpcklwd mm5,mm0 ; mm0=in6=z3 + punpckhwd mm3,mm0 + pmaddwd mm5,[GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H + + movq mm4,mm1 + movq mm0,mm2 + paddd mm1,mm5 ; mm1=tmp10L + paddd mm2,mm3 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp12L + psubd mm0,mm3 ; mm0=tmp12H + + ; -- Final output stage + + movq mm5,mm1 + movq mm3,mm2 + paddd mm1,mm6 ; mm1=data0L + paddd mm2,mm7 ; mm2=data0H + psubd mm5,mm6 ; mm5=data3L + psubd mm3,mm7 ; mm3=data3H + + movq mm6,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm6=[PD_DESCALE_P2_4] + + paddd mm1,mm6 + paddd mm2,mm6 + psrad mm1,DESCALE_P2_4 + psrad mm2,DESCALE_P2_4 + paddd mm5,mm6 + paddd mm3,mm6 + psrad mm5,DESCALE_P2_4 + psrad mm3,DESCALE_P2_4 + + packssdw mm1,mm2 ; mm1=data0=(00 10 20 30) + packssdw mm5,mm3 ; mm5=data3=(03 13 23 33) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp0L + movq mm6, MMWORD [wk(1)] ; mm6=tmp0H + + movq mm2,mm4 + movq mm3,mm0 + paddd mm4,mm7 ; mm4=data1L + paddd mm0,mm6 ; mm0=data1H + psubd mm2,mm7 ; mm2=data2L + psubd mm3,mm6 ; mm3=data2H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm7=[PD_DESCALE_P2_4] + + paddd mm4,mm7 + paddd mm0,mm7 + psrad mm4,DESCALE_P2_4 + psrad mm0,DESCALE_P2_4 + paddd mm2,mm7 + paddd mm3,mm7 + psrad mm2,DESCALE_P2_4 + psrad mm3,DESCALE_P2_4 + + packssdw mm4,mm0 ; mm4=data1=(01 11 21 31) + packssdw mm2,mm3 ; mm2=data2=(02 12 22 32) + + movq mm6,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] + + packsswb mm1,mm2 ; mm1=(00 10 20 30 02 12 22 32) + packsswb mm4,mm5 ; mm4=(01 11 21 31 03 13 23 33) + paddb mm1,mm6 + paddb mm4,mm6 + + movq mm7,mm1 ; transpose coefficients(phase 1) + punpcklbw mm1,mm4 ; mm1=(00 01 10 11 20 21 30 31) + punpckhbw mm7,mm4 ; mm7=(02 03 12 13 22 23 32 33) + + movq mm0,mm1 ; transpose coefficients(phase 2) + punpcklwd mm1,mm7 ; mm1=(00 01 02 03 10 11 12 13) + punpckhwd mm0,mm7 ; mm0=(20 21 22 23 30 31 32 33) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 + movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + + psrlq mm1,4*BYTE_BIT + psrlq mm0,4*BYTE_BIT + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 + movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_mmx (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + + align 16 + global EXTN(jsimd_idct_2x2_mmx) PRIVATE + +EXTN(jsimd_idct_2x2_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + + mov edx, POINTER [dct_table(ebp)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm0=(10 11 ** 13), mm1=(30 31 ** 33) + ; mm2=(50 51 ** 53), mm3=(70 71 ** 73) + + pcmpeqd mm7,mm7 + pslld mm7,WORD_BIT ; mm7={0x0000 0xFFFF 0x0000 0xFFFF} + + movq mm4,mm0 ; mm4=(10 11 ** 13) + movq mm5,mm2 ; mm5=(50 51 ** 53) + punpcklwd mm4,mm1 ; mm4=(10 30 11 31) + punpcklwd mm5,mm3 ; mm5=(50 70 51 71) + pmaddwd mm4,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm5,[GOTOFF(ebx,PW_F085_MF072)] + + psrld mm0,WORD_BIT ; mm0=(11 -- 13 --) + pand mm1,mm7 ; mm1=(-- 31 -- 33) + psrld mm2,WORD_BIT ; mm2=(51 -- 53 --) + pand mm3,mm7 ; mm3=(-- 71 -- 73) + por mm0,mm1 ; mm0=(11 31 13 33) + por mm2,mm3 ; mm2=(51 71 53 73) + pmaddwd mm0,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm2,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm4,mm5 ; mm4=tmp0[col0 col1] + + movq mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)] + pmullw mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)] + pmullw mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm6=(** 15 ** 17), mm1=(** 35 ** 37) + ; mm3=(** 55 ** 57), mm5=(** 75 ** 77) + + psrld mm6,WORD_BIT ; mm6=(15 -- 17 --) + pand mm1,mm7 ; mm1=(-- 35 -- 37) + psrld mm3,WORD_BIT ; mm3=(55 -- 57 --) + pand mm5,mm7 ; mm5=(-- 75 -- 77) + por mm6,mm1 ; mm6=(15 35 17 37) + por mm3,mm5 ; mm3=(55 75 57 77) + pmaddwd mm6,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm3,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm0,mm2 ; mm0=tmp0[col1 col3] + paddd mm6,mm3 ; mm6=tmp0[col5 col7] + + ; -- Even part + + movq mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)] + pmullw mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm1=(00 01 ** 03), mm5=(** 05 ** 07) + + movq mm2,mm1 ; mm2=(00 01 ** 03) + pslld mm1,WORD_BIT ; mm1=(-- 00 -- **) + psrad mm1,(WORD_BIT-CONST_BITS-2) ; mm1=tmp10[col0 ****] + + pand mm2,mm7 ; mm2=(-- 01 -- 03) + pand mm5,mm7 ; mm5=(-- 05 -- 07) + psrad mm2,(WORD_BIT-CONST_BITS-2) ; mm2=tmp10[col1 col3] + psrad mm5,(WORD_BIT-CONST_BITS-2) ; mm5=tmp10[col5 col7] + + ; -- Final output stage + + movq mm3,mm1 + paddd mm1,mm4 ; mm1=data0[col0 ****]=(A0 **) + psubd mm3,mm4 ; mm3=data1[col0 ****]=(B0 **) + punpckldq mm1,mm3 ; mm1=(A0 B0) + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1_2)] ; mm7=[PD_DESCALE_P1_2] + + movq mm4,mm2 + movq mm3,mm5 + paddd mm2,mm0 ; mm2=data0[col1 col3]=(A1 A3) + paddd mm5,mm6 ; mm5=data0[col5 col7]=(A5 A7) + psubd mm4,mm0 ; mm4=data1[col1 col3]=(B1 B3) + psubd mm3,mm6 ; mm3=data1[col5 col7]=(B5 B7) + + paddd mm1,mm7 + psrad mm1,DESCALE_P1_2 + + paddd mm2,mm7 + paddd mm5,mm7 + psrad mm2,DESCALE_P1_2 + psrad mm5,DESCALE_P1_2 + paddd mm4,mm7 + paddd mm3,mm7 + psrad mm4,DESCALE_P1_2 + psrad mm3,DESCALE_P1_2 + + ; ---- Pass 2: process rows, store into output array. + + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(ebp)] + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw mm2,mm4 ; mm2=(A1 A3 B1 B3) + packssdw mm5,mm3 ; mm5=(A5 A7 B5 B7) + pmaddwd mm2,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm5,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm2,mm5 ; mm2=tmp0[row0 row1] + + ; -- Even part + + pslld mm1,(CONST_BITS+2) ; mm1=tmp10[row0 row1] + + ; -- Final output stage + + movq mm0,[GOTOFF(ebx,PD_DESCALE_P2_2)] ; mm0=[PD_DESCALE_P2_2] + + movq mm6,mm1 + paddd mm1,mm2 ; mm1=data0[row0 row1]=(C0 C1) + psubd mm6,mm2 ; mm6=data1[row0 row1]=(D0 D1) + + paddd mm1,mm0 + paddd mm6,mm0 + psrad mm1,DESCALE_P2_2 + psrad mm6,DESCALE_P2_2 + + movq mm7,mm1 ; transpose coefficients + punpckldq mm1,mm6 ; mm1=(C0 D0) + punpckhdq mm7,mm6 ; mm7=(C1 D1) + + packssdw mm1,mm7 ; mm1=(C0 D0 C1 D1) + packsswb mm1,mm1 ; mm1=(C0 D0 C1 D1 C0 D0 C1 D1) + paddb mm1,[GOTOFF(ebx,PB_CENTERJSAMP)] + + movd ecx,mm1 + movd ebx,mm1 ; ebx=(C0 D0 C1 D1) + shr ecx,2*BYTE_BIT ; ecx=(C1 D1 -- --) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov WORD [edx+eax*SIZEOF_JSAMPLE], bx + mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctred-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctred-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctred-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctred-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,576 @@ +; +; jidctred.asm - reduced-size IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) +%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) +%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) +%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_red_sse2) PRIVATE + +EXTN(jconst_idct_red_sse2): + +PW_F184_MF076 times 4 dw F_1_847,-F_0_765 +PW_F256_F089 times 4 dw F_2_562, F_0_899 +PW_F106_MF217 times 4 dw F_1_061,-F_2_172 +PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 +PW_F145_MF021 times 4 dw F_1_451,-F_0_211 +PW_F362_MF127 times 4 dw F_3_624,-F_1_272 +PW_F085_MF072 times 4 dw F_0_850,-F_0_720 +PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4-1) +PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4-1) +PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2-1) +PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + +%define original_rbp rbp+0 +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_idct_4x4_sse2) PRIVATE + +EXTN(jsimd_idct_4x4_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm0,xmm1 + packsswb xmm0,xmm0 + packsswb xmm0,xmm0 + movd eax,xmm0 + test rax,rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm0,PASS1_BITS + + movdqa xmm3,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm3,xmm3 ; xmm3=(04 04 05 05 06 06 07 07) + + pshufd xmm1,xmm0,0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) + pshufd xmm0,xmm0,0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) + pshufd xmm6,xmm3,0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) + pshufd xmm3,xmm3,0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) + + jmp near .column_end +%endif +.columnDCT: + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm0 + punpcklwd xmm4,xmm1 + punpckhwd xmm5,xmm1 + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + pmaddwd xmm4,[rel PW_F256_F089] ; xmm4=(tmp2L) + pmaddwd xmm5,[rel PW_F256_F089] ; xmm5=(tmp2H) + pmaddwd xmm0,[rel PW_F106_MF217] ; xmm0=(tmp0L) + pmaddwd xmm1,[rel PW_F106_MF217] ; xmm1=(tmp0H) + + movdqa xmm6,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm6,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2L) + pmaddwd xmm7,[rel PW_MF060_MF050] ; xmm7=(tmp2H) + pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0L) + pmaddwd xmm3,[rel PW_F145_MF021] ; xmm3=(tmp0H) + + paddd xmm6,xmm4 ; xmm6=tmp2L + paddd xmm7,xmm5 ; xmm7=tmp2H + paddd xmm2,xmm0 ; xmm2=tmp0L + paddd xmm3,xmm1 ; xmm3=tmp0H + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H + + ; -- Even part + + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm5, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movdqa xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm5, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm0, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor xmm1,xmm1 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm4 ; xmm1=tmp0L + punpckhwd xmm2,xmm4 ; xmm2=tmp0H + psrad xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 + psrad xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 + + movdqa xmm3,xmm5 ; xmm5=in2=z2 + punpcklwd xmm5,xmm0 ; xmm0=in6=z3 + punpckhwd xmm3,xmm0 + pmaddwd xmm5,[rel PW_F184_MF076] ; xmm5=tmp2L + pmaddwd xmm3,[rel PW_F184_MF076] ; xmm3=tmp2H + + movdqa xmm4,xmm1 + movdqa xmm0,xmm2 + paddd xmm1,xmm5 ; xmm1=tmp10L + paddd xmm2,xmm3 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp12L + psubd xmm0,xmm3 ; xmm0=tmp12H + + ; -- Final output stage + + movdqa xmm5,xmm1 + movdqa xmm3,xmm2 + paddd xmm1,xmm6 ; xmm1=data0L + paddd xmm2,xmm7 ; xmm2=data0H + psubd xmm5,xmm6 ; xmm5=data3L + psubd xmm3,xmm7 ; xmm3=data3H + + movdqa xmm6,[rel PD_DESCALE_P1_4] ; xmm6=[rel PD_DESCALE_P1_4] + + paddd xmm1,xmm6 + paddd xmm2,xmm6 + psrad xmm1,DESCALE_P1_4 + psrad xmm2,DESCALE_P1_4 + paddd xmm5,xmm6 + paddd xmm3,xmm6 + psrad xmm5,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm1,xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) + packssdw xmm5,xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H + + movdqa xmm2,xmm4 + movdqa xmm3,xmm0 + paddd xmm4,xmm7 ; xmm4=data1L + paddd xmm0,xmm6 ; xmm0=data1H + psubd xmm2,xmm7 ; xmm2=data2L + psubd xmm3,xmm6 ; xmm3=data2H + + movdqa xmm7,[rel PD_DESCALE_P1_4] ; xmm7=[rel PD_DESCALE_P1_4] + + paddd xmm4,xmm7 + paddd xmm0,xmm7 + psrad xmm4,DESCALE_P1_4 + psrad xmm0,DESCALE_P1_4 + paddd xmm2,xmm7 + paddd xmm3,xmm7 + psrad xmm2,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm4,xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm2,xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm4 ; xmm1=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm7,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm5 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm7,xmm5 ; xmm7=(24 34 25 35 26 36 27 37) + + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) + punpckhdq xmm0,xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) + movdqa xmm3,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) + punpckhdq xmm3,xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + pxor xmm4,xmm4 + punpcklwd xmm4,xmm1 ; xmm4=tmp0 + psrad xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 + + ; -- Odd part + + punpckhwd xmm1,xmm0 + punpckhwd xmm6,xmm3 + movdqa xmm5,xmm1 + movdqa xmm2,xmm6 + pmaddwd xmm1,[rel PW_F256_F089] ; xmm1=(tmp2) + pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2) + pmaddwd xmm5,[rel PW_F106_MF217] ; xmm5=(tmp0) + pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0) + + paddd xmm6,xmm1 ; xmm6=tmp2 + paddd xmm2,xmm5 ; xmm2=tmp0 + + ; -- Even part + + punpcklwd xmm0,xmm3 + pmaddwd xmm0,[rel PW_F184_MF076] ; xmm0=tmp2 + + movdqa xmm7,xmm4 + paddd xmm4,xmm0 ; xmm4=tmp10 + psubd xmm7,xmm0 ; xmm7=tmp12 + + ; -- Final output stage + + movdqa xmm1,[rel PD_DESCALE_P2_4] ; xmm1=[rel PD_DESCALE_P2_4] + + movdqa xmm5,xmm4 + movdqa xmm3,xmm7 + paddd xmm4,xmm6 ; xmm4=data0=(00 10 20 30) + paddd xmm7,xmm2 ; xmm7=data1=(01 11 21 31) + psubd xmm5,xmm6 ; xmm5=data3=(03 13 23 33) + psubd xmm3,xmm2 ; xmm3=data2=(02 12 22 32) + + paddd xmm4,xmm1 + paddd xmm7,xmm1 + psrad xmm4,DESCALE_P2_4 + psrad xmm7,DESCALE_P2_4 + paddd xmm5,xmm1 + paddd xmm3,xmm1 + psrad xmm5,DESCALE_P2_4 + psrad xmm3,DESCALE_P2_4 + + packssdw xmm4,xmm3 ; xmm4=(00 10 20 30 02 12 22 32) + packssdw xmm7,xmm5 ; xmm7=(01 11 21 31 03 13 23 33) + + movdqa xmm0,xmm4 ; transpose coefficients(phase 1) + punpcklwd xmm4,xmm7 ; xmm4=(00 01 10 11 20 21 30 31) + punpckhwd xmm0,xmm7 ; xmm0=(02 03 12 13 22 23 32 33) + + movdqa xmm6,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(00 01 02 03 10 11 12 13) + punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33) + + packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) + paddb xmm4,[rel PB_CENTERJSAMP] + + pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) + pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) + pshufd xmm3,xmm4,0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2 + mov rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1 + movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + + align 16 + global EXTN(jsimd_idct_2x2_sse2) PRIVATE + +EXTN(jsimd_idct_2x2_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) + ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) + + pcmpeqd xmm7,xmm7 + pslld xmm7,WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} + + movdqa xmm4,xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) + movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) + punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) + punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) + pmaddwd xmm4,[rel PW_F362_MF127] + pmaddwd xmm5,[rel PW_F085_MF072] + + psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) + pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) + psrld xmm2,WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) + pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) + por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37) + por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77) + pmaddwd xmm0,[rel PW_F362_MF127] + pmaddwd xmm2,[rel PW_F085_MF072] + + paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3] + paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7] + + ; -- Even part + + movdqa xmm6, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm6=(00 01 ** 03 ** 05 ** 07) + + movdqa xmm1,xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) + pslld xmm6,WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) + pand xmm1,xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) + psrad xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] + psrad xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] + + ; -- Final output stage + + movdqa xmm3,xmm6 + movdqa xmm5,xmm1 + paddd xmm6,xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) + paddd xmm1,xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) + psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) + psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) + + movdqa xmm2,[rel PD_DESCALE_P1_2] ; xmm2=[rel PD_DESCALE_P1_2] + + punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **) + + movdqa xmm7,xmm1 + punpcklqdq xmm1,xmm5 ; xmm1=(A1 A3 B1 B3) + punpckhqdq xmm7,xmm5 ; xmm7=(A5 A7 B5 B7) + + paddd xmm6,xmm2 + psrad xmm6,DESCALE_P1_2 + + paddd xmm1,xmm2 + paddd xmm7,xmm2 + psrad xmm1,DESCALE_P1_2 + psrad xmm7,DESCALE_P1_2 + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) + packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) + pmaddwd xmm1,[rel PW_F362_MF127] + pmaddwd xmm7,[rel PW_F085_MF072] + + paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1] + + ; -- Even part + + pslld xmm6,(CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] + + ; -- Final output stage + + movdqa xmm4,xmm6 + paddd xmm6,xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) + psubd xmm4,xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) + + punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1) + + paddd xmm6,[rel PD_DESCALE_P2_2] + psrad xmm6,DESCALE_P2_2 + + packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) + packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) + paddb xmm6,[rel PB_CENTERJSAMP] + + pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --) + pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx + mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx + + pop rbx + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctred-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctred-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jidctred-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jidctred-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,594 @@ +; +; jidctred.asm - reduced-size IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) +%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) +%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) +%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_red_sse2) PRIVATE + +EXTN(jconst_idct_red_sse2): + +PW_F184_MF076 times 4 dw F_1_847,-F_0_765 +PW_F256_F089 times 4 dw F_2_562, F_0_899 +PW_F106_MF217 times 4 dw F_1_061,-F_2_172 +PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 +PW_F145_MF021 times 4 dw F_1_451,-F_0_211 +PW_F362_MF127 times 4 dw F_3_624,-F_1_272 +PW_F085_MF072 times 4 dw F_0_850,-F_0_720 +PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4-1) +PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4-1) +PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2-1) +PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_idct_4x4_sse2) PRIVATE + +EXTN(jsimd_idct_4x4_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm0,xmm1 + packsswb xmm0,xmm0 + packsswb xmm0,xmm0 + movd eax,xmm0 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm0,PASS1_BITS + + movdqa xmm3,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm3,xmm3 ; xmm3=(04 04 05 05 06 06 07 07) + + pshufd xmm1,xmm0,0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) + pshufd xmm0,xmm0,0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) + pshufd xmm6,xmm3,0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) + pshufd xmm3,xmm3,0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) + + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm0 + punpcklwd xmm4,xmm1 + punpckhwd xmm5,xmm1 + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + pmaddwd xmm4,[GOTOFF(ebx,PW_F256_F089)] ; xmm4=(tmp2L) + pmaddwd xmm5,[GOTOFF(ebx,PW_F256_F089)] ; xmm5=(tmp2H) + pmaddwd xmm0,[GOTOFF(ebx,PW_F106_MF217)] ; xmm0=(tmp0L) + pmaddwd xmm1,[GOTOFF(ebx,PW_F106_MF217)] ; xmm1=(tmp0H) + + movdqa xmm6,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm6,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + pmaddwd xmm6,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2L) + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm7=(tmp2H) + pmaddwd xmm2,[GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0L) + pmaddwd xmm3,[GOTOFF(ebx,PW_F145_MF021)] ; xmm3=(tmp0H) + + paddd xmm6,xmm4 ; xmm6=tmp2L + paddd xmm7,xmm5 ; xmm7=tmp2H + paddd xmm2,xmm0 ; xmm2=tmp0L + paddd xmm3,xmm1 ; xmm3=tmp0H + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H + + ; -- Even part + + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movdqa xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor xmm1,xmm1 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm4 ; xmm1=tmp0L + punpckhwd xmm2,xmm4 ; xmm2=tmp0H + psrad xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 + psrad xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 + + movdqa xmm3,xmm5 ; xmm5=in2=z2 + punpcklwd xmm5,xmm0 ; xmm0=in6=z3 + punpckhwd xmm3,xmm0 + pmaddwd xmm5,[GOTOFF(ebx,PW_F184_MF076)] ; xmm5=tmp2L + pmaddwd xmm3,[GOTOFF(ebx,PW_F184_MF076)] ; xmm3=tmp2H + + movdqa xmm4,xmm1 + movdqa xmm0,xmm2 + paddd xmm1,xmm5 ; xmm1=tmp10L + paddd xmm2,xmm3 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp12L + psubd xmm0,xmm3 ; xmm0=tmp12H + + ; -- Final output stage + + movdqa xmm5,xmm1 + movdqa xmm3,xmm2 + paddd xmm1,xmm6 ; xmm1=data0L + paddd xmm2,xmm7 ; xmm2=data0H + psubd xmm5,xmm6 ; xmm5=data3L + psubd xmm3,xmm7 ; xmm3=data3H + + movdqa xmm6,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm6=[PD_DESCALE_P1_4] + + paddd xmm1,xmm6 + paddd xmm2,xmm6 + psrad xmm1,DESCALE_P1_4 + psrad xmm2,DESCALE_P1_4 + paddd xmm5,xmm6 + paddd xmm3,xmm6 + psrad xmm5,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm1,xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) + packssdw xmm5,xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H + + movdqa xmm2,xmm4 + movdqa xmm3,xmm0 + paddd xmm4,xmm7 ; xmm4=data1L + paddd xmm0,xmm6 ; xmm0=data1H + psubd xmm2,xmm7 ; xmm2=data2L + psubd xmm3,xmm6 ; xmm3=data2H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm7=[PD_DESCALE_P1_4] + + paddd xmm4,xmm7 + paddd xmm0,xmm7 + psrad xmm4,DESCALE_P1_4 + psrad xmm0,DESCALE_P1_4 + paddd xmm2,xmm7 + paddd xmm3,xmm7 + psrad xmm2,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm4,xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm2,xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm4 ; xmm1=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm7,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm5 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm7,xmm5 ; xmm7=(24 34 25 35 26 36 27 37) + + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) + punpckhdq xmm0,xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) + movdqa xmm3,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) + punpckhdq xmm3,xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + pxor xmm4,xmm4 + punpcklwd xmm4,xmm1 ; xmm4=tmp0 + psrad xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 + + ; -- Odd part + + punpckhwd xmm1,xmm0 + punpckhwd xmm6,xmm3 + movdqa xmm5,xmm1 + movdqa xmm2,xmm6 + pmaddwd xmm1,[GOTOFF(ebx,PW_F256_F089)] ; xmm1=(tmp2) + pmaddwd xmm6,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2) + pmaddwd xmm5,[GOTOFF(ebx,PW_F106_MF217)] ; xmm5=(tmp0) + pmaddwd xmm2,[GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0) + + paddd xmm6,xmm1 ; xmm6=tmp2 + paddd xmm2,xmm5 ; xmm2=tmp0 + + ; -- Even part + + punpcklwd xmm0,xmm3 + pmaddwd xmm0,[GOTOFF(ebx,PW_F184_MF076)] ; xmm0=tmp2 + + movdqa xmm7,xmm4 + paddd xmm4,xmm0 ; xmm4=tmp10 + psubd xmm7,xmm0 ; xmm7=tmp12 + + ; -- Final output stage + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; xmm1=[PD_DESCALE_P2_4] + + movdqa xmm5,xmm4 + movdqa xmm3,xmm7 + paddd xmm4,xmm6 ; xmm4=data0=(00 10 20 30) + paddd xmm7,xmm2 ; xmm7=data1=(01 11 21 31) + psubd xmm5,xmm6 ; xmm5=data3=(03 13 23 33) + psubd xmm3,xmm2 ; xmm3=data2=(02 12 22 32) + + paddd xmm4,xmm1 + paddd xmm7,xmm1 + psrad xmm4,DESCALE_P2_4 + psrad xmm7,DESCALE_P2_4 + paddd xmm5,xmm1 + paddd xmm3,xmm1 + psrad xmm5,DESCALE_P2_4 + psrad xmm3,DESCALE_P2_4 + + packssdw xmm4,xmm3 ; xmm4=(00 10 20 30 02 12 22 32) + packssdw xmm7,xmm5 ; xmm7=(01 11 21 31 03 13 23 33) + + movdqa xmm0,xmm4 ; transpose coefficients(phase 1) + punpcklwd xmm4,xmm7 ; xmm4=(00 01 10 11 20 21 30 31) + punpckhwd xmm0,xmm7 ; xmm0=(02 03 12 13 22 23 32 33) + + movdqa xmm6,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(00 01 02 03 10 11 12 13) + punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33) + + packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) + paddb xmm4,[GOTOFF(ebx,PB_CENTERJSAMP)] + + pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) + pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) + pshufd xmm3,xmm4,0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 + movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + + align 16 + global EXTN(jsimd_idct_2x2_sse2) PRIVATE + +EXTN(jsimd_idct_2x2_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + + mov edx, POINTER [dct_table(ebp)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) + ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) + + pcmpeqd xmm7,xmm7 + pslld xmm7,WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} + + movdqa xmm4,xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) + movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) + punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) + punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) + pmaddwd xmm4,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm5,[GOTOFF(ebx,PW_F085_MF072)] + + psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) + pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) + psrld xmm2,WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) + pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) + por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37) + por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77) + pmaddwd xmm0,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm2,[GOTOFF(ebx,PW_F085_MF072)] + + paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3] + paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7] + + ; -- Even part + + movdqa xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm6=(00 01 ** 03 ** 05 ** 07) + + movdqa xmm1,xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) + pslld xmm6,WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) + pand xmm1,xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) + psrad xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] + psrad xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] + + ; -- Final output stage + + movdqa xmm3,xmm6 + movdqa xmm5,xmm1 + paddd xmm6,xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) + paddd xmm1,xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) + psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) + psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P1_2)] ; xmm2=[PD_DESCALE_P1_2] + + punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **) + + movdqa xmm7,xmm1 + punpcklqdq xmm1,xmm5 ; xmm1=(A1 A3 B1 B3) + punpckhqdq xmm7,xmm5 ; xmm7=(A5 A7 B5 B7) + + paddd xmm6,xmm2 + psrad xmm6,DESCALE_P1_2 + + paddd xmm1,xmm2 + paddd xmm7,xmm2 + psrad xmm1,DESCALE_P1_2 + psrad xmm7,DESCALE_P1_2 + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(ebp)] + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) + packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) + pmaddwd xmm1,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm7,[GOTOFF(ebx,PW_F085_MF072)] + + paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1] + + ; -- Even part + + pslld xmm6,(CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] + + ; -- Final output stage + + movdqa xmm4,xmm6 + paddd xmm6,xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) + psubd xmm4,xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) + + punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1) + + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P2_2)] + psrad xmm6,DESCALE_P2_2 + + packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) + packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) + paddb xmm6,[GOTOFF(ebx,PB_CENTERJSAMP)] + + pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --) + pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov WORD [edx+eax*SIZEOF_JSAMPLE], bx + mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jpeg_nbits_table.inc glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jpeg_nbits_table.inc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jpeg_nbits_table.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jpeg_nbits_table.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,4097 @@ +jpeg_nbits_table db \ + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, \ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, \ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, \ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquant-3dn.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquant-3dn.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquant-3dn.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquant-3dn.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,233 @@ +; +; jquant.asm - sample data conversion and quantization (3DNow! & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_3dnow (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_convsamp_float_3dnow) PRIVATE + +EXTN(jsimd_convsamp_float_3dnow): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw mm7,mm7 + psllw mm7,7 + packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb mm0,mm7 ; mm0=(01234567) + psubb mm1,mm7 ; mm1=(89ABCDEF) + + punpcklbw mm2,mm0 ; mm2=(*0*1*2*3) + punpckhbw mm0,mm0 ; mm0=(*4*5*6*7) + punpcklbw mm3,mm1 ; mm3=(*8*9*A*B) + punpckhbw mm1,mm1 ; mm1=(*C*D*E*F) + + punpcklwd mm4,mm2 ; mm4=(***0***1) + punpckhwd mm2,mm2 ; mm2=(***2***3) + punpcklwd mm5,mm0 ; mm5=(***4***5) + punpckhwd mm0,mm0 ; mm0=(***6***7) + + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01) + psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23) + pi2fd mm4,mm4 + pi2fd mm2,mm2 + psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45) + psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67) + pi2fd mm5,mm5 + pi2fd mm0,mm0 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + + punpcklwd mm6,mm3 ; mm6=(***8***9) + punpckhwd mm3,mm3 ; mm3=(***A***B) + punpcklwd mm4,mm1 ; mm4=(***C***D) + punpckhwd mm1,mm1 ; mm1=(***E***F) + + psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89) + psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB) + pi2fd mm6,mm6 + pi2fd mm3,mm3 + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD) + psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF) + pi2fd mm4,mm4 + pi2fd mm1,mm1 + + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_3dnow (JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT *divisors +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_quantize_float_3dnow) PRIVATE + +EXTN(jsimd_quantize_float_3dnow): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov eax, 0x4B400000 ; (float)0x00C00000 (rndint_magic) + movd mm7,eax + punpckldq mm7,mm7 ; mm7={12582912.0F 12582912.0F} + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)] + pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] + pfmul mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm0,mm7 ; mm0=(00 ** 01 **) + pfadd mm1,mm7 ; mm1=(02 ** 03 **) + pfadd mm2,mm7 ; mm0=(04 ** 05 **) + pfadd mm3,mm7 ; mm1=(06 ** 07 **) + + movq mm4,mm0 + punpcklwd mm0,mm1 ; mm0=(00 02 ** **) + punpckhwd mm4,mm1 ; mm4=(01 03 ** **) + movq mm5,mm2 + punpcklwd mm2,mm3 ; mm2=(04 06 ** **) + punpckhwd mm5,mm3 ; mm5=(05 07 ** **) + + punpcklwd mm0,mm4 ; mm0=(00 01 02 03) + punpcklwd mm2,mm5 ; mm2=(04 05 06 07) + + movq mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + pfmul mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + pfmul mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)] + movq mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)] + pfmul mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] + pfmul mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm6,mm7 ; mm0=(10 ** 11 **) + pfadd mm1,mm7 ; mm4=(12 ** 13 **) + pfadd mm3,mm7 ; mm0=(14 ** 15 **) + pfadd mm4,mm7 ; mm4=(16 ** 17 **) + + movq mm5,mm6 + punpcklwd mm6,mm1 ; mm6=(10 12 ** **) + punpckhwd mm5,mm1 ; mm5=(11 13 ** **) + movq mm1,mm3 + punpcklwd mm3,mm4 ; mm3=(14 16 ** **) + punpckhwd mm1,mm4 ; mm1=(15 17 ** **) + + punpcklwd mm6,mm5 ; mm6=(10 11 12 13) + punpcklwd mm3,mm1 ; mm3=(14 15 16 17) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz near .quantloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquantf-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquantf-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquantf-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquantf-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,158 @@ +; +; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +; r10 = JSAMPARRAY sample_data +; r11 = JDIMENSION start_col +; r12 = FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_convsamp_float_sse2) PRIVATE + +EXTN(jsimd_convsamp_float_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + pcmpeqw xmm7,xmm7 + psllw xmm7,7 + packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) + + mov rsi, r10 + mov eax, r11d + mov rdi, r12 + mov rcx, DCTSIZE/2 +.convloop: + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] + movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] + + psubb xmm0,xmm7 ; xmm0=(01234567) + psubb xmm1,xmm7 ; xmm1=(89ABCDEF) + + punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) + punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) + + punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3) + punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7) + punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B) + punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F) + + psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123) + psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567) + cvtdq2ps xmm2,xmm2 ; xmm2=(0123) + cvtdq2ps xmm0,xmm0 ; xmm0=(4567) + psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) + psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) + cvtdq2ps xmm3,xmm3 ; xmm3=(89AB) + cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1 + + add rsi, byte 2*SIZEOF_JSAMPROW + add rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec rcx + jnz short .convloop + + pop rbx + uncollect_args + pop rbp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +; r10 = JCOEFPTR coef_block +; r11 = FAST_FLOAT *divisors +; r12 = FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_quantize_float_sse2) PRIVATE + +EXTN(jsimd_quantize_float_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov rsi, r12 + mov rdx, r11 + mov rdi, r10 + mov rax, DCTSIZE2/16 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)] + + cvtps2dq xmm0,xmm0 + cvtps2dq xmm1,xmm1 + cvtps2dq xmm2,xmm2 + cvtps2dq xmm3,xmm3 + + packssdw xmm0,xmm1 + packssdw xmm2,xmm3 + + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2 + + add rsi, byte 16*SIZEOF_FAST_FLOAT + add rdx, byte 16*SIZEOF_FAST_FLOAT + add rdi, byte 16*SIZEOF_JCOEF + dec rax + jnz short .quantloop + + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquantf-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquantf-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquantf-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquantf-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,171 @@ +; +; jquantf.asm - sample data conversion and quantization (SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_convsamp_float_sse2) PRIVATE + +EXTN(jsimd_convsamp_float_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw xmm7,xmm7 + psllw xmm7,7 + packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb xmm0,xmm7 ; xmm0=(01234567) + psubb xmm1,xmm7 ; xmm1=(89ABCDEF) + + punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) + punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) + + punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3) + punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7) + punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B) + punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F) + + psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123) + psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567) + cvtdq2ps xmm2,xmm2 ; xmm2=(0123) + cvtdq2ps xmm0,xmm0 ; xmm0=(4567) + psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) + psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) + cvtdq2ps xmm3,xmm3 ; xmm3=(89AB) + cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT *divisors +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_quantize_float_sse2) PRIVATE + +EXTN(jsimd_quantize_float_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + cvtps2dq xmm0,xmm0 + cvtps2dq xmm1,xmm1 + cvtps2dq xmm2,xmm2 + cvtps2dq xmm3,xmm3 + + packssdw xmm0,xmm1 + packssdw xmm2,xmm3 + + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz short .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquanti-altivec.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquanti-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquanti-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquanti-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,252 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* INTEGER QUANTIZATION AND SAMPLE CONVERSION */ + +#include "jsimd_altivec.h" + + +/* NOTE: The address will either be aligned or offset by 8 bytes, so we can + * always get the data we want by using a single vector load (although we may + * have to permute the result.) + */ +#if __BIG_ENDIAN__ + +#define LOAD_ROW(row) { \ + elemptr = sample_data[row] + start_col; \ + in##row = vec_ld(0, elemptr); \ + if ((size_t)elemptr & 15) \ + in##row = vec_perm(in##row, in##row, vec_lvsl(0, elemptr)); \ +} + +#else + +#define LOAD_ROW(row) { \ + elemptr = sample_data[row] + start_col; \ + in##row = vec_vsx_ld(0, elemptr); \ +} + +#endif + + +void +jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + JSAMPROW elemptr; + + __vector unsigned char in0, in1, in2, in3, in4, in5, in6, in7; + __vector short out0, out1, out2, out3, out4, out5, out6, out7; + + /* Constants */ + __vector short pw_centerjsamp = { __8X(CENTERJSAMPLE) }; + __vector unsigned char pb_zero = { __16X(0) }; + + LOAD_ROW(0); + LOAD_ROW(1); + LOAD_ROW(2); + LOAD_ROW(3); + LOAD_ROW(4); + LOAD_ROW(5); + LOAD_ROW(6); + LOAD_ROW(7); + + out0 = (__vector short)VEC_UNPACKHU(in0); + out1 = (__vector short)VEC_UNPACKHU(in1); + out2 = (__vector short)VEC_UNPACKHU(in2); + out3 = (__vector short)VEC_UNPACKHU(in3); + out4 = (__vector short)VEC_UNPACKHU(in4); + out5 = (__vector short)VEC_UNPACKHU(in5); + out6 = (__vector short)VEC_UNPACKHU(in6); + out7 = (__vector short)VEC_UNPACKHU(in7); + + out0 = vec_sub(out0, pw_centerjsamp); + out1 = vec_sub(out1, pw_centerjsamp); + out2 = vec_sub(out2, pw_centerjsamp); + out3 = vec_sub(out3, pw_centerjsamp); + out4 = vec_sub(out4, pw_centerjsamp); + out5 = vec_sub(out5, pw_centerjsamp); + out6 = vec_sub(out6, pw_centerjsamp); + out7 = vec_sub(out7, pw_centerjsamp); + + vec_st(out0, 0, workspace); + vec_st(out1, 16, workspace); + vec_st(out2, 32, workspace); + vec_st(out3, 48, workspace); + vec_st(out4, 64, workspace); + vec_st(out5, 80, workspace); + vec_st(out6, 96, workspace); + vec_st(out7, 112, workspace); +} + + +#define WORD_BIT 16 + +/* There is no AltiVec 16-bit unsigned multiply instruction, hence this. + We basically need an unsigned equivalent of vec_madds(). */ + +#define MULTIPLY(vs0, vs1, out) { \ + tmpe = vec_mule((__vector unsigned short)vs0, \ + (__vector unsigned short)vs1); \ + tmpo = vec_mulo((__vector unsigned short)vs0, \ + (__vector unsigned short)vs1); \ + out = (__vector short)vec_perm((__vector unsigned short)tmpe, \ + (__vector unsigned short)tmpo, \ + shift_pack_index); \ +} + +void +jsimd_quantize_altivec (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s, + corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7, + recip0, recip1, recip2, recip3, recip4, recip5, recip6, recip7, + scale0, scale1, scale2, scale3, scale4, scale5, scale6, scale7; + __vector unsigned int tmpe, tmpo; + + /* Constants */ + __vector unsigned short pw_word_bit_m1 = { __8X(WORD_BIT - 1) }; +#if __BIG_ENDIAN__ + __vector unsigned char shift_pack_index = + {0,1,16,17,4,5,20,21,8,9,24,25,12,13,28,29}; +#else + __vector unsigned char shift_pack_index = + {2,3,18,19,6,7,22,23,10,11,26,27,14,15,30,31}; +#endif + + row0 = vec_ld(0, workspace); + row1 = vec_ld(16, workspace); + row2 = vec_ld(32, workspace); + row3 = vec_ld(48, workspace); + row4 = vec_ld(64, workspace); + row5 = vec_ld(80, workspace); + row6 = vec_ld(96, workspace); + row7 = vec_ld(112, workspace); + + /* Branch-less absolute value */ + row0s = vec_sra(row0, pw_word_bit_m1); + row1s = vec_sra(row1, pw_word_bit_m1); + row2s = vec_sra(row2, pw_word_bit_m1); + row3s = vec_sra(row3, pw_word_bit_m1); + row4s = vec_sra(row4, pw_word_bit_m1); + row5s = vec_sra(row5, pw_word_bit_m1); + row6s = vec_sra(row6, pw_word_bit_m1); + row7s = vec_sra(row7, pw_word_bit_m1); + row0 = vec_xor(row0, row0s); + row1 = vec_xor(row1, row1s); + row2 = vec_xor(row2, row2s); + row3 = vec_xor(row3, row3s); + row4 = vec_xor(row4, row4s); + row5 = vec_xor(row5, row5s); + row6 = vec_xor(row6, row6s); + row7 = vec_xor(row7, row7s); + row0 = vec_sub(row0, row0s); + row1 = vec_sub(row1, row1s); + row2 = vec_sub(row2, row2s); + row3 = vec_sub(row3, row3s); + row4 = vec_sub(row4, row4s); + row5 = vec_sub(row5, row5s); + row6 = vec_sub(row6, row6s); + row7 = vec_sub(row7, row7s); + + corr0 = vec_ld(DCTSIZE2 * 2, divisors); + corr1 = vec_ld(DCTSIZE2 * 2 + 16, divisors); + corr2 = vec_ld(DCTSIZE2 * 2 + 32, divisors); + corr3 = vec_ld(DCTSIZE2 * 2 + 48, divisors); + corr4 = vec_ld(DCTSIZE2 * 2 + 64, divisors); + corr5 = vec_ld(DCTSIZE2 * 2 + 80, divisors); + corr6 = vec_ld(DCTSIZE2 * 2 + 96, divisors); + corr7 = vec_ld(DCTSIZE2 * 2 + 112, divisors); + + row0 = vec_add(row0, corr0); + row1 = vec_add(row1, corr1); + row2 = vec_add(row2, corr2); + row3 = vec_add(row3, corr3); + row4 = vec_add(row4, corr4); + row5 = vec_add(row5, corr5); + row6 = vec_add(row6, corr6); + row7 = vec_add(row7, corr7); + + recip0 = vec_ld(0, divisors); + recip1 = vec_ld(16, divisors); + recip2 = vec_ld(32, divisors); + recip3 = vec_ld(48, divisors); + recip4 = vec_ld(64, divisors); + recip5 = vec_ld(80, divisors); + recip6 = vec_ld(96, divisors); + recip7 = vec_ld(112, divisors); + + MULTIPLY(row0, recip0, row0); + MULTIPLY(row1, recip1, row1); + MULTIPLY(row2, recip2, row2); + MULTIPLY(row3, recip3, row3); + MULTIPLY(row4, recip4, row4); + MULTIPLY(row5, recip5, row5); + MULTIPLY(row6, recip6, row6); + MULTIPLY(row7, recip7, row7); + + scale0 = vec_ld(DCTSIZE2 * 4, divisors); + scale1 = vec_ld(DCTSIZE2 * 4 + 16, divisors); + scale2 = vec_ld(DCTSIZE2 * 4 + 32, divisors); + scale3 = vec_ld(DCTSIZE2 * 4 + 48, divisors); + scale4 = vec_ld(DCTSIZE2 * 4 + 64, divisors); + scale5 = vec_ld(DCTSIZE2 * 4 + 80, divisors); + scale6 = vec_ld(DCTSIZE2 * 4 + 96, divisors); + scale7 = vec_ld(DCTSIZE2 * 4 + 112, divisors); + + MULTIPLY(row0, scale0, row0); + MULTIPLY(row1, scale1, row1); + MULTIPLY(row2, scale2, row2); + MULTIPLY(row3, scale3, row3); + MULTIPLY(row4, scale4, row4); + MULTIPLY(row5, scale5, row5); + MULTIPLY(row6, scale6, row6); + MULTIPLY(row7, scale7, row7); + + row0 = vec_xor(row0, row0s); + row1 = vec_xor(row1, row1s); + row2 = vec_xor(row2, row2s); + row3 = vec_xor(row3, row3s); + row4 = vec_xor(row4, row4s); + row5 = vec_xor(row5, row5s); + row6 = vec_xor(row6, row6s); + row7 = vec_xor(row7, row7s); + row0 = vec_sub(row0, row0s); + row1 = vec_sub(row1, row1s); + row2 = vec_sub(row2, row2s); + row3 = vec_sub(row3, row3s); + row4 = vec_sub(row4, row4s); + row5 = vec_sub(row5, row5s); + row6 = vec_sub(row6, row6s); + row7 = vec_sub(row7, row7s); + + vec_st(row0, 0, coef_block); + vec_st(row1, 16, coef_block); + vec_st(row2, 32, coef_block); + vec_st(row3, 48, coef_block); + vec_st(row4, 64, coef_block); + vec_st(row5, 80, coef_block); + vec_st(row6, 96, coef_block); + vec_st(row7, 112, coef_block); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquanti-sse2-64.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquanti-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquanti-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquanti-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,187 @@ +; +; jquanti.asm - sample data conversion and quantization (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +; r10 = JSAMPARRAY sample_data +; r11 = JDIMENSION start_col +; r12 = DCTELEM *workspace + + align 16 + global EXTN(jsimd_convsamp_sse2) PRIVATE + +EXTN(jsimd_convsamp_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + pxor xmm6,xmm6 ; xmm6=(all 0's) + pcmpeqw xmm7,xmm7 + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + mov rsi, r10 + mov eax, r11d + mov rdi, r12 + mov rcx, DCTSIZE/4 +.convloop: + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567) + movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) + + mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) + movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) + + punpcklbw xmm0,xmm6 ; xmm0=(01234567) + punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF) + paddw xmm0,xmm7 + paddw xmm1,xmm7 + punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN) + punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV) + paddw xmm2,xmm7 + paddw xmm3,xmm7 + + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3 + + add rsi, byte 4*SIZEOF_JSAMPROW + add rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec rcx + jnz short .convloop + + pop rbx + uncollect_args + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) +%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) +%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) + +; r10 = JCOEFPTR coef_block +; r11 = DCTELEM *divisors +; r12 = DCTELEM *workspace + + align 16 + global EXTN(jsimd_quantize_sse2) PRIVATE + +EXTN(jsimd_quantize_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov rsi, r12 + mov rdx, r11 + mov rdi, r10 + mov rax, DCTSIZE2/32 +.quantloop: + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + psraw xmm4,(WORD_BIT-1) + psraw xmm5,(WORD_BIT-1) + psraw xmm6,(WORD_BIT-1) + psraw xmm7,(WORD_BIT-1) + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; + psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; + psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; + psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; + + paddw xmm0, XMMWORD [CORRECTION(0,0,rdx)] ; correction + roundfactor + paddw xmm1, XMMWORD [CORRECTION(1,0,rdx)] + paddw xmm2, XMMWORD [CORRECTION(2,0,rdx)] + paddw xmm3, XMMWORD [CORRECTION(3,0,rdx)] + pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,rdx)] ; reciprocal + pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,rdx)] + pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,rdx)] + pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,rdx)] + pmulhuw xmm0, XMMWORD [SCALE(0,0,rdx)] ; scale + pmulhuw xmm1, XMMWORD [SCALE(1,0,rdx)] + pmulhuw xmm2, XMMWORD [SCALE(2,0,rdx)] + pmulhuw xmm3, XMMWORD [SCALE(3,0,rdx)] + + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 + psubw xmm1,xmm5 + psubw xmm2,xmm6 + psubw xmm3,xmm7 + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3 + + add rsi, byte 32*SIZEOF_DCTELEM + add rdx, byte 32*SIZEOF_DCTELEM + add rdi, byte 32*SIZEOF_JCOEF + dec rax + jnz near .quantloop + + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquanti-sse2.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquanti-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquanti-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquanti-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,200 @@ +; +; jquanti.asm - sample data conversion and quantization (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; DCTELEM *workspace + + align 16 + global EXTN(jsimd_convsamp_sse2) PRIVATE + +EXTN(jsimd_convsamp_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pxor xmm6,xmm6 ; xmm6=(all 0's) + pcmpeqw xmm7,xmm7 + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm0=(01234567) + movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) + movq xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) + + punpcklbw xmm0,xmm6 ; xmm0=(01234567) + punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF) + paddw xmm0,xmm7 + paddw xmm1,xmm7 + punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN) + punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV) + paddw xmm2,xmm7 + paddw xmm3,xmm7 + + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 + + add esi, byte 4*SIZEOF_JSAMPROW + add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) +%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) +%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; DCTELEM *divisors +%define workspace ebp+16 ; DCTELEM *workspace + + align 16 + global EXTN(jsimd_quantize_sse2) PRIVATE + +EXTN(jsimd_quantize_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/32 + alignx 16,7 +.quantloop: + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)] + movdqa xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)] + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + psraw xmm4,(WORD_BIT-1) + psraw xmm5,(WORD_BIT-1) + psraw xmm6,(WORD_BIT-1) + psraw xmm7,(WORD_BIT-1) + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; + psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; + psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; + psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; + + paddw xmm0, XMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + paddw xmm1, XMMWORD [CORRECTION(1,0,edx)] + paddw xmm2, XMMWORD [CORRECTION(2,0,edx)] + paddw xmm3, XMMWORD [CORRECTION(3,0,edx)] + pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)] + pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)] + pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)] + pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)] ; scale + pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)] + pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)] + pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)] + + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 + psubw xmm1,xmm5 + psubw xmm2,xmm6 + psubw xmm3,xmm7 + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 + + add esi, byte 32*SIZEOF_DCTELEM + add edx, byte 32*SIZEOF_DCTELEM + add edi, byte 32*SIZEOF_JCOEF + dec eax + jnz near .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquant-mmx.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquant-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquant-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquant-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,274 @@ +; +; jquant.asm - sample data conversion and quantization (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_mmx (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; DCTELEM *workspace + + align 16 + global EXTN(jsimd_convsamp_mmx) PRIVATE + +EXTN(jsimd_convsamp_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pxor mm6,mm6 ; mm6=(all 0's) + pcmpeqw mm7,mm7 + psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm0=(01234567) + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm1=(89ABCDEF) + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm2=(GHIJKLMN) + movq mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm3=(OPQRSTUV) + + movq mm4,mm0 + punpcklbw mm0,mm6 ; mm0=(0123) + punpckhbw mm4,mm6 ; mm4=(4567) + movq mm5,mm1 + punpcklbw mm1,mm6 ; mm1=(89AB) + punpckhbw mm5,mm6 ; mm5=(CDEF) + + paddw mm0,mm7 + paddw mm4,mm7 + paddw mm1,mm7 + paddw mm5,mm7 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5 + + movq mm0,mm2 + punpcklbw mm2,mm6 ; mm2=(GHIJ) + punpckhbw mm0,mm6 ; mm0=(KLMN) + movq mm4,mm3 + punpcklbw mm3,mm6 ; mm3=(OPQR) + punpckhbw mm4,mm6 ; mm4=(STUV) + + paddw mm2,mm7 + paddw mm0,mm7 + paddw mm3,mm7 + paddw mm4,mm7 + + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4 + + add esi, byte 4*SIZEOF_JSAMPROW + add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_mmx (JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m,n,b) MMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) +%define CORRECTION(m,n,b) MMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) +%define SCALE(m,n,b) MMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) +%define SHIFT(m,n,b) MMBLOCK(DCTSIZE*3+(m),(n),(b),SIZEOF_DCTELEM) + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; DCTELEM *divisors +%define workspace ebp+16 ; DCTELEM *workspace + + align 16 + global EXTN(jsimd_quantize_mmx) PRIVATE + +EXTN(jsimd_quantize_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov ah, 2 + alignx 16,7 +.quantloop1: + mov al, DCTSIZE2/8/2 + alignx 16,7 +.quantloop2: + movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)] + + movq mm0,mm2 + movq mm1,mm3 + + psraw mm2,(WORD_BIT-1) ; -1 if value < 0, 0 otherwise + psraw mm3,(WORD_BIT-1) + + pxor mm0,mm2 ; val = -val + pxor mm1,mm3 + psubw mm0,mm2 + psubw mm1,mm3 + + ; + ; MMX is an annoyingly crappy instruction set. It has two + ; misfeatures that are causing problems here: + ; + ; - All multiplications are signed. + ; + ; - The second operand for the shifts is not treated as packed. + ; + ; + ; We work around the first problem by implementing this algorithm: + ; + ; unsigned long unsigned_multiply(unsigned short x, unsigned short y) + ; { + ; enum { SHORT_BIT = 16 }; + ; signed short sx = (signed short) x; + ; signed short sy = (signed short) y; + ; signed long sz; + ; + ; sz = (long) sx * (long) sy; /* signed multiply */ + ; + ; if (sx < 0) sz += (long) sy << SHORT_BIT; + ; if (sy < 0) sz += (long) sx << SHORT_BIT; + ; + ; return (unsigned long) sz; + ; } + ; + ; (note that a negative sx adds _sy_ and vice versa) + ; + ; For the second problem, we replace the shift by a multiplication. + ; Unfortunately that means we have to deal with the signed issue again. + ; + + paddw mm0, MMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + paddw mm1, MMWORD [CORRECTION(0,1,edx)] + + movq mm4,mm0 ; store current value for later + movq mm5,mm1 + pmulhw mm0, MMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + pmulhw mm1, MMWORD [RECIPROCAL(0,1,edx)] + paddw mm0,mm4 ; reciprocal is always negative (MSB=1), + paddw mm1,mm5 ; so we always need to add the initial value + ; (input value is never negative as we + ; inverted it at the start of this routine) + + ; here it gets a bit tricky as both scale + ; and mm0/mm1 can be negative + movq mm6, MMWORD [SCALE(0,0,edx)] ; scale + movq mm7, MMWORD [SCALE(0,1,edx)] + movq mm4,mm0 + movq mm5,mm1 + pmulhw mm0,mm6 + pmulhw mm1,mm7 + + psraw mm6,(WORD_BIT-1) ; determine if scale is negative + psraw mm7,(WORD_BIT-1) + + pand mm6,mm4 ; and add input if it is + pand mm7,mm5 + paddw mm0,mm6 + paddw mm1,mm7 + + psraw mm4,(WORD_BIT-1) ; then check if negative input + psraw mm5,(WORD_BIT-1) + + pand mm4, MMWORD [SCALE(0,0,edx)] ; and add scale if it is + pand mm5, MMWORD [SCALE(0,1,edx)] + paddw mm0,mm4 + paddw mm1,mm5 + + pxor mm0,mm2 ; val = -val + pxor mm1,mm3 + psubw mm0,mm2 + psubw mm1,mm3 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1 + + add esi, byte 8*SIZEOF_DCTELEM + add edx, byte 8*SIZEOF_DCTELEM + add edi, byte 8*SIZEOF_JCOEF + dec al + jnz near .quantloop2 + dec ah + jnz near .quantloop1 ; to avoid branch misprediction + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquant-sse.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquant-sse.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jquant-sse.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jquant-sse.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,211 @@ +; +; jquant.asm - sample data conversion and quantization (SSE & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_convsamp_float_sse) PRIVATE + +EXTN(jsimd_convsamp_float_sse): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw mm7,mm7 + psllw mm7,7 + packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb mm0,mm7 ; mm0=(01234567) + psubb mm1,mm7 ; mm1=(89ABCDEF) + + punpcklbw mm2,mm0 ; mm2=(*0*1*2*3) + punpckhbw mm0,mm0 ; mm0=(*4*5*6*7) + punpcklbw mm3,mm1 ; mm3=(*8*9*A*B) + punpckhbw mm1,mm1 ; mm1=(*C*D*E*F) + + punpcklwd mm4,mm2 ; mm4=(***0***1) + punpckhwd mm2,mm2 ; mm2=(***2***3) + punpcklwd mm5,mm0 ; mm5=(***4***5) + punpckhwd mm0,mm0 ; mm0=(***6***7) + + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01) + psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23) + cvtpi2ps xmm0,mm4 ; xmm0=(01**) + cvtpi2ps xmm1,mm2 ; xmm1=(23**) + psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45) + psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67) + cvtpi2ps xmm2,mm5 ; xmm2=(45**) + cvtpi2ps xmm3,mm0 ; xmm3=(67**) + + punpcklwd mm6,mm3 ; mm6=(***8***9) + punpckhwd mm3,mm3 ; mm3=(***A***B) + punpcklwd mm4,mm1 ; mm4=(***C***D) + punpckhwd mm1,mm1 ; mm1=(***E***F) + + psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89) + psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB) + cvtpi2ps xmm4,mm6 ; xmm4=(89**) + cvtpi2ps xmm5,mm3 ; xmm5=(AB**) + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD) + psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF) + cvtpi2ps xmm6,mm4 ; xmm6=(CD**) + cvtpi2ps xmm7,mm1 ; xmm7=(EF**) + + movlhps xmm0,xmm1 ; xmm0=(0123) + movlhps xmm2,xmm3 ; xmm2=(4567) + movlhps xmm4,xmm5 ; xmm4=(89AB) + movlhps xmm6,xmm7 ; xmm6=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse (JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT *divisors +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_quantize_float_sse) PRIVATE + +EXTN(jsimd_quantize_float_sse): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + movhlps xmm4,xmm0 + movhlps xmm5,xmm1 + + cvtps2pi mm0,xmm0 + cvtps2pi mm1,xmm1 + cvtps2pi mm4,xmm4 + cvtps2pi mm5,xmm5 + + movhlps xmm6,xmm2 + movhlps xmm7,xmm3 + + cvtps2pi mm2,xmm2 + cvtps2pi mm3,xmm3 + cvtps2pi mm6,xmm6 + cvtps2pi mm7,xmm7 + + packssdw mm0,mm4 + packssdw mm1,mm5 + packssdw mm2,mm6 + packssdw mm3,mm7 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz short .quantloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_altivec.h glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_altivec.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_altivec.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_altivec.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,99 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" +#include + + +/* Common code */ + +#define __4X(a) a, a, a, a +#define __4X2(a, b) a, b, a, b, a, b, a, b +#define __8X(a) __4X(a), __4X(a) +#define __16X(a) __8X(a), __8X(a) + +#define TRANSPOSE(row, col) \ +{ \ + __vector short row04l, row04h, row15l, row15h, \ + row26l, row26h, row37l, row37h; \ + __vector short col01e, col01o, col23e, col23o, \ + col45e, col45o, col67e, col67o; \ + \ + /* transpose coefficients (phase 1) */ \ + row04l = vec_mergeh(row##0, row##4); /* row04l=(00 40 01 41 02 42 03 43) */ \ + row04h = vec_mergel(row##0, row##4); /* row04h=(04 44 05 45 06 46 07 47) */ \ + row15l = vec_mergeh(row##1, row##5); /* row15l=(10 50 11 51 12 52 13 53) */ \ + row15h = vec_mergel(row##1, row##5); /* row15h=(14 54 15 55 16 56 17 57) */ \ + row26l = vec_mergeh(row##2, row##6); /* row26l=(20 60 21 61 22 62 23 63) */ \ + row26h = vec_mergel(row##2, row##6); /* row26h=(24 64 25 65 26 66 27 67) */ \ + row37l = vec_mergeh(row##3, row##7); /* row37l=(30 70 31 71 32 72 33 73) */ \ + row37h = vec_mergel(row##3, row##7); /* row37h=(34 74 35 75 36 76 37 77) */ \ + \ + /* transpose coefficients (phase 2) */ \ + col01e = vec_mergeh(row04l, row26l); /* col01e=(00 20 40 60 01 21 41 61) */ \ + col23e = vec_mergel(row04l, row26l); /* col23e=(02 22 42 62 03 23 43 63) */ \ + col45e = vec_mergeh(row04h, row26h); /* col45e=(04 24 44 64 05 25 45 65) */ \ + col67e = vec_mergel(row04h, row26h); /* col67e=(06 26 46 66 07 27 47 67) */ \ + col01o = vec_mergeh(row15l, row37l); /* col01o=(10 30 50 70 11 31 51 71) */ \ + col23o = vec_mergel(row15l, row37l); /* col23o=(12 32 52 72 13 33 53 73) */ \ + col45o = vec_mergeh(row15h, row37h); /* col45o=(14 34 54 74 15 35 55 75) */ \ + col67o = vec_mergel(row15h, row37h); /* col67o=(16 36 56 76 17 37 57 77) */ \ + \ + /* transpose coefficients (phase 3) */ \ + col##0 = vec_mergeh(col01e, col01o); /* col0=(00 10 20 30 40 50 60 70) */ \ + col##1 = vec_mergel(col01e, col01o); /* col1=(01 11 21 31 41 51 61 71) */ \ + col##2 = vec_mergeh(col23e, col23o); /* col2=(02 12 22 32 42 52 62 72) */ \ + col##3 = vec_mergel(col23e, col23o); /* col3=(03 13 23 33 43 53 63 73) */ \ + col##4 = vec_mergeh(col45e, col45o); /* col4=(04 14 24 34 44 54 64 74) */ \ + col##5 = vec_mergel(col45e, col45o); /* col5=(05 15 25 35 45 55 65 75) */ \ + col##6 = vec_mergeh(col67e, col67o); /* col6=(06 16 26 36 46 56 66 76) */ \ + col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \ +} + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +/* Macros to abstract big/little endian bit twiddling */ + +#if __BIG_ENDIAN__ + +#define VEC_LD(a, b) vec_ld(a, b) +#define VEC_ST(a, b, c) vec_st(a, b, c) +#define VEC_UNPACKHU(a) vec_mergeh(pb_zero, a) +#define VEC_UNPACKLU(a) vec_mergel(pb_zero, a) + +#else + +#define VEC_LD(a, b) vec_vsx_ld(a, b) +#define VEC_ST(a, b, c) vec_vsx_st(a, b, c) +#define VEC_UNPACKHU(a) vec_mergeh(a, pb_zero) +#define VEC_UNPACKLU(a) vec_mergel(a, pb_zero) + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_arm64.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_arm64.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_arm64.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_arm64.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,802 @@ +/* + * jsimd_arm64.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander + * Copyright 2015-2016 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 64-bit ARM architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +#include +#include +#include + +#define JSIMD_FASTLD3 1 +#define JSIMD_FASTST3 2 +#define JSIMD_FASTTBL 4 + +static unsigned int simd_support = ~0; +static unsigned int simd_huffman = 1; +static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 | + JSIMD_FASTTBL; + +#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + +#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) + +LOCAL(int) +check_cpuinfo (char *buffer, const char *field, char *value) +{ + char *p; + if (*value == 0) + return 0; + if (strncmp(buffer, field, strlen(field)) != 0) + return 0; + buffer += strlen(field); + while (isspace(*buffer)) + buffer++; + + /* Check if 'value' is present in the buffer as a separate word */ + while ((p = strstr(buffer, value))) { + if (p > buffer && !isspace(*(p - 1))) { + buffer++; + continue; + } + p += strlen(value); + if (*p != 0 && !isspace(*p)) { + buffer++; + continue; + } + return 1; + } + return 0; +} + +LOCAL(int) +parse_proc_cpuinfo (int bufsize) +{ + char *buffer = (char *)malloc(bufsize); + FILE *fd; + + if (!buffer) + return 0; + + fd = fopen("/proc/cpuinfo", "r"); + if (fd) { + while (fgets(buffer, bufsize, fd)) { + if (!strchr(buffer, '\n') && !feof(fd)) { + /* "impossible" happened - insufficient size of the buffer! */ + fclose(fd); + free(buffer); + return 0; + } + if (check_cpuinfo(buffer, "CPU part", "0xd03") || + check_cpuinfo(buffer, "CPU part", "0xd07")) + /* The Cortex-A53 has a slow tbl implementation. We can gain a few + percent speedup by disabling the use of that instruction. The + speedup on Cortex-A57 is more subtle but still measurable. */ + simd_features &= ~JSIMD_FASTTBL; + else if (check_cpuinfo(buffer, "CPU part", "0x0a1")) + /* The SIMD version of Huffman encoding is slower than the C version on + Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that + CPU. */ + simd_huffman = simd_features = 0; + } + fclose(fd); + } + free(buffer); + return 1; +} + +#endif + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ + +/* + * ARMv8 architectures support NEON extensions by default. + * It is no longer optional as it was with ARMv7. + */ + + +LOCAL(void) +init_simd (void) +{ + char *env = NULL; +#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + int bufsize = 1024; /* an initial guess for the line buffer size limit */ +#endif + + if (simd_support != ~0U) + return; + + simd_support = 0; + + simd_support |= JSIMD_ARM_NEON; +#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + while (!parse_proc_cpuinfo(bufsize)) { + bufsize *= 2; + if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) + break; + } +#endif + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCENEON"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_ARM_NEON; + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; + env = getenv("JSIMD_FASTLD3"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_features |= JSIMD_FASTLD3; + if ((env != NULL) && (strcmp(env, "0") == 0)) + simd_features &= ~JSIMD_FASTLD3; + env = getenv("JSIMD_FASTST3"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_features |= JSIMD_FASTST3; + if ((env != NULL) && (strcmp(env, "0") == 0)) + simd_features &= ~JSIMD_FASTST3; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + if (simd_features & JSIMD_FASTLD3) + neonfct=jsimd_extrgb_ycc_convert_neon; + else + neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + neonfct=jsimd_extrgbx_ycc_convert_neon; + break; + case JCS_EXT_BGR: + if (simd_features & JSIMD_FASTLD3) + neonfct=jsimd_extbgr_ycc_convert_neon; + else + neonfct=jsimd_extbgr_ycc_convert_neon_slowld3; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + neonfct=jsimd_extbgrx_ycc_convert_neon; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + neonfct=jsimd_extxbgr_ycc_convert_neon; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + neonfct=jsimd_extxrgb_ycc_convert_neon; + break; + default: + if (simd_features & JSIMD_FASTLD3) + neonfct=jsimd_extrgb_ycc_convert_neon; + else + neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; + break; + } + + neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + if (simd_features & JSIMD_FASTST3) + neonfct=jsimd_ycc_extrgb_convert_neon; + else + neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + neonfct=jsimd_ycc_extrgbx_convert_neon; + break; + case JCS_EXT_BGR: + if (simd_features & JSIMD_FASTST3) + neonfct=jsimd_ycc_extbgr_convert_neon; + else + neonfct=jsimd_ycc_extbgr_convert_neon_slowst3; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + neonfct=jsimd_ycc_extbgrx_convert_neon; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + neonfct=jsimd_ycc_extxbgr_convert_neon; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + neonfct=jsimd_ycc_extxrgb_convert_neon; + break; + default: + if (simd_features & JSIMD_FASTST3) + neonfct=jsimd_ycc_extrgb_convert_neon; + else + neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; + break; + } + + neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, + output_buf, num_rows); +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (DCTSIZE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (DCTSIZE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + jsimd_convsamp_neon(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + jsimd_fdct_islow_neon(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + jsimd_fdct_ifast_neon(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + jsimd_quantize_neon(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON && simd_huffman) + return 1; + + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + if (simd_features & JSIMD_FASTTBL) + return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, + dctbl, actbl); + else + return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block, + last_dc_val, dctbl, actbl); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_arm64_neon.S glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_arm64_neon.S --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_arm64_neon.S 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_arm64_neon.S 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,3453 @@ +/* + * ARMv8 NEON optimizations for libjpeg-turbo + * + * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies). + * All rights reserved. + * Author: Siarhei Siamashka + * Copyright (C) 2013-2014, Linaro Limited + * Author: Ragesh Radhakrishnan + * Copyright (C) 2014-2016, D. R. Commander. All Rights Reserved. + * Copyright (C) 2015-2016, Matthieu Darbois. All Rights Reserved. + * Copyright (C) 2016, Siarhei Siamashka. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits /* mark stack as non-executable */ +#endif + +.text + + +#define RESPECT_STRICT_ALIGNMENT 1 + + +/*****************************************************************************/ + +/* Supplementary macro for setting function attributes */ +.macro asm_function fname +#ifdef __APPLE__ + .globl _\fname +_\fname: +#else + .global \fname +#ifdef __ELF__ + .hidden \fname + .type \fname, %function +#endif +\fname: +#endif +.endm + +/* Transpose elements of single 128 bit registers */ +.macro transpose_single x0, x1, xi, xilen, literal + ins \xi\xilen[0], \x0\xilen[0] + ins \x1\xilen[0], \x0\xilen[1] + trn1 \x0\literal, \x0\literal, \x1\literal + trn2 \x1\literal, \xi\literal, \x1\literal +.endm + +/* Transpose elements of 2 differnet registers */ +.macro transpose x0, x1, xi, xilen, literal + mov \xi\xilen, \x0\xilen + trn1 \x0\literal, \x0\literal, \x1\literal + trn2 \x1\literal, \xi\literal, \x1\literal +.endm + +/* Transpose a block of 4x4 coefficients in four 64-bit registers */ +.macro transpose_4x4_32 x0, x0len, x1, x1len, x2, x2len, x3, x3len, xi, xilen + mov \xi\xilen, \x0\xilen + trn1 \x0\x0len, \x0\x0len, \x2\x2len + trn2 \x2\x2len, \xi\x0len, \x2\x2len + mov \xi\xilen, \x1\xilen + trn1 \x1\x1len, \x1\x1len, \x3\x3len + trn2 \x3\x3len, \xi\x1len, \x3\x3len +.endm + +.macro transpose_4x4_16 x0, x0len, x1, x1len, x2, x2len, x3, x3len, xi, xilen + mov \xi\xilen, \x0\xilen + trn1 \x0\x0len, \x0\x0len, \x1\x1len + trn2 \x1\x2len, \xi\x0len, \x1\x2len + mov \xi\xilen, \x2\xilen + trn1 \x2\x2len, \x2\x2len, \x3\x3len + trn2 \x3\x2len, \xi\x1len, \x3\x3len +.endm + +.macro transpose_4x4 x0, x1, x2, x3, x5 + transpose_4x4_16 \x0, .4h, \x1, .4h, \x2, .4h, \x3, .4h, \x5, .16b + transpose_4x4_32 \x0, .2s, \x1, .2s, \x2, .2s, \x3, .2s, \x5, .16b +.endm + +.macro transpose_8x8 l0, l1, l2, l3, l4, l5, l6, l7, t0, t1, t2, t3 + trn1 \t0\().8h, \l0\().8h, \l1\().8h + trn1 \t1\().8h, \l2\().8h, \l3\().8h + trn1 \t2\().8h, \l4\().8h, \l5\().8h + trn1 \t3\().8h, \l6\().8h, \l7\().8h + trn2 \l1\().8h, \l0\().8h, \l1\().8h + trn2 \l3\().8h, \l2\().8h, \l3\().8h + trn2 \l5\().8h, \l4\().8h, \l5\().8h + trn2 \l7\().8h, \l6\().8h, \l7\().8h + + trn1 \l4\().4s, \t2\().4s, \t3\().4s + trn2 \t3\().4s, \t2\().4s, \t3\().4s + trn1 \t2\().4s, \t0\().4s, \t1\().4s + trn2 \l2\().4s, \t0\().4s, \t1\().4s + trn1 \t0\().4s, \l1\().4s, \l3\().4s + trn2 \l3\().4s, \l1\().4s, \l3\().4s + trn2 \t1\().4s, \l5\().4s, \l7\().4s + trn1 \l5\().4s, \l5\().4s, \l7\().4s + + trn2 \l6\().2d, \l2\().2d, \t3\().2d + trn1 \l0\().2d, \t2\().2d, \l4\().2d + trn1 \l1\().2d, \t0\().2d, \l5\().2d + trn2 \l7\().2d, \l3\().2d, \t1\().2d + trn1 \l2\().2d, \l2\().2d, \t3\().2d + trn2 \l4\().2d, \t2\().2d, \l4\().2d + trn1 \l3\().2d, \l3\().2d, \t1\().2d + trn2 \l5\().2d, \t0\().2d, \l5\().2d +.endm + + +#define CENTERJSAMPLE 128 + +/*****************************************************************************/ + +/* + * Perform dequantization and inverse DCT on one block of coefficients. + * + * GLOBAL(void) + * jsimd_idct_islow_neon (void *dct_table, JCOEFPTR coef_block, + * JSAMPARRAY output_buf, JDIMENSION output_col) + */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +.balign 16 +Ljsimd_idct_islow_neon_consts: + .short F_0_298 + .short -F_0_390 + .short F_0_541 + .short F_0_765 + .short - F_0_899 + .short F_1_175 + .short F_1_501 + .short - F_1_847 + .short - F_1_961 + .short F_2_053 + .short - F_2_562 + .short F_3_072 + .short 0 /* padding */ + .short 0 + .short 0 + .short 0 + +#undef F_0_298 +#undef F_0_390 +#undef F_0_541 +#undef F_0_765 +#undef F_0_899 +#undef F_1_175 +#undef F_1_501 +#undef F_1_847 +#undef F_1_961 +#undef F_2_053 +#undef F_2_562 +#undef F_3_072 + +#define XFIX_P_0_298 v0.h[0] +#define XFIX_N_0_390 v0.h[1] +#define XFIX_P_0_541 v0.h[2] +#define XFIX_P_0_765 v0.h[3] +#define XFIX_N_0_899 v0.h[4] +#define XFIX_P_1_175 v0.h[5] +#define XFIX_P_1_501 v0.h[6] +#define XFIX_N_1_847 v0.h[7] +#define XFIX_N_1_961 v1.h[0] +#define XFIX_P_2_053 v1.h[1] +#define XFIX_N_2_562 v1.h[2] +#define XFIX_P_3_072 v1.h[3] + +asm_function jsimd_idct_islow_neon + DCT_TABLE .req x0 + COEF_BLOCK .req x1 + OUTPUT_BUF .req x2 + OUTPUT_COL .req x3 + TMP1 .req x0 + TMP2 .req x1 + TMP3 .req x9 + TMP4 .req x10 + TMP5 .req x11 + TMP6 .req x12 + TMP7 .req x13 + TMP8 .req x14 + + sub sp, sp, #64 + adr x15, Ljsimd_idct_islow_neon_consts + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], #32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], #32 + ld1 {v0.8h, v1.8h}, [x15] + ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [COEF_BLOCK], #64 + ld1 {v18.8h, v19.8h, v20.8h, v21.8h}, [DCT_TABLE], #64 + ld1 {v6.8h, v7.8h, v8.8h, v9.8h}, [COEF_BLOCK], #64 + ld1 {v22.8h, v23.8h, v24.8h, v25.8h}, [DCT_TABLE], #64 + + cmeq v16.8h, v3.8h, #0 + cmeq v26.8h, v4.8h, #0 + cmeq v27.8h, v5.8h, #0 + cmeq v28.8h, v6.8h, #0 + cmeq v29.8h, v7.8h, #0 + cmeq v30.8h, v8.8h, #0 + cmeq v31.8h, v9.8h, #0 + + and v10.16b, v16.16b, v26.16b + and v11.16b, v27.16b, v28.16b + and v12.16b, v29.16b, v30.16b + and v13.16b, v31.16b, v10.16b + and v14.16b, v11.16b, v12.16b + mul v2.8h, v2.8h, v18.8h + and v15.16b, v13.16b, v14.16b + shl v10.8h, v2.8h, #(PASS1_BITS) + sqxtn v16.8b, v15.8h + mov TMP1, v16.d[0] + sub sp, sp, #64 + mvn TMP2, TMP1 + + cbnz TMP2, 2f + /* case all AC coeffs are zeros */ + dup v2.2d, v10.d[0] + dup v6.2d, v10.d[1] + mov v3.16b, v2.16b + mov v7.16b, v6.16b + mov v4.16b, v2.16b + mov v8.16b, v6.16b + mov v5.16b, v2.16b + mov v9.16b, v6.16b +1: + /* for this transpose, we should organise data like this: + * 00, 01, 02, 03, 40, 41, 42, 43 + * 10, 11, 12, 13, 50, 51, 52, 53 + * 20, 21, 22, 23, 60, 61, 62, 63 + * 30, 31, 32, 33, 70, 71, 72, 73 + * 04, 05, 06, 07, 44, 45, 46, 47 + * 14, 15, 16, 17, 54, 55, 56, 57 + * 24, 25, 26, 27, 64, 65, 66, 67 + * 34, 35, 36, 37, 74, 75, 76, 77 + */ + trn1 v28.8h, v2.8h, v3.8h + trn1 v29.8h, v4.8h, v5.8h + trn1 v30.8h, v6.8h, v7.8h + trn1 v31.8h, v8.8h, v9.8h + trn2 v16.8h, v2.8h, v3.8h + trn2 v17.8h, v4.8h, v5.8h + trn2 v18.8h, v6.8h, v7.8h + trn2 v19.8h, v8.8h, v9.8h + trn1 v2.4s, v28.4s, v29.4s + trn1 v6.4s, v30.4s, v31.4s + trn1 v3.4s, v16.4s, v17.4s + trn1 v7.4s, v18.4s, v19.4s + trn2 v4.4s, v28.4s, v29.4s + trn2 v8.4s, v30.4s, v31.4s + trn2 v5.4s, v16.4s, v17.4s + trn2 v9.4s, v18.4s, v19.4s + /* Even part: reverse the even part of the forward DCT. */ + add v18.8h, v4.8h, v8.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) */ + add v22.8h, v2.8h, v6.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull2 v19.4s, v18.8h, XFIX_P_0_541 /* z1h z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sub v26.8h, v2.8h, v6.8h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1l z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + mov v21.16b, v19.16b /* tmp3 = z1 */ + mov v20.16b, v18.16b /* tmp3 = z1 */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + sshll v22.4s, v22.4h, #(CONST_BITS) /* tmp0l tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + sshll v26.4s, v26.4h, #(CONST_BITS) /* tmp1l tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + add v2.4s, v22.4s, v20.4s /* tmp10l tmp10 = tmp0 + tmp3; */ + sub v6.4s, v22.4s, v20.4s /* tmp13l tmp13 = tmp0 - tmp3; */ + add v8.4s, v26.4s, v18.4s /* tmp11l tmp11 = tmp1 + tmp2; */ + sub v4.4s, v26.4s, v18.4s /* tmp12l tmp12 = tmp1 - tmp2; */ + add v28.4s, v23.4s, v21.4s /* tmp10h tmp10 = tmp0 + tmp3; */ + sub v31.4s, v23.4s, v21.4s /* tmp13h tmp13 = tmp0 - tmp3; */ + add v29.4s, v27.4s, v19.4s /* tmp11h tmp11 = tmp1 + tmp2; */ + sub v30.4s, v27.4s, v19.4s /* tmp12h tmp12 = tmp1 - tmp2; */ + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + add v22.8h, v9.8h, v5.8h /* z3 = tmp0 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v24.8h, v7.8h, v3.8h /* z4 = tmp1 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v18.8h, v9.8h, v3.8h /* z1 = tmp0 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v20.8h, v7.8h, v5.8h /* z2 = tmp1 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v26.8h, v22.8h, v24.8h /* z5 = z3 + z4 */ + + smull2 v11.4s, v9.8h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull2 v13.4s, v7.8h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + smull v10.4s, v9.4h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull v12.4s, v7.4h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + add v23.4s, v23.4s, v27.4s /* z3 += z5 */ + add v22.4s, v22.4s, v26.4s /* z3 += z5 */ + add v25.4s, v25.4s, v27.4s /* z4 += z5 */ + add v24.4s, v24.4s, v26.4s /* z4 += z5 */ + + add v11.4s, v11.4s, v19.4s /* tmp0 += z1 */ + add v10.4s, v10.4s, v18.4s /* tmp0 += z1 */ + add v13.4s, v13.4s, v21.4s /* tmp1 += z2 */ + add v12.4s, v12.4s, v20.4s /* tmp1 += z2 */ + add v15.4s, v15.4s, v21.4s /* tmp2 += z2 */ + add v14.4s, v14.4s, v20.4s /* tmp2 += z2 */ + add v17.4s, v17.4s, v19.4s /* tmp3 += z1 */ + add v16.4s, v16.4s, v18.4s /* tmp3 += z1 */ + + add v11.4s, v11.4s, v23.4s /* tmp0 += z3 */ + add v10.4s, v10.4s, v22.4s /* tmp0 += z3 */ + add v13.4s, v13.4s, v25.4s /* tmp1 += z4 */ + add v12.4s, v12.4s, v24.4s /* tmp1 += z4 */ + add v17.4s, v17.4s, v25.4s /* tmp3 += z4 */ + add v16.4s, v16.4s, v24.4s /* tmp3 += z4 */ + add v15.4s, v15.4s, v23.4s /* tmp2 += z3 */ + add v14.4s, v14.4s, v22.4s /* tmp2 += z3 */ + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + add v18.4s, v2.4s, v16.4s /* tmp10 + tmp3 */ + add v19.4s, v28.4s, v17.4s /* tmp10 + tmp3 */ + sub v20.4s, v2.4s, v16.4s /* tmp10 - tmp3 */ + sub v21.4s, v28.4s, v17.4s /* tmp10 - tmp3 */ + add v22.4s, v8.4s, v14.4s /* tmp11 + tmp2 */ + add v23.4s, v29.4s, v15.4s /* tmp11 + tmp2 */ + sub v24.4s, v8.4s, v14.4s /* tmp11 - tmp2 */ + sub v25.4s, v29.4s, v15.4s /* tmp11 - tmp2 */ + add v26.4s, v4.4s, v12.4s /* tmp12 + tmp1 */ + add v27.4s, v30.4s, v13.4s /* tmp12 + tmp1 */ + sub v28.4s, v4.4s, v12.4s /* tmp12 - tmp1 */ + sub v29.4s, v30.4s, v13.4s /* tmp12 - tmp1 */ + add v14.4s, v6.4s, v10.4s /* tmp13 + tmp0 */ + add v15.4s, v31.4s, v11.4s /* tmp13 + tmp0 */ + sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ + sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */ + + shrn v2.4h, v18.4s, #16 /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn v9.4h, v20.4s, #16 /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn v3.4h, v22.4s, #16 /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn v8.4h, v24.4s, #16 /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn v4.4h, v26.4s, #16 /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn v7.4h, v28.4s, #16 /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn v5.4h, v14.4s, #16 /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn v6.4h, v16.4s, #16 /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn2 v2.8h, v19.4s, #16 /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn2 v9.8h, v21.4s, #16 /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn2 v3.8h, v23.4s, #16 /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn2 v8.8h, v25.4s, #16 /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn2 v4.8h, v27.4s, #16 /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn2 v7.8h, v29.4s, #16 /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn2 v5.8h, v15.4s, #16 /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn2 v6.8h, v17.4s, #16 /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) */ + movi v0.16b, #(CENTERJSAMPLE) + /* Prepare pointers (dual-issue with NEON instructions) */ + ldp TMP1, TMP2, [OUTPUT_BUF], 16 + sqrshrn v28.8b, v2.8h, #(CONST_BITS+PASS1_BITS+3-16) + ldp TMP3, TMP4, [OUTPUT_BUF], 16 + sqrshrn v29.8b, v3.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP1, TMP1, OUTPUT_COL + sqrshrn v30.8b, v4.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP2, TMP2, OUTPUT_COL + sqrshrn v31.8b, v5.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP3, TMP3, OUTPUT_COL + sqrshrn2 v28.16b, v6.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP4, TMP4, OUTPUT_COL + sqrshrn2 v29.16b, v7.8h, #(CONST_BITS+PASS1_BITS+3-16) + ldp TMP5, TMP6, [OUTPUT_BUF], 16 + sqrshrn2 v30.16b, v8.8h, #(CONST_BITS+PASS1_BITS+3-16) + ldp TMP7, TMP8, [OUTPUT_BUF], 16 + sqrshrn2 v31.16b, v9.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP5, TMP5, OUTPUT_COL + add v16.16b, v28.16b, v0.16b + add TMP6, TMP6, OUTPUT_COL + add v18.16b, v29.16b, v0.16b + add TMP7, TMP7, OUTPUT_COL + add v20.16b, v30.16b, v0.16b + add TMP8, TMP8, OUTPUT_COL + add v22.16b, v31.16b, v0.16b + + /* Transpose the final 8-bit samples */ + trn1 v28.16b, v16.16b, v18.16b + trn1 v30.16b, v20.16b, v22.16b + trn2 v29.16b, v16.16b, v18.16b + trn2 v31.16b, v20.16b, v22.16b + + trn1 v16.8h, v28.8h, v30.8h + trn2 v18.8h, v28.8h, v30.8h + trn1 v20.8h, v29.8h, v31.8h + trn2 v22.8h, v29.8h, v31.8h + + uzp1 v28.4s, v16.4s, v18.4s + uzp2 v30.4s, v16.4s, v18.4s + uzp1 v29.4s, v20.4s, v22.4s + uzp2 v31.4s, v20.4s, v22.4s + + /* Store results to the output buffer */ + st1 {v28.d}[0], [TMP1] + st1 {v29.d}[0], [TMP2] + st1 {v28.d}[1], [TMP3] + st1 {v29.d}[1], [TMP4] + st1 {v30.d}[0], [TMP5] + st1 {v31.d}[0], [TMP6] + st1 {v30.d}[1], [TMP7] + st1 {v31.d}[1], [TMP8] + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], #32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], #32 + blr x30 + +.balign 16 +2: + mul v3.8h, v3.8h, v19.8h + mul v4.8h, v4.8h, v20.8h + mul v5.8h, v5.8h, v21.8h + add TMP4, xzr, TMP2, LSL #32 + mul v6.8h, v6.8h, v22.8h + mul v7.8h, v7.8h, v23.8h + adds TMP3, xzr, TMP2, LSR #32 + mul v8.8h, v8.8h, v24.8h + mul v9.8h, v9.8h, v25.8h + b.ne 3f + /* Right AC coef is zero */ + dup v15.2d, v10.d[1] + /* Even part: reverse the even part of the forward DCT. */ + add v18.4h, v4.4h, v8.4h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) */ + add v22.4h, v2.4h, v6.4h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + sub v26.4h, v2.4h, v6.4h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1l z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sshll v22.4s, v22.4h, #(CONST_BITS) /* tmp0l tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + mov v20.16b, v18.16b /* tmp3 = z1 */ + sshll v26.4s, v26.4h, #(CONST_BITS) /* tmp1l tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + add v2.4s, v22.4s, v20.4s /* tmp10l tmp10 = tmp0 + tmp3; */ + sub v6.4s, v22.4s, v20.4s /* tmp13l tmp13 = tmp0 - tmp3; */ + add v8.4s, v26.4s, v18.4s /* tmp11l tmp11 = tmp1 + tmp2; */ + sub v4.4s, v26.4s, v18.4s /* tmp12l tmp12 = tmp1 - tmp2; */ + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + add v22.4h, v9.4h, v5.4h /* z3 = tmp0 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v24.4h, v7.4h, v3.4h /* z4 = tmp1 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v18.4h, v9.4h, v3.4h /* z1 = tmp0 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v20.4h, v7.4h, v5.4h /* z2 = tmp1 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v26.4h, v22.4h, v24.4h /* z5 = z3 + z4 */ + + smull v10.4s, v9.4h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull v12.4s, v7.4h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + add v22.4s, v22.4s, v26.4s /* z3 += z5 */ + add v24.4s, v24.4s, v26.4s /* z4 += z5 */ + + add v10.4s, v10.4s, v18.4s /* tmp0 += z1 */ + add v12.4s, v12.4s, v20.4s /* tmp1 += z2 */ + add v14.4s, v14.4s, v20.4s /* tmp2 += z2 */ + add v16.4s, v16.4s, v18.4s /* tmp3 += z1 */ + + add v10.4s, v10.4s, v22.4s /* tmp0 += z3 */ + add v12.4s, v12.4s, v24.4s /* tmp1 += z4 */ + add v16.4s, v16.4s, v24.4s /* tmp3 += z4 */ + add v14.4s, v14.4s, v22.4s /* tmp2 += z3 */ + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + add v18.4s, v2.4s, v16.4s /* tmp10 + tmp3 */ + sub v20.4s, v2.4s, v16.4s /* tmp10 - tmp3 */ + add v22.4s, v8.4s, v14.4s /* tmp11 + tmp2 */ + sub v24.4s, v8.4s, v14.4s /* tmp11 - tmp2 */ + add v26.4s, v4.4s, v12.4s /* tmp12 + tmp1 */ + sub v28.4s, v4.4s, v12.4s /* tmp12 - tmp1 */ + add v14.4s, v6.4s, v10.4s /* tmp13 + tmp0 */ + sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ + + rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + mov v6.16b, v15.16b + mov v7.16b, v15.16b + mov v8.16b, v15.16b + mov v9.16b, v15.16b + b 1b + +.balign 16 +3: + cbnz TMP4, 4f + /* Left AC coef is zero */ + dup v14.2d, v10.d[0] + /* Even part: reverse the even part of the forward DCT. */ + add v18.8h, v4.8h, v8.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) */ + add v22.8h, v2.8h, v6.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull2 v19.4s, v18.8h, XFIX_P_0_541 /* z1h z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sub v26.8h, v2.8h, v6.8h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + mov v21.16b, v19.16b /* tmp3 = z1 */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + add v28.4s, v23.4s, v21.4s /* tmp10h tmp10 = tmp0 + tmp3; */ + sub v31.4s, v23.4s, v21.4s /* tmp13h tmp13 = tmp0 - tmp3; */ + add v29.4s, v27.4s, v19.4s /* tmp11h tmp11 = tmp1 + tmp2; */ + sub v30.4s, v27.4s, v19.4s /* tmp12h tmp12 = tmp1 - tmp2; */ + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + add v22.8h, v9.8h, v5.8h /* z3 = tmp0 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v24.8h, v7.8h, v3.8h /* z4 = tmp1 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v18.8h, v9.8h, v3.8h /* z1 = tmp0 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v20.8h, v7.8h, v5.8h /* z2 = tmp1 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v26.8h, v22.8h, v24.8h /* z5 = z3 + z4 */ + + smull2 v11.4s, v9.8h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull2 v13.4s, v7.8h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + add v23.4s, v23.4s, v27.4s /* z3 += z5 */ + add v22.4s, v22.4s, v26.4s /* z3 += z5 */ + add v25.4s, v25.4s, v27.4s /* z4 += z5 */ + add v24.4s, v24.4s, v26.4s /* z4 += z5 */ + + add v11.4s, v11.4s, v19.4s /* tmp0 += z1 */ + add v13.4s, v13.4s, v21.4s /* tmp1 += z2 */ + add v15.4s, v15.4s, v21.4s /* tmp2 += z2 */ + add v17.4s, v17.4s, v19.4s /* tmp3 += z1 */ + + add v11.4s, v11.4s, v23.4s /* tmp0 += z3 */ + add v13.4s, v13.4s, v25.4s /* tmp1 += z4 */ + add v17.4s, v17.4s, v25.4s /* tmp3 += z4 */ + add v15.4s, v15.4s, v23.4s /* tmp2 += z3 */ + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + add v19.4s, v28.4s, v17.4s /* tmp10 + tmp3 */ + sub v21.4s, v28.4s, v17.4s /* tmp10 - tmp3 */ + add v23.4s, v29.4s, v15.4s /* tmp11 + tmp2 */ + sub v25.4s, v29.4s, v15.4s /* tmp11 - tmp2 */ + add v27.4s, v30.4s, v13.4s /* tmp12 + tmp1 */ + sub v29.4s, v30.4s, v13.4s /* tmp12 - tmp1 */ + add v15.4s, v31.4s, v11.4s /* tmp13 + tmp0 */ + sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */ + + mov v2.16b, v14.16b + mov v3.16b, v14.16b + mov v4.16b, v14.16b + mov v5.16b, v14.16b + rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + b 1b + +.balign 16 +4: + /* "No" AC coef is zero */ + /* Even part: reverse the even part of the forward DCT. */ + add v18.8h, v4.8h, v8.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) */ + add v22.8h, v2.8h, v6.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull2 v19.4s, v18.8h, XFIX_P_0_541 /* z1h z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sub v26.8h, v2.8h, v6.8h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1l z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + mov v21.16b, v19.16b /* tmp3 = z1 */ + mov v20.16b, v18.16b /* tmp3 = z1 */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + sshll v22.4s, v22.4h, #(CONST_BITS) /* tmp0l tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + sshll v26.4s, v26.4h, #(CONST_BITS) /* tmp1l tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + add v2.4s, v22.4s, v20.4s /* tmp10l tmp10 = tmp0 + tmp3; */ + sub v6.4s, v22.4s, v20.4s /* tmp13l tmp13 = tmp0 - tmp3; */ + add v8.4s, v26.4s, v18.4s /* tmp11l tmp11 = tmp1 + tmp2; */ + sub v4.4s, v26.4s, v18.4s /* tmp12l tmp12 = tmp1 - tmp2; */ + add v28.4s, v23.4s, v21.4s /* tmp10h tmp10 = tmp0 + tmp3; */ + sub v31.4s, v23.4s, v21.4s /* tmp13h tmp13 = tmp0 - tmp3; */ + add v29.4s, v27.4s, v19.4s /* tmp11h tmp11 = tmp1 + tmp2; */ + sub v30.4s, v27.4s, v19.4s /* tmp12h tmp12 = tmp1 - tmp2; */ + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + add v22.8h, v9.8h, v5.8h /* z3 = tmp0 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v24.8h, v7.8h, v3.8h /* z4 = tmp1 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v18.8h, v9.8h, v3.8h /* z1 = tmp0 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v20.8h, v7.8h, v5.8h /* z2 = tmp1 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v26.8h, v22.8h, v24.8h /* z5 = z3 + z4 */ + + smull2 v11.4s, v9.8h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull2 v13.4s, v7.8h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + smull v10.4s, v9.4h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull v12.4s, v7.4h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + add v23.4s, v23.4s, v27.4s /* z3 += z5 */ + add v22.4s, v22.4s, v26.4s /* z3 += z5 */ + add v25.4s, v25.4s, v27.4s /* z4 += z5 */ + add v24.4s, v24.4s, v26.4s /* z4 += z5 */ + + add v11.4s, v11.4s, v19.4s /* tmp0 += z1 */ + add v10.4s, v10.4s, v18.4s /* tmp0 += z1 */ + add v13.4s, v13.4s, v21.4s /* tmp1 += z2 */ + add v12.4s, v12.4s, v20.4s /* tmp1 += z2 */ + add v15.4s, v15.4s, v21.4s /* tmp2 += z2 */ + add v14.4s, v14.4s, v20.4s /* tmp2 += z2 */ + add v17.4s, v17.4s, v19.4s /* tmp3 += z1 */ + add v16.4s, v16.4s, v18.4s /* tmp3 += z1 */ + + add v11.4s, v11.4s, v23.4s /* tmp0 += z3 */ + add v10.4s, v10.4s, v22.4s /* tmp0 += z3 */ + add v13.4s, v13.4s, v25.4s /* tmp1 += z4 */ + add v12.4s, v12.4s, v24.4s /* tmp1 += z4 */ + add v17.4s, v17.4s, v25.4s /* tmp3 += z4 */ + add v16.4s, v16.4s, v24.4s /* tmp3 += z4 */ + add v15.4s, v15.4s, v23.4s /* tmp2 += z3 */ + add v14.4s, v14.4s, v22.4s /* tmp2 += z3 */ + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + add v18.4s, v2.4s, v16.4s /* tmp10 + tmp3 */ + add v19.4s, v28.4s, v17.4s /* tmp10 + tmp3 */ + sub v20.4s, v2.4s, v16.4s /* tmp10 - tmp3 */ + sub v21.4s, v28.4s, v17.4s /* tmp10 - tmp3 */ + add v22.4s, v8.4s, v14.4s /* tmp11 + tmp2 */ + add v23.4s, v29.4s, v15.4s /* tmp11 + tmp2 */ + sub v24.4s, v8.4s, v14.4s /* tmp11 - tmp2 */ + sub v25.4s, v29.4s, v15.4s /* tmp11 - tmp2 */ + add v26.4s, v4.4s, v12.4s /* tmp12 + tmp1 */ + add v27.4s, v30.4s, v13.4s /* tmp12 + tmp1 */ + sub v28.4s, v4.4s, v12.4s /* tmp12 - tmp1 */ + sub v29.4s, v30.4s, v13.4s /* tmp12 - tmp1 */ + add v14.4s, v6.4s, v10.4s /* tmp13 + tmp0 */ + add v15.4s, v31.4s, v11.4s /* tmp13 + tmp0 */ + sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ + sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */ + + rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + b 1b + + .unreq DCT_TABLE + .unreq COEF_BLOCK + .unreq OUTPUT_BUF + .unreq OUTPUT_COL + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMP4 + .unreq TMP5 + .unreq TMP6 + .unreq TMP7 + .unreq TMP8 + +#undef CENTERJSAMPLE +#undef CONST_BITS +#undef PASS1_BITS +#undef XFIX_P_0_298 +#undef XFIX_N_0_390 +#undef XFIX_P_0_541 +#undef XFIX_P_0_765 +#undef XFIX_N_0_899 +#undef XFIX_P_1_175 +#undef XFIX_P_1_501 +#undef XFIX_N_1_847 +#undef XFIX_N_1_961 +#undef XFIX_P_2_053 +#undef XFIX_N_2_562 +#undef XFIX_P_3_072 + + +/*****************************************************************************/ + +/* + * jsimd_idct_ifast_neon + * + * This function contains a fast, not so accurate integer implementation of + * the inverse DCT (Discrete Cosine Transform). It uses the same calculations + * and produces exactly the same output as IJG's original 'jpeg_idct_ifast' + * function from jidctfst.c + * + * Normally 1-D AAN DCT needs 5 multiplications and 29 additions. + * But in ARM NEON case some extra additions are required because VQDMULH + * instruction can't handle the constants larger than 1. So the expressions + * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x", + * which introduces an extra addition. Overall, there are 6 extra additions + * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions. + */ + +#define XFIX_1_082392200 v0.h[0] +#define XFIX_1_414213562 v0.h[1] +#define XFIX_1_847759065 v0.h[2] +#define XFIX_2_613125930 v0.h[3] + +.balign 16 +Ljsimd_idct_ifast_neon_consts: + .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ + .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ + .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ + .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ + +asm_function jsimd_idct_ifast_neon + + DCT_TABLE .req x0 + COEF_BLOCK .req x1 + OUTPUT_BUF .req x2 + OUTPUT_COL .req x3 + TMP1 .req x0 + TMP2 .req x1 + TMP3 .req x9 + TMP4 .req x10 + TMP5 .req x11 + TMP6 .req x12 + TMP7 .req x13 + TMP8 .req x14 + + /* Load and dequantize coefficients into NEON registers + * with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | d16 | d17 ( v16.8h ) + * 1 | d18 | d19 ( v17.8h ) + * 2 | d20 | d21 ( v18.8h ) + * 3 | d22 | d23 ( v19.8h ) + * 4 | d24 | d25 ( v20.8h ) + * 5 | d26 | d27 ( v21.8h ) + * 6 | d28 | d29 ( v22.8h ) + * 7 | d30 | d31 ( v23.8h ) + */ + /* Save NEON registers used in fast IDCT */ + adr TMP5, Ljsimd_idct_ifast_neon_consts + ld1 {v16.8h, v17.8h}, [COEF_BLOCK], 32 + ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 + ld1 {v18.8h, v19.8h}, [COEF_BLOCK], 32 + mul v16.8h, v16.8h, v0.8h + ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 + mul v17.8h, v17.8h, v1.8h + ld1 {v20.8h, v21.8h}, [COEF_BLOCK], 32 + mul v18.8h, v18.8h, v2.8h + ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 + mul v19.8h, v19.8h, v3.8h + ld1 {v22.8h, v23.8h}, [COEF_BLOCK], 32 + mul v20.8h, v20.8h, v0.8h + ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 + mul v22.8h, v22.8h, v2.8h + mul v21.8h, v21.8h, v1.8h + ld1 {v0.4h}, [TMP5] /* load constants */ + mul v23.8h, v23.8h, v3.8h + + /* 1-D IDCT, pass 1 */ + sub v2.8h, v18.8h, v22.8h + add v22.8h, v18.8h, v22.8h + sub v1.8h, v19.8h, v21.8h + add v21.8h, v19.8h, v21.8h + sub v5.8h, v17.8h, v23.8h + add v23.8h, v17.8h, v23.8h + sqdmulh v4.8h, v2.8h, XFIX_1_414213562 + sqdmulh v6.8h, v1.8h, XFIX_2_613125930 + add v3.8h, v1.8h, v1.8h + sub v1.8h, v5.8h, v1.8h + add v18.8h, v2.8h, v4.8h + sqdmulh v4.8h, v1.8h, XFIX_1_847759065 + sub v2.8h, v23.8h, v21.8h + add v3.8h, v3.8h, v6.8h + sqdmulh v6.8h, v2.8h, XFIX_1_414213562 + add v1.8h, v1.8h, v4.8h + sqdmulh v4.8h, v5.8h, XFIX_1_082392200 + sub v18.8h, v18.8h, v22.8h + add v2.8h, v2.8h, v6.8h + sub v6.8h, v16.8h, v20.8h + add v20.8h, v16.8h, v20.8h + add v17.8h, v5.8h, v4.8h + add v5.8h, v6.8h, v18.8h + sub v18.8h, v6.8h, v18.8h + add v6.8h, v23.8h, v21.8h + add v16.8h, v20.8h, v22.8h + sub v3.8h, v6.8h, v3.8h + sub v20.8h, v20.8h, v22.8h + sub v3.8h, v3.8h, v1.8h + sub v1.8h, v17.8h, v1.8h + add v2.8h, v3.8h, v2.8h + sub v23.8h, v16.8h, v6.8h + add v1.8h, v1.8h, v2.8h + add v16.8h, v16.8h, v6.8h + add v22.8h, v5.8h, v3.8h + sub v17.8h, v5.8h, v3.8h + sub v21.8h, v18.8h, v2.8h + add v18.8h, v18.8h, v2.8h + sub v19.8h, v20.8h, v1.8h + add v20.8h, v20.8h, v1.8h + transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v28, v29, v30, v31 + /* 1-D IDCT, pass 2 */ + sub v2.8h, v18.8h, v22.8h + add v22.8h, v18.8h, v22.8h + sub v1.8h, v19.8h, v21.8h + add v21.8h, v19.8h, v21.8h + sub v5.8h, v17.8h, v23.8h + add v23.8h, v17.8h, v23.8h + sqdmulh v4.8h, v2.8h, XFIX_1_414213562 + sqdmulh v6.8h, v1.8h, XFIX_2_613125930 + add v3.8h, v1.8h, v1.8h + sub v1.8h, v5.8h, v1.8h + add v18.8h, v2.8h, v4.8h + sqdmulh v4.8h, v1.8h, XFIX_1_847759065 + sub v2.8h, v23.8h, v21.8h + add v3.8h, v3.8h, v6.8h + sqdmulh v6.8h, v2.8h, XFIX_1_414213562 + add v1.8h, v1.8h, v4.8h + sqdmulh v4.8h, v5.8h, XFIX_1_082392200 + sub v18.8h, v18.8h, v22.8h + add v2.8h, v2.8h, v6.8h + sub v6.8h, v16.8h, v20.8h + add v20.8h, v16.8h, v20.8h + add v17.8h, v5.8h, v4.8h + add v5.8h, v6.8h, v18.8h + sub v18.8h, v6.8h, v18.8h + add v6.8h, v23.8h, v21.8h + add v16.8h, v20.8h, v22.8h + sub v3.8h, v6.8h, v3.8h + sub v20.8h, v20.8h, v22.8h + sub v3.8h, v3.8h, v1.8h + sub v1.8h, v17.8h, v1.8h + add v2.8h, v3.8h, v2.8h + sub v23.8h, v16.8h, v6.8h + add v1.8h, v1.8h, v2.8h + add v16.8h, v16.8h, v6.8h + add v22.8h, v5.8h, v3.8h + sub v17.8h, v5.8h, v3.8h + sub v21.8h, v18.8h, v2.8h + add v18.8h, v18.8h, v2.8h + sub v19.8h, v20.8h, v1.8h + add v20.8h, v20.8h, v1.8h + /* Descale to 8-bit and range limit */ + movi v0.16b, #0x80 + /* Prepare pointers (dual-issue with NEON instructions) */ + ldp TMP1, TMP2, [OUTPUT_BUF], 16 + sqshrn v28.8b, v16.8h, #5 + ldp TMP3, TMP4, [OUTPUT_BUF], 16 + sqshrn v29.8b, v17.8h, #5 + add TMP1, TMP1, OUTPUT_COL + sqshrn v30.8b, v18.8h, #5 + add TMP2, TMP2, OUTPUT_COL + sqshrn v31.8b, v19.8h, #5 + add TMP3, TMP3, OUTPUT_COL + sqshrn2 v28.16b, v20.8h, #5 + add TMP4, TMP4, OUTPUT_COL + sqshrn2 v29.16b, v21.8h, #5 + ldp TMP5, TMP6, [OUTPUT_BUF], 16 + sqshrn2 v30.16b, v22.8h, #5 + ldp TMP7, TMP8, [OUTPUT_BUF], 16 + sqshrn2 v31.16b, v23.8h, #5 + add TMP5, TMP5, OUTPUT_COL + add v16.16b, v28.16b, v0.16b + add TMP6, TMP6, OUTPUT_COL + add v18.16b, v29.16b, v0.16b + add TMP7, TMP7, OUTPUT_COL + add v20.16b, v30.16b, v0.16b + add TMP8, TMP8, OUTPUT_COL + add v22.16b, v31.16b, v0.16b + + /* Transpose the final 8-bit samples */ + trn1 v28.16b, v16.16b, v18.16b + trn1 v30.16b, v20.16b, v22.16b + trn2 v29.16b, v16.16b, v18.16b + trn2 v31.16b, v20.16b, v22.16b + + trn1 v16.8h, v28.8h, v30.8h + trn2 v18.8h, v28.8h, v30.8h + trn1 v20.8h, v29.8h, v31.8h + trn2 v22.8h, v29.8h, v31.8h + + uzp1 v28.4s, v16.4s, v18.4s + uzp2 v30.4s, v16.4s, v18.4s + uzp1 v29.4s, v20.4s, v22.4s + uzp2 v31.4s, v20.4s, v22.4s + + /* Store results to the output buffer */ + st1 {v28.d}[0], [TMP1] + st1 {v29.d}[0], [TMP2] + st1 {v28.d}[1], [TMP3] + st1 {v29.d}[1], [TMP4] + st1 {v30.d}[0], [TMP5] + st1 {v31.d}[0], [TMP6] + st1 {v30.d}[1], [TMP7] + st1 {v31.d}[1], [TMP8] + blr x30 + + .unreq DCT_TABLE + .unreq COEF_BLOCK + .unreq OUTPUT_BUF + .unreq OUTPUT_COL + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMP4 + .unreq TMP5 + .unreq TMP6 + .unreq TMP7 + .unreq TMP8 + + +/*****************************************************************************/ + +/* + * jsimd_idct_4x4_neon + * + * This function contains inverse-DCT code for getting reduced-size + * 4x4 pixels output from an 8x8 DCT block. It uses the same calculations + * and produces exactly the same output as IJG's original 'jpeg_idct_4x4' + * function from jpeg-6b (jidctred.c). + * + * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which + * requires much less arithmetic operations and hence should be faster. + * The primary purpose of this particular NEON optimized function is + * bit exact compatibility with jpeg-6b. + * + * TODO: a bit better instructions scheduling can be achieved by expanding + * idct_helper/transpose_4x4 macros and reordering instructions, + * but readability will suffer somewhat. + */ + +#define CONST_BITS 13 + +#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ + +.balign 16 +Ljsimd_idct_4x4_neon_consts: + .short FIX_1_847759065 /* v0.h[0] */ + .short -FIX_0_765366865 /* v0.h[1] */ + .short -FIX_0_211164243 /* v0.h[2] */ + .short FIX_1_451774981 /* v0.h[3] */ + .short -FIX_2_172734803 /* d1[0] */ + .short FIX_1_061594337 /* d1[1] */ + .short -FIX_0_509795579 /* d1[2] */ + .short -FIX_0_601344887 /* d1[3] */ + .short FIX_0_899976223 /* v2.h[0] */ + .short FIX_2_562915447 /* v2.h[1] */ + .short 1 << (CONST_BITS+1) /* v2.h[2] */ + .short 0 /* v2.h[3] */ + +.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 + smull v28.4s, \x4, v2.h[2] + smlal v28.4s, \x8, v0.h[0] + smlal v28.4s, \x14, v0.h[1] + + smull v26.4s, \x16, v1.h[2] + smlal v26.4s, \x12, v1.h[3] + smlal v26.4s, \x10, v2.h[0] + smlal v26.4s, \x6, v2.h[1] + + smull v30.4s, \x4, v2.h[2] + smlsl v30.4s, \x8, v0.h[0] + smlsl v30.4s, \x14, v0.h[1] + + smull v24.4s, \x16, v0.h[2] + smlal v24.4s, \x12, v0.h[3] + smlal v24.4s, \x10, v1.h[0] + smlal v24.4s, \x6, v1.h[1] + + add v20.4s, v28.4s, v26.4s + sub v28.4s, v28.4s, v26.4s + + .if \shift > 16 + srshr v20.4s, v20.4s, #\shift + srshr v28.4s, v28.4s, #\shift + xtn \y26, v20.4s + xtn \y29, v28.4s + .else + rshrn \y26, v20.4s, #\shift + rshrn \y29, v28.4s, #\shift + .endif + + add v20.4s, v30.4s, v24.4s + sub v30.4s, v30.4s, v24.4s + + .if \shift > 16 + srshr v20.4s, v20.4s, #\shift + srshr v30.4s, v30.4s, #\shift + xtn \y27, v20.4s + xtn \y28, v30.4s + .else + rshrn \y27, v20.4s, #\shift + rshrn \y28, v30.4s, #\shift + .endif +.endm + +asm_function jsimd_idct_4x4_neon + + DCT_TABLE .req x0 + COEF_BLOCK .req x1 + OUTPUT_BUF .req x2 + OUTPUT_COL .req x3 + TMP1 .req x0 + TMP2 .req x1 + TMP3 .req x2 + TMP4 .req x15 + + /* Save all used NEON registers */ + sub sp, sp, 272 + str x15, [sp], 16 + /* Load constants (v3.4h is just used for padding) */ + adr TMP4, Ljsimd_idct_4x4_neon_consts + st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 + st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 + st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4] + + /* Load all COEF_BLOCK into NEON registers with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | v4.4h | v5.4h + * 1 | v6.4h | v7.4h + * 2 | v8.4h | v9.4h + * 3 | v10.4h | v11.4h + * 4 | - | - + * 5 | v12.4h | v13.4h + * 6 | v14.4h | v15.4h + * 7 | v16.4h | v17.4h + */ + ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 + ld1 {v8.4h, v9.4h, v10.4h, v11.4h}, [COEF_BLOCK], 32 + add COEF_BLOCK, COEF_BLOCK, #16 + ld1 {v12.4h, v13.4h, v14.4h, v15.4h}, [COEF_BLOCK], 32 + ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 + /* dequantize */ + ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 + mul v4.4h, v4.4h, v18.4h + mul v5.4h, v5.4h, v19.4h + ins v4.d[1], v5.d[0] /* 128 bit q4 */ + ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32 + mul v6.4h, v6.4h, v20.4h + mul v7.4h, v7.4h, v21.4h + ins v6.d[1], v7.d[0] /* 128 bit q6 */ + mul v8.4h, v8.4h, v22.4h + mul v9.4h, v9.4h, v23.4h + ins v8.d[1], v9.d[0] /* 128 bit q8 */ + add DCT_TABLE, DCT_TABLE, #16 + ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32 + mul v10.4h, v10.4h, v24.4h + mul v11.4h, v11.4h, v25.4h + ins v10.d[1], v11.d[0] /* 128 bit q10 */ + mul v12.4h, v12.4h, v26.4h + mul v13.4h, v13.4h, v27.4h + ins v12.d[1], v13.d[0] /* 128 bit q12 */ + ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 + mul v14.4h, v14.4h, v28.4h + mul v15.4h, v15.4h, v29.4h + ins v14.d[1], v15.d[0] /* 128 bit q14 */ + mul v16.4h, v16.4h, v30.4h + mul v17.4h, v17.4h, v31.4h + ins v16.d[1], v17.d[0] /* 128 bit q16 */ + + /* Pass 1 */ + idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, \ + v4.4h, v6.4h, v8.4h, v10.4h + transpose_4x4 v4, v6, v8, v10, v3 + ins v10.d[1], v11.d[0] + idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, \ + v5.4h, v7.4h, v9.4h, v11.4h + transpose_4x4 v5, v7, v9, v11, v3 + ins v10.d[1], v11.d[0] + + /* Pass 2 */ + idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, \ + v26.4h, v27.4h, v28.4h, v29.4h + transpose_4x4 v26, v27, v28, v29, v3 + + /* Range limit */ + movi v30.8h, #0x80 + ins v26.d[1], v27.d[0] + ins v28.d[1], v29.d[0] + add v26.8h, v26.8h, v30.8h + add v28.8h, v28.8h, v30.8h + sqxtun v26.8b, v26.8h + sqxtun v27.8b, v28.8h + + /* Store results to the output buffer */ + ldp TMP1, TMP2, [OUTPUT_BUF], 16 + ldp TMP3, TMP4, [OUTPUT_BUF] + add TMP1, TMP1, OUTPUT_COL + add TMP2, TMP2, OUTPUT_COL + add TMP3, TMP3, OUTPUT_COL + add TMP4, TMP4, OUTPUT_COL + +#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT + /* We can use much less instructions on little endian systems if the + * OS kernel is not configured to trap unaligned memory accesses + */ + st1 {v26.s}[0], [TMP1], 4 + st1 {v27.s}[0], [TMP3], 4 + st1 {v26.s}[1], [TMP2], 4 + st1 {v27.s}[1], [TMP4], 4 +#else + st1 {v26.b}[0], [TMP1], 1 + st1 {v27.b}[0], [TMP3], 1 + st1 {v26.b}[1], [TMP1], 1 + st1 {v27.b}[1], [TMP3], 1 + st1 {v26.b}[2], [TMP1], 1 + st1 {v27.b}[2], [TMP3], 1 + st1 {v26.b}[3], [TMP1], 1 + st1 {v27.b}[3], [TMP3], 1 + + st1 {v26.b}[4], [TMP2], 1 + st1 {v27.b}[4], [TMP4], 1 + st1 {v26.b}[5], [TMP2], 1 + st1 {v27.b}[5], [TMP4], 1 + st1 {v26.b}[6], [TMP2], 1 + st1 {v27.b}[6], [TMP4], 1 + st1 {v26.b}[7], [TMP2], 1 + st1 {v27.b}[7], [TMP4], 1 +#endif + + /* vpop {v8.4h - v15.4h} ;not available */ + sub sp, sp, #272 + ldr x15, [sp], 16 + ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 + ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 + ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + blr x30 + + .unreq DCT_TABLE + .unreq COEF_BLOCK + .unreq OUTPUT_BUF + .unreq OUTPUT_COL + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMP4 + +.purgem idct_helper + + +/*****************************************************************************/ + +/* + * jsimd_idct_2x2_neon + * + * This function contains inverse-DCT code for getting reduced-size + * 2x2 pixels output from an 8x8 DCT block. It uses the same calculations + * and produces exactly the same output as IJG's original 'jpeg_idct_2x2' + * function from jpeg-6b (jidctred.c). + * + * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which + * requires much less arithmetic operations and hence should be faster. + * The primary purpose of this particular NEON optimized function is + * bit exact compatibility with jpeg-6b. + */ + +.balign 8 +Ljsimd_idct_2x2_neon_consts: + .short -FIX_0_720959822 /* v14[0] */ + .short FIX_0_850430095 /* v14[1] */ + .short -FIX_1_272758580 /* v14[2] */ + .short FIX_3_624509785 /* v14[3] */ + +.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27 + sshll v15.4s, \x4, #15 + smull v26.4s, \x6, v14.h[3] + smlal v26.4s, \x10, v14.h[2] + smlal v26.4s, \x12, v14.h[1] + smlal v26.4s, \x16, v14.h[0] + + add v20.4s, v15.4s, v26.4s + sub v15.4s, v15.4s, v26.4s + + .if \shift > 16 + srshr v20.4s, v20.4s, #\shift + srshr v15.4s, v15.4s, #\shift + xtn \y26, v20.4s + xtn \y27, v15.4s + .else + rshrn \y26, v20.4s, #\shift + rshrn \y27, v15.4s, #\shift + .endif +.endm + +asm_function jsimd_idct_2x2_neon + + DCT_TABLE .req x0 + COEF_BLOCK .req x1 + OUTPUT_BUF .req x2 + OUTPUT_COL .req x3 + TMP1 .req x0 + TMP2 .req x15 + + /* vpush {v8.4h - v15.4h} ; not available */ + sub sp, sp, 208 + str x15, [sp], 16 + + /* Load constants */ + adr TMP2, Ljsimd_idct_2x2_neon_consts + st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + st1 {v21.8b, v22.8b}, [sp], 16 + st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + st1 {v30.8b, v31.8b}, [sp], 16 + ld1 {v14.4h}, [TMP2] + + /* Load all COEF_BLOCK into NEON registers with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | v4.4h | v5.4h + * 1 | v6.4h | v7.4h + * 2 | - | - + * 3 | v10.4h | v11.4h + * 4 | - | - + * 5 | v12.4h | v13.4h + * 6 | - | - + * 7 | v16.4h | v17.4h + */ + ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 + add COEF_BLOCK, COEF_BLOCK, #16 + ld1 {v10.4h, v11.4h}, [COEF_BLOCK], 16 + add COEF_BLOCK, COEF_BLOCK, #16 + ld1 {v12.4h, v13.4h}, [COEF_BLOCK], 16 + add COEF_BLOCK, COEF_BLOCK, #16 + ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 + /* Dequantize */ + ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 + mul v4.4h, v4.4h, v18.4h + mul v5.4h, v5.4h, v19.4h + ins v4.d[1], v5.d[0] + mul v6.4h, v6.4h, v20.4h + mul v7.4h, v7.4h, v21.4h + ins v6.d[1], v7.d[0] + add DCT_TABLE, DCT_TABLE, #16 + ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16 + mul v10.4h, v10.4h, v24.4h + mul v11.4h, v11.4h, v25.4h + ins v10.d[1], v11.d[0] + add DCT_TABLE, DCT_TABLE, #16 + ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16 + mul v12.4h, v12.4h, v26.4h + mul v13.4h, v13.4h, v27.4h + ins v12.d[1], v13.d[0] + add DCT_TABLE, DCT_TABLE, #16 + ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 + mul v16.4h, v16.4h, v30.4h + mul v17.4h, v17.4h, v31.4h + ins v16.d[1], v17.d[0] + + /* Pass 1 */ +#if 0 + idct_helper v4.4h, v6.4h, v10.4h, v12.4h, v16.4h, 13, v4.4h, v6.4h + transpose_4x4 v4.4h, v6.4h, v8.4h, v10.4h + idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h + transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h +#else + smull v26.4s, v6.4h, v14.h[3] + smlal v26.4s, v10.4h, v14.h[2] + smlal v26.4s, v12.4h, v14.h[1] + smlal v26.4s, v16.4h, v14.h[0] + smull v24.4s, v7.4h, v14.h[3] + smlal v24.4s, v11.4h, v14.h[2] + smlal v24.4s, v13.4h, v14.h[1] + smlal v24.4s, v17.4h, v14.h[0] + sshll v15.4s, v4.4h, #15 + sshll v30.4s, v5.4h, #15 + add v20.4s, v15.4s, v26.4s + sub v15.4s, v15.4s, v26.4s + rshrn v4.4h, v20.4s, #13 + rshrn v6.4h, v15.4s, #13 + add v20.4s, v30.4s, v24.4s + sub v15.4s, v30.4s, v24.4s + rshrn v5.4h, v20.4s, #13 + rshrn v7.4h, v15.4s, #13 + ins v4.d[1], v5.d[0] + ins v6.d[1], v7.d[0] + transpose v4, v6, v3, .16b, .8h + transpose v6, v10, v3, .16b, .4s + ins v11.d[0], v10.d[1] + ins v7.d[0], v6.d[1] +#endif + + /* Pass 2 */ + idct_helper v4.4h, v6.4h, v10.4h, v7.4h, v11.4h, 20, v26.4h, v27.4h + + /* Range limit */ + movi v30.8h, #0x80 + ins v26.d[1], v27.d[0] + add v26.8h, v26.8h, v30.8h + sqxtun v30.8b, v26.8h + ins v26.d[0], v30.d[0] + sqxtun v27.8b, v26.8h + + /* Store results to the output buffer */ + ldp TMP1, TMP2, [OUTPUT_BUF] + add TMP1, TMP1, OUTPUT_COL + add TMP2, TMP2, OUTPUT_COL + + st1 {v26.b}[0], [TMP1], 1 + st1 {v27.b}[4], [TMP1], 1 + st1 {v26.b}[1], [TMP2], 1 + st1 {v27.b}[5], [TMP2], 1 + + sub sp, sp, #208 + ldr x15, [sp], 16 + ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + ld1 {v21.8b, v22.8b}, [sp], 16 + ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + ld1 {v30.8b, v31.8b}, [sp], 16 + blr x30 + + .unreq DCT_TABLE + .unreq COEF_BLOCK + .unreq OUTPUT_BUF + .unreq OUTPUT_COL + .unreq TMP1 + .unreq TMP2 + +.purgem idct_helper + + +/*****************************************************************************/ + +/* + * jsimd_ycc_extrgb_convert_neon + * jsimd_ycc_extbgr_convert_neon + * jsimd_ycc_extrgbx_convert_neon + * jsimd_ycc_extbgrx_convert_neon + * jsimd_ycc_extxbgr_convert_neon + * jsimd_ycc_extxrgb_convert_neon + * + * Colorspace conversion YCbCr -> RGB + */ + +.macro do_load size + .if \size == 8 + ld1 {v4.8b}, [U], 8 + ld1 {v5.8b}, [V], 8 + ld1 {v0.8b}, [Y], 8 + prfm pldl1keep, [U, #64] + prfm pldl1keep, [V, #64] + prfm pldl1keep, [Y, #64] + .elseif \size == 4 + ld1 {v4.b}[0], [U], 1 + ld1 {v4.b}[1], [U], 1 + ld1 {v4.b}[2], [U], 1 + ld1 {v4.b}[3], [U], 1 + ld1 {v5.b}[0], [V], 1 + ld1 {v5.b}[1], [V], 1 + ld1 {v5.b}[2], [V], 1 + ld1 {v5.b}[3], [V], 1 + ld1 {v0.b}[0], [Y], 1 + ld1 {v0.b}[1], [Y], 1 + ld1 {v0.b}[2], [Y], 1 + ld1 {v0.b}[3], [Y], 1 + .elseif \size == 2 + ld1 {v4.b}[4], [U], 1 + ld1 {v4.b}[5], [U], 1 + ld1 {v5.b}[4], [V], 1 + ld1 {v5.b}[5], [V], 1 + ld1 {v0.b}[4], [Y], 1 + ld1 {v0.b}[5], [Y], 1 + .elseif \size == 1 + ld1 {v4.b}[6], [U], 1 + ld1 {v5.b}[6], [V], 1 + ld1 {v0.b}[6], [Y], 1 + .else + .error unsupported macroblock size + .endif +.endm + +.macro do_store bpp, size, fast_st3 + .if \bpp == 24 + .if \size == 8 + .if \fast_st3 == 1 + st3 {v10.8b, v11.8b, v12.8b}, [RGB], 24 + .else + st1 {v10.b}[0], [RGB], #1 + st1 {v11.b}[0], [RGB], #1 + st1 {v12.b}[0], [RGB], #1 + + st1 {v10.b}[1], [RGB], #1 + st1 {v11.b}[1], [RGB], #1 + st1 {v12.b}[1], [RGB], #1 + + st1 {v10.b}[2], [RGB], #1 + st1 {v11.b}[2], [RGB], #1 + st1 {v12.b}[2], [RGB], #1 + + st1 {v10.b}[3], [RGB], #1 + st1 {v11.b}[3], [RGB], #1 + st1 {v12.b}[3], [RGB], #1 + + st1 {v10.b}[4], [RGB], #1 + st1 {v11.b}[4], [RGB], #1 + st1 {v12.b}[4], [RGB], #1 + + st1 {v10.b}[5], [RGB], #1 + st1 {v11.b}[5], [RGB], #1 + st1 {v12.b}[5], [RGB], #1 + + st1 {v10.b}[6], [RGB], #1 + st1 {v11.b}[6], [RGB], #1 + st1 {v12.b}[6], [RGB], #1 + + st1 {v10.b}[7], [RGB], #1 + st1 {v11.b}[7], [RGB], #1 + st1 {v12.b}[7], [RGB], #1 + .endif + .elseif \size == 4 + st3 {v10.b, v11.b, v12.b}[0], [RGB], 3 + st3 {v10.b, v11.b, v12.b}[1], [RGB], 3 + st3 {v10.b, v11.b, v12.b}[2], [RGB], 3 + st3 {v10.b, v11.b, v12.b}[3], [RGB], 3 + .elseif \size == 2 + st3 {v10.b, v11.b, v12.b}[4], [RGB], 3 + st3 {v10.b, v11.b, v12.b}[5], [RGB], 3 + .elseif \size == 1 + st3 {v10.b, v11.b, v12.b}[6], [RGB], 3 + .else + .error unsupported macroblock size + .endif + .elseif \bpp == 32 + .if \size == 8 + st4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], 32 + .elseif \size == 4 + st4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], 4 + st4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], 4 + st4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], 4 + st4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], 4 + .elseif \size == 2 + st4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], 4 + st4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], 4 + .elseif \size == 1 + st4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], 4 + .else + .error unsupported macroblock size + .endif + .elseif \bpp==16 + .if \size == 8 + st1 {v25.8h}, [RGB], 16 + .elseif \size == 4 + st1 {v25.4h}, [RGB], 8 + .elseif \size == 2 + st1 {v25.h}[4], [RGB], 2 + st1 {v25.h}[5], [RGB], 2 + .elseif \size == 1 + st1 {v25.h}[6], [RGB], 2 + .else + .error unsupported macroblock size + .endif + .else + .error unsupported bpp + .endif +.endm + +.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, \ + g_offs, gsize, b_offs, bsize, \ + defsize, fast_st3 + +/* + * 2-stage pipelined YCbCr->RGB conversion + */ + +.macro do_yuv_to_rgb_stage1 + uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */ + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ + smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */ + smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */ + smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */ + smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */ + smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */ + smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */ + smull v28.4s, v6.4h, v1.h[3] /* multiply by 29033 */ + smull2 v30.4s, v6.8h, v1.h[3] /* multiply by 29033 */ +.endm + +.macro do_yuv_to_rgb_stage2 + rshrn v20.4h, v20.4s, #15 + rshrn2 v20.8h, v22.4s, #15 + rshrn v24.4h, v24.4s, #14 + rshrn2 v24.8h, v26.4s, #14 + rshrn v28.4h, v28.4s, #14 + rshrn2 v28.8h, v30.4s, #14 + uaddw v20.8h, v20.8h, v0.8b + uaddw v24.8h, v24.8h, v0.8b + uaddw v28.8h, v28.8h, v0.8b + .if \bpp != 16 + sqxtun v1\g_offs\defsize, v20.8h + sqxtun v1\r_offs\defsize, v24.8h + sqxtun v1\b_offs\defsize, v28.8h + .else + sqshlu v21.8h, v20.8h, #8 + sqshlu v25.8h, v24.8h, #8 + sqshlu v29.8h, v28.8h, #8 + sri v25.8h, v21.8h, #5 + sri v25.8h, v29.8h, #11 + .endif +.endm + +.macro do_yuv_to_rgb_stage2_store_load_stage1 fast_st3 + rshrn v20.4h, v20.4s, #15 + rshrn v24.4h, v24.4s, #14 + rshrn v28.4h, v28.4s, #14 + ld1 {v4.8b}, [U], 8 + rshrn2 v20.8h, v22.4s, #15 + rshrn2 v24.8h, v26.4s, #14 + rshrn2 v28.8h, v30.4s, #14 + ld1 {v5.8b}, [V], 8 + uaddw v20.8h, v20.8h, v0.8b + uaddw v24.8h, v24.8h, v0.8b + uaddw v28.8h, v28.8h, v0.8b + .if \bpp != 16 /**************** rgb24/rgb32 ******************************/ + sqxtun v1\g_offs\defsize, v20.8h + ld1 {v0.8b}, [Y], 8 + sqxtun v1\r_offs\defsize, v24.8h + prfm pldl1keep, [U, #64] + prfm pldl1keep, [V, #64] + prfm pldl1keep, [Y, #64] + sqxtun v1\b_offs\defsize, v28.8h + uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ + smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */ + smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */ + smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */ + smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */ + smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */ + smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */ + .else /**************************** rgb565 ********************************/ + sqshlu v21.8h, v20.8h, #8 + sqshlu v25.8h, v24.8h, #8 + sqshlu v29.8h, v28.8h, #8 + uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ + ld1 {v0.8b}, [Y], 8 + smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */ + smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */ + smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */ + smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */ + sri v25.8h, v21.8h, #5 + smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */ + smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */ + prfm pldl1keep, [U, #64] + prfm pldl1keep, [V, #64] + prfm pldl1keep, [Y, #64] + sri v25.8h, v29.8h, #11 + .endif + do_store \bpp, 8, \fast_st3 + smull v28.4s, v6.4h, v1.h[3] /* multiply by 29033 */ + smull2 v30.4s, v6.8h, v1.h[3] /* multiply by 29033 */ +.endm + +.macro do_yuv_to_rgb + do_yuv_to_rgb_stage1 + do_yuv_to_rgb_stage2 +.endm + +/* Apple gas crashes on adrl, work around that by using adr. + * But this requires a copy of these constants for each function. + */ + +.balign 16 +.if \fast_st3 == 1 +Ljsimd_ycc_\colorid\()_neon_consts: +.else +Ljsimd_ycc_\colorid\()_neon_slowst3_consts: +.endif + .short 0, 0, 0, 0 + .short 22971, -11277, -23401, 29033 + .short -128, -128, -128, -128 + .short -128, -128, -128, -128 + +.if \fast_st3 == 1 +asm_function jsimd_ycc_\colorid\()_convert_neon +.else +asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3 +.endif + OUTPUT_WIDTH .req x0 + INPUT_BUF .req x1 + INPUT_ROW .req x2 + OUTPUT_BUF .req x3 + NUM_ROWS .req x4 + + INPUT_BUF0 .req x5 + INPUT_BUF1 .req x6 + INPUT_BUF2 .req x1 + + RGB .req x7 + Y .req x8 + U .req x9 + V .req x10 + N .req x15 + + sub sp, sp, 336 + str x15, [sp], 16 + + /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */ + .if \fast_st3 == 1 + adr x15, Ljsimd_ycc_\colorid\()_neon_consts + .else + adr x15, Ljsimd_ycc_\colorid\()_neon_slowst3_consts + .endif + + /* Save NEON registers */ + st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 + st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 + st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + ld1 {v0.4h, v1.4h}, [x15], 16 + ld1 {v2.8h}, [x15] + + /* Save ARM registers and handle input arguments */ + /* push {x4, x5, x6, x7, x8, x9, x10, x30} */ + stp x4, x5, [sp], 16 + stp x6, x7, [sp], 16 + stp x8, x9, [sp], 16 + stp x10, x30, [sp], 16 + ldr INPUT_BUF0, [INPUT_BUF] + ldr INPUT_BUF1, [INPUT_BUF, #8] + ldr INPUT_BUF2, [INPUT_BUF, #16] + .unreq INPUT_BUF + + /* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */ + movi v10.16b, #255 + movi v13.16b, #255 + + /* Outer loop over scanlines */ + cmp NUM_ROWS, #1 + b.lt 9f +0: + lsl x16, INPUT_ROW, #3 + ldr Y, [INPUT_BUF0, x16] + ldr U, [INPUT_BUF1, x16] + mov N, OUTPUT_WIDTH + ldr V, [INPUT_BUF2, x16] + add INPUT_ROW, INPUT_ROW, #1 + ldr RGB, [OUTPUT_BUF], #8 + + /* Inner loop over pixels */ + subs N, N, #8 + b.lt 3f + do_load 8 + do_yuv_to_rgb_stage1 + subs N, N, #8 + b.lt 2f +1: + do_yuv_to_rgb_stage2_store_load_stage1 \fast_st3 + subs N, N, #8 + b.ge 1b +2: + do_yuv_to_rgb_stage2 + do_store \bpp, 8, \fast_st3 + tst N, #7 + b.eq 8f +3: + tst N, #4 + b.eq 3f + do_load 4 +3: + tst N, #2 + b.eq 4f + do_load 2 +4: + tst N, #1 + b.eq 5f + do_load 1 +5: + do_yuv_to_rgb + tst N, #4 + b.eq 6f + do_store \bpp, 4, \fast_st3 +6: + tst N, #2 + b.eq 7f + do_store \bpp, 2, \fast_st3 +7: + tst N, #1 + b.eq 8f + do_store \bpp, 1, \fast_st3 +8: + subs NUM_ROWS, NUM_ROWS, #1 + b.gt 0b +9: + /* Restore all registers and return */ + sub sp, sp, #336 + ldr x15, [sp], 16 + ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 + ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 + ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + /* pop {r4, r5, r6, r7, r8, r9, r10, pc} */ + ldp x4, x5, [sp], 16 + ldp x6, x7, [sp], 16 + ldp x8, x9, [sp], 16 + ldp x10, x30, [sp], 16 + br x30 + .unreq OUTPUT_WIDTH + .unreq INPUT_ROW + .unreq OUTPUT_BUF + .unreq NUM_ROWS + .unreq INPUT_BUF0 + .unreq INPUT_BUF1 + .unreq INPUT_BUF2 + .unreq RGB + .unreq Y + .unreq U + .unreq V + .unreq N + +.purgem do_yuv_to_rgb +.purgem do_yuv_to_rgb_stage1 +.purgem do_yuv_to_rgb_stage2 +.purgem do_yuv_to_rgb_stage2_store_load_stage1 + +.endm + +/*--------------------------------- id ----- bpp R rsize G gsize B bsize defsize fast_st3*/ +generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h, 1, .4h, 2, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h, 1, .4h, 0, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h, 2, .4h, 1, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h, 2, .4h, 3, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, .4h, 0, .4h, 0, .4h, .8b, 1 + +generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b, 0 +generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b, 0 + +.purgem do_load +.purgem do_store + + +/*****************************************************************************/ + +/* + * jsimd_extrgb_ycc_convert_neon + * jsimd_extbgr_ycc_convert_neon + * jsimd_extrgbx_ycc_convert_neon + * jsimd_extbgrx_ycc_convert_neon + * jsimd_extxbgr_ycc_convert_neon + * jsimd_extxrgb_ycc_convert_neon + * + * Colorspace conversion RGB -> YCbCr + */ + +.macro do_store size + .if \size == 8 + st1 {v20.8b}, [Y], #8 + st1 {v21.8b}, [U], #8 + st1 {v22.8b}, [V], #8 + .elseif \size == 4 + st1 {v20.b}[0], [Y], #1 + st1 {v20.b}[1], [Y], #1 + st1 {v20.b}[2], [Y], #1 + st1 {v20.b}[3], [Y], #1 + st1 {v21.b}[0], [U], #1 + st1 {v21.b}[1], [U], #1 + st1 {v21.b}[2], [U], #1 + st1 {v21.b}[3], [U], #1 + st1 {v22.b}[0], [V], #1 + st1 {v22.b}[1], [V], #1 + st1 {v22.b}[2], [V], #1 + st1 {v22.b}[3], [V], #1 + .elseif \size == 2 + st1 {v20.b}[4], [Y], #1 + st1 {v20.b}[5], [Y], #1 + st1 {v21.b}[4], [U], #1 + st1 {v21.b}[5], [U], #1 + st1 {v22.b}[4], [V], #1 + st1 {v22.b}[5], [V], #1 + .elseif \size == 1 + st1 {v20.b}[6], [Y], #1 + st1 {v21.b}[6], [U], #1 + st1 {v22.b}[6], [V], #1 + .else + .error unsupported macroblock size + .endif +.endm + +.macro do_load bpp, size, fast_ld3 + .if \bpp == 24 + .if \size == 8 + .if \fast_ld3 == 1 + ld3 {v10.8b, v11.8b, v12.8b}, [RGB], #24 + .else + ld1 {v10.b}[0], [RGB], #1 + ld1 {v11.b}[0], [RGB], #1 + ld1 {v12.b}[0], [RGB], #1 + + ld1 {v10.b}[1], [RGB], #1 + ld1 {v11.b}[1], [RGB], #1 + ld1 {v12.b}[1], [RGB], #1 + + ld1 {v10.b}[2], [RGB], #1 + ld1 {v11.b}[2], [RGB], #1 + ld1 {v12.b}[2], [RGB], #1 + + ld1 {v10.b}[3], [RGB], #1 + ld1 {v11.b}[3], [RGB], #1 + ld1 {v12.b}[3], [RGB], #1 + + ld1 {v10.b}[4], [RGB], #1 + ld1 {v11.b}[4], [RGB], #1 + ld1 {v12.b}[4], [RGB], #1 + + ld1 {v10.b}[5], [RGB], #1 + ld1 {v11.b}[5], [RGB], #1 + ld1 {v12.b}[5], [RGB], #1 + + ld1 {v10.b}[6], [RGB], #1 + ld1 {v11.b}[6], [RGB], #1 + ld1 {v12.b}[6], [RGB], #1 + + ld1 {v10.b}[7], [RGB], #1 + ld1 {v11.b}[7], [RGB], #1 + ld1 {v12.b}[7], [RGB], #1 + .endif + prfm pldl1keep, [RGB, #128] + .elseif \size == 4 + ld3 {v10.b, v11.b, v12.b}[0], [RGB], #3 + ld3 {v10.b, v11.b, v12.b}[1], [RGB], #3 + ld3 {v10.b, v11.b, v12.b}[2], [RGB], #3 + ld3 {v10.b, v11.b, v12.b}[3], [RGB], #3 + .elseif \size == 2 + ld3 {v10.b, v11.b, v12.b}[4], [RGB], #3 + ld3 {v10.b, v11.b, v12.b}[5], [RGB], #3 + .elseif \size == 1 + ld3 {v10.b, v11.b, v12.b}[6], [RGB], #3 + .else + .error unsupported macroblock size + .endif + .elseif \bpp == 32 + .if \size == 8 + ld4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], #32 + prfm pldl1keep, [RGB, #128] + .elseif \size == 4 + ld4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], #4 + ld4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], #4 + ld4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], #4 + ld4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], #4 + .elseif \size == 2 + ld4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], #4 + ld4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], #4 + .elseif \size == 1 + ld4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], #4 + .else + .error unsupported macroblock size + .endif + .else + .error unsupported bpp + .endif +.endm + +.macro generate_jsimd_rgb_ycc_convert_neon colorid, bpp, r_offs, g_offs, \ + b_offs, fast_ld3 + +/* + * 2-stage pipelined RGB->YCbCr conversion + */ + +.macro do_rgb_to_yuv_stage1 + ushll v4.8h, v1\r_offs\().8b, #0 /* r = v4 */ + ushll v6.8h, v1\g_offs\().8b, #0 /* g = v6 */ + ushll v8.8h, v1\b_offs\().8b, #0 /* b = v8 */ + rev64 v18.4s, v1.4s + rev64 v26.4s, v1.4s + rev64 v28.4s, v1.4s + rev64 v30.4s, v1.4s + umull v14.4s, v4.4h, v0.h[0] + umull2 v16.4s, v4.8h, v0.h[0] + umlsl v18.4s, v4.4h, v0.h[3] + umlsl2 v26.4s, v4.8h, v0.h[3] + umlal v28.4s, v4.4h, v0.h[5] + umlal2 v30.4s, v4.8h, v0.h[5] + umlal v14.4s, v6.4h, v0.h[1] + umlal2 v16.4s, v6.8h, v0.h[1] + umlsl v18.4s, v6.4h, v0.h[4] + umlsl2 v26.4s, v6.8h, v0.h[4] + umlsl v28.4s, v6.4h, v0.h[6] + umlsl2 v30.4s, v6.8h, v0.h[6] + umlal v14.4s, v8.4h, v0.h[2] + umlal2 v16.4s, v8.8h, v0.h[2] + umlal v18.4s, v8.4h, v0.h[5] + umlal2 v26.4s, v8.8h, v0.h[5] + umlsl v28.4s, v8.4h, v0.h[7] + umlsl2 v30.4s, v8.8h, v0.h[7] +.endm + +.macro do_rgb_to_yuv_stage2 + rshrn v20.4h, v14.4s, #16 + shrn v22.4h, v18.4s, #16 + shrn v24.4h, v28.4s, #16 + rshrn2 v20.8h, v16.4s, #16 + shrn2 v22.8h, v26.4s, #16 + shrn2 v24.8h, v30.4s, #16 + xtn v20.8b, v20.8h /* v20 = y */ + xtn v21.8b, v22.8h /* v21 = u */ + xtn v22.8b, v24.8h /* v22 = v */ +.endm + +.macro do_rgb_to_yuv + do_rgb_to_yuv_stage1 + do_rgb_to_yuv_stage2 +.endm + +/* TODO: expand macros and interleave instructions if some in-order + * ARM64 processor actually can dual-issue LOAD/STORE with ALU */ +.macro do_rgb_to_yuv_stage2_store_load_stage1 fast_ld3 + do_rgb_to_yuv_stage2 + do_load \bpp, 8, \fast_ld3 + st1 {v20.8b}, [Y], #8 + st1 {v21.8b}, [U], #8 + st1 {v22.8b}, [V], #8 + do_rgb_to_yuv_stage1 +.endm + +.balign 16 +.if \fast_ld3 == 1 +Ljsimd_\colorid\()_ycc_neon_consts: +.else +Ljsimd_\colorid\()_ycc_neon_slowld3_consts: +.endif + .short 19595, 38470, 7471, 11059 + .short 21709, 32768, 27439, 5329 + .short 32767, 128, 32767, 128 + .short 32767, 128, 32767, 128 + +.if \fast_ld3 == 1 +asm_function jsimd_\colorid\()_ycc_convert_neon +.else +asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3 +.endif + OUTPUT_WIDTH .req w0 + INPUT_BUF .req x1 + OUTPUT_BUF .req x2 + OUTPUT_ROW .req x3 + NUM_ROWS .req x4 + + OUTPUT_BUF0 .req x5 + OUTPUT_BUF1 .req x6 + OUTPUT_BUF2 .req x2 /* OUTPUT_BUF */ + + RGB .req x7 + Y .req x9 + U .req x10 + V .req x11 + N .req w12 + + /* Load constants to d0, d1, d2, d3 */ + .if \fast_ld3 == 1 + adr x13, Ljsimd_\colorid\()_ycc_neon_consts + .else + adr x13, Ljsimd_\colorid\()_ycc_neon_slowld3_consts + .endif + ld1 {v0.8h, v1.8h}, [x13] + + ldr OUTPUT_BUF0, [OUTPUT_BUF] + ldr OUTPUT_BUF1, [OUTPUT_BUF, #8] + ldr OUTPUT_BUF2, [OUTPUT_BUF, #16] + .unreq OUTPUT_BUF + + /* Save NEON registers */ + sub sp, sp, #64 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + + /* Outer loop over scanlines */ + cmp NUM_ROWS, #1 + b.lt 9f +0: + ldr Y, [OUTPUT_BUF0, OUTPUT_ROW, lsl #3] + ldr U, [OUTPUT_BUF1, OUTPUT_ROW, lsl #3] + mov N, OUTPUT_WIDTH + ldr V, [OUTPUT_BUF2, OUTPUT_ROW, lsl #3] + add OUTPUT_ROW, OUTPUT_ROW, #1 + ldr RGB, [INPUT_BUF], #8 + + /* Inner loop over pixels */ + subs N, N, #8 + b.lt 3f + do_load \bpp, 8, \fast_ld3 + do_rgb_to_yuv_stage1 + subs N, N, #8 + b.lt 2f +1: + do_rgb_to_yuv_stage2_store_load_stage1 \fast_ld3 + subs N, N, #8 + b.ge 1b +2: + do_rgb_to_yuv_stage2 + do_store 8 + tst N, #7 + b.eq 8f +3: + tbz N, #2, 3f + do_load \bpp, 4, \fast_ld3 +3: + tbz N, #1, 4f + do_load \bpp, 2, \fast_ld3 +4: + tbz N, #0, 5f + do_load \bpp, 1, \fast_ld3 +5: + do_rgb_to_yuv + tbz N, #2, 6f + do_store 4 +6: + tbz N, #1, 7f + do_store 2 +7: + tbz N, #0, 8f + do_store 1 +8: + subs NUM_ROWS, NUM_ROWS, #1 + b.gt 0b +9: + /* Restore all registers and return */ + sub sp, sp, #64 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + br x30 + + .unreq OUTPUT_WIDTH + .unreq OUTPUT_ROW + .unreq INPUT_BUF + .unreq NUM_ROWS + .unreq OUTPUT_BUF0 + .unreq OUTPUT_BUF1 + .unreq OUTPUT_BUF2 + .unreq RGB + .unreq Y + .unreq U + .unreq V + .unreq N + +.purgem do_rgb_to_yuv +.purgem do_rgb_to_yuv_stage1 +.purgem do_rgb_to_yuv_stage2 +.purgem do_rgb_to_yuv_stage2_store_load_stage1 + +.endm + +/*--------------------------------- id ----- bpp R G B Fast LD3 */ +generate_jsimd_rgb_ycc_convert_neon extrgb, 24, 0, 1, 2, 1 +generate_jsimd_rgb_ycc_convert_neon extbgr, 24, 2, 1, 0, 1 +generate_jsimd_rgb_ycc_convert_neon extrgbx, 32, 0, 1, 2, 1 +generate_jsimd_rgb_ycc_convert_neon extbgrx, 32, 2, 1, 0, 1 +generate_jsimd_rgb_ycc_convert_neon extxbgr, 32, 3, 2, 1, 1 +generate_jsimd_rgb_ycc_convert_neon extxrgb, 32, 1, 2, 3, 1 + +generate_jsimd_rgb_ycc_convert_neon extrgb, 24, 0, 1, 2, 0 +generate_jsimd_rgb_ycc_convert_neon extbgr, 24, 2, 1, 0, 0 + +.purgem do_load +.purgem do_store + + +/*****************************************************************************/ + +/* + * Load data into workspace, applying unsigned->signed conversion + * + * TODO: can be combined with 'jsimd_fdct_ifast_neon' to get + * rid of VST1.16 instructions + */ + +asm_function jsimd_convsamp_neon + SAMPLE_DATA .req x0 + START_COL .req x1 + WORKSPACE .req x2 + TMP1 .req x9 + TMP2 .req x10 + TMP3 .req x11 + TMP4 .req x12 + TMP5 .req x13 + TMP6 .req x14 + TMP7 .req x15 + TMP8 .req x4 + TMPDUP .req w3 + + mov TMPDUP, #128 + ldp TMP1, TMP2, [SAMPLE_DATA], 16 + ldp TMP3, TMP4, [SAMPLE_DATA], 16 + dup v0.8b, TMPDUP + add TMP1, TMP1, START_COL + add TMP2, TMP2, START_COL + ldp TMP5, TMP6, [SAMPLE_DATA], 16 + add TMP3, TMP3, START_COL + add TMP4, TMP4, START_COL + ldp TMP7, TMP8, [SAMPLE_DATA], 16 + add TMP5, TMP5, START_COL + add TMP6, TMP6, START_COL + ld1 {v16.8b}, [TMP1] + add TMP7, TMP7, START_COL + add TMP8, TMP8, START_COL + ld1 {v17.8b}, [TMP2] + usubl v16.8h, v16.8b, v0.8b + ld1 {v18.8b}, [TMP3] + usubl v17.8h, v17.8b, v0.8b + ld1 {v19.8b}, [TMP4] + usubl v18.8h, v18.8b, v0.8b + ld1 {v20.8b}, [TMP5] + usubl v19.8h, v19.8b, v0.8b + ld1 {v21.8b}, [TMP6] + st1 {v16.8h, v17.8h, v18.8h, v19.8h}, [WORKSPACE], 64 + usubl v20.8h, v20.8b, v0.8b + ld1 {v22.8b}, [TMP7] + usubl v21.8h, v21.8b, v0.8b + ld1 {v23.8b}, [TMP8] + usubl v22.8h, v22.8b, v0.8b + usubl v23.8h, v23.8b, v0.8b + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [WORKSPACE], 64 + + br x30 + + .unreq SAMPLE_DATA + .unreq START_COL + .unreq WORKSPACE + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMP4 + .unreq TMP5 + .unreq TMP6 + .unreq TMP7 + .unreq TMP8 + .unreq TMPDUP + +/*****************************************************************************/ + +/* + * jsimd_fdct_islow_neon + * + * This file contains a slow-but-accurate integer implementation of the + * forward DCT (Discrete Cosine Transform). The following code is based + * directly on the IJG''s original jfdctint.c; see the jfdctint.c for + * more details. + * + * TODO: can be combined with 'jsimd_convsamp_neon' to get + * rid of a bunch of VLD1.16 instructions + */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 + +#define DESCALE_P1 (CONST_BITS-PASS1_BITS) +#define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +.balign 16 +Ljsimd_fdct_islow_neon_consts: + .short F_0_298 + .short -F_0_390 + .short F_0_541 + .short F_0_765 + .short - F_0_899 + .short F_1_175 + .short F_1_501 + .short - F_1_847 + .short - F_1_961 + .short F_2_053 + .short - F_2_562 + .short F_3_072 + .short 0 /* padding */ + .short 0 + .short 0 + .short 0 + +#undef F_0_298 +#undef F_0_390 +#undef F_0_541 +#undef F_0_765 +#undef F_0_899 +#undef F_1_175 +#undef F_1_501 +#undef F_1_847 +#undef F_1_961 +#undef F_2_053 +#undef F_2_562 +#undef F_3_072 +#define XFIX_P_0_298 v0.h[0] +#define XFIX_N_0_390 v0.h[1] +#define XFIX_P_0_541 v0.h[2] +#define XFIX_P_0_765 v0.h[3] +#define XFIX_N_0_899 v0.h[4] +#define XFIX_P_1_175 v0.h[5] +#define XFIX_P_1_501 v0.h[6] +#define XFIX_N_1_847 v0.h[7] +#define XFIX_N_1_961 v1.h[0] +#define XFIX_P_2_053 v1.h[1] +#define XFIX_N_2_562 v1.h[2] +#define XFIX_P_3_072 v1.h[3] + +asm_function jsimd_fdct_islow_neon + + DATA .req x0 + TMP .req x9 + + /* Load constants */ + adr TMP, Ljsimd_fdct_islow_neon_consts + ld1 {v0.8h, v1.8h}, [TMP] + + /* Save NEON registers */ + sub sp, sp, #64 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + + /* Load all DATA into NEON registers with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | d16 | d17 | v16.8h + * 1 | d18 | d19 | v17.8h + * 2 | d20 | d21 | v18.8h + * 3 | d22 | d23 | v19.8h + * 4 | d24 | d25 | v20.8h + * 5 | d26 | d27 | v21.8h + * 6 | d28 | d29 | v22.8h + * 7 | d30 | d31 | v23.8h + */ + + ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 + ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] + sub DATA, DATA, #64 + + /* Transpose */ + transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v31, v2, v3, v4 + /* 1-D FDCT */ + add v24.8h, v16.8h, v23.8h /* tmp0 = dataptr[0] + dataptr[7]; */ + sub v31.8h, v16.8h, v23.8h /* tmp7 = dataptr[0] - dataptr[7]; */ + add v25.8h, v17.8h, v22.8h /* tmp1 = dataptr[1] + dataptr[6]; */ + sub v30.8h, v17.8h, v22.8h /* tmp6 = dataptr[1] - dataptr[6]; */ + add v26.8h, v18.8h, v21.8h /* tmp2 = dataptr[2] + dataptr[5]; */ + sub v29.8h, v18.8h, v21.8h /* tmp5 = dataptr[2] - dataptr[5]; */ + add v27.8h, v19.8h, v20.8h /* tmp3 = dataptr[3] + dataptr[4]; */ + sub v28.8h, v19.8h, v20.8h /* tmp4 = dataptr[3] - dataptr[4]; */ + + /* even part */ + + add v8.8h, v24.8h, v27.8h /* tmp10 = tmp0 + tmp3; */ + sub v9.8h, v24.8h, v27.8h /* tmp13 = tmp0 - tmp3; */ + add v10.8h, v25.8h, v26.8h /* tmp11 = tmp1 + tmp2; */ + sub v11.8h, v25.8h, v26.8h /* tmp12 = tmp1 - tmp2; */ + + add v16.8h, v8.8h, v10.8h /* tmp10 + tmp11 */ + sub v20.8h, v8.8h, v10.8h /* tmp10 - tmp11 */ + + add v18.8h, v11.8h, v9.8h /* tmp12 + tmp13 */ + + shl v16.8h, v16.8h, #PASS1_BITS /* dataptr[0] = (DCTELEM) LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); */ + shl v20.8h, v20.8h, #PASS1_BITS /* dataptr[4] = (DCTELEM) LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); */ + + smull2 v24.4s, v18.8h, XFIX_P_0_541 /* z1 hi = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1 lo = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ + mov v22.16b, v18.16b + mov v25.16b, v24.16b + + smlal v18.4s, v9.4h, XFIX_P_0_765 /* lo z1 + MULTIPLY(tmp13, XFIX_P_0_765) */ + smlal2 v24.4s, v9.8h, XFIX_P_0_765 /* hi z1 + MULTIPLY(tmp13, XFIX_P_0_765) */ + smlal v22.4s, v11.4h, XFIX_N_1_847 /* lo z1 + MULTIPLY(tmp12, XFIX_N_1_847) */ + smlal2 v25.4s, v11.8h, XFIX_N_1_847 /* hi z1 + MULTIPLY(tmp12, XFIX_N_1_847) */ + + rshrn v18.4h, v18.4s, #DESCALE_P1 + rshrn v22.4h, v22.4s, #DESCALE_P1 + rshrn2 v18.8h, v24.4s, #DESCALE_P1 /* dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */ + rshrn2 v22.8h, v25.4s, #DESCALE_P1 /* dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */ + + /* Odd part */ + + add v8.8h, v28.8h, v31.8h /* z1 = tmp4 + tmp7; */ + add v9.8h, v29.8h, v30.8h /* z2 = tmp5 + tmp6; */ + add v10.8h, v28.8h, v30.8h /* z3 = tmp4 + tmp6; */ + add v11.8h, v29.8h, v31.8h /* z4 = tmp5 + tmp7; */ + smull v4.4s, v10.4h, XFIX_P_1_175 /* z5 lo = z3 lo * XFIX_P_1_175 */ + smull2 v5.4s, v10.8h, XFIX_P_1_175 + smlal v4.4s, v11.4h, XFIX_P_1_175 /* z5 = MULTIPLY(z3 + z4, FIX_1_175875602); */ + smlal2 v5.4s, v11.8h, XFIX_P_1_175 + + smull2 v24.4s, v28.8h, XFIX_P_0_298 + smull2 v25.4s, v29.8h, XFIX_P_2_053 + smull2 v26.4s, v30.8h, XFIX_P_3_072 + smull2 v27.4s, v31.8h, XFIX_P_1_501 + smull v28.4s, v28.4h, XFIX_P_0_298 /* tmp4 = MULTIPLY(tmp4, FIX_0_298631336); */ + smull v29.4s, v29.4h, XFIX_P_2_053 /* tmp5 = MULTIPLY(tmp5, FIX_2_053119869); */ + smull v30.4s, v30.4h, XFIX_P_3_072 /* tmp6 = MULTIPLY(tmp6, FIX_3_072711026); */ + smull v31.4s, v31.4h, XFIX_P_1_501 /* tmp7 = MULTIPLY(tmp7, FIX_1_501321110); */ + + smull2 v12.4s, v8.8h, XFIX_N_0_899 + smull2 v13.4s, v9.8h, XFIX_N_2_562 + smull2 v14.4s, v10.8h, XFIX_N_1_961 + smull2 v15.4s, v11.8h, XFIX_N_0_390 + smull v8.4s, v8.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223); */ + smull v9.4s, v9.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447); */ + smull v10.4s, v10.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560); */ + smull v11.4s, v11.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644); */ + + add v10.4s, v10.4s, v4.4s /* z3 += z5 */ + add v14.4s, v14.4s, v5.4s + add v11.4s, v11.4s, v4.4s /* z4 += z5 */ + add v15.4s, v15.4s, v5.4s + + add v28.4s, v28.4s, v8.4s /* tmp4 += z1 */ + add v24.4s, v24.4s, v12.4s + add v29.4s, v29.4s, v9.4s /* tmp5 += z2 */ + add v25.4s, v25.4s, v13.4s + add v30.4s, v30.4s, v10.4s /* tmp6 += z3 */ + add v26.4s, v26.4s, v14.4s + add v31.4s, v31.4s, v11.4s /* tmp7 += z4 */ + add v27.4s, v27.4s, v15.4s + + add v28.4s, v28.4s, v10.4s /* tmp4 += z3 */ + add v24.4s, v24.4s, v14.4s + add v29.4s, v29.4s, v11.4s /* tmp5 += z4 */ + add v25.4s, v25.4s, v15.4s + add v30.4s, v30.4s, v9.4s /* tmp6 += z2 */ + add v26.4s, v26.4s, v13.4s + add v31.4s, v31.4s, v8.4s /* tmp7 += z1 */ + add v27.4s, v27.4s, v12.4s + + rshrn v23.4h, v28.4s, #DESCALE_P1 + rshrn v21.4h, v29.4s, #DESCALE_P1 + rshrn v19.4h, v30.4s, #DESCALE_P1 + rshrn v17.4h, v31.4s, #DESCALE_P1 + rshrn2 v23.8h, v24.4s, #DESCALE_P1 /* dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v21.8h, v25.4s, #DESCALE_P1 /* dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */ + rshrn2 v19.8h, v26.4s, #DESCALE_P1 /* dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v17.8h, v27.4s, #DESCALE_P1 /* dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */ + + /* Transpose */ + transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v31, v2, v3, v4 + + /* 1-D FDCT */ + add v24.8h, v16.8h, v23.8h /* tmp0 = dataptr[0] + dataptr[7]; */ + sub v31.8h, v16.8h, v23.8h /* tmp7 = dataptr[0] - dataptr[7]; */ + add v25.8h, v17.8h, v22.8h /* tmp1 = dataptr[1] + dataptr[6]; */ + sub v30.8h, v17.8h, v22.8h /* tmp6 = dataptr[1] - dataptr[6]; */ + add v26.8h, v18.8h, v21.8h /* tmp2 = dataptr[2] + dataptr[5]; */ + sub v29.8h, v18.8h, v21.8h /* tmp5 = dataptr[2] - dataptr[5]; */ + add v27.8h, v19.8h, v20.8h /* tmp3 = dataptr[3] + dataptr[4]; */ + sub v28.8h, v19.8h, v20.8h /* tmp4 = dataptr[3] - dataptr[4]; */ + + /* even part */ + add v8.8h, v24.8h, v27.8h /* tmp10 = tmp0 + tmp3; */ + sub v9.8h, v24.8h, v27.8h /* tmp13 = tmp0 - tmp3; */ + add v10.8h, v25.8h, v26.8h /* tmp11 = tmp1 + tmp2; */ + sub v11.8h, v25.8h, v26.8h /* tmp12 = tmp1 - tmp2; */ + + add v16.8h, v8.8h, v10.8h /* tmp10 + tmp11 */ + sub v20.8h, v8.8h, v10.8h /* tmp10 - tmp11 */ + + add v18.8h, v11.8h, v9.8h /* tmp12 + tmp13 */ + + srshr v16.8h, v16.8h, #PASS1_BITS /* dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); */ + srshr v20.8h, v20.8h, #PASS1_BITS /* dataptr[4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); */ + + smull2 v24.4s, v18.8h, XFIX_P_0_541 /* z1 hi = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1 lo = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ + mov v22.16b, v18.16b + mov v25.16b, v24.16b + + smlal v18.4s, v9.4h, XFIX_P_0_765 /* lo z1 + MULTIPLY(tmp13, XFIX_P_0_765) */ + smlal2 v24.4s, v9.8h, XFIX_P_0_765 /* hi z1 + MULTIPLY(tmp13, XFIX_P_0_765) */ + smlal v22.4s, v11.4h, XFIX_N_1_847 /* lo z1 + MULTIPLY(tmp12, XFIX_N_1_847) */ + smlal2 v25.4s, v11.8h, XFIX_N_1_847 /* hi z1 + MULTIPLY(tmp12, XFIX_N_1_847) */ + + rshrn v18.4h, v18.4s, #DESCALE_P2 + rshrn v22.4h, v22.4s, #DESCALE_P2 + rshrn2 v18.8h, v24.4s, #DESCALE_P2 /* dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */ + rshrn2 v22.8h, v25.4s, #DESCALE_P2 /* dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */ + + /* Odd part */ + add v8.8h, v28.8h, v31.8h /* z1 = tmp4 + tmp7; */ + add v9.8h, v29.8h, v30.8h /* z2 = tmp5 + tmp6; */ + add v10.8h, v28.8h, v30.8h /* z3 = tmp4 + tmp6; */ + add v11.8h, v29.8h, v31.8h /* z4 = tmp5 + tmp7; */ + + smull v4.4s, v10.4h, XFIX_P_1_175 /* z5 lo = z3 lo * XFIX_P_1_175 */ + smull2 v5.4s, v10.8h, XFIX_P_1_175 + smlal v4.4s, v11.4h, XFIX_P_1_175 /* z5 = MULTIPLY(z3 + z4, FIX_1_175875602); */ + smlal2 v5.4s, v11.8h, XFIX_P_1_175 + + smull2 v24.4s, v28.8h, XFIX_P_0_298 + smull2 v25.4s, v29.8h, XFIX_P_2_053 + smull2 v26.4s, v30.8h, XFIX_P_3_072 + smull2 v27.4s, v31.8h, XFIX_P_1_501 + smull v28.4s, v28.4h, XFIX_P_0_298 /* tmp4 = MULTIPLY(tmp4, FIX_0_298631336); */ + smull v29.4s, v29.4h, XFIX_P_2_053 /* tmp5 = MULTIPLY(tmp5, FIX_2_053119869); */ + smull v30.4s, v30.4h, XFIX_P_3_072 /* tmp6 = MULTIPLY(tmp6, FIX_3_072711026); */ + smull v31.4s, v31.4h, XFIX_P_1_501 /* tmp7 = MULTIPLY(tmp7, FIX_1_501321110); */ + + smull2 v12.4s, v8.8h, XFIX_N_0_899 + smull2 v13.4s, v9.8h, XFIX_N_2_562 + smull2 v14.4s, v10.8h, XFIX_N_1_961 + smull2 v15.4s, v11.8h, XFIX_N_0_390 + smull v8.4s, v8.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223); */ + smull v9.4s, v9.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447); */ + smull v10.4s, v10.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560); */ + smull v11.4s, v11.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644); */ + + add v10.4s, v10.4s, v4.4s + add v14.4s, v14.4s, v5.4s + add v11.4s, v11.4s, v4.4s + add v15.4s, v15.4s, v5.4s + + add v28.4s, v28.4s, v8.4s /* tmp4 += z1 */ + add v24.4s, v24.4s, v12.4s + add v29.4s, v29.4s, v9.4s /* tmp5 += z2 */ + add v25.4s, v25.4s, v13.4s + add v30.4s, v30.4s, v10.4s /* tmp6 += z3 */ + add v26.4s, v26.4s, v14.4s + add v31.4s, v31.4s, v11.4s /* tmp7 += z4 */ + add v27.4s, v27.4s, v15.4s + + add v28.4s, v28.4s, v10.4s /* tmp4 += z3 */ + add v24.4s, v24.4s, v14.4s + add v29.4s, v29.4s, v11.4s /* tmp5 += z4 */ + add v25.4s, v25.4s, v15.4s + add v30.4s, v30.4s, v9.4s /* tmp6 += z2 */ + add v26.4s, v26.4s, v13.4s + add v31.4s, v31.4s, v8.4s /* tmp7 += z1 */ + add v27.4s, v27.4s, v12.4s + + rshrn v23.4h, v28.4s, #DESCALE_P2 + rshrn v21.4h, v29.4s, #DESCALE_P2 + rshrn v19.4h, v30.4s, #DESCALE_P2 + rshrn v17.4h, v31.4s, #DESCALE_P2 + rshrn2 v23.8h, v24.4s, #DESCALE_P2 /* dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v21.8h, v25.4s, #DESCALE_P2 /* dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */ + rshrn2 v19.8h, v26.4s, #DESCALE_P2 /* dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v17.8h, v27.4s, #DESCALE_P2 /* dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */ + + /* store results */ + st1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] + + /* Restore NEON registers */ + sub sp, sp, #64 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + + br x30 + + .unreq DATA + .unreq TMP + +#undef XFIX_P_0_298 +#undef XFIX_N_0_390 +#undef XFIX_P_0_541 +#undef XFIX_P_0_765 +#undef XFIX_N_0_899 +#undef XFIX_P_1_175 +#undef XFIX_P_1_501 +#undef XFIX_N_1_847 +#undef XFIX_N_1_961 +#undef XFIX_P_2_053 +#undef XFIX_N_2_562 +#undef XFIX_P_3_072 + + +/*****************************************************************************/ + +/* + * jsimd_fdct_ifast_neon + * + * This function contains a fast, not so accurate integer implementation of + * the forward DCT (Discrete Cosine Transform). It uses the same calculations + * and produces exactly the same output as IJG's original 'jpeg_fdct_ifast' + * function from jfdctfst.c + * + * TODO: can be combined with 'jsimd_convsamp_neon' to get + * rid of a bunch of VLD1.16 instructions + */ + +#undef XFIX_0_541196100 +#define XFIX_0_382683433 v0.h[0] +#define XFIX_0_541196100 v0.h[1] +#define XFIX_0_707106781 v0.h[2] +#define XFIX_1_306562965 v0.h[3] + +.balign 16 +Ljsimd_fdct_ifast_neon_consts: + .short (98 * 128) /* XFIX_0_382683433 */ + .short (139 * 128) /* XFIX_0_541196100 */ + .short (181 * 128) /* XFIX_0_707106781 */ + .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */ + +asm_function jsimd_fdct_ifast_neon + + DATA .req x0 + TMP .req x9 + + /* Load constants */ + adr TMP, Ljsimd_fdct_ifast_neon_consts + ld1 {v0.4h}, [TMP] + + /* Load all DATA into NEON registers with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | d16 | d17 | v0.8h + * 1 | d18 | d19 | q9 + * 2 | d20 | d21 | q10 + * 3 | d22 | d23 | q11 + * 4 | d24 | d25 | q12 + * 5 | d26 | d27 | q13 + * 6 | d28 | d29 | q14 + * 7 | d30 | d31 | q15 + */ + + ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 + ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] + mov TMP, #2 + sub DATA, DATA, #64 +1: + /* Transpose */ + transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v1, v2, v3, v4 + subs TMP, TMP, #1 + /* 1-D FDCT */ + add v4.8h, v19.8h, v20.8h + sub v20.8h, v19.8h, v20.8h + sub v28.8h, v18.8h, v21.8h + add v18.8h, v18.8h, v21.8h + sub v29.8h, v17.8h, v22.8h + add v17.8h, v17.8h, v22.8h + sub v21.8h, v16.8h, v23.8h + add v16.8h, v16.8h, v23.8h + sub v6.8h, v17.8h, v18.8h + sub v7.8h, v16.8h, v4.8h + add v5.8h, v17.8h, v18.8h + add v6.8h, v6.8h, v7.8h + add v4.8h, v16.8h, v4.8h + sqdmulh v6.8h, v6.8h, XFIX_0_707106781 + add v19.8h, v20.8h, v28.8h + add v16.8h, v4.8h, v5.8h + sub v20.8h, v4.8h, v5.8h + add v5.8h, v28.8h, v29.8h + add v29.8h, v29.8h, v21.8h + sqdmulh v5.8h, v5.8h, XFIX_0_707106781 + sub v28.8h, v19.8h, v29.8h + add v18.8h, v7.8h, v6.8h + sqdmulh v28.8h, v28.8h, XFIX_0_382683433 + sub v22.8h, v7.8h, v6.8h + sqdmulh v19.8h, v19.8h, XFIX_0_541196100 + sqdmulh v7.8h, v29.8h, XFIX_1_306562965 + add v6.8h, v21.8h, v5.8h + sub v5.8h, v21.8h, v5.8h + add v29.8h, v29.8h, v28.8h + add v19.8h, v19.8h, v28.8h + add v29.8h, v29.8h, v7.8h + add v21.8h, v5.8h, v19.8h + sub v19.8h, v5.8h, v19.8h + add v17.8h, v6.8h, v29.8h + sub v23.8h, v6.8h, v29.8h + + b.ne 1b + + /* store results */ + st1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] + + br x30 + + .unreq DATA + .unreq TMP +#undef XFIX_0_382683433 +#undef XFIX_0_541196100 +#undef XFIX_0_707106781 +#undef XFIX_1_306562965 + + +/*****************************************************************************/ + +/* + * GLOBAL(void) + * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM *divisors, + * DCTELEM *workspace); + * + */ +asm_function jsimd_quantize_neon + + COEF_BLOCK .req x0 + DIVISORS .req x1 + WORKSPACE .req x2 + + RECIPROCAL .req DIVISORS + CORRECTION .req x9 + SHIFT .req x10 + LOOP_COUNT .req x11 + + mov LOOP_COUNT, #2 + add CORRECTION, DIVISORS, #(64 * 2) + add SHIFT, DIVISORS, #(64 * 6) +1: + subs LOOP_COUNT, LOOP_COUNT, #1 + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [WORKSPACE], 64 + ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [CORRECTION], 64 + abs v20.8h, v0.8h + abs v21.8h, v1.8h + abs v22.8h, v2.8h + abs v23.8h, v3.8h + ld1 {v28.8h, v29.8h, v30.8h, v31.8h}, [RECIPROCAL], 64 + add v20.8h, v20.8h, v4.8h /* add correction */ + add v21.8h, v21.8h, v5.8h + add v22.8h, v22.8h, v6.8h + add v23.8h, v23.8h, v7.8h + umull v4.4s, v20.4h, v28.4h /* multiply by reciprocal */ + umull2 v16.4s, v20.8h, v28.8h + umull v5.4s, v21.4h, v29.4h + umull2 v17.4s, v21.8h, v29.8h + umull v6.4s, v22.4h, v30.4h /* multiply by reciprocal */ + umull2 v18.4s, v22.8h, v30.8h + umull v7.4s, v23.4h, v31.4h + umull2 v19.4s, v23.8h, v31.8h + ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [SHIFT], 64 + shrn v4.4h, v4.4s, #16 + shrn v5.4h, v5.4s, #16 + shrn v6.4h, v6.4s, #16 + shrn v7.4h, v7.4s, #16 + shrn2 v4.8h, v16.4s, #16 + shrn2 v5.8h, v17.4s, #16 + shrn2 v6.8h, v18.4s, #16 + shrn2 v7.8h, v19.4s, #16 + neg v24.8h, v24.8h + neg v25.8h, v25.8h + neg v26.8h, v26.8h + neg v27.8h, v27.8h + sshr v0.8h, v0.8h, #15 /* extract sign */ + sshr v1.8h, v1.8h, #15 + sshr v2.8h, v2.8h, #15 + sshr v3.8h, v3.8h, #15 + ushl v4.8h, v4.8h, v24.8h /* shift */ + ushl v5.8h, v5.8h, v25.8h + ushl v6.8h, v6.8h, v26.8h + ushl v7.8h, v7.8h, v27.8h + + eor v4.16b, v4.16b, v0.16b /* restore sign */ + eor v5.16b, v5.16b, v1.16b + eor v6.16b, v6.16b, v2.16b + eor v7.16b, v7.16b, v3.16b + sub v4.8h, v4.8h, v0.8h + sub v5.8h, v5.8h, v1.8h + sub v6.8h, v6.8h, v2.8h + sub v7.8h, v7.8h, v3.8h + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [COEF_BLOCK], 64 + + b.ne 1b + + br x30 /* return */ + + .unreq COEF_BLOCK + .unreq DIVISORS + .unreq WORKSPACE + .unreq RECIPROCAL + .unreq CORRECTION + .unreq SHIFT + .unreq LOOP_COUNT + + +/*****************************************************************************/ + +/* + * Downsample pixel values of a single component. + * This version handles the common case of 2:1 horizontal and 1:1 vertical, + * without smoothing. + * + * GLOBAL(void) + * jsimd_h2v1_downsample_neon (JDIMENSION image_width, int max_v_samp_factor, + * JDIMENSION v_samp_factor, + * JDIMENSION width_blocks, JSAMPARRAY input_data, + * JSAMPARRAY output_data); + */ + +.balign 16 +Ljsimd_h2_downsample_neon_consts: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F /* diff 0 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0E /* diff 1 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0D, 0x0D /* diff 2 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C /* diff 3 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B /* diff 4 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A /* diff 5 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 /* diff 6 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 /* diff 7 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 /* diff 8 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, \ + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 /* diff 9 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x05, \ + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 /* diff 10 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x04, \ + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 /* diff 11 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, \ + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 /* diff 12 */ + .byte 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, \ + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 /* diff 13 */ + .byte 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, \ + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 /* diff 14 */ + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* diff 15 */ + +asm_function jsimd_h2v1_downsample_neon + IMAGE_WIDTH .req x0 + MAX_V_SAMP .req x1 + V_SAMP .req x2 + BLOCK_WIDTH .req x3 + INPUT_DATA .req x4 + OUTPUT_DATA .req x5 + OUTPTR .req x9 + INPTR .req x10 + TMP1 .req x11 + TMP2 .req x12 + TMP3 .req x13 + TMPDUP .req w15 + + mov TMPDUP, #0x10000 + lsl TMP2, BLOCK_WIDTH, #4 + sub TMP2, TMP2, IMAGE_WIDTH + adr TMP3, Ljsimd_h2_downsample_neon_consts + add TMP3, TMP3, TMP2, lsl #4 + dup v16.4s, TMPDUP + ld1 {v18.16b}, [TMP3] + +1: /* row loop */ + ldr INPTR, [INPUT_DATA], #8 + ldr OUTPTR, [OUTPUT_DATA], #8 + subs TMP1, BLOCK_WIDTH, #1 + b.eq 3f +2: /* columns */ + ld1 {v0.16b}, [INPTR], #16 + mov v4.16b, v16.16b + subs TMP1, TMP1, #1 + uadalp v4.8h, v0.16b + shrn v6.8b, v4.8h, #1 + st1 {v6.8b}, [OUTPTR], #8 + b.ne 2b +3: /* last columns */ + ld1 {v0.16b}, [INPTR] + mov v4.16b, v16.16b + subs V_SAMP, V_SAMP, #1 + /* expand right */ + tbl v2.16b, {v0.16b}, v18.16b + uadalp v4.8h, v2.16b + shrn v6.8b, v4.8h, #1 + st1 {v6.8b}, [OUTPTR], #8 + b.ne 1b + + br x30 + + .unreq IMAGE_WIDTH + .unreq MAX_V_SAMP + .unreq V_SAMP + .unreq BLOCK_WIDTH + .unreq INPUT_DATA + .unreq OUTPUT_DATA + .unreq OUTPTR + .unreq INPTR + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMPDUP + + +/*****************************************************************************/ + +/* + * Downsample pixel values of a single component. + * This version handles the common case of 2:1 horizontal and 2:1 vertical, + * without smoothing. + * + * GLOBAL(void) + * jsimd_h2v2_downsample_neon (JDIMENSION image_width, int max_v_samp_factor, + * JDIMENSION v_samp_factor, JDIMENSION width_blocks, + * JSAMPARRAY input_data, JSAMPARRAY output_data); + */ + +.balign 16 +asm_function jsimd_h2v2_downsample_neon + IMAGE_WIDTH .req x0 + MAX_V_SAMP .req x1 + V_SAMP .req x2 + BLOCK_WIDTH .req x3 + INPUT_DATA .req x4 + OUTPUT_DATA .req x5 + OUTPTR .req x9 + INPTR0 .req x10 + INPTR1 .req x14 + TMP1 .req x11 + TMP2 .req x12 + TMP3 .req x13 + TMPDUP .req w15 + + mov TMPDUP, #1 + lsl TMP2, BLOCK_WIDTH, #4 + lsl TMPDUP, TMPDUP, #17 + sub TMP2, TMP2, IMAGE_WIDTH + adr TMP3, Ljsimd_h2_downsample_neon_consts + orr TMPDUP, TMPDUP, #1 + add TMP3, TMP3, TMP2, lsl #4 + dup v16.4s, TMPDUP + ld1 {v18.16b}, [TMP3] + +1: /* row loop */ + ldr INPTR0, [INPUT_DATA], #8 + ldr OUTPTR, [OUTPUT_DATA], #8 + ldr INPTR1, [INPUT_DATA], #8 + subs TMP1, BLOCK_WIDTH, #1 + b.eq 3f +2: /* columns */ + ld1 {v0.16b}, [INPTR0], #16 + ld1 {v1.16b}, [INPTR1], #16 + mov v4.16b, v16.16b + subs TMP1, TMP1, #1 + uadalp v4.8h, v0.16b + uadalp v4.8h, v1.16b + shrn v6.8b, v4.8h, #2 + st1 {v6.8b}, [OUTPTR], #8 + b.ne 2b +3: /* last columns */ + ld1 {v0.16b}, [INPTR0], #16 + ld1 {v1.16b}, [INPTR1], #16 + mov v4.16b, v16.16b + subs V_SAMP, V_SAMP, #1 + /* expand right */ + tbl v2.16b, {v0.16b}, v18.16b + tbl v3.16b, {v1.16b}, v18.16b + uadalp v4.8h, v2.16b + uadalp v4.8h, v3.16b + shrn v6.8b, v4.8h, #2 + st1 {v6.8b}, [OUTPTR], #8 + b.ne 1b + + br x30 + + .unreq IMAGE_WIDTH + .unreq MAX_V_SAMP + .unreq V_SAMP + .unreq BLOCK_WIDTH + .unreq INPUT_DATA + .unreq OUTPUT_DATA + .unreq OUTPTR + .unreq INPTR0 + .unreq INPTR1 + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMPDUP + + +/*****************************************************************************/ + +/* + * GLOBAL(JOCTET*) + * jsimd_huff_encode_one_block (working_state *state, JOCTET *buffer, + * JCOEFPTR block, int last_dc_val, + * c_derived_tbl *dctbl, c_derived_tbl *actbl) + * + */ + + BUFFER .req x1 + PUT_BUFFER .req x6 + PUT_BITS .req x7 + PUT_BITSw .req w7 + +.macro emit_byte + sub PUT_BITS, PUT_BITS, #0x8 + lsr x19, PUT_BUFFER, PUT_BITS + uxtb w19, w19 + strb w19, [BUFFER, #1]! + cmp w19, #0xff + b.ne 14f + strb wzr, [BUFFER, #1]! +14: +.endm +.macro put_bits CODE, SIZE + lsl PUT_BUFFER, PUT_BUFFER, \SIZE + add PUT_BITS, PUT_BITS, \SIZE + orr PUT_BUFFER, PUT_BUFFER, \CODE +.endm +.macro checkbuf31 + cmp PUT_BITS, #0x20 + b.lt 31f + emit_byte + emit_byte + emit_byte + emit_byte +31: +.endm +.macro checkbuf47 + cmp PUT_BITS, #0x30 + b.lt 47f + emit_byte + emit_byte + emit_byte + emit_byte + emit_byte + emit_byte +47: +.endm + +.macro generate_jsimd_huff_encode_one_block fast_tbl + +.balign 16 +.if \fast_tbl == 1 +Ljsimd_huff_encode_one_block_neon_consts: +.else +Ljsimd_huff_encode_one_block_neon_slowtbl_consts: +.endif + .byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, \ + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 +.if \fast_tbl == 1 + .byte 0, 1, 2, 3, 16, 17, 32, 33, \ + 18, 19, 4, 5, 6, 7, 20, 21 /* L0 => L3 : 4 lines OK */ + .byte 34, 35, 48, 49, 255, 255, 50, 51, \ + 36, 37, 22, 23, 8, 9, 10, 11 /* L0 => L3 : 4 lines OK */ + .byte 8, 9, 22, 23, 36, 37, 50, 51, \ + 255, 255, 255, 255, 255, 255, 52, 53 /* L1 => L4 : 4 lines OK */ + .byte 54, 55, 40, 41, 26, 27, 12, 13, \ + 14, 15, 28, 29, 42, 43, 56, 57 /* L0 => L3 : 4 lines OK */ + .byte 6, 7, 20, 21, 34, 35, 48, 49, \ + 50, 51, 36, 37, 22, 23, 8, 9 /* L4 => L7 : 4 lines OK */ + .byte 42, 43, 28, 29, 14, 15, 30, 31, \ + 44, 45, 58, 59, 255, 255, 255, 255 /* L1 => L4 : 4 lines OK */ + .byte 255, 255, 255, 255, 56, 57, 42, 43, \ + 28, 29, 14, 15, 30, 31, 44, 45 /* L3 => L6 : 4 lines OK */ + .byte 26, 27, 40, 41, 42, 43, 28, 29, \ + 14, 15, 30, 31, 44, 45, 46, 47 /* L5 => L7 : 3 lines OK */ + .byte 255, 255, 255, 255, 0, 1, 255, 255, \ + 255, 255, 255, 255, 255, 255, 255, 255 /* L4 : 1 lines OK */ + .byte 255, 255, 255, 255, 255, 255, 255, 255, \ + 0, 1, 16, 17, 2, 3, 255, 255 /* L5 => L6 : 2 lines OK */ + .byte 255, 255, 255, 255, 255, 255, 255, 255, \ + 255, 255, 255, 255, 8, 9, 22, 23 /* L5 => L6 : 2 lines OK */ + .byte 4, 5, 6, 7, 255, 255, 255, 255, \ + 255, 255, 255, 255, 255, 255, 255, 255 /* L7 : 1 line OK */ +.endif + +.if \fast_tbl == 1 +asm_function jsimd_huff_encode_one_block_neon +.else +asm_function jsimd_huff_encode_one_block_neon_slowtbl +.endif + sub sp, sp, 272 + sub BUFFER, BUFFER, #0x1 /* BUFFER=buffer-- */ + /* Save ARM registers */ + stp x19, x20, [sp], 16 +.if \fast_tbl == 1 + adr x15, Ljsimd_huff_encode_one_block_neon_consts +.else + adr x15, Ljsimd_huff_encode_one_block_neon_slowtbl_consts +.endif + ldr PUT_BUFFER, [x0, #0x10] + ldr PUT_BITSw, [x0, #0x18] + ldrsh w12, [x2] /* load DC coeff in w12 */ + /* prepare data */ +.if \fast_tbl == 1 + ld1 {v23.16b}, [x15], #16 + ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x15], #64 + ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x15], #64 + ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x15], #64 + ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [x2], #64 + ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [x2], #64 + sub w12, w12, w3 /* last_dc_val, not used afterwards */ + /* ZigZag 8x8 */ + tbl v0.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v0.16b + tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b + tbl v2.16b, {v25.16b, v26.16b, v27.16b, v28.16b}, v2.16b + tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b + tbl v4.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v4.16b + tbl v5.16b, {v25.16b, v26.16b, v27.16b, v28.16b}, v5.16b + tbl v6.16b, {v27.16b, v28.16b, v29.16b, v30.16b}, v6.16b + tbl v7.16b, {v29.16b, v30.16b, v31.16b}, v7.16b + ins v0.h[0], w12 + tbx v1.16b, {v28.16b}, v16.16b + tbx v2.16b, {v29.16b, v30.16b}, v17.16b + tbx v5.16b, {v29.16b, v30.16b}, v18.16b + tbx v6.16b, {v31.16b}, v19.16b +.else + add x13, x2, #0x22 + sub w12, w12, w3 /* last_dc_val, not used afterwards */ + ld1 {v23.16b}, [x15] + add x14, x2, #0x18 + add x3, x2, #0x36 + ins v0.h[0], w12 + add x9, x2, #0x2 + ld1 {v1.h}[0], [x13] + add x15, x2, #0x30 + ld1 {v2.h}[0], [x14] + add x19, x2, #0x26 + ld1 {v3.h}[0], [x3] + add x20, x2, #0x28 + ld1 {v0.h}[1], [x9] + add x12, x2, #0x10 + ld1 {v1.h}[1], [x15] + add x13, x2, #0x40 + ld1 {v2.h}[1], [x19] + add x14, x2, #0x34 + ld1 {v3.h}[1], [x20] + add x3, x2, #0x1a + ld1 {v0.h}[2], [x12] + add x9, x2, #0x20 + ld1 {v1.h}[2], [x13] + add x15, x2, #0x32 + ld1 {v2.h}[2], [x14] + add x19, x2, #0x42 + ld1 {v3.h}[2], [x3] + add x20, x2, #0xc + ld1 {v0.h}[3], [x9] + add x12, x2, #0x12 + ld1 {v1.h}[3], [x15] + add x13, x2, #0x24 + ld1 {v2.h}[3], [x19] + add x14, x2, #0x50 + ld1 {v3.h}[3], [x20] + add x3, x2, #0xe + ld1 {v0.h}[4], [x12] + add x9, x2, #0x4 + ld1 {v1.h}[4], [x13] + add x15, x2, #0x16 + ld1 {v2.h}[4], [x14] + add x19, x2, #0x60 + ld1 {v3.h}[4], [x3] + add x20, x2, #0x1c + ld1 {v0.h}[5], [x9] + add x12, x2, #0x6 + ld1 {v1.h}[5], [x15] + add x13, x2, #0x8 + ld1 {v2.h}[5], [x19] + add x14, x2, #0x52 + ld1 {v3.h}[5], [x20] + add x3, x2, #0x2a + ld1 {v0.h}[6], [x12] + add x9, x2, #0x14 + ld1 {v1.h}[6], [x13] + add x15, x2, #0xa + ld1 {v2.h}[6], [x14] + add x19, x2, #0x44 + ld1 {v3.h}[6], [x3] + add x20, x2, #0x38 + ld1 {v0.h}[7], [x9] + add x12, x2, #0x46 + ld1 {v1.h}[7], [x15] + add x13, x2, #0x3a + ld1 {v2.h}[7], [x19] + add x14, x2, #0x74 + ld1 {v3.h}[7], [x20] + add x3, x2, #0x6a + ld1 {v4.h}[0], [x12] + add x9, x2, #0x54 + ld1 {v5.h}[0], [x13] + add x15, x2, #0x2c + ld1 {v6.h}[0], [x14] + add x19, x2, #0x76 + ld1 {v7.h}[0], [x3] + add x20, x2, #0x78 + ld1 {v4.h}[1], [x9] + add x12, x2, #0x62 + ld1 {v5.h}[1], [x15] + add x13, x2, #0x1e + ld1 {v6.h}[1], [x19] + add x14, x2, #0x68 + ld1 {v7.h}[1], [x20] + add x3, x2, #0x7a + ld1 {v4.h}[2], [x12] + add x9, x2, #0x70 + ld1 {v5.h}[2], [x13] + add x15, x2, #0x2e + ld1 {v6.h}[2], [x14] + add x19, x2, #0x5a + ld1 {v7.h}[2], [x3] + add x20, x2, #0x6c + ld1 {v4.h}[3], [x9] + add x12, x2, #0x72 + ld1 {v5.h}[3], [x15] + add x13, x2, #0x3c + ld1 {v6.h}[3], [x19] + add x14, x2, #0x4c + ld1 {v7.h}[3], [x20] + add x3, x2, #0x5e + ld1 {v4.h}[4], [x12] + add x9, x2, #0x64 + ld1 {v5.h}[4], [x13] + add x15, x2, #0x4a + ld1 {v6.h}[4], [x14] + add x19, x2, #0x3e + ld1 {v7.h}[4], [x3] + add x20, x2, #0x6e + ld1 {v4.h}[5], [x9] + add x12, x2, #0x56 + ld1 {v5.h}[5], [x15] + add x13, x2, #0x58 + ld1 {v6.h}[5], [x19] + add x14, x2, #0x4e + ld1 {v7.h}[5], [x20] + add x3, x2, #0x7c + ld1 {v4.h}[6], [x12] + add x9, x2, #0x48 + ld1 {v5.h}[6], [x13] + add x15, x2, #0x66 + ld1 {v6.h}[6], [x14] + add x19, x2, #0x5c + ld1 {v7.h}[6], [x3] + add x20, x2, #0x7e + ld1 {v4.h}[7], [x9] + ld1 {v5.h}[7], [x15] + ld1 {v6.h}[7], [x19] + ld1 {v7.h}[7], [x20] +.endif + cmlt v24.8h, v0.8h, #0 + cmlt v25.8h, v1.8h, #0 + cmlt v26.8h, v2.8h, #0 + cmlt v27.8h, v3.8h, #0 + cmlt v28.8h, v4.8h, #0 + cmlt v29.8h, v5.8h, #0 + cmlt v30.8h, v6.8h, #0 + cmlt v31.8h, v7.8h, #0 + abs v0.8h, v0.8h + abs v1.8h, v1.8h + abs v2.8h, v2.8h + abs v3.8h, v3.8h + abs v4.8h, v4.8h + abs v5.8h, v5.8h + abs v6.8h, v6.8h + abs v7.8h, v7.8h + eor v24.16b, v24.16b, v0.16b + eor v25.16b, v25.16b, v1.16b + eor v26.16b, v26.16b, v2.16b + eor v27.16b, v27.16b, v3.16b + eor v28.16b, v28.16b, v4.16b + eor v29.16b, v29.16b, v5.16b + eor v30.16b, v30.16b, v6.16b + eor v31.16b, v31.16b, v7.16b + cmeq v16.8h, v0.8h, #0 + cmeq v17.8h, v1.8h, #0 + cmeq v18.8h, v2.8h, #0 + cmeq v19.8h, v3.8h, #0 + cmeq v20.8h, v4.8h, #0 + cmeq v21.8h, v5.8h, #0 + cmeq v22.8h, v6.8h, #0 + xtn v16.8b, v16.8h + xtn v18.8b, v18.8h + xtn v20.8b, v20.8h + xtn v22.8b, v22.8h + umov w14, v0.h[0] + xtn2 v16.16b, v17.8h + umov w13, v24.h[0] + xtn2 v18.16b, v19.8h + clz w14, w14 + xtn2 v20.16b, v21.8h + lsl w13, w13, w14 + cmeq v17.8h, v7.8h, #0 + sub w12, w14, #32 + xtn2 v22.16b, v17.8h + lsr w13, w13, w14 + and v16.16b, v16.16b, v23.16b + neg w12, w12 + and v18.16b, v18.16b, v23.16b + add x3, x4, #0x400 /* r1 = dctbl->ehufsi */ + and v20.16b, v20.16b, v23.16b + add x15, sp, #0x80 /* x15 = t2 */ + and v22.16b, v22.16b, v23.16b + ldr w10, [x4, x12, lsl #2] + addp v16.16b, v16.16b, v18.16b + ldrb w11, [x3, x12] + addp v20.16b, v20.16b, v22.16b + checkbuf47 + addp v16.16b, v16.16b, v20.16b + put_bits x10, x11 + addp v16.16b, v16.16b, v18.16b + checkbuf47 + umov x9,v16.D[0] + put_bits x13, x12 + cnt v17.8b, v16.8b + mvn x9, x9 + addv B18, v17.8b + add x4, x5, #0x400 /* x4 = actbl->ehufsi */ + umov w12, v18.b[0] + lsr x9, x9, #0x1 /* clear AC coeff */ + ldr w13, [x5, #0x3c0] /* x13 = actbl->ehufco[0xf0] */ + rbit x9, x9 /* x9 = index0 */ + ldrb w14, [x4, #0xf0] /* x14 = actbl->ehufsi[0xf0] */ + cmp w12, #(64-8) + mov x11, sp + b.lt 4f + cbz x9, 6f + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x11], #64 + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x11], #64 + st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x11], #64 + st1 {v28.8h, v29.8h, v30.8h, v31.8h}, [x11], #64 +1: + clz x2, x9 + add x15, x15, x2, lsl #1 + lsl x9, x9, x2 + ldrh w20, [x15, #-126] +2: + cmp x2, #0x10 + b.lt 3f + sub x2, x2, #0x10 + checkbuf47 + put_bits x13, x14 + b 2b +3: + clz w20, w20 + ldrh w3, [x15, #2]! + sub w11, w20, #32 + lsl w3, w3, w20 + neg w11, w11 + lsr w3, w3, w20 + add x2, x11, x2, lsl #4 + lsl x9, x9, #0x1 + ldr w12, [x5, x2, lsl #2] + ldrb w10, [x4, x2] + checkbuf31 + put_bits x12, x10 + put_bits x3, x11 + cbnz x9, 1b + b 6f +4: + movi v21.8h, #0x0010 + clz v0.8h, v0.8h + clz v1.8h, v1.8h + clz v2.8h, v2.8h + clz v3.8h, v3.8h + clz v4.8h, v4.8h + clz v5.8h, v5.8h + clz v6.8h, v6.8h + clz v7.8h, v7.8h + ushl v24.8h, v24.8h, v0.8h + ushl v25.8h, v25.8h, v1.8h + ushl v26.8h, v26.8h, v2.8h + ushl v27.8h, v27.8h, v3.8h + ushl v28.8h, v28.8h, v4.8h + ushl v29.8h, v29.8h, v5.8h + ushl v30.8h, v30.8h, v6.8h + ushl v31.8h, v31.8h, v7.8h + neg v0.8h, v0.8h + neg v1.8h, v1.8h + neg v2.8h, v2.8h + neg v3.8h, v3.8h + neg v4.8h, v4.8h + neg v5.8h, v5.8h + neg v6.8h, v6.8h + neg v7.8h, v7.8h + ushl v24.8h, v24.8h, v0.8h + ushl v25.8h, v25.8h, v1.8h + ushl v26.8h, v26.8h, v2.8h + ushl v27.8h, v27.8h, v3.8h + ushl v28.8h, v28.8h, v4.8h + ushl v29.8h, v29.8h, v5.8h + ushl v30.8h, v30.8h, v6.8h + ushl v31.8h, v31.8h, v7.8h + add v0.8h, v21.8h, v0.8h + add v1.8h, v21.8h, v1.8h + add v2.8h, v21.8h, v2.8h + add v3.8h, v21.8h, v3.8h + add v4.8h, v21.8h, v4.8h + add v5.8h, v21.8h, v5.8h + add v6.8h, v21.8h, v6.8h + add v7.8h, v21.8h, v7.8h + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x11], #64 + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x11], #64 + st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x11], #64 + st1 {v28.8h, v29.8h, v30.8h, v31.8h}, [x11], #64 +1: + clz x2, x9 + add x15, x15, x2, lsl #1 + lsl x9, x9, x2 + ldrh w11, [x15, #-126] +2: + cmp x2, #0x10 + b.lt 3f + sub x2, x2, #0x10 + checkbuf47 + put_bits x13, x14 + b 2b +3: + ldrh w3, [x15, #2]! + add x2, x11, x2, lsl #4 + lsl x9, x9, #0x1 + ldr w12, [x5, x2, lsl #2] + ldrb w10, [x4, x2] + checkbuf31 + put_bits x12, x10 + put_bits x3, x11 + cbnz x9, 1b +6: + add x13, sp, #0xfe + cmp x15, x13 + b.hs 1f + ldr w12, [x5] + ldrb w14, [x4] + checkbuf47 + put_bits x12, x14 +1: + sub sp, sp, 16 + str PUT_BUFFER, [x0, #0x10] + str PUT_BITSw, [x0, #0x18] + ldp x19, x20, [sp], 16 + add x0, BUFFER, #0x1 + add sp, sp, 256 + br x30 + +.endm + +generate_jsimd_huff_encode_one_block 1 +generate_jsimd_huff_encode_one_block 0 + + .unreq BUFFER + .unreq PUT_BUFFER + .unreq PUT_BITS + .unreq PUT_BITSw + +.purgem emit_byte +.purgem put_bits +.purgem checkbuf31 +.purgem checkbuf47 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_arm.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_arm.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_arm.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_arm.c 2021-04-25 01:47:22.000000000 +0800 @@ -2,17 +2,16 @@ * jsimd_arm.c * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright 2009-2011 D. R. Commander - * + * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander + * Copyright 2015-2016 Matthieu Darbois + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc * * This file contains the interface between the "normal" portions - * of the library and the SIMD implementations when running on - * ARM architecture. - * - * Based on the stubs from 'jsimd_none.c' + * of the library and the SIMD implementations when running on a + * 32-bit ARM architecture. */ #define JPEG_INTERNALS @@ -28,8 +27,9 @@ #include static unsigned int simd_support = ~0; +static unsigned int simd_huffman = 1; -#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) +#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) @@ -100,7 +100,7 @@ init_simd (void) { char *env = NULL; -#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) +#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) int bufsize = 1024; /* an initial guess for the line buffer size limit */ #endif @@ -123,12 +123,15 @@ #endif /* Force different settings through environment variables */ - env = getenv("JSIMD_FORCE_ARM_NEON"); + env = getenv("JSIMD_FORCENEON"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support &= JSIMD_ARM_NEON; - env = getenv("JSIMD_FORCE_NO_SIMD"); + env = getenv("JSIMD_FORCENONE"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; } GLOBAL(int) @@ -170,6 +173,24 @@ return 0; if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (simd_support & JSIMD_ARM_NEON) return 1; @@ -183,8 +204,7 @@ { void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - switch(cinfo->in_color_space) - { + switch(cinfo->in_color_space) { case JCS_EXT_RGB: neonfct=jsimd_extrgb_ycc_convert_neon; break; @@ -212,9 +232,7 @@ break; } - if (simd_support & JSIMD_ARM_NEON) - neonfct(cinfo->image_width, input_buf, - output_buf, output_row, num_rows); + neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); } GLOBAL(void) @@ -231,8 +249,7 @@ { void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - switch(cinfo->out_color_space) - { + switch(cinfo->out_color_space) { case JCS_EXT_RGB: neonfct=jsimd_ycc_extrgb_convert_neon; break; @@ -255,14 +272,21 @@ case JCS_EXT_ARGB: neonfct=jsimd_ycc_extxrgb_convert_neon; break; - default: + default: neonfct=jsimd_ycc_extrgb_convert_neon; break; } - if (simd_support & JSIMD_ARM_NEON) - neonfct(cinfo->output_width, input_buf, - input_row, output_buf, num_rows); + neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, + output_buf, num_rows); } GLOBAL(int) @@ -282,13 +306,13 @@ } GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { } GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { } @@ -311,17 +335,17 @@ GLOBAL(void) jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info * compptr, + jpeg_component_info *compptr, JSAMPARRAY input_data, - JSAMPARRAY * output_data_ptr) + JSAMPARRAY *output_data_ptr) { } GLOBAL(void) jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info * compptr, + jpeg_component_info *compptr, JSAMPARRAY input_data, - JSAMPARRAY * output_data_ptr) + JSAMPARRAY *output_data_ptr) { } @@ -338,23 +362,35 @@ { init_simd(); + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + return 0; } GLOBAL(void) jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info * compptr, + jpeg_component_info *compptr, JSAMPARRAY input_data, - JSAMPARRAY * output_data_ptr) + JSAMPARRAY *output_data_ptr) { } GLOBAL(void) jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info * compptr, + jpeg_component_info *compptr, JSAMPARRAY input_data, - JSAMPARRAY * output_data_ptr) + JSAMPARRAY *output_data_ptr) { + jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); } GLOBAL(int) @@ -420,15 +456,14 @@ GLOBAL(void) jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM * workspace) + DCTELEM *workspace) { - if (simd_support & JSIMD_ARM_NEON) - jsimd_convsamp_neon(sample_data, start_col, workspace); + jsimd_convsamp_neon(sample_data, start_col, workspace); } GLOBAL(void) jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT * workspace) + FAST_FLOAT *workspace) { } @@ -466,19 +501,18 @@ } GLOBAL(void) -jsimd_fdct_islow (DCTELEM * data) +jsimd_fdct_islow (DCTELEM *data) { } GLOBAL(void) -jsimd_fdct_ifast (DCTELEM * data) +jsimd_fdct_ifast (DCTELEM *data) { - if (simd_support & JSIMD_ARM_NEON) - jsimd_fdct_ifast_neon(data); + jsimd_fdct_ifast_neon(data); } GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT * data) +jsimd_fdct_float (FAST_FLOAT *data) { } @@ -510,16 +544,15 @@ } GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, - DCTELEM * workspace) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) { - if (simd_support & JSIMD_ARM_NEON) - jsimd_quantize_neon(coef_block, divisors, workspace); + jsimd_quantize_neon(coef_block, divisors, workspace); } GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, - FAST_FLOAT * workspace) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) { } @@ -540,7 +573,7 @@ if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if ((simd_support & JSIMD_ARM_NEON)) + if (simd_support & JSIMD_ARM_NEON) return 1; return 0; @@ -563,28 +596,28 @@ if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if ((simd_support & JSIMD_ARM_NEON)) + if (simd_support & JSIMD_ARM_NEON) return 1; return 0; } GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { - if ((simd_support & JSIMD_ARM_NEON)) - jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col); + jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, + output_col); } GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { - if ((simd_support & JSIMD_ARM_NEON)) - jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col); + jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, + output_col); } GLOBAL(int) @@ -629,7 +662,7 @@ if (IFAST_SCALE_BITS != 2) return 0; - if ((simd_support & JSIMD_ARM_NEON)) + if (simd_support & JSIMD_ARM_NEON) return 1; return 0; @@ -644,27 +677,51 @@ } GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { - if ((simd_support & JSIMD_ARM_NEON)) - jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col); + jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, + output_col); } GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { - if ((simd_support & JSIMD_ARM_NEON)) - jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col); + jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, + output_col); } GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON && simd_huffman) + return 1; + + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, + dctbl, actbl); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_arm_neon.S glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_arm_neon.S --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_arm_neon.S 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_arm_neon.S 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,13 @@ /* - * ARM NEON optimizations for libjpeg-turbo + * ARMv7 NEON optimizations for libjpeg-turbo * * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies). * All rights reserved. * Author: Siarhei Siamashka + * Copyright (C) 2014 Siarhei Siamashka. All Rights Reserved. + * Copyright (C) 2014 Linaro Limited. All Rights Reserved. + * Copyright (C) 2015 D. R. Commander. All Rights Reserved. + * Copyright (C) 2015-2016 Matthieu Darbois. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -23,7 +27,7 @@ */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ +.section .note.GNU-stack, "", %progbits /* mark stack as non-executable */ #endif .text @@ -31,20 +35,20 @@ .arch armv7a .object_arch armv4 .arm +.syntax unified #define RESPECT_STRICT_ALIGNMENT 1 + /*****************************************************************************/ /* Supplementary macro for setting function attributes */ .macro asm_function fname #ifdef __APPLE__ - .func _\fname .globl _\fname _\fname: #else - .func \fname .global \fname #ifdef __ELF__ .hidden \fname @@ -56,12 +60,13 @@ /* Transpose a block of 4x4 coefficients in four 64-bit registers */ .macro transpose_4x4 x0, x1, x2, x3 - vtrn.16 \x0, \x1 - vtrn.16 \x2, \x3 - vtrn.32 \x0, \x2 - vtrn.32 \x1, \x3 + vtrn.16 \x0, \x1 + vtrn.16 \x2, \x3 + vtrn.32 \x0, \x2 + vtrn.32 \x1, \x3 .endm + #define CENTERJSAMPLE 128 /*****************************************************************************/ @@ -70,22 +75,22 @@ * Perform dequantization and inverse DCT on one block of coefficients. * * GLOBAL(void) - * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block, + * jsimd_idct_islow_neon (void *dct_table, JCOEFPTR coef_block, * JSAMPARRAY output_buf, JDIMENSION output_col) */ -#define FIX_0_298631336 (2446) -#define FIX_0_390180644 (3196) -#define FIX_0_541196100 (4433) -#define FIX_0_765366865 (6270) -#define FIX_0_899976223 (7373) -#define FIX_1_175875602 (9633) -#define FIX_1_501321110 (12299) -#define FIX_1_847759065 (15137) -#define FIX_1_961570560 (16069) -#define FIX_2_053119869 (16819) -#define FIX_2_562915447 (20995) -#define FIX_3_072711026 (25172) +#define FIX_0_298631336 (2446) +#define FIX_0_390180644 (3196) +#define FIX_0_541196100 (4433) +#define FIX_0_765366865 (6270) +#define FIX_0_899976223 (7373) +#define FIX_1_175875602 (9633) +#define FIX_1_501321110 (12299) +#define FIX_1_847759065 (15137) +#define FIX_1_961570560 (16069) +#define FIX_2_053119869 (16819) +#define FIX_2_562915447 (20995) +#define FIX_3_072711026 (25172) #define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560) #define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644) @@ -103,8 +108,8 @@ #define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) \ { \ DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \ - INT32 q1, q2, q3, q4, q5, q6, q7; \ - INT32 tmp11_plus_tmp2, tmp11_minus_tmp2; \ + JLONG q1, q2, q3, q4, q5, q6, q7; \ + JLONG tmp11_plus_tmp2, tmp11_minus_tmp2; \ \ /* 1-D iDCT input data */ \ row0 = xrow0; \ @@ -125,7 +130,7 @@ q2 = MULTIPLY(row2, FIX_0_541196100) + \ MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \ q4 = q6; \ - q3 = ((INT32) row0 - (INT32) row4) << 13; \ + q3 = ((JLONG) row0 - (JLONG) row4) << 13; \ q6 += MULTIPLY(row5, -FIX_2_562915447) + \ MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \ /* now we can use q1 (reloadable constants have been used up) */ \ @@ -152,7 +157,7 @@ /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \ tmp11_minus_tmp2 = q1; \ \ - q1 = ((INT32) row0 + (INT32) row4) << 13; \ + q1 = ((JLONG) row0 + (JLONG) row4) << 13; \ q2 = q1 + q6; \ q1 = q1 - q6; \ \ @@ -167,34 +172,34 @@ tmp13 = q1; \ } -#define XFIX_0_899976223 d0[0] -#define XFIX_0_541196100 d0[1] -#define XFIX_2_562915447 d0[2] -#define XFIX_0_298631336_MINUS_0_899976223 d0[3] -#define XFIX_1_501321110_MINUS_0_899976223 d1[0] -#define XFIX_2_053119869_MINUS_2_562915447 d1[1] -#define XFIX_0_541196100_PLUS_0_765366865 d1[2] -#define XFIX_1_175875602 d1[3] -#define XFIX_1_175875602_MINUS_0_390180644 d2[0] -#define XFIX_0_541196100_MINUS_1_847759065 d2[1] -#define XFIX_3_072711026_MINUS_2_562915447 d2[2] -#define XFIX_1_175875602_MINUS_1_961570560 d2[3] +#define XFIX_0_899976223 d0[0] +#define XFIX_0_541196100 d0[1] +#define XFIX_2_562915447 d0[2] +#define XFIX_0_298631336_MINUS_0_899976223 d0[3] +#define XFIX_1_501321110_MINUS_0_899976223 d1[0] +#define XFIX_2_053119869_MINUS_2_562915447 d1[1] +#define XFIX_0_541196100_PLUS_0_765366865 d1[2] +#define XFIX_1_175875602 d1[3] +#define XFIX_1_175875602_MINUS_0_390180644 d2[0] +#define XFIX_0_541196100_MINUS_1_847759065 d2[1] +#define XFIX_3_072711026_MINUS_2_562915447 d2[2] +#define XFIX_1_175875602_MINUS_1_961570560 d2[3] .balign 16 jsimd_idct_islow_neon_consts: - .short FIX_0_899976223 /* d0[0] */ - .short FIX_0_541196100 /* d0[1] */ - .short FIX_2_562915447 /* d0[2] */ - .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */ - .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */ - .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */ - .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */ - .short FIX_1_175875602 /* d1[3] */ - /* reloadable constants */ - .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */ - .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */ - .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */ - .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */ + .short FIX_0_899976223 /* d0[0] */ + .short FIX_0_541196100 /* d0[1] */ + .short FIX_2_562915447 /* d0[2] */ + .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */ + .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */ + .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */ + .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */ + .short FIX_1_175875602 /* d1[3] */ + /* reloadable constants */ + .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */ + .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */ + .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */ + .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */ asm_function jsimd_idct_islow_neon @@ -253,140 +258,141 @@ vld1.16 {d4, d5, d6, d7}, [DCT_TABLE, :128]! vmul.s16 q14, q14, q2 vmul.s16 q13, q13, q1 - vld1.16 {d0, d1, d2, d3}, [ip, :128] /* load constants */ + vld1.16 {d0, d1, d2, d3}, [ip, :128] /* load constants */ add ip, ip, #16 vmul.s16 q15, q15, q3 - vpush {d8-d15} /* save NEON registers */ + vpush {d8-d15} /* save NEON registers */ /* 1-D IDCT, pass 1, left 4x8 half */ - vadd.s16 d4, ROW7L, ROW3L - vadd.s16 d5, ROW5L, ROW1L - vmull.s16 q6, d4, XFIX_1_175875602_MINUS_1_961570560 - vmlal.s16 q6, d5, XFIX_1_175875602 - vmull.s16 q7, d4, XFIX_1_175875602 + vadd.s16 d4, ROW7L, ROW3L + vadd.s16 d5, ROW5L, ROW1L + vmull.s16 q6, d4, XFIX_1_175875602_MINUS_1_961570560 + vmlal.s16 q6, d5, XFIX_1_175875602 + vmull.s16 q7, d4, XFIX_1_175875602 /* Check for the zero coefficients in the right 4x8 half */ push {r4, r5} - vmlal.s16 q7, d5, XFIX_1_175875602_MINUS_0_390180644 - vsubl.s16 q3, ROW0L, ROW4L - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))] - vmull.s16 q2, ROW2L, XFIX_0_541196100 - vmlal.s16 q2, ROW6L, XFIX_0_541196100_MINUS_1_847759065 - orr r0, r4, r5 - vmov q4, q6 - vmlsl.s16 q6, ROW5L, XFIX_2_562915447 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))] - vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 - vshl.s32 q3, q3, #13 - orr r0, r0, r4 - vmlsl.s16 q4, ROW1L, XFIX_0_899976223 - orr r0, r0, r5 - vadd.s32 q1, q3, q2 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))] - vmov q5, q7 - vadd.s32 q1, q1, q6 - orr r0, r0, r4 - vmlsl.s16 q7, ROW7L, XFIX_0_899976223 - orr r0, r0, r5 - vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 - vrshrn.s32 ROW1L, q1, #11 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))] - vsub.s32 q1, q1, q6 - vmlal.s16 q5, ROW5L, XFIX_2_053119869_MINUS_2_562915447 - orr r0, r0, r4 - vmlsl.s16 q5, ROW3L, XFIX_2_562915447 - orr r0, r0, r5 - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))] - vmlal.s16 q6, ROW6L, XFIX_0_541196100 - vsub.s32 q3, q3, q2 - orr r0, r0, r4 - vrshrn.s32 ROW6L, q1, #11 - orr r0, r0, r5 - vadd.s32 q1, q3, q5 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))] - vsub.s32 q3, q3, q5 - vaddl.s16 q5, ROW0L, ROW4L - orr r0, r0, r4 - vrshrn.s32 ROW2L, q1, #11 - orr r0, r0, r5 - vrshrn.s32 ROW5L, q3, #11 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))] - vshl.s32 q5, q5, #13 - vmlal.s16 q4, ROW7L, XFIX_0_298631336_MINUS_0_899976223 - orr r0, r0, r4 - vadd.s32 q2, q5, q6 - orrs r0, r0, r5 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))] - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - orr r0, r4, r5 - vsub.s32 q3, q1, q4 + vmlal.s16 q7, d5, XFIX_1_175875602_MINUS_0_390180644 + vsubl.s16 q3, ROW0L, ROW4L + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))] + vmull.s16 q2, ROW2L, XFIX_0_541196100 + vmlal.s16 q2, ROW6L, XFIX_0_541196100_MINUS_1_847759065 + orr r0, r4, r5 + vmov q4, q6 + vmlsl.s16 q6, ROW5L, XFIX_2_562915447 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))] + vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 + vshl.s32 q3, q3, #13 + orr r0, r0, r4 + vmlsl.s16 q4, ROW1L, XFIX_0_899976223 + orr r0, r0, r5 + vadd.s32 q1, q3, q2 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))] + vmov q5, q7 + vadd.s32 q1, q1, q6 + orr r0, r0, r4 + vmlsl.s16 q7, ROW7L, XFIX_0_899976223 + orr r0, r0, r5 + vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 + vrshrn.s32 ROW1L, q1, #11 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))] + vsub.s32 q1, q1, q6 + vmlal.s16 q5, ROW5L, XFIX_2_053119869_MINUS_2_562915447 + orr r0, r0, r4 + vmlsl.s16 q5, ROW3L, XFIX_2_562915447 + orr r0, r0, r5 + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))] + vmlal.s16 q6, ROW6L, XFIX_0_541196100 + vsub.s32 q3, q3, q2 + orr r0, r0, r4 + vrshrn.s32 ROW6L, q1, #11 + orr r0, r0, r5 + vadd.s32 q1, q3, q5 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))] + vsub.s32 q3, q3, q5 + vaddl.s16 q5, ROW0L, ROW4L + orr r0, r0, r4 + vrshrn.s32 ROW2L, q1, #11 + orr r0, r0, r5 + vrshrn.s32 ROW5L, q3, #11 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))] + vshl.s32 q5, q5, #13 + vmlal.s16 q4, ROW7L, XFIX_0_298631336_MINUS_0_899976223 + orr r0, r0, r4 + vadd.s32 q2, q5, q6 + orrs r0, r0, r5 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))] + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + orr r0, r4, r5 + vsub.s32 q3, q1, q4 pop {r4, r5} - vrshrn.s32 ROW7L, q2, #11 - vrshrn.s32 ROW3L, q5, #11 - vrshrn.s32 ROW0L, q6, #11 - vrshrn.s32 ROW4L, q3, #11 + vrshrn.s32 ROW7L, q2, #11 + vrshrn.s32 ROW3L, q5, #11 + vrshrn.s32 ROW0L, q6, #11 + vrshrn.s32 ROW4L, q3, #11 - beq 3f /* Go to do some special handling for the sparse right 4x8 half */ + beq 3f /* Go to do some special handling for the sparse + right 4x8 half */ /* 1-D IDCT, pass 1, right 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vadd.s16 d10, ROW7R, ROW3R - vadd.s16 d8, ROW5R, ROW1R + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vadd.s16 d10, ROW7R, ROW3R + vadd.s16 d8, ROW5R, ROW1R /* Transpose left 4x8 half */ vtrn.16 ROW6L, ROW7L - vmull.s16 q6, d10, XFIX_1_175875602_MINUS_1_961570560 - vmlal.s16 q6, d8, XFIX_1_175875602 + vmull.s16 q6, d10, XFIX_1_175875602_MINUS_1_961570560 + vmlal.s16 q6, d8, XFIX_1_175875602 vtrn.16 ROW2L, ROW3L - vmull.s16 q7, d10, XFIX_1_175875602 - vmlal.s16 q7, d8, XFIX_1_175875602_MINUS_0_390180644 + vmull.s16 q7, d10, XFIX_1_175875602 + vmlal.s16 q7, d8, XFIX_1_175875602_MINUS_0_390180644 vtrn.16 ROW0L, ROW1L - vsubl.s16 q3, ROW0R, ROW4R - vmull.s16 q2, ROW2R, XFIX_0_541196100 - vmlal.s16 q2, ROW6R, XFIX_0_541196100_MINUS_1_847759065 + vsubl.s16 q3, ROW0R, ROW4R + vmull.s16 q2, ROW2R, XFIX_0_541196100 + vmlal.s16 q2, ROW6R, XFIX_0_541196100_MINUS_1_847759065 vtrn.16 ROW4L, ROW5L - vmov q4, q6 - vmlsl.s16 q6, ROW5R, XFIX_2_562915447 - vmlal.s16 q6, ROW3R, XFIX_3_072711026_MINUS_2_562915447 + vmov q4, q6 + vmlsl.s16 q6, ROW5R, XFIX_2_562915447 + vmlal.s16 q6, ROW3R, XFIX_3_072711026_MINUS_2_562915447 vtrn.32 ROW1L, ROW3L - vshl.s32 q3, q3, #13 - vmlsl.s16 q4, ROW1R, XFIX_0_899976223 + vshl.s32 q3, q3, #13 + vmlsl.s16 q4, ROW1R, XFIX_0_899976223 vtrn.32 ROW4L, ROW6L - vadd.s32 q1, q3, q2 - vmov q5, q7 - vadd.s32 q1, q1, q6 + vadd.s32 q1, q3, q2 + vmov q5, q7 + vadd.s32 q1, q1, q6 vtrn.32 ROW0L, ROW2L - vmlsl.s16 q7, ROW7R, XFIX_0_899976223 - vmlal.s16 q7, ROW1R, XFIX_1_501321110_MINUS_0_899976223 - vrshrn.s32 ROW1R, q1, #11 + vmlsl.s16 q7, ROW7R, XFIX_0_899976223 + vmlal.s16 q7, ROW1R, XFIX_1_501321110_MINUS_0_899976223 + vrshrn.s32 ROW1R, q1, #11 vtrn.32 ROW5L, ROW7L - vsub.s32 q1, q1, q6 - vmlal.s16 q5, ROW5R, XFIX_2_053119869_MINUS_2_562915447 - vmlsl.s16 q5, ROW3R, XFIX_2_562915447 - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW2R, XFIX_0_541196100_PLUS_0_765366865 - vmlal.s16 q6, ROW6R, XFIX_0_541196100 - vsub.s32 q3, q3, q2 - vrshrn.s32 ROW6R, q1, #11 - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vaddl.s16 q5, ROW0R, ROW4R - vrshrn.s32 ROW2R, q1, #11 - vrshrn.s32 ROW5R, q3, #11 - vshl.s32 q5, q5, #13 - vmlal.s16 q4, ROW7R, XFIX_0_298631336_MINUS_0_899976223 - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vrshrn.s32 ROW7R, q2, #11 - vrshrn.s32 ROW3R, q5, #11 - vrshrn.s32 ROW0R, q6, #11 - vrshrn.s32 ROW4R, q3, #11 + vsub.s32 q1, q1, q6 + vmlal.s16 q5, ROW5R, XFIX_2_053119869_MINUS_2_562915447 + vmlsl.s16 q5, ROW3R, XFIX_2_562915447 + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW2R, XFIX_0_541196100_PLUS_0_765366865 + vmlal.s16 q6, ROW6R, XFIX_0_541196100 + vsub.s32 q3, q3, q2 + vrshrn.s32 ROW6R, q1, #11 + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vaddl.s16 q5, ROW0R, ROW4R + vrshrn.s32 ROW2R, q1, #11 + vrshrn.s32 ROW5R, q3, #11 + vshl.s32 q5, q5, #13 + vmlal.s16 q4, ROW7R, XFIX_0_298631336_MINUS_0_899976223 + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vrshrn.s32 ROW7R, q2, #11 + vrshrn.s32 ROW3R, q5, #11 + vrshrn.s32 ROW0R, q6, #11 + vrshrn.s32 ROW4R, q3, #11 /* Transpose right 4x8 half */ vtrn.16 ROW6R, ROW7R vtrn.16 ROW2R, ROW3R @@ -398,122 +404,122 @@ vtrn.32 ROW5R, ROW7R 1: /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vmull.s16 q6, ROW1R, XFIX_1_175875602 /* ROW5L <-> ROW1R */ - vmlal.s16 q6, ROW1L, XFIX_1_175875602 - vmlal.s16 q6, ROW3R, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */ - vmlal.s16 q6, ROW3L, XFIX_1_175875602_MINUS_1_961570560 - vmull.s16 q7, ROW3R, XFIX_1_175875602 /* ROW7L <-> ROW3R */ - vmlal.s16 q7, ROW3L, XFIX_1_175875602 - vmlal.s16 q7, ROW1R, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */ - vmlal.s16 q7, ROW1L, XFIX_1_175875602_MINUS_0_390180644 - vsubl.s16 q3, ROW0L, ROW0R /* ROW4L <-> ROW0R */ - vmull.s16 q2, ROW2L, XFIX_0_541196100 - vmlal.s16 q2, ROW2R, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L <-> ROW2R */ - vmov q4, q6 - vmlsl.s16 q6, ROW1R, XFIX_2_562915447 /* ROW5L <-> ROW1R */ - vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 - vshl.s32 q3, q3, #13 - vmlsl.s16 q4, ROW1L, XFIX_0_899976223 - vadd.s32 q1, q3, q2 - vmov q5, q7 - vadd.s32 q1, q1, q6 - vmlsl.s16 q7, ROW3R, XFIX_0_899976223 /* ROW7L <-> ROW3R */ - vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 - vshrn.s32 ROW1L, q1, #16 - vsub.s32 q1, q1, q6 - vmlal.s16 q5, ROW1R, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L <-> ROW1R */ - vmlsl.s16 q5, ROW3L, XFIX_2_562915447 - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 - vmlal.s16 q6, ROW2R, XFIX_0_541196100 /* ROW6L <-> ROW2R */ - vsub.s32 q3, q3, q2 - vshrn.s32 ROW2R, q1, #16 /* ROW6L <-> ROW2R */ - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vaddl.s16 q5, ROW0L, ROW0R /* ROW4L <-> ROW0R */ - vshrn.s32 ROW2L, q1, #16 - vshrn.s32 ROW1R, q3, #16 /* ROW5L <-> ROW1R */ - vshl.s32 q5, q5, #13 - vmlal.s16 q4, ROW3R, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L <-> ROW3R */ - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vshrn.s32 ROW3R, q2, #16 /* ROW7L <-> ROW3R */ - vshrn.s32 ROW3L, q5, #16 - vshrn.s32 ROW0L, q6, #16 - vshrn.s32 ROW0R, q3, #16 /* ROW4L <-> ROW0R */ + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vmull.s16 q6, ROW1R, XFIX_1_175875602 /* ROW5L <-> ROW1R */ + vmlal.s16 q6, ROW1L, XFIX_1_175875602 + vmlal.s16 q6, ROW3R, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */ + vmlal.s16 q6, ROW3L, XFIX_1_175875602_MINUS_1_961570560 + vmull.s16 q7, ROW3R, XFIX_1_175875602 /* ROW7L <-> ROW3R */ + vmlal.s16 q7, ROW3L, XFIX_1_175875602 + vmlal.s16 q7, ROW1R, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */ + vmlal.s16 q7, ROW1L, XFIX_1_175875602_MINUS_0_390180644 + vsubl.s16 q3, ROW0L, ROW0R /* ROW4L <-> ROW0R */ + vmull.s16 q2, ROW2L, XFIX_0_541196100 + vmlal.s16 q2, ROW2R, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L <-> ROW2R */ + vmov q4, q6 + vmlsl.s16 q6, ROW1R, XFIX_2_562915447 /* ROW5L <-> ROW1R */ + vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 + vshl.s32 q3, q3, #13 + vmlsl.s16 q4, ROW1L, XFIX_0_899976223 + vadd.s32 q1, q3, q2 + vmov q5, q7 + vadd.s32 q1, q1, q6 + vmlsl.s16 q7, ROW3R, XFIX_0_899976223 /* ROW7L <-> ROW3R */ + vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 + vshrn.s32 ROW1L, q1, #16 + vsub.s32 q1, q1, q6 + vmlal.s16 q5, ROW1R, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L <-> ROW1R */ + vmlsl.s16 q5, ROW3L, XFIX_2_562915447 + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 + vmlal.s16 q6, ROW2R, XFIX_0_541196100 /* ROW6L <-> ROW2R */ + vsub.s32 q3, q3, q2 + vshrn.s32 ROW2R, q1, #16 /* ROW6L <-> ROW2R */ + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vaddl.s16 q5, ROW0L, ROW0R /* ROW4L <-> ROW0R */ + vshrn.s32 ROW2L, q1, #16 + vshrn.s32 ROW1R, q3, #16 /* ROW5L <-> ROW1R */ + vshl.s32 q5, q5, #13 + vmlal.s16 q4, ROW3R, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L <-> ROW3R */ + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vshrn.s32 ROW3R, q2, #16 /* ROW7L <-> ROW3R */ + vshrn.s32 ROW3L, q5, #16 + vshrn.s32 ROW0L, q6, #16 + vshrn.s32 ROW0R, q3, #16 /* ROW4L <-> ROW0R */ /* 1-D IDCT, pass 2, right 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vmull.s16 q6, ROW5R, XFIX_1_175875602 - vmlal.s16 q6, ROW5L, XFIX_1_175875602 /* ROW5L <-> ROW1R */ - vmlal.s16 q6, ROW7R, XFIX_1_175875602_MINUS_1_961570560 - vmlal.s16 q6, ROW7L, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */ - vmull.s16 q7, ROW7R, XFIX_1_175875602 - vmlal.s16 q7, ROW7L, XFIX_1_175875602 /* ROW7L <-> ROW3R */ - vmlal.s16 q7, ROW5R, XFIX_1_175875602_MINUS_0_390180644 - vmlal.s16 q7, ROW5L, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */ - vsubl.s16 q3, ROW4L, ROW4R /* ROW4L <-> ROW0R */ - vmull.s16 q2, ROW6L, XFIX_0_541196100 /* ROW6L <-> ROW2R */ - vmlal.s16 q2, ROW6R, XFIX_0_541196100_MINUS_1_847759065 - vmov q4, q6 - vmlsl.s16 q6, ROW5R, XFIX_2_562915447 - vmlal.s16 q6, ROW7L, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L <-> ROW3R */ - vshl.s32 q3, q3, #13 - vmlsl.s16 q4, ROW5L, XFIX_0_899976223 /* ROW5L <-> ROW1R */ - vadd.s32 q1, q3, q2 - vmov q5, q7 - vadd.s32 q1, q1, q6 - vmlsl.s16 q7, ROW7R, XFIX_0_899976223 - vmlal.s16 q7, ROW5L, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L <-> ROW1R */ - vshrn.s32 ROW5L, q1, #16 /* ROW5L <-> ROW1R */ - vsub.s32 q1, q1, q6 - vmlal.s16 q5, ROW5R, XFIX_2_053119869_MINUS_2_562915447 - vmlsl.s16 q5, ROW7L, XFIX_2_562915447 /* ROW7L <-> ROW3R */ - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW6L, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L <-> ROW2R */ - vmlal.s16 q6, ROW6R, XFIX_0_541196100 - vsub.s32 q3, q3, q2 - vshrn.s32 ROW6R, q1, #16 - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vaddl.s16 q5, ROW4L, ROW4R /* ROW4L <-> ROW0R */ - vshrn.s32 ROW6L, q1, #16 /* ROW6L <-> ROW2R */ - vshrn.s32 ROW5R, q3, #16 - vshl.s32 q5, q5, #13 - vmlal.s16 q4, ROW7R, XFIX_0_298631336_MINUS_0_899976223 - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vshrn.s32 ROW7R, q2, #16 - vshrn.s32 ROW7L, q5, #16 /* ROW7L <-> ROW3R */ - vshrn.s32 ROW4L, q6, #16 /* ROW4L <-> ROW0R */ - vshrn.s32 ROW4R, q3, #16 + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vmull.s16 q6, ROW5R, XFIX_1_175875602 + vmlal.s16 q6, ROW5L, XFIX_1_175875602 /* ROW5L <-> ROW1R */ + vmlal.s16 q6, ROW7R, XFIX_1_175875602_MINUS_1_961570560 + vmlal.s16 q6, ROW7L, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */ + vmull.s16 q7, ROW7R, XFIX_1_175875602 + vmlal.s16 q7, ROW7L, XFIX_1_175875602 /* ROW7L <-> ROW3R */ + vmlal.s16 q7, ROW5R, XFIX_1_175875602_MINUS_0_390180644 + vmlal.s16 q7, ROW5L, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */ + vsubl.s16 q3, ROW4L, ROW4R /* ROW4L <-> ROW0R */ + vmull.s16 q2, ROW6L, XFIX_0_541196100 /* ROW6L <-> ROW2R */ + vmlal.s16 q2, ROW6R, XFIX_0_541196100_MINUS_1_847759065 + vmov q4, q6 + vmlsl.s16 q6, ROW5R, XFIX_2_562915447 + vmlal.s16 q6, ROW7L, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L <-> ROW3R */ + vshl.s32 q3, q3, #13 + vmlsl.s16 q4, ROW5L, XFIX_0_899976223 /* ROW5L <-> ROW1R */ + vadd.s32 q1, q3, q2 + vmov q5, q7 + vadd.s32 q1, q1, q6 + vmlsl.s16 q7, ROW7R, XFIX_0_899976223 + vmlal.s16 q7, ROW5L, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L <-> ROW1R */ + vshrn.s32 ROW5L, q1, #16 /* ROW5L <-> ROW1R */ + vsub.s32 q1, q1, q6 + vmlal.s16 q5, ROW5R, XFIX_2_053119869_MINUS_2_562915447 + vmlsl.s16 q5, ROW7L, XFIX_2_562915447 /* ROW7L <-> ROW3R */ + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW6L, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L <-> ROW2R */ + vmlal.s16 q6, ROW6R, XFIX_0_541196100 + vsub.s32 q3, q3, q2 + vshrn.s32 ROW6R, q1, #16 + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vaddl.s16 q5, ROW4L, ROW4R /* ROW4L <-> ROW0R */ + vshrn.s32 ROW6L, q1, #16 /* ROW6L <-> ROW2R */ + vshrn.s32 ROW5R, q3, #16 + vshl.s32 q5, q5, #13 + vmlal.s16 q4, ROW7R, XFIX_0_298631336_MINUS_0_899976223 + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vshrn.s32 ROW7R, q2, #16 + vshrn.s32 ROW7L, q5, #16 /* ROW7L <-> ROW3R */ + vshrn.s32 ROW4L, q6, #16 /* ROW4L <-> ROW0R */ + vshrn.s32 ROW4R, q3, #16 2: /* Descale to 8-bit and range limit */ - vqrshrn.s16 d16, q8, #2 - vqrshrn.s16 d17, q9, #2 - vqrshrn.s16 d18, q10, #2 - vqrshrn.s16 d19, q11, #2 - vpop {d8-d15} /* restore NEON registers */ - vqrshrn.s16 d20, q12, #2 + vqrshrn.s16 d16, q8, #2 + vqrshrn.s16 d17, q9, #2 + vqrshrn.s16 d18, q10, #2 + vqrshrn.s16 d19, q11, #2 + vpop {d8-d15} /* restore NEON registers */ + vqrshrn.s16 d20, q12, #2 /* Transpose the final 8-bit samples and do signed->unsigned conversion */ - vtrn.16 q8, q9 - vqrshrn.s16 d21, q13, #2 - vqrshrn.s16 d22, q14, #2 - vmov.u8 q0, #(CENTERJSAMPLE) - vqrshrn.s16 d23, q15, #2 - vtrn.8 d16, d17 - vtrn.8 d18, d19 - vadd.u8 q8, q8, q0 - vadd.u8 q9, q9, q0 - vtrn.16 q10, q11 + vtrn.16 q8, q9 + vqrshrn.s16 d21, q13, #2 + vqrshrn.s16 d22, q14, #2 + vmov.u8 q0, #(CENTERJSAMPLE) + vqrshrn.s16 d23, q15, #2 + vtrn.8 d16, d17 + vtrn.8 d18, d19 + vadd.u8 q8, q8, q0 + vadd.u8 q9, q9, q0 + vtrn.16 q10, q11 /* Store results to the output buffer */ ldmia OUTPUT_BUF!, {TMP1, TMP2} add TMP1, TMP1, OUTPUT_COL @@ -525,7 +531,7 @@ add TMP1, TMP1, OUTPUT_COL add TMP2, TMP2, OUTPUT_COL vst1.8 {d18}, [TMP1] - vadd.u8 q10, q10, q0 + vadd.u8 q10, q10, q0 vst1.8 {d19}, [TMP2] ldmia OUTPUT_BUF, {TMP1, TMP2, TMP3, TMP4} add TMP1, TMP1, OUTPUT_COL @@ -534,7 +540,7 @@ add TMP4, TMP4, OUTPUT_COL vtrn.8 d22, d23 vst1.8 {d20}, [TMP1] - vadd.u8 q11, q11, q0 + vadd.u8 q11, q11, q0 vst1.8 {d21}, [TMP2] vst1.8 {d22}, [TMP3] vst1.8 {d23}, [TMP4] @@ -547,14 +553,15 @@ vtrn.16 ROW2L, ROW3L vtrn.16 ROW0L, ROW1L vtrn.16 ROW4L, ROW5L - vshl.s16 ROW0R, ROW0R, #2 /* PASS1_BITS */ + vshl.s16 ROW0R, ROW0R, #2 /* PASS1_BITS */ vtrn.32 ROW1L, ROW3L vtrn.32 ROW4L, ROW6L vtrn.32 ROW0L, ROW2L vtrn.32 ROW5L, ROW7L cmp r0, #0 - beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */ + beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second + pass */ /* Only row 0 is non-zero for the right 4x8 half */ vdup.s16 ROW1R, ROW0R[1] @@ -565,83 +572,83 @@ vdup.s16 ROW6R, ROW0R[2] vdup.s16 ROW7R, ROW0R[3] vdup.s16 ROW0R, ROW0R[0] - b 1b /* Go to 'normal' second pass */ + b 1b /* Go to 'normal' second pass */ 4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vmull.s16 q6, ROW1L, XFIX_1_175875602 - vmlal.s16 q6, ROW3L, XFIX_1_175875602_MINUS_1_961570560 - vmull.s16 q7, ROW3L, XFIX_1_175875602 - vmlal.s16 q7, ROW1L, XFIX_1_175875602_MINUS_0_390180644 - vmull.s16 q2, ROW2L, XFIX_0_541196100 - vshll.s16 q3, ROW0L, #13 - vmov q4, q6 - vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 - vmlsl.s16 q4, ROW1L, XFIX_0_899976223 - vadd.s32 q1, q3, q2 - vmov q5, q7 - vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 - vadd.s32 q1, q1, q6 - vadd.s32 q6, q6, q6 - vmlsl.s16 q5, ROW3L, XFIX_2_562915447 - vshrn.s32 ROW1L, q1, #16 - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 - vsub.s32 q3, q3, q2 - vshrn.s32 ROW2R, q1, #16 /* ROW6L <-> ROW2R */ - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vshll.s16 q5, ROW0L, #13 - vshrn.s32 ROW2L, q1, #16 - vshrn.s32 ROW1R, q3, #16 /* ROW5L <-> ROW1R */ - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vshrn.s32 ROW3R, q2, #16 /* ROW7L <-> ROW3R */ - vshrn.s32 ROW3L, q5, #16 - vshrn.s32 ROW0L, q6, #16 - vshrn.s32 ROW0R, q3, #16 /* ROW4L <-> ROW0R */ + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vmull.s16 q6, ROW1L, XFIX_1_175875602 + vmlal.s16 q6, ROW3L, XFIX_1_175875602_MINUS_1_961570560 + vmull.s16 q7, ROW3L, XFIX_1_175875602 + vmlal.s16 q7, ROW1L, XFIX_1_175875602_MINUS_0_390180644 + vmull.s16 q2, ROW2L, XFIX_0_541196100 + vshll.s16 q3, ROW0L, #13 + vmov q4, q6 + vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 + vmlsl.s16 q4, ROW1L, XFIX_0_899976223 + vadd.s32 q1, q3, q2 + vmov q5, q7 + vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 + vadd.s32 q1, q1, q6 + vadd.s32 q6, q6, q6 + vmlsl.s16 q5, ROW3L, XFIX_2_562915447 + vshrn.s32 ROW1L, q1, #16 + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 + vsub.s32 q3, q3, q2 + vshrn.s32 ROW2R, q1, #16 /* ROW6L <-> ROW2R */ + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vshll.s16 q5, ROW0L, #13 + vshrn.s32 ROW2L, q1, #16 + vshrn.s32 ROW1R, q3, #16 /* ROW5L <-> ROW1R */ + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vshrn.s32 ROW3R, q2, #16 /* ROW7L <-> ROW3R */ + vshrn.s32 ROW3L, q5, #16 + vshrn.s32 ROW0L, q6, #16 + vshrn.s32 ROW0R, q3, #16 /* ROW4L <-> ROW0R */ /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vmull.s16 q6, ROW5L, XFIX_1_175875602 - vmlal.s16 q6, ROW7L, XFIX_1_175875602_MINUS_1_961570560 - vmull.s16 q7, ROW7L, XFIX_1_175875602 - vmlal.s16 q7, ROW5L, XFIX_1_175875602_MINUS_0_390180644 - vmull.s16 q2, ROW6L, XFIX_0_541196100 - vshll.s16 q3, ROW4L, #13 - vmov q4, q6 - vmlal.s16 q6, ROW7L, XFIX_3_072711026_MINUS_2_562915447 - vmlsl.s16 q4, ROW5L, XFIX_0_899976223 - vadd.s32 q1, q3, q2 - vmov q5, q7 - vmlal.s16 q7, ROW5L, XFIX_1_501321110_MINUS_0_899976223 - vadd.s32 q1, q1, q6 - vadd.s32 q6, q6, q6 - vmlsl.s16 q5, ROW7L, XFIX_2_562915447 - vshrn.s32 ROW5L, q1, #16 /* ROW5L <-> ROW1R */ - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW6L, XFIX_0_541196100_PLUS_0_765366865 - vsub.s32 q3, q3, q2 - vshrn.s32 ROW6R, q1, #16 - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vshll.s16 q5, ROW4L, #13 - vshrn.s32 ROW6L, q1, #16 /* ROW6L <-> ROW2R */ - vshrn.s32 ROW5R, q3, #16 - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vshrn.s32 ROW7R, q2, #16 - vshrn.s32 ROW7L, q5, #16 /* ROW7L <-> ROW3R */ - vshrn.s32 ROW4L, q6, #16 /* ROW4L <-> ROW0R */ - vshrn.s32 ROW4R, q3, #16 - b 2b /* Go to epilogue */ + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vmull.s16 q6, ROW5L, XFIX_1_175875602 + vmlal.s16 q6, ROW7L, XFIX_1_175875602_MINUS_1_961570560 + vmull.s16 q7, ROW7L, XFIX_1_175875602 + vmlal.s16 q7, ROW5L, XFIX_1_175875602_MINUS_0_390180644 + vmull.s16 q2, ROW6L, XFIX_0_541196100 + vshll.s16 q3, ROW4L, #13 + vmov q4, q6 + vmlal.s16 q6, ROW7L, XFIX_3_072711026_MINUS_2_562915447 + vmlsl.s16 q4, ROW5L, XFIX_0_899976223 + vadd.s32 q1, q3, q2 + vmov q5, q7 + vmlal.s16 q7, ROW5L, XFIX_1_501321110_MINUS_0_899976223 + vadd.s32 q1, q1, q6 + vadd.s32 q6, q6, q6 + vmlsl.s16 q5, ROW7L, XFIX_2_562915447 + vshrn.s32 ROW5L, q1, #16 /* ROW5L <-> ROW1R */ + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW6L, XFIX_0_541196100_PLUS_0_765366865 + vsub.s32 q3, q3, q2 + vshrn.s32 ROW6R, q1, #16 + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vshll.s16 q5, ROW4L, #13 + vshrn.s32 ROW6L, q1, #16 /* ROW6L <-> ROW2R */ + vshrn.s32 ROW5R, q3, #16 + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vshrn.s32 ROW7R, q2, #16 + vshrn.s32 ROW7L, q5, #16 /* ROW7L <-> ROW3R */ + vshrn.s32 ROW4L, q6, #16 /* ROW4L <-> ROW0R */ + vshrn.s32 ROW4R, q3, #16 + b 2b /* Go to epilogue */ .unreq DCT_TABLE .unreq COEF_BLOCK @@ -668,7 +675,7 @@ .unreq ROW6R .unreq ROW7L .unreq ROW7R -.endfunc + /*****************************************************************************/ @@ -695,10 +702,10 @@ .balign 16 jsimd_idct_ifast_neon_consts: - .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ - .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ - .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ - .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ + .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ + .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ + .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ + .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ asm_function jsimd_idct_ifast_neon @@ -728,9 +735,9 @@ vld1.16 {d16, d17, d18, d19}, [COEF_BLOCK, :128]! vld1.16 {d0, d1, d2, d3}, [DCT_TABLE, :128]! vld1.16 {d20, d21, d22, d23}, [COEF_BLOCK, :128]! - vmul.s16 q8, q8, q0 + vmul.s16 q8, q8, q0 vld1.16 {d4, d5, d6, d7}, [DCT_TABLE, :128]! - vmul.s16 q9, q9, q1 + vmul.s16 q9, q9, q1 vld1.16 {d24, d25, d26, d27}, [COEF_BLOCK, :128]! vmul.s16 q10, q10, q2 vld1.16 {d0, d1, d2, d3}, [DCT_TABLE, :128]! @@ -740,124 +747,124 @@ vld1.16 {d4, d5, d6, d7}, [DCT_TABLE, :128]! vmul.s16 q14, q14, q2 vmul.s16 q13, q13, q1 - vld1.16 {d0}, [ip, :64] /* load constants */ + vld1.16 {d0}, [ip, :64] /* load constants */ vmul.s16 q15, q15, q3 - vpush {d8-d13} /* save NEON registers */ + vpush {d8-d13} /* save NEON registers */ /* 1-D IDCT, pass 1 */ - vsub.s16 q2, q10, q14 + vsub.s16 q2, q10, q14 vadd.s16 q14, q10, q14 - vsub.s16 q1, q11, q13 + vsub.s16 q1, q11, q13 vadd.s16 q13, q11, q13 - vsub.s16 q5, q9, q15 - vadd.s16 q15, q9, q15 - vqdmulh.s16 q4, q2, XFIX_1_414213562 - vqdmulh.s16 q6, q1, XFIX_2_613125930 - vadd.s16 q3, q1, q1 - vsub.s16 q1, q5, q1 - vadd.s16 q10, q2, q4 - vqdmulh.s16 q4, q1, XFIX_1_847759065 - vsub.s16 q2, q15, q13 - vadd.s16 q3, q3, q6 - vqdmulh.s16 q6, q2, XFIX_1_414213562 - vadd.s16 q1, q1, q4 - vqdmulh.s16 q4, q5, XFIX_1_082392200 + vsub.s16 q5, q9, q15 + vadd.s16 q15, q9, q15 + vqdmulh.s16 q4, q2, XFIX_1_414213562 + vqdmulh.s16 q6, q1, XFIX_2_613125930 + vadd.s16 q3, q1, q1 + vsub.s16 q1, q5, q1 + vadd.s16 q10, q2, q4 + vqdmulh.s16 q4, q1, XFIX_1_847759065 + vsub.s16 q2, q15, q13 + vadd.s16 q3, q3, q6 + vqdmulh.s16 q6, q2, XFIX_1_414213562 + vadd.s16 q1, q1, q4 + vqdmulh.s16 q4, q5, XFIX_1_082392200 vsub.s16 q10, q10, q14 - vadd.s16 q2, q2, q6 - vsub.s16 q6, q8, q12 - vadd.s16 q12, q8, q12 - vadd.s16 q9, q5, q4 - vadd.s16 q5, q6, q10 - vsub.s16 q10, q6, q10 - vadd.s16 q6, q15, q13 - vadd.s16 q8, q12, q14 - vsub.s16 q3, q6, q3 + vadd.s16 q2, q2, q6 + vsub.s16 q6, q8, q12 + vadd.s16 q12, q8, q12 + vadd.s16 q9, q5, q4 + vadd.s16 q5, q6, q10 + vsub.s16 q10, q6, q10 + vadd.s16 q6, q15, q13 + vadd.s16 q8, q12, q14 + vsub.s16 q3, q6, q3 vsub.s16 q12, q12, q14 - vsub.s16 q3, q3, q1 - vsub.s16 q1, q9, q1 - vadd.s16 q2, q3, q2 - vsub.s16 q15, q8, q6 - vadd.s16 q1, q1, q2 - vadd.s16 q8, q8, q6 - vadd.s16 q14, q5, q3 - vsub.s16 q9, q5, q3 + vsub.s16 q3, q3, q1 + vsub.s16 q1, q9, q1 + vadd.s16 q2, q3, q2 + vsub.s16 q15, q8, q6 + vadd.s16 q1, q1, q2 + vadd.s16 q8, q8, q6 + vadd.s16 q14, q5, q3 + vsub.s16 q9, q5, q3 vsub.s16 q13, q10, q2 vadd.s16 q10, q10, q2 /* Transpose */ - vtrn.16 q8, q9 + vtrn.16 q8, q9 vsub.s16 q11, q12, q1 vtrn.16 q14, q15 vadd.s16 q12, q12, q1 vtrn.16 q10, q11 vtrn.16 q12, q13 - vtrn.32 q9, q11 + vtrn.32 q9, q11 vtrn.32 q12, q14 - vtrn.32 q8, q10 + vtrn.32 q8, q10 vtrn.32 q13, q15 vswp d28, d21 vswp d26, d19 /* 1-D IDCT, pass 2 */ - vsub.s16 q2, q10, q14 + vsub.s16 q2, q10, q14 vswp d30, d23 vadd.s16 q14, q10, q14 vswp d24, d17 - vsub.s16 q1, q11, q13 + vsub.s16 q1, q11, q13 vadd.s16 q13, q11, q13 - vsub.s16 q5, q9, q15 - vadd.s16 q15, q9, q15 - vqdmulh.s16 q4, q2, XFIX_1_414213562 - vqdmulh.s16 q6, q1, XFIX_2_613125930 - vadd.s16 q3, q1, q1 - vsub.s16 q1, q5, q1 - vadd.s16 q10, q2, q4 - vqdmulh.s16 q4, q1, XFIX_1_847759065 - vsub.s16 q2, q15, q13 - vadd.s16 q3, q3, q6 - vqdmulh.s16 q6, q2, XFIX_1_414213562 - vadd.s16 q1, q1, q4 - vqdmulh.s16 q4, q5, XFIX_1_082392200 + vsub.s16 q5, q9, q15 + vadd.s16 q15, q9, q15 + vqdmulh.s16 q4, q2, XFIX_1_414213562 + vqdmulh.s16 q6, q1, XFIX_2_613125930 + vadd.s16 q3, q1, q1 + vsub.s16 q1, q5, q1 + vadd.s16 q10, q2, q4 + vqdmulh.s16 q4, q1, XFIX_1_847759065 + vsub.s16 q2, q15, q13 + vadd.s16 q3, q3, q6 + vqdmulh.s16 q6, q2, XFIX_1_414213562 + vadd.s16 q1, q1, q4 + vqdmulh.s16 q4, q5, XFIX_1_082392200 vsub.s16 q10, q10, q14 - vadd.s16 q2, q2, q6 - vsub.s16 q6, q8, q12 - vadd.s16 q12, q8, q12 - vadd.s16 q9, q5, q4 - vadd.s16 q5, q6, q10 - vsub.s16 q10, q6, q10 - vadd.s16 q6, q15, q13 - vadd.s16 q8, q12, q14 - vsub.s16 q3, q6, q3 + vadd.s16 q2, q2, q6 + vsub.s16 q6, q8, q12 + vadd.s16 q12, q8, q12 + vadd.s16 q9, q5, q4 + vadd.s16 q5, q6, q10 + vsub.s16 q10, q6, q10 + vadd.s16 q6, q15, q13 + vadd.s16 q8, q12, q14 + vsub.s16 q3, q6, q3 vsub.s16 q12, q12, q14 - vsub.s16 q3, q3, q1 - vsub.s16 q1, q9, q1 - vadd.s16 q2, q3, q2 - vsub.s16 q15, q8, q6 - vadd.s16 q1, q1, q2 - vadd.s16 q8, q8, q6 - vadd.s16 q14, q5, q3 - vsub.s16 q9, q5, q3 + vsub.s16 q3, q3, q1 + vsub.s16 q1, q9, q1 + vadd.s16 q2, q3, q2 + vsub.s16 q15, q8, q6 + vadd.s16 q1, q1, q2 + vadd.s16 q8, q8, q6 + vadd.s16 q14, q5, q3 + vsub.s16 q9, q5, q3 vsub.s16 q13, q10, q2 - vpop {d8-d13} /* restore NEON registers */ + vpop {d8-d13} /* restore NEON registers */ vadd.s16 q10, q10, q2 vsub.s16 q11, q12, q1 vadd.s16 q12, q12, q1 /* Descale to 8-bit and range limit */ - vmov.u8 q0, #0x80 - vqshrn.s16 d16, q8, #5 - vqshrn.s16 d17, q9, #5 + vmov.u8 q0, #0x80 + vqshrn.s16 d16, q8, #5 + vqshrn.s16 d17, q9, #5 vqshrn.s16 d18, q10, #5 vqshrn.s16 d19, q11, #5 vqshrn.s16 d20, q12, #5 vqshrn.s16 d21, q13, #5 vqshrn.s16 d22, q14, #5 vqshrn.s16 d23, q15, #5 - vadd.u8 q8, q8, q0 - vadd.u8 q9, q9, q0 + vadd.u8 q8, q8, q0 + vadd.u8 q9, q9, q0 vadd.u8 q10, q10, q0 vadd.u8 q11, q11, q0 /* Transpose the final 8-bit samples */ - vtrn.16 q8, q9 + vtrn.16 q8, q9 vtrn.16 q10, q11 - vtrn.32 q8, q10 - vtrn.32 q9, q11 + vtrn.32 q8, q10 + vtrn.32 q9, q11 vtrn.8 d16, d17 vtrn.8 d18, d19 /* Store results to the output buffer */ @@ -892,7 +899,7 @@ .unreq TMP2 .unreq TMP3 .unreq TMP4 -.endfunc + /*****************************************************************************/ @@ -916,81 +923,80 @@ #define CONST_BITS 13 -#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ -#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ -#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ -#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ -#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ -#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ -#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ -#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ -#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ -#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ -#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ -#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ -#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ -#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ +#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ .balign 16 jsimd_idct_4x4_neon_consts: - .short FIX_1_847759065 /* d0[0] */ - .short -FIX_0_765366865 /* d0[1] */ - .short -FIX_0_211164243 /* d0[2] */ - .short FIX_1_451774981 /* d0[3] */ - .short -FIX_2_172734803 /* d1[0] */ - .short FIX_1_061594337 /* d1[1] */ - .short -FIX_0_509795579 /* d1[2] */ - .short -FIX_0_601344887 /* d1[3] */ - .short FIX_0_899976223 /* d2[0] */ - .short FIX_2_562915447 /* d2[1] */ - .short 1 << (CONST_BITS+1) /* d2[2] */ - .short 0 /* d2[3] */ + .short FIX_1_847759065 /* d0[0] */ + .short -FIX_0_765366865 /* d0[1] */ + .short -FIX_0_211164243 /* d0[2] */ + .short FIX_1_451774981 /* d0[3] */ + .short -FIX_2_172734803 /* d1[0] */ + .short FIX_1_061594337 /* d1[1] */ + .short -FIX_0_509795579 /* d1[2] */ + .short -FIX_0_601344887 /* d1[3] */ + .short FIX_0_899976223 /* d2[0] */ + .short FIX_2_562915447 /* d2[1] */ + .short 1 << (CONST_BITS+1) /* d2[2] */ + .short 0 /* d2[3] */ .macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 - vmull.s16 q14, \x4, d2[2] - vmlal.s16 q14, \x8, d0[0] + vmull.s16 q14, \x4, d2[2] + vmlal.s16 q14, \x8, d0[0] vmlal.s16 q14, \x14, d0[1] vmull.s16 q13, \x16, d1[2] vmlal.s16 q13, \x12, d1[3] vmlal.s16 q13, \x10, d2[0] - vmlal.s16 q13, \x6, d2[1] + vmlal.s16 q13, \x6, d2[1] - vmull.s16 q15, \x4, d2[2] - vmlsl.s16 q15, \x8, d0[0] + vmull.s16 q15, \x4, d2[2] + vmlsl.s16 q15, \x8, d0[0] vmlsl.s16 q15, \x14, d0[1] vmull.s16 q12, \x16, d0[2] vmlal.s16 q12, \x12, d0[3] vmlal.s16 q12, \x10, d1[0] - vmlal.s16 q12, \x6, d1[1] + vmlal.s16 q12, \x6, d1[1] vadd.s32 q10, q14, q13 vsub.s32 q14, q14, q13 -.if \shift > 16 - vrshr.s32 q10, q10, #\shift - vrshr.s32 q14, q14, #\shift + .if \shift > 16 + vrshr.s32 q10, q10, #\shift + vrshr.s32 q14, q14, #\shift vmovn.s32 \y26, q10 vmovn.s32 \y29, q14 -.else + .else vrshrn.s32 \y26, q10, #\shift vrshrn.s32 \y29, q14, #\shift -.endif + .endif vadd.s32 q10, q15, q12 vsub.s32 q15, q15, q12 -.if \shift > 16 - vrshr.s32 q10, q10, #\shift - vrshr.s32 q15, q15, #\shift + .if \shift > 16 + vrshr.s32 q10, q10, #\shift + vrshr.s32 q15, q15, #\shift vmovn.s32 \y27, q10 vmovn.s32 \y28, q15 -.else + .else vrshrn.s32 \y27, q10, #\shift vrshrn.s32 \y28, q15, #\shift -.endif - + .endif .endm asm_function jsimd_idct_4x4_neon @@ -1104,10 +1110,10 @@ .unreq TMP2 .unreq TMP3 .unreq TMP4 -.endfunc .purgem idct_helper + /*****************************************************************************/ /* @@ -1126,31 +1132,30 @@ .balign 8 jsimd_idct_2x2_neon_consts: - .short -FIX_0_720959822 /* d0[0] */ - .short FIX_0_850430095 /* d0[1] */ - .short -FIX_1_272758580 /* d0[2] */ - .short FIX_3_624509785 /* d0[3] */ + .short -FIX_0_720959822 /* d0[0] */ + .short FIX_0_850430095 /* d0[1] */ + .short -FIX_1_272758580 /* d0[2] */ + .short FIX_3_624509785 /* d0[3] */ .macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27 - vshll.s16 q14, \x4, #15 - vmull.s16 q13, \x6, d0[3] - vmlal.s16 q13, \x10, d0[2] - vmlal.s16 q13, \x12, d0[1] - vmlal.s16 q13, \x16, d0[0] - - vadd.s32 q10, q14, q13 - vsub.s32 q14, q14, q13 - -.if \shift > 16 - vrshr.s32 q10, q10, #\shift - vrshr.s32 q14, q14, #\shift - vmovn.s32 \y26, q10 - vmovn.s32 \y27, q14 -.else - vrshrn.s32 \y26, q10, #\shift - vrshrn.s32 \y27, q14, #\shift -.endif + vshll.s16 q14, \x4, #15 + vmull.s16 q13, \x6, d0[3] + vmlal.s16 q13, \x10, d0[2] + vmlal.s16 q13, \x12, d0[1] + vmlal.s16 q13, \x16, d0[0] + vadd.s32 q10, q14, q13 + vsub.s32 q14, q14, q13 + + .if \shift > 16 + vrshr.s32 q10, q10, #\shift + vrshr.s32 q14, q14, #\shift + vmovn.s32 \y26, q10 + vmovn.s32 \y27, q14 + .else + vrshrn.s32 \y26, q10, #\shift + vrshrn.s32 \y27, q14, #\shift + .endif .endm asm_function jsimd_idct_2x2_neon @@ -1204,30 +1209,30 @@ /* Pass 1 */ #if 0 idct_helper d4, d6, d10, d12, d16, 13, d4, d6 - transpose_4x4 d4, d6, d8, d10 + transpose_4x4 d4, d6, d8, d10 idct_helper d5, d7, d11, d13, d17, 13, d5, d7 - transpose_4x4 d5, d7, d9, d11 + transpose_4x4 d5, d7, d9, d11 #else - vmull.s16 q13, d6, d0[3] + vmull.s16 q13, d6, d0[3] vmlal.s16 q13, d10, d0[2] vmlal.s16 q13, d12, d0[1] vmlal.s16 q13, d16, d0[0] - vmull.s16 q12, d7, d0[3] + vmull.s16 q12, d7, d0[3] vmlal.s16 q12, d11, d0[2] vmlal.s16 q12, d13, d0[1] vmlal.s16 q12, d17, d0[0] - vshll.s16 q14, d4, #15 - vshll.s16 q15, d5, #15 + vshll.s16 q14, d4, #15 + vshll.s16 q15, d5, #15 vadd.s32 q10, q14, q13 vsub.s32 q14, q14, q13 - vrshrn.s32 d4, q10, #13 - vrshrn.s32 d6, q14, #13 + vrshrn.s32 d4, q10, #13 + vrshrn.s32 d6, q14, #13 vadd.s32 q10, q15, q12 vsub.s32 q14, q15, q12 - vrshrn.s32 d5, q10, #13 - vrshrn.s32 d7, q14, #13 - vtrn.16 q2, q3 - vtrn.32 q3, q5 + vrshrn.s32 d5, q10, #13 + vrshrn.s32 d7, q14, #13 + vtrn.16 q2, q3 + vtrn.32 q3, q5 #endif /* Pass 2 */ @@ -1258,10 +1263,10 @@ .unreq OUTPUT_COL .unreq TMP1 .unreq TMP2 -.endfunc .purgem idct_helper + /*****************************************************************************/ /* @@ -1277,97 +1282,110 @@ .macro do_load size + .if \size == 8 + vld1.8 {d4}, [U, :64]! + vld1.8 {d5}, [V, :64]! + vld1.8 {d0}, [Y, :64]! + pld [U, #64] + pld [V, #64] + pld [Y, #64] + .elseif \size == 4 + vld1.8 {d4[0]}, [U]! + vld1.8 {d4[1]}, [U]! + vld1.8 {d4[2]}, [U]! + vld1.8 {d4[3]}, [U]! + vld1.8 {d5[0]}, [V]! + vld1.8 {d5[1]}, [V]! + vld1.8 {d5[2]}, [V]! + vld1.8 {d5[3]}, [V]! + vld1.8 {d0[0]}, [Y]! + vld1.8 {d0[1]}, [Y]! + vld1.8 {d0[2]}, [Y]! + vld1.8 {d0[3]}, [Y]! + .elseif \size == 2 + vld1.8 {d4[4]}, [U]! + vld1.8 {d4[5]}, [U]! + vld1.8 {d5[4]}, [V]! + vld1.8 {d5[5]}, [V]! + vld1.8 {d0[4]}, [Y]! + vld1.8 {d0[5]}, [Y]! + .elseif \size == 1 + vld1.8 {d4[6]}, [U]! + vld1.8 {d5[6]}, [V]! + vld1.8 {d0[6]}, [Y]! + .else + .error unsupported macroblock size + .endif +.endm + +.macro do_store bpp, size + .if \bpp == 24 .if \size == 8 - vld1.8 {d4}, [U, :64]! - vld1.8 {d5}, [V, :64]! - vld1.8 {d0}, [Y, :64]! - pld [U, #64] - pld [V, #64] - pld [Y, #64] + vst3.8 {d10, d11, d12}, [RGB]! .elseif \size == 4 - vld1.8 {d4[0]}, [U]! - vld1.8 {d4[1]}, [U]! - vld1.8 {d4[2]}, [U]! - vld1.8 {d4[3]}, [U]! - vld1.8 {d5[0]}, [V]! - vld1.8 {d5[1]}, [V]! - vld1.8 {d5[2]}, [V]! - vld1.8 {d5[3]}, [V]! - vld1.8 {d0[0]}, [Y]! - vld1.8 {d0[1]}, [Y]! - vld1.8 {d0[2]}, [Y]! - vld1.8 {d0[3]}, [Y]! + vst3.8 {d10[0], d11[0], d12[0]}, [RGB]! + vst3.8 {d10[1], d11[1], d12[1]}, [RGB]! + vst3.8 {d10[2], d11[2], d12[2]}, [RGB]! + vst3.8 {d10[3], d11[3], d12[3]}, [RGB]! .elseif \size == 2 - vld1.8 {d4[4]}, [U]! - vld1.8 {d4[5]}, [U]! - vld1.8 {d5[4]}, [V]! - vld1.8 {d5[5]}, [V]! - vld1.8 {d0[4]}, [Y]! - vld1.8 {d0[5]}, [Y]! + vst3.8 {d10[4], d11[4], d12[4]}, [RGB]! + vst3.8 {d10[5], d11[5], d12[5]}, [RGB]! .elseif \size == 1 - vld1.8 {d4[6]}, [U]! - vld1.8 {d5[6]}, [V]! - vld1.8 {d0[6]}, [Y]! + vst3.8 {d10[6], d11[6], d12[6]}, [RGB]! .else - .error unsupported macroblock size + .error unsupported macroblock size .endif -.endm - -.macro do_store bpp, size - .if \bpp == 24 - .if \size == 8 - vst3.8 {d10, d11, d12}, [RGB]! - .elseif \size == 4 - vst3.8 {d10[0], d11[0], d12[0]}, [RGB]! - vst3.8 {d10[1], d11[1], d12[1]}, [RGB]! - vst3.8 {d10[2], d11[2], d12[2]}, [RGB]! - vst3.8 {d10[3], d11[3], d12[3]}, [RGB]! - .elseif \size == 2 - vst3.8 {d10[4], d11[4], d12[4]}, [RGB]! - vst3.8 {d10[5], d11[5], d12[5]}, [RGB]! - .elseif \size == 1 - vst3.8 {d10[6], d11[6], d12[6]}, [RGB]! - .else - .error unsupported macroblock size - .endif - .elseif \bpp == 32 - .if \size == 8 - vst4.8 {d10, d11, d12, d13}, [RGB]! - .elseif \size == 4 - vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! - vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! - vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! - vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! - .elseif \size == 2 - vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! - vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! - .elseif \size == 1 - vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! - .else - .error unsupported macroblock size - .endif + .elseif \bpp == 32 + .if \size == 8 + vst4.8 {d10, d11, d12, d13}, [RGB]! + .elseif \size == 4 + vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! + vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! + vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! + vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! + .elseif \size == 2 + vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! + vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! + .elseif \size == 1 + vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! .else - .error unsupported bpp + .error unsupported macroblock size .endif + .elseif \bpp == 16 + .if \size == 8 + vst1.16 {q15}, [RGB]! + .elseif \size == 4 + vst1.16 {d30}, [RGB]! + .elseif \size == 2 + vst1.16 {d31[0]}, [RGB]! + vst1.16 {d31[1]}, [RGB]! + .elseif \size == 1 + vst1.16 {d31[2]}, [RGB]! + .else + .error unsupported macroblock size + .endif + .else + .error unsupported bpp + .endif .endm .macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, g_offs, b_offs /* - * 2 stage pipelined YCbCr->RGB conversion + * 2-stage pipelined YCbCr->RGB conversion */ .macro do_yuv_to_rgb_stage1 - vaddw.u8 q3, q1, d4 /* q3 = u - 128 */ - vaddw.u8 q4, q1, d5 /* q2 = v - 128 */ - vmull.s16 q10, d6, d1[1] /* multiply by -11277 */ - vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */ - vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ - vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ - vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ - vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ - vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ - vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ + vaddw.u8 q3, q1, d4 /* q3 = u - 128 */ + vaddw.u8 q4, q1, d5 /* q2 = v - 128 */ + vmull.s16 q10, d6, d1[1] /* multiply by -11277 */ + vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */ + vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ + vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ + vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ + vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ + vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ + vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ .endm .macro do_yuv_to_rgb_stage2 @@ -1377,44 +1395,71 @@ vrshrn.s32 d25, q13, #14 vrshrn.s32 d28, q14, #14 vrshrn.s32 d29, q15, #14 - vaddw.u8 q10, q10, d0 + vaddw.u8 q11, q10, d0 vaddw.u8 q12, q12, d0 vaddw.u8 q14, q14, d0 - vqmovun.s16 d1\g_offs, q10 + .if \bpp != 16 + vqmovun.s16 d1\g_offs, q11 vqmovun.s16 d1\r_offs, q12 vqmovun.s16 d1\b_offs, q14 + .else /* rgb565 */ + vqshlu.s16 q13, q11, #8 + vqshlu.s16 q15, q12, #8 + vqshlu.s16 q14, q14, #8 + vsri.u16 q15, q13, #5 + vsri.u16 q15, q14, #11 + .endif .endm .macro do_yuv_to_rgb_stage2_store_load_stage1 + /* "do_yuv_to_rgb_stage2" and "store" */ + vrshrn.s32 d20, q10, #15 + /* "load" and "do_yuv_to_rgb_stage1" */ + pld [U, #64] + vrshrn.s32 d21, q11, #15 + pld [V, #64] + vrshrn.s32 d24, q12, #14 + vrshrn.s32 d25, q13, #14 vld1.8 {d4}, [U, :64]! - vrshrn.s32 d20, q10, #15 - vrshrn.s32 d21, q11, #15 - vrshrn.s32 d24, q12, #14 - vrshrn.s32 d25, q13, #14 - vrshrn.s32 d28, q14, #14 + vrshrn.s32 d28, q14, #14 vld1.8 {d5}, [V, :64]! - vrshrn.s32 d29, q15, #14 - vaddw.u8 q10, q10, d0 - vaddw.u8 q12, q12, d0 - vaddw.u8 q14, q14, d0 - vqmovun.s16 d1\g_offs, q10 + vrshrn.s32 d29, q15, #14 + vaddw.u8 q3, q1, d4 /* q3 = u - 128 */ + vaddw.u8 q4, q1, d5 /* q2 = v - 128 */ + vaddw.u8 q11, q10, d0 + vmull.s16 q10, d6, d1[1] /* multiply by -11277 */ + vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */ + vaddw.u8 q12, q12, d0 + vaddw.u8 q14, q14, d0 + .if \bpp != 16 /**************** rgb24/rgb32 ******************************/ + vqmovun.s16 d1\g_offs, q11 + pld [Y, #64] + vqmovun.s16 d1\r_offs, q12 vld1.8 {d0}, [Y, :64]! - vqmovun.s16 d1\r_offs, q12 - pld [U, #64] - pld [V, #64] + vqmovun.s16 d1\b_offs, q14 + vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ + vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ + do_store \bpp, 8 + vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ + vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ + vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ + vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ + .else /**************************** rgb565 ********************************/ + vqshlu.s16 q13, q11, #8 pld [Y, #64] - vqmovun.s16 d1\b_offs, q14 - vaddw.u8 q3, q1, d4 /* q3 = u - 128 */ - vaddw.u8 q4, q1, d5 /* q2 = v - 128 */ - do_store \bpp, 8 - vmull.s16 q10, d6, d1[1] /* multiply by -11277 */ - vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */ - vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ - vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ - vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ - vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ - vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ - vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ + vqshlu.s16 q15, q12, #8 + vqshlu.s16 q14, q14, #8 + vld1.8 {d0}, [Y, :64]! + vmull.s16 q11, d7, d1[1] + vmlal.s16 q11, d9, d1[2] + vsri.u16 q15, q13, #5 + vmull.s16 q12, d8, d1[0] + vsri.u16 q15, q14, #11 + vmull.s16 q13, d9, d1[0] + vmull.s16 q14, d6, d1[3] + do_store \bpp, 8 + vmull.s16 q15, d7, d1[3] + .endif .endm .macro do_yuv_to_rgb @@ -1428,10 +1473,10 @@ .balign 16 jsimd_ycc_\colorid\()_neon_consts: - .short 0, 0, 0, 0 - .short 22971, -11277, -23401, 29033 - .short -128, -128, -128, -128 - .short -128, -128, -128, -128 + .short 0, 0, 0, 0 + .short 22971, -11277, -23401, 29033 + .short -128, -128, -128, -128 + .short -128, -128, -128, -128 asm_function jsimd_ycc_\colorid\()_convert_neon OUTPUT_WIDTH .req r0 @@ -1541,7 +1586,6 @@ .unreq U .unreq V .unreq N -.endfunc .purgem do_yuv_to_rgb .purgem do_yuv_to_rgb_stage1 @@ -1557,10 +1601,12 @@ generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, 1, 0 generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, 2, 1 generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3 +generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, 0, 0 .purgem do_load .purgem do_store + /*****************************************************************************/ /* @@ -1575,123 +1621,123 @@ */ .macro do_store size + .if \size == 8 + vst1.8 {d20}, [Y]! + vst1.8 {d21}, [U]! + vst1.8 {d22}, [V]! + .elseif \size == 4 + vst1.8 {d20[0]}, [Y]! + vst1.8 {d20[1]}, [Y]! + vst1.8 {d20[2]}, [Y]! + vst1.8 {d20[3]}, [Y]! + vst1.8 {d21[0]}, [U]! + vst1.8 {d21[1]}, [U]! + vst1.8 {d21[2]}, [U]! + vst1.8 {d21[3]}, [U]! + vst1.8 {d22[0]}, [V]! + vst1.8 {d22[1]}, [V]! + vst1.8 {d22[2]}, [V]! + vst1.8 {d22[3]}, [V]! + .elseif \size == 2 + vst1.8 {d20[4]}, [Y]! + vst1.8 {d20[5]}, [Y]! + vst1.8 {d21[4]}, [U]! + vst1.8 {d21[5]}, [U]! + vst1.8 {d22[4]}, [V]! + vst1.8 {d22[5]}, [V]! + .elseif \size == 1 + vst1.8 {d20[6]}, [Y]! + vst1.8 {d21[6]}, [U]! + vst1.8 {d22[6]}, [V]! + .else + .error unsupported macroblock size + .endif +.endm + +.macro do_load bpp, size + .if \bpp == 24 .if \size == 8 - vst1.8 {d20}, [Y]! - vst1.8 {d21}, [U]! - vst1.8 {d22}, [V]! + vld3.8 {d10, d11, d12}, [RGB]! + pld [RGB, #128] .elseif \size == 4 - vst1.8 {d20[0]}, [Y]! - vst1.8 {d20[1]}, [Y]! - vst1.8 {d20[2]}, [Y]! - vst1.8 {d20[3]}, [Y]! - vst1.8 {d21[0]}, [U]! - vst1.8 {d21[1]}, [U]! - vst1.8 {d21[2]}, [U]! - vst1.8 {d21[3]}, [U]! - vst1.8 {d22[0]}, [V]! - vst1.8 {d22[1]}, [V]! - vst1.8 {d22[2]}, [V]! - vst1.8 {d22[3]}, [V]! + vld3.8 {d10[0], d11[0], d12[0]}, [RGB]! + vld3.8 {d10[1], d11[1], d12[1]}, [RGB]! + vld3.8 {d10[2], d11[2], d12[2]}, [RGB]! + vld3.8 {d10[3], d11[3], d12[3]}, [RGB]! .elseif \size == 2 - vst1.8 {d20[4]}, [Y]! - vst1.8 {d20[5]}, [Y]! - vst1.8 {d21[4]}, [U]! - vst1.8 {d21[5]}, [U]! - vst1.8 {d22[4]}, [V]! - vst1.8 {d22[5]}, [V]! + vld3.8 {d10[4], d11[4], d12[4]}, [RGB]! + vld3.8 {d10[5], d11[5], d12[5]}, [RGB]! .elseif \size == 1 - vst1.8 {d20[6]}, [Y]! - vst1.8 {d21[6]}, [U]! - vst1.8 {d22[6]}, [V]! + vld3.8 {d10[6], d11[6], d12[6]}, [RGB]! .else - .error unsupported macroblock size + .error unsupported macroblock size .endif -.endm - -.macro do_load bpp, size - .if \bpp == 24 - .if \size == 8 - vld3.8 {d10, d11, d12}, [RGB]! - pld [RGB, #128] - .elseif \size == 4 - vld3.8 {d10[0], d11[0], d12[0]}, [RGB]! - vld3.8 {d10[1], d11[1], d12[1]}, [RGB]! - vld3.8 {d10[2], d11[2], d12[2]}, [RGB]! - vld3.8 {d10[3], d11[3], d12[3]}, [RGB]! - .elseif \size == 2 - vld3.8 {d10[4], d11[4], d12[4]}, [RGB]! - vld3.8 {d10[5], d11[5], d12[5]}, [RGB]! - .elseif \size == 1 - vld3.8 {d10[6], d11[6], d12[6]}, [RGB]! - .else - .error unsupported macroblock size - .endif - .elseif \bpp == 32 - .if \size == 8 - vld4.8 {d10, d11, d12, d13}, [RGB]! - pld [RGB, #128] - .elseif \size == 4 - vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! - vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! - vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! - vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! - .elseif \size == 2 - vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! - vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! - .elseif \size == 1 - vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! - .else - .error unsupported macroblock size - .endif + .elseif \bpp == 32 + .if \size == 8 + vld4.8 {d10, d11, d12, d13}, [RGB]! + pld [RGB, #128] + .elseif \size == 4 + vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! + vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! + vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! + vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! + .elseif \size == 2 + vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! + vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! + .elseif \size == 1 + vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! .else - .error unsupported bpp + .error unsupported macroblock size .endif + .else + .error unsupported bpp + .endif .endm .macro generate_jsimd_rgb_ycc_convert_neon colorid, bpp, r_offs, g_offs, b_offs /* - * 2 stage pipelined RGB->YCbCr conversion + * 2-stage pipelined RGB->YCbCr conversion */ .macro do_rgb_to_yuv_stage1 - vmovl.u8 q2, d1\r_offs /* r = { d4, d5 } */ - vmovl.u8 q3, d1\g_offs /* g = { d6, d7 } */ - vmovl.u8 q4, d1\b_offs /* b = { d8, d9 } */ - vmull.u16 q7, d4, d0[0] - vmlal.u16 q7, d6, d0[1] - vmlal.u16 q7, d8, d0[2] - vmull.u16 q8, d5, d0[0] - vmlal.u16 q8, d7, d0[1] - vmlal.u16 q8, d9, d0[2] - vrev64.32 q9, q1 - vrev64.32 q13, q1 - vmlsl.u16 q9, d4, d0[3] - vmlsl.u16 q9, d6, d1[0] - vmlal.u16 q9, d8, d1[1] - vmlsl.u16 q13, d5, d0[3] - vmlsl.u16 q13, d7, d1[0] - vmlal.u16 q13, d9, d1[1] - vrev64.32 q14, q1 - vrev64.32 q15, q1 - vmlal.u16 q14, d4, d1[1] - vmlsl.u16 q14, d6, d1[2] - vmlsl.u16 q14, d8, d1[3] - vmlal.u16 q15, d5, d1[1] - vmlsl.u16 q15, d7, d1[2] - vmlsl.u16 q15, d9, d1[3] + vmovl.u8 q2, d1\r_offs /* r = { d4, d5 } */ + vmovl.u8 q3, d1\g_offs /* g = { d6, d7 } */ + vmovl.u8 q4, d1\b_offs /* b = { d8, d9 } */ + vmull.u16 q7, d4, d0[0] + vmlal.u16 q7, d6, d0[1] + vmlal.u16 q7, d8, d0[2] + vmull.u16 q8, d5, d0[0] + vmlal.u16 q8, d7, d0[1] + vmlal.u16 q8, d9, d0[2] + vrev64.32 q9, q1 + vrev64.32 q13, q1 + vmlsl.u16 q9, d4, d0[3] + vmlsl.u16 q9, d6, d1[0] + vmlal.u16 q9, d8, d1[1] + vmlsl.u16 q13, d5, d0[3] + vmlsl.u16 q13, d7, d1[0] + vmlal.u16 q13, d9, d1[1] + vrev64.32 q14, q1 + vrev64.32 q15, q1 + vmlal.u16 q14, d4, d1[1] + vmlsl.u16 q14, d6, d1[2] + vmlsl.u16 q14, d8, d1[3] + vmlal.u16 q15, d5, d1[1] + vmlsl.u16 q15, d7, d1[2] + vmlsl.u16 q15, d9, d1[3] .endm .macro do_rgb_to_yuv_stage2 - vrshrn.u32 d20, q7, #16 - vrshrn.u32 d21, q8, #16 - vshrn.u32 d22, q9, #16 - vshrn.u32 d23, q13, #16 - vshrn.u32 d24, q14, #16 - vshrn.u32 d25, q15, #16 - vmovn.u16 d20, q10 /* d20 = y */ - vmovn.u16 d21, q11 /* d21 = u */ - vmovn.u16 d22, q12 /* d22 = v */ + vrshrn.u32 d20, q7, #16 + vrshrn.u32 d21, q8, #16 + vshrn.u32 d22, q9, #16 + vshrn.u32 d23, q13, #16 + vshrn.u32 d24, q14, #16 + vshrn.u32 d25, q15, #16 + vmovn.u16 d20, q10 /* d20 = y */ + vmovn.u16 d21, q11 /* d21 = u */ + vmovn.u16 d22, q12 /* d22 = v */ .endm .macro do_rgb_to_yuv @@ -1700,52 +1746,52 @@ .endm .macro do_rgb_to_yuv_stage2_store_load_stage1 - vrshrn.u32 d20, q7, #16 - vrshrn.u32 d21, q8, #16 - vshrn.u32 d22, q9, #16 - vrev64.32 q9, q1 - vshrn.u32 d23, q13, #16 - vrev64.32 q13, q1 - vshrn.u32 d24, q14, #16 - vshrn.u32 d25, q15, #16 - do_load \bpp, 8 - vmovn.u16 d20, q10 /* d20 = y */ - vmovl.u8 q2, d1\r_offs /* r = { d4, d5 } */ - vmovn.u16 d21, q11 /* d21 = u */ - vmovl.u8 q3, d1\g_offs /* g = { d6, d7 } */ - vmovn.u16 d22, q12 /* d22 = v */ - vmovl.u8 q4, d1\b_offs /* b = { d8, d9 } */ - vmull.u16 q7, d4, d0[0] - vmlal.u16 q7, d6, d0[1] - vmlal.u16 q7, d8, d0[2] - vst1.8 {d20}, [Y]! - vmull.u16 q8, d5, d0[0] - vmlal.u16 q8, d7, d0[1] - vmlal.u16 q8, d9, d0[2] - vmlsl.u16 q9, d4, d0[3] - vmlsl.u16 q9, d6, d1[0] - vmlal.u16 q9, d8, d1[1] - vst1.8 {d21}, [U]! - vmlsl.u16 q13, d5, d0[3] - vmlsl.u16 q13, d7, d1[0] - vmlal.u16 q13, d9, d1[1] - vrev64.32 q14, q1 - vrev64.32 q15, q1 - vmlal.u16 q14, d4, d1[1] - vmlsl.u16 q14, d6, d1[2] - vmlsl.u16 q14, d8, d1[3] - vst1.8 {d22}, [V]! - vmlal.u16 q15, d5, d1[1] - vmlsl.u16 q15, d7, d1[2] - vmlsl.u16 q15, d9, d1[3] + vrshrn.u32 d20, q7, #16 + vrshrn.u32 d21, q8, #16 + vshrn.u32 d22, q9, #16 + vrev64.32 q9, q1 + vshrn.u32 d23, q13, #16 + vrev64.32 q13, q1 + vshrn.u32 d24, q14, #16 + vshrn.u32 d25, q15, #16 + do_load \bpp, 8 + vmovn.u16 d20, q10 /* d20 = y */ + vmovl.u8 q2, d1\r_offs /* r = { d4, d5 } */ + vmovn.u16 d21, q11 /* d21 = u */ + vmovl.u8 q3, d1\g_offs /* g = { d6, d7 } */ + vmovn.u16 d22, q12 /* d22 = v */ + vmovl.u8 q4, d1\b_offs /* b = { d8, d9 } */ + vmull.u16 q7, d4, d0[0] + vmlal.u16 q7, d6, d0[1] + vmlal.u16 q7, d8, d0[2] + vst1.8 {d20}, [Y]! + vmull.u16 q8, d5, d0[0] + vmlal.u16 q8, d7, d0[1] + vmlal.u16 q8, d9, d0[2] + vmlsl.u16 q9, d4, d0[3] + vmlsl.u16 q9, d6, d1[0] + vmlal.u16 q9, d8, d1[1] + vst1.8 {d21}, [U]! + vmlsl.u16 q13, d5, d0[3] + vmlsl.u16 q13, d7, d1[0] + vmlal.u16 q13, d9, d1[1] + vrev64.32 q14, q1 + vrev64.32 q15, q1 + vmlal.u16 q14, d4, d1[1] + vmlsl.u16 q14, d6, d1[2] + vmlsl.u16 q14, d8, d1[3] + vst1.8 {d22}, [V]! + vmlal.u16 q15, d5, d1[1] + vmlsl.u16 q15, d7, d1[2] + vmlsl.u16 q15, d9, d1[3] .endm .balign 16 jsimd_\colorid\()_ycc_neon_consts: - .short 19595, 38470, 7471, 11059 - .short 21709, 32768, 27439, 5329 - .short 32767, 128, 32767, 128 - .short 32767, 128, 32767, 128 + .short 19595, 38470, 7471, 11059 + .short 21709, 32768, 27439, 5329 + .short 32767, 128, 32767, 128 + .short 32767, 128, 32767, 128 asm_function jsimd_\colorid\()_ycc_convert_neon OUTPUT_WIDTH .req r0 @@ -1851,7 +1897,6 @@ .unreq U .unreq V .unreq N -.endfunc .purgem do_rgb_to_yuv .purgem do_rgb_to_yuv_stage1 @@ -1871,6 +1916,7 @@ .purgem do_load .purgem do_store + /*****************************************************************************/ /* @@ -1932,7 +1978,7 @@ .unreq TMP2 .unreq TMP3 .unreq TMP4 -.endfunc + /*****************************************************************************/ @@ -1955,10 +2001,10 @@ .balign 16 jsimd_fdct_ifast_neon_consts: - .short (98 * 128) /* XFIX_0_382683433 */ - .short (139 * 128) /* XFIX_0_541196100 */ - .short (181 * 128) /* XFIX_0_707106781 */ - .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */ + .short (98 * 128) /* XFIX_0_382683433 */ + .short (139 * 128) /* XFIX_0_541196100 */ + .short (181 * 128) /* XFIX_0_707106781 */ + .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */ asm_function jsimd_fdct_ifast_neon @@ -1995,52 +2041,52 @@ /* Transpose */ vtrn.16 q12, q13 vtrn.16 q10, q11 - vtrn.16 q8, q9 + vtrn.16 q8, q9 vtrn.16 q14, q15 - vtrn.32 q9, q11 + vtrn.32 q9, q11 vtrn.32 q13, q15 - vtrn.32 q8, q10 + vtrn.32 q8, q10 vtrn.32 q12, q14 vswp d30, d23 vswp d24, d17 vswp d26, d19 /* 1-D FDCT */ - vadd.s16 q2, q11, q12 + vadd.s16 q2, q11, q12 vswp d28, d21 vsub.s16 q12, q11, q12 - vsub.s16 q6, q10, q13 + vsub.s16 q6, q10, q13 vadd.s16 q10, q10, q13 - vsub.s16 q7, q9, q14 - vadd.s16 q9, q9, q14 - vsub.s16 q1, q8, q15 - vadd.s16 q8, q8, q15 - vsub.s16 q4, q9, q10 - vsub.s16 q5, q8, q2 - vadd.s16 q3, q9, q10 - vadd.s16 q4, q4, q5 - vadd.s16 q2, q8, q2 - vqdmulh.s16 q4, q4, XFIX_0_707106781 + vsub.s16 q7, q9, q14 + vadd.s16 q9, q9, q14 + vsub.s16 q1, q8, q15 + vadd.s16 q8, q8, q15 + vsub.s16 q4, q9, q10 + vsub.s16 q5, q8, q2 + vadd.s16 q3, q9, q10 + vadd.s16 q4, q4, q5 + vadd.s16 q2, q8, q2 + vqdmulh.s16 q4, q4, XFIX_0_707106781 vadd.s16 q11, q12, q6 - vadd.s16 q8, q2, q3 - vsub.s16 q12, q2, q3 - vadd.s16 q3, q6, q7 - vadd.s16 q7, q7, q1 - vqdmulh.s16 q3, q3, XFIX_0_707106781 - vsub.s16 q6, q11, q7 - vadd.s16 q10, q5, q4 - vqdmulh.s16 q6, q6, XFIX_0_382683433 - vsub.s16 q14, q5, q4 + vadd.s16 q8, q2, q3 + vsub.s16 q12, q2, q3 + vadd.s16 q3, q6, q7 + vadd.s16 q7, q7, q1 + vqdmulh.s16 q3, q3, XFIX_0_707106781 + vsub.s16 q6, q11, q7 + vadd.s16 q10, q5, q4 + vqdmulh.s16 q6, q6, XFIX_0_382683433 + vsub.s16 q14, q5, q4 vqdmulh.s16 q11, q11, XFIX_0_541196100 - vqdmulh.s16 q5, q7, XFIX_1_306562965 - vadd.s16 q4, q1, q3 - vsub.s16 q3, q1, q3 - vadd.s16 q7, q7, q6 + vqdmulh.s16 q5, q7, XFIX_1_306562965 + vadd.s16 q4, q1, q3 + vsub.s16 q3, q1, q3 + vadd.s16 q7, q7, q6 vadd.s16 q11, q11, q6 - vadd.s16 q7, q7, q5 - vadd.s16 q13, q3, q11 - vsub.s16 q11, q3, q11 - vadd.s16 q9, q4, q7 - vsub.s16 q15, q4, q7 + vadd.s16 q7, q7, q5 + vadd.s16 q13, q3, q11 + vsub.s16 q11, q3, q11 + vadd.s16 q9, q4, q7 + vsub.s16 q15, q4, q7 subs TMP, TMP, #1 bne 1b @@ -2055,14 +2101,14 @@ .unreq DATA .unreq TMP -.endfunc + /*****************************************************************************/ /* * GLOBAL(void) - * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM * divisors, - * DCTELEM * workspace); + * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM *divisors, + * DCTELEM *workspace); * * Note: the code uses 2 stage pipelining in order to improve instructions * scheduling and eliminate stalls (this provides ~15% better @@ -2089,22 +2135,22 @@ vld1.16 {d20, d21, d22, d23}, [CORRECTION, :128]! vabs.s16 q13, q1 vld1.16 {d16, d17, d18, d19}, [RECIPROCAL, :128]! - vadd.u16 q12, q12, q10 /* add correction */ + vadd.u16 q12, q12, q10 /* add correction */ vadd.u16 q13, q13, q11 - vmull.u16 q10, d24, d16 /* multiply by reciprocal */ + vmull.u16 q10, d24, d16 /* multiply by reciprocal */ vmull.u16 q11, d25, d17 - vmull.u16 q8, d26, d18 - vmull.u16 q9, d27, d19 + vmull.u16 q8, d26, d18 + vmull.u16 q9, d27, d19 vld1.16 {d24, d25, d26, d27}, [SHIFT, :128]! vshrn.u32 d20, q10, #16 vshrn.u32 d21, q11, #16 - vshrn.u32 d22, q8, #16 - vshrn.u32 d23, q9, #16 + vshrn.u32 d22, q8, #16 + vshrn.u32 d23, q9, #16 vneg.s16 q12, q12 vneg.s16 q13, q13 - vshr.s16 q2, q0, #15 /* extract sign */ - vshr.s16 q3, q1, #15 - vshl.u16 q14, q10, q12 /* shift */ + vshr.s16 q2, q0, #15 /* extract sign */ + vshr.s16 q3, q1, #15 + vshl.u16 q14, q10, q12 /* shift */ vshl.u16 q15, q11, q13 push {r4, r5} @@ -2117,25 +2163,25 @@ vabs.s16 q13, q1 veor.u16 q15, q15, q3 vld1.16 {d16, d17, d18, d19}, [RECIPROCAL, :128]! - vadd.u16 q12, q12, q10 /* add correction */ + vadd.u16 q12, q12, q10 /* add correction */ vadd.u16 q13, q13, q11 - vmull.u16 q10, d24, d16 /* multiply by reciprocal */ + vmull.u16 q10, d24, d16 /* multiply by reciprocal */ vmull.u16 q11, d25, d17 - vmull.u16 q8, d26, d18 - vmull.u16 q9, d27, d19 + vmull.u16 q8, d26, d18 + vmull.u16 q9, d27, d19 vsub.u16 q14, q14, q2 vld1.16 {d24, d25, d26, d27}, [SHIFT, :128]! vsub.u16 q15, q15, q3 vshrn.u32 d20, q10, #16 vshrn.u32 d21, q11, #16 vst1.16 {d28, d29, d30, d31}, [COEF_BLOCK, :128]! - vshrn.u32 d22, q8, #16 - vshrn.u32 d23, q9, #16 + vshrn.u32 d22, q8, #16 + vshrn.u32 d23, q9, #16 vneg.s16 q12, q12 vneg.s16 q13, q13 - vshr.s16 q2, q0, #15 /* extract sign */ - vshr.s16 q3, q1, #15 - vshl.u16 q14, q10, q12 /* shift */ + vshr.s16 q2, q0, #15 /* extract sign */ + vshr.s16 q3, q1, #15 + vshl.u16 q14, q10, q12 /* shift */ vshl.u16 q15, q11, q13 subs LOOP_COUNT, LOOP_COUNT, #1 bne 1b @@ -2147,7 +2193,7 @@ vsub.u16 q15, q15, q3 vst1.16 {d28, d29, d30, d31}, [COEF_BLOCK, :128]! - bx lr /* return */ + bx lr /* return */ .unreq COEF_BLOCK .unreq DIVISORS @@ -2156,4 +2202,677 @@ .unreq CORRECTION .unreq SHIFT .unreq LOOP_COUNT -.endfunc + + +/*****************************************************************************/ + +/* + * GLOBAL(void) + * jsimd_h2v1_fancy_upsample_neon (int max_v_samp_factor, + * JDIMENSION downsampled_width, + * JSAMPARRAY input_data, + * JSAMPARRAY *output_data_ptr); + * + * Note: the use of unaligned writes is the main remaining bottleneck in + * this code, which can be potentially solved to get up to tens + * of percents performance improvement on Cortex-A8/Cortex-A9. + */ + +/* + * Upsample 16 source pixels to 32 destination pixels. The new 16 source + * pixels are loaded to q0. The previous 16 source pixels are in q1. The + * shifted-by-one source pixels are constructed in q2 by using q0 and q1. + * Register d28 is used for multiplication by 3. Register q15 is used + * for adding +1 bias. + */ +.macro upsample16 OUTPTR, INPTR + vld1.8 {q0}, [\INPTR]! + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + vmov q1, q0 /* backup source pixels to q1 */ + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! +.endm + +/* + * Upsample 32 source pixels to 64 destination pixels. Compared to 'usample16' + * macro, the roles of q0 and q1 registers are reversed for even and odd + * groups of 16 pixels, that's why "vmov q1, q0" instructions are not needed. + * Also this unrolling allows to reorder loads and stores to compensate + * multiplication latency and reduce stalls. + */ +.macro upsample32 OUTPTR, INPTR + /* even 16 pixels group */ + vld1.8 {q0}, [\INPTR]! + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + /* odd 16 pixels group */ + vld1.8 {q1}, [\INPTR]! + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vmovl.u8 q8, d2 + vext.8 q2, q0, q1, #15 + vmovl.u8 q9, d3 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d2, d28 + vmlal.u8 q11, d3, d28 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! +.endm + +/* + * Upsample a row of WIDTH pixels from INPTR to OUTPTR. + */ +.macro upsample_row OUTPTR, INPTR, WIDTH, TMP1 + /* special case for the first and last pixels */ + sub \WIDTH, \WIDTH, #1 + add \OUTPTR, \OUTPTR, #1 + ldrb \TMP1, [\INPTR, \WIDTH] + strb \TMP1, [\OUTPTR, \WIDTH, asl #1] + ldrb \TMP1, [\INPTR], #1 + strb \TMP1, [\OUTPTR, #-1] + vmov.8 d3[7], \TMP1 + + subs \WIDTH, \WIDTH, #32 + blt 5f +0: /* process 32 pixels per iteration */ + upsample32 \OUTPTR, \INPTR + subs \WIDTH, \WIDTH, #32 + bge 0b +5: + adds \WIDTH, \WIDTH, #16 + blt 1f +0: /* process 16 pixels if needed */ + upsample16 \OUTPTR, \INPTR + subs \WIDTH, \WIDTH, #16 +1: + adds \WIDTH, \WIDTH, #16 + beq 9f + + /* load the remaining 1-15 pixels */ + add \INPTR, \INPTR, \WIDTH + tst \WIDTH, #1 + beq 2f + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #2 + beq 2f + vext.8 d0, d0, d0, #6 + sub \INPTR, \INPTR, #1 + vld1.8 {d0[1]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #4 + beq 2f + vrev64.32 d0, d0 + sub \INPTR, \INPTR, #1 + vld1.8 {d0[3]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[2]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[1]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #8 + beq 2f + vmov d1, d0 + sub \INPTR, \INPTR, #8 + vld1.8 {d0}, [\INPTR] +2: /* upsample the remaining pixels */ + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + vrshrn.u16 d10, q8, #2 + vrshrn.u16 d12, q9, #2 + vshrn.u16 d11, q10, #2 + vshrn.u16 d13, q11, #2 + vzip.8 d10, d11 + vzip.8 d12, d13 + /* store the remaining pixels */ + tst \WIDTH, #8 + beq 2f + vst1.8 {d10, d11}, [\OUTPTR]! + vmov q5, q6 +2: + tst \WIDTH, #4 + beq 2f + vst1.8 {d10}, [\OUTPTR]! + vmov d10, d11 +2: + tst \WIDTH, #2 + beq 2f + vst1.8 {d10[0]}, [\OUTPTR]! + vst1.8 {d10[1]}, [\OUTPTR]! + vst1.8 {d10[2]}, [\OUTPTR]! + vst1.8 {d10[3]}, [\OUTPTR]! + vext.8 d10, d10, d10, #4 +2: + tst \WIDTH, #1 + beq 2f + vst1.8 {d10[0]}, [\OUTPTR]! + vst1.8 {d10[1]}, [\OUTPTR]! +2: +9: +.endm + +asm_function jsimd_h2v1_fancy_upsample_neon + + MAX_V_SAMP_FACTOR .req r0 + DOWNSAMPLED_WIDTH .req r1 + INPUT_DATA .req r2 + OUTPUT_DATA_PTR .req r3 + OUTPUT_DATA .req OUTPUT_DATA_PTR + + OUTPTR .req r4 + INPTR .req r5 + WIDTH .req ip + TMP .req lr + + push {r4, r5, r6, lr} + vpush {d8-d15} + + ldr OUTPUT_DATA, [OUTPUT_DATA_PTR] + cmp MAX_V_SAMP_FACTOR, #0 + ble 99f + + /* initialize constants */ + vmov.u8 d28, #3 + vmov.u16 q15, #1 +11: + ldr INPTR, [INPUT_DATA], #4 + ldr OUTPTR, [OUTPUT_DATA], #4 + mov WIDTH, DOWNSAMPLED_WIDTH + upsample_row OUTPTR, INPTR, WIDTH, TMP + subs MAX_V_SAMP_FACTOR, MAX_V_SAMP_FACTOR, #1 + bgt 11b + +99: + vpop {d8-d15} + pop {r4, r5, r6, pc} + + .unreq MAX_V_SAMP_FACTOR + .unreq DOWNSAMPLED_WIDTH + .unreq INPUT_DATA + .unreq OUTPUT_DATA_PTR + .unreq OUTPUT_DATA + + .unreq OUTPTR + .unreq INPTR + .unreq WIDTH + .unreq TMP + +.purgem upsample16 +.purgem upsample32 +.purgem upsample_row + + +/*****************************************************************************/ + +/* + * GLOBAL(JOCTET*) + * jsimd_huff_encode_one_block (working_state *state, JOCTET *buffer, + * JCOEFPTR block, int last_dc_val, + * c_derived_tbl *dctbl, c_derived_tbl *actbl) + * + */ + +.macro emit_byte BUFFER, PUT_BUFFER, PUT_BITS, ZERO, TMP + sub \PUT_BITS, \PUT_BITS, #0x8 + lsr \TMP, \PUT_BUFFER, \PUT_BITS + uxtb \TMP, \TMP + strb \TMP, [\BUFFER, #1]! + cmp \TMP, #0xff + /*it eq*/ + strbeq \ZERO, [\BUFFER, #1]! +.endm + +.macro put_bits PUT_BUFFER, PUT_BITS, CODE, SIZE + /*lsl \PUT_BUFFER, \PUT_BUFFER, \SIZE*/ + add \PUT_BITS, \SIZE + /*orr \PUT_BUFFER, \PUT_BUFFER, \CODE*/ + orr \PUT_BUFFER, \CODE, \PUT_BUFFER, lsl \SIZE +.endm + +.macro checkbuf15 BUFFER, PUT_BUFFER, PUT_BITS, ZERO, TMP + cmp \PUT_BITS, #0x10 + blt 15f + eor \ZERO, \ZERO, \ZERO + emit_byte \BUFFER, \PUT_BUFFER, \PUT_BITS, \ZERO, \TMP + emit_byte \BUFFER, \PUT_BUFFER, \PUT_BITS, \ZERO, \TMP +15: +.endm + +.balign 16 +jsimd_huff_encode_one_block_neon_consts: + .byte 0x01 + .byte 0x02 + .byte 0x04 + .byte 0x08 + .byte 0x10 + .byte 0x20 + .byte 0x40 + .byte 0x80 + +asm_function jsimd_huff_encode_one_block_neon + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + add r7, sp, #0x1c + sub r4, sp, #0x40 + bfc r4, #0, #5 + mov sp, r4 /* align sp on 32 bytes */ + vst1.64 {d8, d9, d10, d11}, [r4, :128]! + vst1.64 {d12, d13, d14, d15}, [r4, :128] + sub sp, #0x140 /* reserve 320 bytes */ + str r0, [sp, #0x18] /* working state > sp + Ox18 */ + add r4, sp, #0x20 /* r4 = t1 */ + ldr lr, [r7, #0x8] /* lr = dctbl */ + sub r10, r1, #0x1 /* r10=buffer-- */ + ldrsh r1, [r2] + mov r9, #0x10 + mov r8, #0x1 + adr r5, jsimd_huff_encode_one_block_neon_consts + /* prepare data */ + vld1.8 {d26}, [r5, :64] + veor q8, q8, q8 + veor q9, q9, q9 + vdup.16 q14, r9 + vdup.16 q15, r8 + veor q10, q10, q10 + veor q11, q11, q11 + sub r1, r1, r3 + add r9, r2, #0x22 + add r8, r2, #0x18 + add r3, r2, #0x36 + vmov.16 d0[0], r1 + vld1.16 {d2[0]}, [r9, :16] + vld1.16 {d4[0]}, [r8, :16] + vld1.16 {d6[0]}, [r3, :16] + add r1, r2, #0x2 + add r9, r2, #0x30 + add r8, r2, #0x26 + add r3, r2, #0x28 + vld1.16 {d0[1]}, [r1, :16] + vld1.16 {d2[1]}, [r9, :16] + vld1.16 {d4[1]}, [r8, :16] + vld1.16 {d6[1]}, [r3, :16] + add r1, r2, #0x10 + add r9, r2, #0x40 + add r8, r2, #0x34 + add r3, r2, #0x1a + vld1.16 {d0[2]}, [r1, :16] + vld1.16 {d2[2]}, [r9, :16] + vld1.16 {d4[2]}, [r8, :16] + vld1.16 {d6[2]}, [r3, :16] + add r1, r2, #0x20 + add r9, r2, #0x32 + add r8, r2, #0x42 + add r3, r2, #0xc + vld1.16 {d0[3]}, [r1, :16] + vld1.16 {d2[3]}, [r9, :16] + vld1.16 {d4[3]}, [r8, :16] + vld1.16 {d6[3]}, [r3, :16] + add r1, r2, #0x12 + add r9, r2, #0x24 + add r8, r2, #0x50 + add r3, r2, #0xe + vld1.16 {d1[0]}, [r1, :16] + vld1.16 {d3[0]}, [r9, :16] + vld1.16 {d5[0]}, [r8, :16] + vld1.16 {d7[0]}, [r3, :16] + add r1, r2, #0x4 + add r9, r2, #0x16 + add r8, r2, #0x60 + add r3, r2, #0x1c + vld1.16 {d1[1]}, [r1, :16] + vld1.16 {d3[1]}, [r9, :16] + vld1.16 {d5[1]}, [r8, :16] + vld1.16 {d7[1]}, [r3, :16] + add r1, r2, #0x6 + add r9, r2, #0x8 + add r8, r2, #0x52 + add r3, r2, #0x2a + vld1.16 {d1[2]}, [r1, :16] + vld1.16 {d3[2]}, [r9, :16] + vld1.16 {d5[2]}, [r8, :16] + vld1.16 {d7[2]}, [r3, :16] + add r1, r2, #0x14 + add r9, r2, #0xa + add r8, r2, #0x44 + add r3, r2, #0x38 + vld1.16 {d1[3]}, [r1, :16] + vld1.16 {d3[3]}, [r9, :16] + vld1.16 {d5[3]}, [r8, :16] + vld1.16 {d7[3]}, [r3, :16] + vcgt.s16 q8, q8, q0 + vcgt.s16 q9, q9, q1 + vcgt.s16 q10, q10, q2 + vcgt.s16 q11, q11, q3 + vabs.s16 q0, q0 + vabs.s16 q1, q1 + vabs.s16 q2, q2 + vabs.s16 q3, q3 + veor q8, q8, q0 + veor q9, q9, q1 + veor q10, q10, q2 + veor q11, q11, q3 + add r9, r4, #0x20 + add r8, r4, #0x80 + add r3, r4, #0xa0 + vclz.i16 q0, q0 + vclz.i16 q1, q1 + vclz.i16 q2, q2 + vclz.i16 q3, q3 + vsub.i16 q0, q14, q0 + vsub.i16 q1, q14, q1 + vsub.i16 q2, q14, q2 + vsub.i16 q3, q14, q3 + vst1.16 {d0, d1, d2, d3}, [r4, :256] + vst1.16 {d4, d5, d6, d7}, [r9, :256] + vshl.s16 q0, q15, q0 + vshl.s16 q1, q15, q1 + vshl.s16 q2, q15, q2 + vshl.s16 q3, q15, q3 + vsub.i16 q0, q0, q15 + vsub.i16 q1, q1, q15 + vsub.i16 q2, q2, q15 + vsub.i16 q3, q3, q15 + vand q8, q8, q0 + vand q9, q9, q1 + vand q10, q10, q2 + vand q11, q11, q3 + vst1.16 {d16, d17, d18, d19}, [r8, :256] + vst1.16 {d20, d21, d22, d23}, [r3, :256] + add r1, r2, #0x46 + add r9, r2, #0x3a + add r8, r2, #0x74 + add r3, r2, #0x6a + vld1.16 {d8[0]}, [r1, :16] + vld1.16 {d10[0]}, [r9, :16] + vld1.16 {d12[0]}, [r8, :16] + vld1.16 {d14[0]}, [r3, :16] + veor q8, q8, q8 + veor q9, q9, q9 + veor q10, q10, q10 + veor q11, q11, q11 + add r1, r2, #0x54 + add r9, r2, #0x2c + add r8, r2, #0x76 + add r3, r2, #0x78 + vld1.16 {d8[1]}, [r1, :16] + vld1.16 {d10[1]}, [r9, :16] + vld1.16 {d12[1]}, [r8, :16] + vld1.16 {d14[1]}, [r3, :16] + add r1, r2, #0x62 + add r9, r2, #0x1e + add r8, r2, #0x68 + add r3, r2, #0x7a + vld1.16 {d8[2]}, [r1, :16] + vld1.16 {d10[2]}, [r9, :16] + vld1.16 {d12[2]}, [r8, :16] + vld1.16 {d14[2]}, [r3, :16] + add r1, r2, #0x70 + add r9, r2, #0x2e + add r8, r2, #0x5a + add r3, r2, #0x6c + vld1.16 {d8[3]}, [r1, :16] + vld1.16 {d10[3]}, [r9, :16] + vld1.16 {d12[3]}, [r8, :16] + vld1.16 {d14[3]}, [r3, :16] + add r1, r2, #0x72 + add r9, r2, #0x3c + add r8, r2, #0x4c + add r3, r2, #0x5e + vld1.16 {d9[0]}, [r1, :16] + vld1.16 {d11[0]}, [r9, :16] + vld1.16 {d13[0]}, [r8, :16] + vld1.16 {d15[0]}, [r3, :16] + add r1, r2, #0x64 + add r9, r2, #0x4a + add r8, r2, #0x3e + add r3, r2, #0x6e + vld1.16 {d9[1]}, [r1, :16] + vld1.16 {d11[1]}, [r9, :16] + vld1.16 {d13[1]}, [r8, :16] + vld1.16 {d15[1]}, [r3, :16] + add r1, r2, #0x56 + add r9, r2, #0x58 + add r8, r2, #0x4e + add r3, r2, #0x7c + vld1.16 {d9[2]}, [r1, :16] + vld1.16 {d11[2]}, [r9, :16] + vld1.16 {d13[2]}, [r8, :16] + vld1.16 {d15[2]}, [r3, :16] + add r1, r2, #0x48 + add r9, r2, #0x66 + add r8, r2, #0x5c + add r3, r2, #0x7e + vld1.16 {d9[3]}, [r1, :16] + vld1.16 {d11[3]}, [r9, :16] + vld1.16 {d13[3]}, [r8, :16] + vld1.16 {d15[3]}, [r3, :16] + vcgt.s16 q8, q8, q4 + vcgt.s16 q9, q9, q5 + vcgt.s16 q10, q10, q6 + vcgt.s16 q11, q11, q7 + vabs.s16 q4, q4 + vabs.s16 q5, q5 + vabs.s16 q6, q6 + vabs.s16 q7, q7 + veor q8, q8, q4 + veor q9, q9, q5 + veor q10, q10, q6 + veor q11, q11, q7 + add r1, r4, #0x40 + add r9, r4, #0x60 + add r8, r4, #0xc0 + add r3, r4, #0xe0 + vclz.i16 q4, q4 + vclz.i16 q5, q5 + vclz.i16 q6, q6 + vclz.i16 q7, q7 + vsub.i16 q4, q14, q4 + vsub.i16 q5, q14, q5 + vsub.i16 q6, q14, q6 + vsub.i16 q7, q14, q7 + vst1.16 {d8, d9, d10, d11}, [r1, :256] + vst1.16 {d12, d13, d14, d15}, [r9, :256] + vshl.s16 q4, q15, q4 + vshl.s16 q5, q15, q5 + vshl.s16 q6, q15, q6 + vshl.s16 q7, q15, q7 + vsub.i16 q4, q4, q15 + vsub.i16 q5, q5, q15 + vsub.i16 q6, q6, q15 + vsub.i16 q7, q7, q15 + vand q8, q8, q4 + vand q9, q9, q5 + vand q10, q10, q6 + vand q11, q11, q7 + vst1.16 {d16, d17, d18, d19}, [r8, :256] + vst1.16 {d20, d21, d22, d23}, [r3, :256] + ldr r12, [r7, #0xc] /* r12 = actbl */ + add r1, lr, #0x400 /* r1 = dctbl->ehufsi */ + mov r9, r12 /* r9 = actbl */ + add r6, r4, #0x80 /* r6 = t2 */ + ldr r11, [r0, #0x8] /* r11 = put_buffer */ + ldr r4, [r0, #0xc] /* r4 = put_bits */ + ldrh r2, [r6, #-128] /* r2 = nbits */ + ldrh r3, [r6] /* r3 = temp2 & (((JLONG) 1)<ehufsi */ + ldrsb r6, [r5, #0xf0] /* r6 = actbl->ehufsi[0xf0] */ + veor q8, q8, q8 + vceq.i16 q0, q0, q8 + vceq.i16 q1, q1, q8 + vceq.i16 q2, q2, q8 + vceq.i16 q3, q3, q8 + vceq.i16 q4, q4, q8 + vceq.i16 q5, q5, q8 + vceq.i16 q6, q6, q8 + vceq.i16 q7, q7, q8 + vmovn.i16 d0, q0 + vmovn.i16 d2, q1 + vmovn.i16 d4, q2 + vmovn.i16 d6, q3 + vmovn.i16 d8, q4 + vmovn.i16 d10, q5 + vmovn.i16 d12, q6 + vmovn.i16 d14, q7 + vand d0, d0, d26 + vand d2, d2, d26 + vand d4, d4, d26 + vand d6, d6, d26 + vand d8, d8, d26 + vand d10, d10, d26 + vand d12, d12, d26 + vand d14, d14, d26 + vpadd.i8 d0, d0, d2 + vpadd.i8 d4, d4, d6 + vpadd.i8 d8, d8, d10 + vpadd.i8 d12, d12, d14 + vpadd.i8 d0, d0, d4 + vpadd.i8 d8, d8, d12 + vpadd.i8 d0, d0, d8 + vmov.32 r1, d0[1] + vmov.32 r8, d0[0] + mvn r1, r1 + mvn r8, r8 + lsrs r1, r1, #0x1 + rrx r8, r8 /* shift in last r1 bit while shifting out DC bit */ + rbit r1, r1 /* r1 = index1 */ + rbit r8, r8 /* r8 = index0 */ + ldr r0, [r9, #0x3c0] /* r0 = actbl->ehufco[0xf0] */ + str r1, [sp, #0x14] /* index1 > sp + 0x14 */ + cmp r8, #0x0 + beq 6f +1: + clz r2, r8 + add lr, lr, r2, lsl #1 + lsl r8, r8, r2 + ldrh r1, [lr, #-126] +2: + cmp r2, #0x10 + blt 3f + sub r2, r2, #0x10 + put_bits r11, r4, r0, r6 + cmp r4, #0x10 + blt 2b + eor r3, r3, r3 + emit_byte r10, r11, r4, r3, r12 + emit_byte r10, r11, r4, r3, r12 + b 2b +3: + add r2, r1, r2, lsl #4 + ldrh r3, [lr, #2]! + ldr r12, [r9, r2, lsl #2] + ldrb r2, [r5, r2] + put_bits r11, r4, r12, r2 + checkbuf15 r10, r11, r4, r2, r12 + put_bits r11, r4, r3, r1 + checkbuf15 r10, r11, r4, r2, r12 + lsls r8, r8, #0x1 + bne 1b +6: + add r12, sp, #0x20 /* r12 = t1 */ + ldr r8, [sp, #0x14] /* r8 = index1 */ + adds r12, #0xc0 /* r12 = t2 + (DCTSIZE2/2) */ + cmp r8, #0x0 + beq 6f + clz r2, r8 + sub r12, r12, lr + lsl r8, r8, r2 + add r2, r2, r12, lsr #1 + add lr, lr, r2, lsl #1 + b 7f +1: + clz r2, r8 + add lr, lr, r2, lsl #1 + lsl r8, r8, r2 +7: + ldrh r1, [lr, #-126] +2: + cmp r2, #0x10 + blt 3f + sub r2, r2, #0x10 + put_bits r11, r4, r0, r6 + cmp r4, #0x10 + blt 2b + eor r3, r3, r3 + emit_byte r10, r11, r4, r3, r12 + emit_byte r10, r11, r4, r3, r12 + b 2b +3: + add r2, r1, r2, lsl #4 + ldrh r3, [lr, #2]! + ldr r12, [r9, r2, lsl #2] + ldrb r2, [r5, r2] + put_bits r11, r4, r12, r2 + checkbuf15 r10, r11, r4, r2, r12 + put_bits r11, r4, r3, r1 + checkbuf15 r10, r11, r4, r2, r12 + lsls r8, r8, #0x1 + bne 1b +6: + add r0, sp, #0x20 + add r0, #0xfe + cmp lr, r0 + bhs 1f + ldr r1, [r9] + ldrb r0, [r5] + put_bits r11, r4, r1, r0 + checkbuf15 r10, r11, r4, r0, r1 +1: + ldr r12, [sp, #0x18] + str r11, [r12, #0x8] + str r4, [r12, #0xc] + add r0, r10, #0x1 + add r4, sp, #0x140 + vld1.64 {d8, d9, d10, d11}, [r4, :128]! + vld1.64 {d12, d13, d14, d15}, [r4, :128] + sub r4, r7, #0x1c + mov sp, r4 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + +.purgem emit_byte +.purgem put_bits +.purgem checkbuf15 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimdcfg.inc glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimdcfg.inc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimdcfg.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimdcfg.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,92 @@ +; +; Automatically generated include file from jsimdcfg.inc.h +; +; +; -- jpeglib.h +; +%define DCTSIZE 8 +%define DCTSIZE2 64 +; +; -- jmorecfg.h +; +%define RGB_RED 0 +%define RGB_GREEN 1 +%define RGB_BLUE 2 +%define RGB_PIXELSIZE 3 +%define EXT_RGB_RED 0 +%define EXT_RGB_GREEN 1 +%define EXT_RGB_BLUE 2 +%define EXT_RGB_PIXELSIZE 3 +%define EXT_RGBX_RED 0 +%define EXT_RGBX_GREEN 1 +%define EXT_RGBX_BLUE 2 +%define EXT_RGBX_PIXELSIZE 4 +%define EXT_BGR_RED 2 +%define EXT_BGR_GREEN 1 +%define EXT_BGR_BLUE 0 +%define EXT_BGR_PIXELSIZE 3 +%define EXT_BGRX_RED 2 +%define EXT_BGRX_GREEN 1 +%define EXT_BGRX_BLUE 0 +%define EXT_BGRX_PIXELSIZE 4 +%define EXT_XBGR_RED 3 +%define EXT_XBGR_GREEN 2 +%define EXT_XBGR_BLUE 1 +%define EXT_XBGR_PIXELSIZE 4 +%define EXT_XRGB_RED 1 +%define EXT_XRGB_GREEN 2 +%define EXT_XRGB_BLUE 3 +%define EXT_XRGB_PIXELSIZE 4 +%define RGBX_FILLER_0XFF 1 +; Representation of a single sample (pixel element value). +; On this SIMD implementation, this must be 'unsigned char'. +; +%define JSAMPLE byte ; unsigned char +%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE) +%define CENTERJSAMPLE 128 +; Representation of a DCT frequency coefficient. +; On this SIMD implementation, this must be 'short'. +; +%define JCOEF word ; short +%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF) +; Datatype used for image dimensions. +; On this SIMD implementation, this must be 'unsigned int'. +; +%define JDIMENSION dword ; unsigned int +%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION) +%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h) +%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h) +%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h) +%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h) +%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW) +%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY) +%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE) +%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR) +; +; -- jdct.h +; +; A forward DCT routine is given a pointer to a work area of type DCTELEM[]; +; the DCT is to be performed in-place in that buffer. +; To maximize parallelism, Type DCTELEM is changed to short (originally, int). +; +%define DCTELEM word ; short +%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM) +%define float FP32 ; float +%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float) +; To maximize parallelism, Type short is changed to short. +; +%define ISLOW_MULT_TYPE word ; must be short +%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE) +%define IFAST_MULT_TYPE word ; must be short +%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE) +%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors +%define FLOAT_MULT_TYPE FP32 ; must be float +%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE) +; +; -- jsimd.h +; +%define JSIMD_NONE 0x00 +%define JSIMD_MMX 0x01 +%define JSIMD_3DNOW 0x02 +%define JSIMD_SSE 0x04 +%define JSIMD_SSE2 0x08 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimdcfg.inc.h glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimdcfg.inc.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimdcfg.inc.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimdcfg.inc.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,130 @@ +// This file generates the include file for the assembly +// implementations by abusing the C preprocessor. +// +// Note: Some things are manually defined as they need to +// be mapped to NASM types. + +; +; Automatically generated include file from jsimdcfg.inc.h +; + +#define JPEG_INTERNALS + +#include "../jpeglib.h" +#include "../jconfig.h" +#include "../jmorecfg.h" +#include "jsimd.h" + +; +; -- jpeglib.h +; + +%define _cpp_protection_DCTSIZE DCTSIZE +%define _cpp_protection_DCTSIZE2 DCTSIZE2 + +; +; -- jmorecfg.h +; + +%define _cpp_protection_RGB_RED RGB_RED +%define _cpp_protection_RGB_GREEN RGB_GREEN +%define _cpp_protection_RGB_BLUE RGB_BLUE +%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE + +%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED +%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN +%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE +%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE + +%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED +%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN +%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE +%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE + +%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED +%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN +%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE +%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE + +%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED +%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN +%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE +%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE + +%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED +%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN +%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE +%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE + +%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED +%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN +%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE +%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE + +%define RGBX_FILLER_0XFF 1 + +; Representation of a single sample (pixel element value). +; On this SIMD implementation, this must be 'unsigned char'. +; + +%define JSAMPLE byte ; unsigned char +%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE) + +%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE + +; Representation of a DCT frequency coefficient. +; On this SIMD implementation, this must be 'short'. +; +%define JCOEF word ; short +%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF) + +; Datatype used for image dimensions. +; On this SIMD implementation, this must be 'unsigned int'. +; +%define JDIMENSION dword ; unsigned int +%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION) + +%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h) +%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h) +%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h) +%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h) +%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW) +%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY) +%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE) +%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR) + +; +; -- jdct.h +; + +; A forward DCT routine is given a pointer to a work area of type DCTELEM[]; +; the DCT is to be performed in-place in that buffer. +; To maximize parallelism, Type DCTELEM is changed to short (originally, int). +; +%define DCTELEM word ; short +%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM) + +%define FAST_FLOAT FP32 ; float +%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(FAST_FLOAT) + +; To maximize parallelism, Type MULTIPLIER is changed to short. +; +%define ISLOW_MULT_TYPE word ; must be short +%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE) + +%define IFAST_MULT_TYPE word ; must be short +%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE) +%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors + +%define FLOAT_MULT_TYPE FP32 ; must be float +%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE) + +; +; -- jsimd.h +; + +%define _cpp_protection_JSIMD_NONE JSIMD_NONE +%define _cpp_protection_JSIMD_MMX JSIMD_MMX +%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW +%define _cpp_protection_JSIMD_SSE JSIMD_SSE +%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimdcpu.asm glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimdcpu.asm --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimdcpu.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimdcpu.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,105 @@ +; +; jsimdcpu.asm - SIMD instruction support check +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Check if the CPU supports SIMD instructions +; +; GLOBAL(unsigned int) +; jpeg_simd_cpu_support (void) +; + + align 16 + global EXTN(jpeg_simd_cpu_support) PRIVATE + +EXTN(jpeg_simd_cpu_support): + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused + push edi + + xor edi,edi ; simd support flag + + pushfd + pop eax + mov edx,eax + xor eax, 1<<21 ; flip ID bit in EFLAGS + push eax + popfd + pushfd + pop eax + xor eax,edx + jz short .return ; CPUID is not supported + + ; Check for MMX instruction support + xor eax,eax + cpuid + test eax,eax + jz short .return + + xor eax,eax + inc eax + cpuid + mov eax,edx ; eax = Standard feature flags + + test eax, 1<<23 ; bit23:MMX + jz short .no_mmx + or edi, byte JSIMD_MMX +.no_mmx: + test eax, 1<<25 ; bit25:SSE + jz short .no_sse + or edi, byte JSIMD_SSE +.no_sse: + test eax, 1<<26 ; bit26:SSE2 + jz short .no_sse2 + or edi, byte JSIMD_SSE2 +.no_sse2: + + ; Check for 3DNow! instruction support + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000000 + jbe short .return + + mov eax, 0x80000001 + cpuid + mov eax,edx ; eax = Extended feature flags + + test eax, 1<<31 ; bit31:3DNow!(vendor independent) + jz short .no_3dnow + or edi, byte JSIMD_3DNOW +.no_3dnow: + +.return: + mov eax,edi + + pop edi +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimdext.inc glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimdext.inc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimdext.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimdext.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,386 @@ +; +; jsimdext.inc - common declarations +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2010 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library - version 1.02 +; +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. +; +; [TAB8] + +; ========================================================================== +; System-dependent configurations + +%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- +; * Microsoft Visual C++ +; * MinGW (Minimalist GNU for Windows) +; * CygWin +; * LCC-Win32 + +; -- segment definition -- +; +%ifdef __YASM_VER__ +%define SEG_TEXT .text align=16 +%define SEG_CONST .rdata align=16 +%else +%define SEG_TEXT .text align=16 public use32 class=CODE +%define SEG_CONST .rdata align=16 public use32 class=CONST +%endif + +%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- +; * Microsoft Visual C++ + +; -- segment definition -- +; +%ifdef __YASM_VER__ +%define SEG_TEXT .text align=16 +%define SEG_CONST .rdata align=16 +%else +%define SEG_TEXT .text align=16 public use64 class=CODE +%define SEG_CONST .rdata align=16 public use64 class=CONST +%endif +%define EXTN(name) name ; foo() -> foo + +%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- +; * Borland C++ (Win32) + +; -- segment definition -- +; +%define SEG_TEXT _text align=16 public use32 class=CODE +%define SEG_CONST _data align=16 public use32 class=DATA + +%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ +; * Linux +; * *BSD family Unix using elf format +; * Unix System V, including Solaris x86, UnixWare and SCO Unix + +; mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits + +; -- segment definition -- +; +%ifdef __x86_64__ +%define SEG_TEXT .text progbits align=16 +%define SEG_CONST .rodata progbits align=16 +%else +%define SEG_TEXT .text progbits alloc exec nowrite align=16 +%define SEG_CONST .rodata progbits alloc noexec nowrite align=16 +%endif + +; To make the code position-independent, append -DPIC to the commandline +; +%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC +%define EXTN(name) name ; foo() -> foo + +%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- +; * Older Linux using a.out format (nasm -f aout -DAOUT ...) +; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) + +; -- segment definition -- +; +%define SEG_TEXT .text +%define SEG_CONST .data + +; To make the code position-independent, append -DPIC to the commandline +; +%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC + +%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- +; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) + +; -- segment definition -- +; +%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? +%define SEG_CONST .rodata align=16 + +; The generation of position-independent code (PIC) is the default on Darwin. +; +%define PIC +%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing + +%else ; ----(Other case)---------------------- + +; -- segment definition -- +; +%define SEG_TEXT .text +%define SEG_CONST .data + +%endif ; ---------------------------------------------- + +; ========================================================================== + +; -------------------------------------------------------------------------- +; Common types +; +%ifdef __x86_64__ +%define POINTER qword ; general pointer type +%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) +%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT +%else +%define POINTER dword ; general pointer type +%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) +%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT +%endif + +%define INT dword ; signed integer type +%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) +%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT + +%define FP32 dword ; IEEE754 single +%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) +%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT + +%define MMWORD qword ; int64 (MMX register) +%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) +%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT + +; NASM is buggy and doesn't properly handle operand sizes for SSE +; instructions, so for now we have to define XMMWORD as blank. +%define XMMWORD ; int128 (SSE register) +%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) +%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT + +; Similar hacks for when we load a dword or MMWORD into an xmm# register +%define XMM_DWORD +%define XMM_MMWORD + +%define SIZEOF_BYTE 1 ; sizeof(BYTE) +%define SIZEOF_WORD 2 ; sizeof(WORD) +%define SIZEOF_DWORD 4 ; sizeof(DWORD) +%define SIZEOF_QWORD 8 ; sizeof(QWORD) +%define SIZEOF_OWORD 16 ; sizeof(OWORD) + +%define BYTE_BIT 8 ; CHAR_BIT in C +%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT +%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT +%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT +%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT + +; -------------------------------------------------------------------------- +; External Symbol Name +; +%ifndef EXTN +%define EXTN(name) _ %+ name ; foo() -> _foo +%endif + +; -------------------------------------------------------------------------- +; Macros for position-independent code (PIC) support +; +%ifndef GOT_SYMBOL +%undef PIC +%endif + +%ifdef PIC ; ------------------------------------------- + +%ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- + +; At present, nasm doesn't seem to support PIC generation for Mach-O. +; The PIC support code below is a little tricky. + + SECTION SEG_CONST +const_base: + +%define GOTOFF(got,sym) (got) + (sym) - const_base + +%imacro get_GOT 1 + ; NOTE: this macro destroys ecx resister. + call %%geteip + add ecx, byte (%%ref - $) + jmp short %%adjust +%%geteip: + mov ecx, POINTER [esp] + ret +%%adjust: + push ebp + xor ebp,ebp ; ebp = 0 +%ifidni %1,ebx ; (%1 == ebx) + ; db 0x8D,0x9C + jmp near const_base = + ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) + db 0x8D,0x9C ; 8D,9C + jmp near const_base ; E9,(const_base-%%ref) +%%ref: +%else ; (%1 != ebx) + ; db 0x8D,0x8C + jmp near const_base = + ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) + db 0x8D,0x8C ; 8D,8C + jmp near const_base ; E9,(const_base-%%ref) +%%ref: mov %1, ecx +%endif ; (%1 == ebx) + pop ebp +%endmacro + +%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- + +%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff + +%imacro get_GOT 1 + extern GOT_SYMBOL + call %%geteip + add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc + jmp short %%done +%%geteip: + mov %1, POINTER [esp] + ret +%%done: +%endmacro + +%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- + +%imacro pushpic 1.nolist + push %1 +%endmacro +%imacro poppic 1.nolist + pop %1 +%endmacro +%imacro movpic 2.nolist + mov %1,%2 +%endmacro + +%else ; !PIC ----------------------------------------- + +%define GOTOFF(got,sym) (sym) + +%imacro get_GOT 1.nolist +%endmacro +%imacro pushpic 1.nolist +%endmacro +%imacro poppic 1.nolist +%endmacro +%imacro movpic 2.nolist +%endmacro + +%endif ; PIC ----------------------------------------- + +; -------------------------------------------------------------------------- +; Align the next instruction on {2,4,8,16,..}-byte boundary. +; ".balign n,,m" in GNU as +; +%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) +%define FILLB(b,n) (($$-(b)) & ((n)-1)) + +%imacro alignx 1-2.nolist 0xFFFF +%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ + db 0x90 ; nop + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ + db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ + db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ + db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ + db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ + db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ + db 0x8B,0xED ; mov ebp,ebp + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ + db 0x90 ; nop +%endmacro + +; Align the next data on {2,4,8,16,..}-byte boundary. +; +%imacro alignz 1.nolist + align %1, db 0 ; filling zeros +%endmacro + +%ifdef __x86_64__ + +%ifdef WIN64 + +%imacro collect_args 0 + push r12 + push r13 + push r14 + push r15 + mov r10, rcx + mov r11, rdx + mov r12, r8 + mov r13, r9 + mov r14, [rax+48] + mov r15, [rax+56] + push rsi + push rdi + sub rsp, SIZEOF_XMMWORD + movaps XMMWORD [rsp], xmm6 + sub rsp, SIZEOF_XMMWORD + movaps XMMWORD [rsp], xmm7 +%endmacro + +%imacro uncollect_args 0 + movaps xmm7, XMMWORD [rsp] + add rsp, SIZEOF_XMMWORD + movaps xmm6, XMMWORD [rsp] + add rsp, SIZEOF_XMMWORD + pop rdi + pop rsi + pop r15 + pop r14 + pop r13 + pop r12 +%endmacro + +%else + +%imacro collect_args 0 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + mov r10, rdi + mov r11, rsi + mov r12, rdx + mov r13, rcx + mov r14, r8 + mov r15, r9 +%endmacro + +%imacro uncollect_args 0 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 +%endmacro + +%endif + +%endif + +; Begin chromium edits +%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- +%define PRIVATE :private_extern +%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ +%define PRIVATE :hidden +%else +%define PRIVATE +%endif +; End chromium edits + +; -------------------------------------------------------------------------- +; Defines picked up from the C headers +; +%include "jsimdcfg.inc" + +; -------------------------------------------------------------------------- diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd.h glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd.h 2021-04-25 01:47:22.000000000 +0800 @@ -2,8 +2,11 @@ * simd/jsimd.h * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright 2011 D. R. Commander - * + * Copyright (C) 2011, 2014-2016 D. R. Commander + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California + * Copyright (C) 2014 Linaro Limited + * Copyright (C) 2015-2016 Matthieu Darbois + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc @@ -18,649 +21,851 @@ #define JSIMD_SSE 0x04 #define JSIMD_SSE2 0x08 #define JSIMD_ARM_NEON 0x10 - -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_simd_cpu_support jSiCpuSupport -#define jsimd_rgb_ycc_convert_mmx jSRGBYCCM -#define jsimd_extrgb_ycc_convert_mmx jSEXTRGBYCCM -#define jsimd_extrgbx_ycc_convert_mmx jSEXTRGBXYCCM -#define jsimd_extbgr_ycc_convert_mmx jSEXTBGRYCCM -#define jsimd_extbgrx_ycc_convert_mmx jSEXTBGRXYCCM -#define jsimd_extxbgr_ycc_convert_mmx jSEXTXBGRYCCM -#define jsimd_extxrgb_ycc_convert_mmx jSEXTXRGBYCCM -#define jsimd_rgb_gray_convert_mmx jSRGBGRYM -#define jsimd_extrgb_gray_convert_mmx jSEXTRGBGRYM -#define jsimd_extrgbx_gray_convert_mmx jSEXTRGBXGRYM -#define jsimd_extbgr_gray_convert_mmx jSEXTBGRGRYM -#define jsimd_extbgrx_gray_convert_mmx jSEXTBGRXGRYM -#define jsimd_extxbgr_gray_convert_mmx jSEXTXBGRGRYM -#define jsimd_extxrgb_gray_convert_mmx jSEXTXRGBGRYM -#define jsimd_ycc_rgb_convert_mmx jSYCCRGBM -#define jsimd_ycc_extrgb_convert_mmx jSYCCEXTRGBM -#define jsimd_ycc_extrgbx_convert_mmx jSYCCEXTRGBXM -#define jsimd_ycc_extbgr_convert_mmx jSYCCEXTBGRM -#define jsimd_ycc_extbgrx_convert_mmx jSYCCEXTBGRXM -#define jsimd_ycc_extxbgr_convert_mmx jSYCCEXTXBGRM -#define jsimd_ycc_extxrgb_convert_mmx jSYCCEXTXRGBM -#define jconst_rgb_ycc_convert_sse2 jSCRGBYCCS2 -#define jsimd_rgb_ycc_convert_sse2 jSRGBYCCS2 -#define jsimd_extrgb_ycc_convert_sse2 jSEXTRGBYCCS2 -#define jsimd_extrgbx_ycc_convert_sse2 jSEXTRGBXYCCS2 -#define jsimd_extbgr_ycc_convert_sse2 jSEXTBGRYCCS2 -#define jsimd_extbgrx_ycc_convert_sse2 jSEXTBGRXYCCS2 -#define jsimd_extxbgr_ycc_convert_sse2 jSEXTXBGRYCCS2 -#define jsimd_extxrgb_ycc_convert_sse2 jSEXTXRGBYCCS2 -#define jconst_rgb_gray_convert_sse2 jSCRGBGRYS2 -#define jsimd_rgb_gray_convert_sse2 jSRGBGRYS2 -#define jsimd_extrgb_gray_convert_sse2 jSEXTRGBGRYS2 -#define jsimd_extrgbx_gray_convert_sse2 jSEXTRGBXGRYS2 -#define jsimd_extbgr_gray_convert_sse2 jSEXTBGRGRYS2 -#define jsimd_extbgrx_gray_convert_sse2 jSEXTBGRXGRYS2 -#define jsimd_extxbgr_gray_convert_sse2 jSEXTXBGRGRYS2 -#define jsimd_extxrgb_gray_convert_sse2 jSEXTXRGBGRYS2 -#define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2 -#define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2 -#define jsimd_ycc_extrgb_convert_sse2 jSYCCEXTRGBS2 -#define jsimd_ycc_extrgbx_convert_sse2 jSYCCEXTRGBXS2 -#define jsimd_ycc_extbgr_convert_sse2 jSYCCEXTBGRS2 -#define jsimd_ycc_extbgrx_convert_sse2 jSYCCEXTBGRXS2 -#define jsimd_ycc_extxbgr_convert_sse2 jSYCCEXTXBGRS2 -#define jsimd_ycc_extxrgb_convert_sse2 jSYCCEXTXRGBS2 -#define jsimd_h2v2_downsample_mmx jSDnH2V2M -#define jsimd_h2v1_downsample_mmx jSDnH2V1M -#define jsimd_h2v2_downsample_sse2 jSDnH2V2S2 -#define jsimd_h2v1_downsample_sse2 jSDnH2V1S2 -#define jsimd_h2v2_upsample_mmx jSUpH2V2M -#define jsimd_h2v1_upsample_mmx jSUpH2V1M -#define jsimd_h2v2_fancy_upsample_mmx jSFUpH2V2M -#define jsimd_h2v1_fancy_upsample_mmx jSFUpH2V1M -#define jsimd_h2v2_merged_upsample_mmx jSMUpH2V2M -#define jsimd_h2v2_extrgb_merged_upsample_mmx jSMUpH2V2EXTRGBM -#define jsimd_h2v2_extrgbx_merged_upsample_mmx jSMUpH2V2EXTRGBXM -#define jsimd_h2v2_extbgr_merged_upsample_mmx jSMUpH2V2EXTBGRM -#define jsimd_h2v2_extbgrx_merged_upsample_mmx jSMUpH2V2EXTBGRXM -#define jsimd_h2v2_extxbgr_merged_upsample_mmx jSMUpH2V2EXTXBGRM -#define jsimd_h2v2_extxrgb_merged_upsample_mmx jSMUpH2V2EXTXRGBM -#define jsimd_h2v1_merged_upsample_mmx jSMUpH2V1M -#define jsimd_h2v1_extrgb_merged_upsample_mmx jSMUpH2V1EXTRGBM -#define jsimd_h2v1_extrgbx_merged_upsample_mmx jSMUpH2V1EXTRGBXM -#define jsimd_h2v1_extbgr_merged_upsample_mmx jSMUpH2V1EXTBGRM -#define jsimd_h2v1_extbgrx_merged_upsample_mmx jSMUpH2V1EXTBGRXM -#define jsimd_h2v1_extxbgr_merged_upsample_mmx jSMUpH2V1EXTXBGRM -#define jsimd_h2v1_extxrgb_merged_upsample_mmx jSMUpH2V1EXTXRGBM -#define jsimd_h2v2_upsample_sse2 jSUpH2V2S2 -#define jsimd_h2v1_upsample_sse2 jSUpH2V1S2 -#define jconst_fancy_upsample_sse2 jSCFUpS2 -#define jsimd_h2v2_fancy_upsample_sse2 jSFUpH2V2S2 -#define jsimd_h2v1_fancy_upsample_sse2 jSFUpH2V1S2 -#define jconst_merged_upsample_sse2 jSCMUpS2 -#define jsimd_h2v2_merged_upsample_sse2 jSMUpH2V2S2 -#define jsimd_h2v2_extrgb_merged_upsample_sse2 jSMUpH2V2EXTRGBS2 -#define jsimd_h2v2_extrgbx_merged_upsample_sse2 jSMUpH2V2EXTRGBXS2 -#define jsimd_h2v2_extbgr_merged_upsample_sse2 jSMUpH2V2EXTBGRS2 -#define jsimd_h2v2_extbgrx_merged_upsample_sse2 jSMUpH2V2EXTBGRXS2 -#define jsimd_h2v2_extxbgr_merged_upsample_sse2 jSMUpH2V2EXTXBGRS2 -#define jsimd_h2v2_extxrgb_merged_upsample_sse2 jSMUpH2V2EXTXRGBS2 -#define jsimd_h2v1_merged_upsample_sse2 jSMUpH2V1S2 -#define jsimd_h2v1_extrgb_merged_upsample_sse2 jSMUpH2V1EXTRGBS2 -#define jsimd_h2v1_extrgbx_merged_upsample_sse2 jSMUpH2V1EXTRGBXS2 -#define jsimd_h2v1_extbgr_merged_upsample_sse2 jSMUpH2V1EXTBGRS2 -#define jsimd_h2v1_extbgrx_merged_upsample_sse2 jSMUpH2V1EXTBGRXS2 -#define jsimd_h2v1_extxbgr_merged_upsample_sse2 jSMUpH2V1EXTXBGRS2 -#define jsimd_h2v1_extxrgb_merged_upsample_sse2 jSMUpH2V1EXTXRGBS2 -#define jsimd_convsamp_mmx jSConvM -#define jsimd_convsamp_sse2 jSConvS2 -#define jsimd_convsamp_float_3dnow jSConvF3D -#define jsimd_convsamp_float_sse jSConvFS -#define jsimd_convsamp_float_sse2 jSConvFS2 -#define jsimd_fdct_islow_mmx jSFDMIS -#define jsimd_fdct_ifast_mmx jSFDMIF -#define jconst_fdct_islow_sse2 jSCFDS2IS -#define jsimd_fdct_islow_sse2 jSFDS2IS -#define jconst_fdct_ifast_sse2 jSCFDS2IF -#define jsimd_fdct_ifast_sse2 jSFDS2IF -#define jsimd_fdct_float_3dnow jSFD3DF -#define jconst_fdct_float_sse jSCFDSF -#define jsimd_fdct_float_sse jSFDSF -#define jsimd_quantize_mmx jSQuantM -#define jsimd_quantize_sse2 jSQuantS2 -#define jsimd_quantize_float_3dnow jSQuantF3D -#define jsimd_quantize_float_sse jSQuantFS -#define jsimd_quantize_float_sse2 jSQuantFS2 -#define jsimd_idct_2x2_mmx jSIDM22 -#define jsimd_idct_4x4_mmx jSIDM44 -#define jconst_idct_red_sse2 jSCIDS2R -#define jsimd_idct_2x2_sse2 jSIDS222 -#define jsimd_idct_4x4_sse2 jSIDS244 -#define jsimd_idct_islow_mmx jSIDMIS -#define jsimd_idct_ifast_mmx jSIDMIF -#define jconst_idct_islow_sse2 jSCIDS2IS -#define jsimd_idct_islow_sse2 jSIDS2IS -#define jconst_idct_ifast_sse2 jSCIDS2IF -#define jsimd_idct_ifast_sse2 jSIDS2IF -#define jsimd_idct_float_3dnow jSID3DF -#define jconst_fdct_float_sse jSCIDSF -#define jsimd_idct_float_sse jSIDSF -#define jconst_fdct_float_sse2 jSCIDS2F -#define jsimd_idct_float_sse2 jSIDS2F -#endif /* NEED_SHORT_EXTERNAL_NAMES */ +#define JSIMD_MIPS_DSPR2 0x20 +#define JSIMD_ALTIVEC 0x40 /* SIMD Ext: retrieve SIMD/CPU information */ -EXTERN(unsigned int) jpeg_simd_cpu_support JPP((void)); +EXTERN(unsigned int) jpeg_simd_cpu_support (void); -/* SIMD Color Space Conversion */ +/* RGB & extended RGB --> YCC Colorspace Conversion */ EXTERN(void) jsimd_rgb_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); - -EXTERN(void) jsimd_rgb_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extrgb_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extrgbx_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extbgr_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extbgrx_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extxbgr_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extxrgb_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); - -EXTERN(void) jsimd_ycc_rgb_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extrgb_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extrgbx_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extbgr_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extbgrx_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extxbgr_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extxrgb_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); extern const int jconst_rgb_ycc_convert_sse2[]; EXTERN(void) jsimd_rgb_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_extrgb_ycc_convert_neon_slowld3 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_neon_slowld3 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +/* RGB & extended RGB --> Grayscale Colorspace Conversion */ +EXTERN(void) jsimd_rgb_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); extern const int jconst_rgb_gray_convert_sse2[]; EXTERN(void) jsimd_rgb_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +/* YCC --> RGB & extended RGB Colorspace Conversion */ +EXTERN(void) jsimd_ycc_rgb_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); extern const int jconst_ycc_rgb_convert_sse2[]; EXTERN(void) jsimd_ycc_rgb_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgbx_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgrx_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxbgr_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxrgb_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); - -EXTERN(void) jsimd_rgb_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extrgb_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extrgbx_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extbgr_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extbgrx_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extxbgr_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extxrgb_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_rgb_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgbx_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgrx_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxbgr_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxrgb_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_rgb565_convert_neon + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +EXTERN(void) jsimd_ycc_extrgb_convert_neon_slowst3 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_neon_slowst3 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +EXTERN(void) jsimd_ycc_rgb_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +EXTERN(void) jsimd_ycc_rgb_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +/* NULL Colorspace Conversion */ +EXTERN(void) jsimd_c_null_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows, int num_components); -/* SIMD Downsample */ -EXTERN(void) jsimd_h2v2_downsample_mmx - JPP((JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data)); +/* h2v1 Downsampling */ EXTERN(void) jsimd_h2v1_downsample_mmx - JPP((JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data)); + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); -EXTERN(void) jsimd_h2v2_downsample_sse2 - JPP((JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data)); EXTERN(void) jsimd_h2v1_downsample_sse2 - JPP((JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data)); + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v1_downsample_neon + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v1_downsample_mips_dspr2 + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v1_downsample_altivec + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); -/* SIMD Upsample */ -EXTERN(void) jsimd_h2v2_upsample_mmx - JPP((int max_v_samp_factor, JDIMENSION output_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +/* h2v2 Downsampling */ +EXTERN(void) jsimd_h2v2_downsample_mmx + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_sse2 + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_neon + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_mips_dspr2 + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_altivec + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +/* h2v2 Smooth Downsampling */ +EXTERN(void) jsimd_h2v2_smooth_downsample_mips_dspr2 + (JSAMPARRAY input_data, JSAMPARRAY output_data, + JDIMENSION v_samp_factor, int max_v_samp_factor, + int smoothing_factor, JDIMENSION width_blocks, + JDIMENSION image_width); + + +/* Upsampling */ EXTERN(void) jsimd_h2v1_upsample_mmx - JPP((int max_v_samp_factor, JDIMENSION output_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_mmx + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v2_fancy_upsample_mmx - JPP((int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jsimd_h2v1_upsample_sse2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_sse2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_upsample_mips_dspr2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_mips_dspr2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_int_upsample_mips_dspr2 + (UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr, JDIMENSION output_width, + int max_v_samp_factor); + +EXTERN(void) jsimd_h2v1_upsample_altivec + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_altivec + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +/* Fancy Upsampling */ EXTERN(void) jsimd_h2v1_fancy_upsample_mmx - JPP((int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_mmx + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v2_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); +extern const int jconst_fancy_upsample_sse2[]; +EXTERN(void) jsimd_h2v1_fancy_upsample_sse2 + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_sse2 + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_fancy_upsample_neon + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_fancy_upsample_mips_dspr2 + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_mips_dspr2 + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_fancy_upsample_altivec + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_altivec + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + +/* Merged Upsampling */ EXTERN(void) jsimd_h2v1_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); - -EXTERN(void) jsimd_h2v2_upsample_sse2 - JPP((int max_v_samp_factor, JDIMENSION output_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); -EXTERN(void) jsimd_h2v1_upsample_sse2 - JPP((int max_v_samp_factor, JDIMENSION output_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); -extern const int jconst_fancy_upsample_sse2[]; -EXTERN(void) jsimd_h2v2_fancy_upsample_sse2 - JPP((int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); -EXTERN(void) jsimd_h2v1_fancy_upsample_sse2 - JPP((int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jsimd_h2v2_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); extern const int jconst_merged_upsample_sse2[]; -EXTERN(void) jsimd_h2v2_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); EXTERN(void) jsimd_h2v1_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + +EXTERN(void) jsimd_h2v2_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); -/* SIMD Sample Conversion */ -EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - DCTELEM * workspace)); - -EXTERN(void) jsimd_convsamp_sse2 JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - DCTELEM * workspace)); - -EXTERN(void) jsimd_convsamp_neon JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - DCTELEM * workspace)); - -EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT * workspace)); - -EXTERN(void) jsimd_convsamp_float_sse JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT * workspace)); - -EXTERN(void) jsimd_convsamp_float_sse2 JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT * workspace)); - -/* SIMD Forward DCT */ -EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data)); -EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data)); +EXTERN(void) jsimd_h2v1_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); + +EXTERN(void) jsimd_h2v2_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); + +EXTERN(void) jsimd_h2v1_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + +EXTERN(void) jsimd_h2v2_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + +/* Sample Conversion */ +EXTERN(void) jsimd_convsamp_mmx + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_sse2 + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_neon + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_mips_dspr2 + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_altivec + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +/* Floating Point Sample Conversion */ +EXTERN(void) jsimd_convsamp_float_3dnow + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_convsamp_float_sse + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_convsamp_float_sse2 + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_convsamp_float_mips_dspr2 + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + +/* Slow Integer Forward DCT */ +EXTERN(void) jsimd_fdct_islow_mmx (DCTELEM *data); -extern const int jconst_fdct_ifast_sse2[]; -EXTERN(void) jsimd_fdct_islow_sse2 JPP((DCTELEM * data)); extern const int jconst_fdct_islow_sse2[]; -EXTERN(void) jsimd_fdct_ifast_sse2 JPP((DCTELEM * data)); +EXTERN(void) jsimd_fdct_islow_sse2 (DCTELEM *data); + +EXTERN(void) jsimd_fdct_islow_neon (DCTELEM *data); + +EXTERN(void) jsimd_fdct_islow_mips_dspr2 (DCTELEM *data); + +EXTERN(void) jsimd_fdct_islow_altivec (DCTELEM *data); -EXTERN(void) jsimd_fdct_ifast_neon JPP((DCTELEM * data)); +/* Fast Integer Forward DCT */ +EXTERN(void) jsimd_fdct_ifast_mmx (DCTELEM *data); -EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data)); +extern const int jconst_fdct_ifast_sse2[]; +EXTERN(void) jsimd_fdct_ifast_sse2 (DCTELEM *data); + +EXTERN(void) jsimd_fdct_ifast_neon (DCTELEM *data); + +EXTERN(void) jsimd_fdct_ifast_mips_dspr2 (DCTELEM *data); + +EXTERN(void) jsimd_fdct_ifast_altivec (DCTELEM *data); + +/* Floating Point Forward DCT */ +EXTERN(void) jsimd_fdct_float_3dnow (FAST_FLOAT *data); extern const int jconst_fdct_float_sse[]; -EXTERN(void) jsimd_fdct_float_sse JPP((FAST_FLOAT * data)); +EXTERN(void) jsimd_fdct_float_sse (FAST_FLOAT *data); + +/* Quantization */ +EXTERN(void) jsimd_quantize_mmx + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +EXTERN(void) jsimd_quantize_sse2 + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +EXTERN(void) jsimd_quantize_neon + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); -/* SIMD Quantization */ -EXTERN(void) jsimd_quantize_mmx JPP((JCOEFPTR coef_block, - DCTELEM * divisors, - DCTELEM * workspace)); - -EXTERN(void) jsimd_quantize_sse2 JPP((JCOEFPTR coef_block, - DCTELEM * divisors, - DCTELEM * workspace)); - -EXTERN(void) jsimd_quantize_neon JPP((JCOEFPTR coef_block, - DCTELEM * divisors, - DCTELEM * workspace)); - -EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block, - FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); - -EXTERN(void) jsimd_quantize_float_sse JPP((JCOEFPTR coef_block, - FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); - -EXTERN(void) jsimd_quantize_float_sse2 JPP((JCOEFPTR coef_block, - FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); - -/* SIMD Reduced Inverse DCT */ -EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_4x4_mmx JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_quantize_mips_dspr2 + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +EXTERN(void) jsimd_quantize_altivec + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +/* Floating Point Quantization */ +EXTERN(void) jsimd_quantize_float_3dnow + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_quantize_float_sse + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_quantize_float_sse2 + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_quantize_float_mips_dspr2 + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + +/* Scaled Inverse DCT */ +EXTERN(void) jsimd_idct_2x2_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_red_sse2[]; -EXTERN(void) jsimd_idct_2x2_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_4x4_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -EXTERN(void) jsimd_idct_2x2_neon JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_4x4_neon JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -/* SIMD Inverse DCT */ -EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_ifast_mmx JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_idct_2x2_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_2x2_neon + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_neon + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_2x2_mips_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_mips_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col, int *workspace); +EXTERN(void) jsimd_idct_6x6_mips_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_12x12_pass1_mips_dspr2 + (JCOEFPTR coef_block, void *dct_table, int *workspace); +EXTERN(void) jsimd_idct_12x12_pass2_mips_dspr2 + (int *workspace, int *output); + +/* Slow Integer Inverse DCT */ +EXTERN(void) jsimd_idct_islow_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_islow_sse2[]; -EXTERN(void) jsimd_idct_islow_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_idct_islow_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_islow_neon + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_islow_mips_dspr2 + (void *dct_table, JCOEFPTR coef_block, int *output_buf, + JSAMPLE *output_col); + +EXTERN(void) jsimd_idct_islow_altivec + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +/* Fast Integer Inverse DCT */ +EXTERN(void) jsimd_idct_ifast_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + extern const int jconst_idct_ifast_sse2[]; -EXTERN(void) jsimd_idct_ifast_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -EXTERN(void) jsimd_idct_islow_neon JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_ifast_neon JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_idct_ifast_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_ifast_neon + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_ifast_cols_mips_dspr2 + (JCOEF *inptr, IFAST_MULT_TYPE *quantptr, DCTELEM *wsptr, + const int *idct_coefs); +EXTERN(void) jsimd_idct_ifast_rows_mips_dspr2 + (DCTELEM *wsptr, JSAMPARRAY output_buf, JDIMENSION output_col, + const int *idct_coefs); + +EXTERN(void) jsimd_idct_ifast_altivec + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +/* Floating Point Inverse DCT */ +EXTERN(void) jsimd_idct_float_3dnow + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_float_sse[]; -EXTERN(void) jsimd_idct_float_sse JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_idct_float_sse + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_float_sse2[]; -EXTERN(void) jsimd_idct_float_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - +EXTERN(void) jsimd_idct_float_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +/* Huffman coding */ +extern const int jconst_huff_encode_one_block[]; +EXTERN(JOCTET*) jsimd_huff_encode_one_block_sse2 + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); + +EXTERN(JOCTET*) jsimd_huff_encode_one_block_neon + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); + +EXTERN(JOCTET*) jsimd_huff_encode_one_block_neon_slowtbl + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_i386.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_i386.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_i386.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_i386.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1091 @@ +/* + * jsimd_i386.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 32-bit x86 architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +/* + * In the PIC cases, we have no guarantee that constants will keep + * their alignment. This macro allows us to verify it at runtime. + */ +#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) + +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ + +static unsigned int simd_support = ~0; +static unsigned int simd_huffman = 1; + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd (void) +{ + char *env = NULL; + + if (simd_support != ~0U) + return; + + simd_support = jpeg_simd_cpu_support(); + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCEMMX"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_MMX; + env = getenv("JSIMD_FORCE3DNOW"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_3DNOW|JSIMD_MMX; + env = getenv("JSIMD_FORCESSE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_SSE|JSIMD_MMX; + env = getenv("JSIMD_FORCESSE2"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_SSE2; + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_extrgb_ycc_convert_sse2; + mmxfct=jsimd_extrgb_ycc_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_extrgbx_ycc_convert_sse2; + mmxfct=jsimd_extrgbx_ycc_convert_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_extbgr_ycc_convert_sse2; + mmxfct=jsimd_extbgr_ycc_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_extbgrx_ycc_convert_sse2; + mmxfct=jsimd_extbgrx_ycc_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_extxbgr_ycc_convert_sse2; + mmxfct=jsimd_extxbgr_ycc_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_extxrgb_ycc_convert_sse2; + mmxfct=jsimd_extxrgb_ycc_convert_mmx; + break; + default: + sse2fct=jsimd_rgb_ycc_convert_sse2; + mmxfct=jsimd_rgb_ycc_convert_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_extrgb_gray_convert_sse2; + mmxfct=jsimd_extrgb_gray_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_extrgbx_gray_convert_sse2; + mmxfct=jsimd_extrgbx_gray_convert_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_extbgr_gray_convert_sse2; + mmxfct=jsimd_extbgr_gray_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_extbgrx_gray_convert_sse2; + mmxfct=jsimd_extbgrx_gray_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_extxbgr_gray_convert_sse2; + mmxfct=jsimd_extxbgr_gray_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_extxrgb_gray_convert_sse2; + mmxfct=jsimd_extxrgb_gray_convert_mmx; + break; + default: + sse2fct=jsimd_rgb_gray_convert_sse2; + mmxfct=jsimd_rgb_gray_convert_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_ycc_extrgb_convert_sse2; + mmxfct=jsimd_ycc_extrgb_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_ycc_extrgbx_convert_sse2; + mmxfct=jsimd_ycc_extrgbx_convert_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_ycc_extbgr_convert_sse2; + mmxfct=jsimd_ycc_extbgr_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_ycc_extbgrx_convert_sse2; + mmxfct=jsimd_ycc_extbgrx_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_ycc_extxbgr_convert_sse2; + mmxfct=jsimd_ycc_extxbgr_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_ycc_extxrgb_convert_sse2; + mmxfct=jsimd_ycc_extxrgb_convert_mmx; + break; + default: + sse2fct=jsimd_ycc_rgb_convert_sse2; + mmxfct=jsimd_ycc_rgb_convert_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) + sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_SSE2) + jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else if (simd_support & JSIMD_MMX) + jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_SSE2) + jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else if (simd_support & JSIMD_MMX) + jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_SSE2) + jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else if (simd_support & JSIMD_MMX) + jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_SSE2) + jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else if (simd_support & JSIMD_MMX) + jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else if (simd_support & JSIMD_MMX) + jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else if (simd_support & JSIMD_MMX) + jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; + break; + default: + sse2fct=jsimd_h2v2_merged_upsample_sse2; + mmxfct=jsimd_h2v2_merged_upsample_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; + break; + default: + sse2fct=jsimd_h2v1_merged_upsample_sse2; + mmxfct=jsimd_h2v1_merged_upsample_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_SSE) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_convsamp_sse2(sample_data, start_col, workspace); + else if (simd_support & JSIMD_MMX) + jsimd_convsamp_mmx(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_convsamp_float_sse2(sample_data, start_col, workspace); + else if (simd_support & JSIMD_SSE) + jsimd_convsamp_float_sse(sample_data, start_col, workspace); + else if (simd_support & JSIMD_3DNOW) + jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + jsimd_fdct_islow_sse2(data); + else if (simd_support & JSIMD_MMX) + jsimd_fdct_islow_mmx(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + jsimd_fdct_ifast_sse2(data); + else if (simd_support & JSIMD_MMX) + jsimd_fdct_ifast_mmx(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + jsimd_fdct_float_sse(data); + else if (simd_support & JSIMD_3DNOW) + jsimd_fdct_float_3dnow(data); +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_SSE) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_quantize_sse2(coef_block, divisors, workspace); + else if (simd_support & JSIMD_MMX) + jsimd_quantize_mmx(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_quantize_float_sse2(coef_block, divisors, workspace); + else if (simd_support & JSIMD_SSE) + jsimd_quantize_float_sse(coef_block, divisors, workspace); + else if (simd_support & JSIMD_3DNOW) + jsimd_quantize_float_3dnow(coef_block, divisors, workspace); +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_MMX) + jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_MMX) + jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + if (sizeof(FLOAT_MULT_TYPE) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + return 1; + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) + jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_MMX) + jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_MMX) + jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) + jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_3DNOW) + jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && simd_huffman && + IS_ALIGNED_SSE(jconst_huff_encode_one_block)) + return 1; + + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, + dctbl, actbl); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_mips.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_mips.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_mips.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_mips.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1130 @@ +/* + * jsimd_mips.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014 D. R. Commander + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * MIPS architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +#include +#include +#include + +static unsigned int simd_support = ~0; + +#if defined(__linux__) + +LOCAL(int) +parse_proc_cpuinfo(const char* search_string) +{ + const char* file_name = "/proc/cpuinfo"; + char cpuinfo_line[256]; + FILE* f = NULL; + simd_support = 0; + + if ((f = fopen(file_name, "r")) != NULL) { + while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) { + if (strstr(cpuinfo_line, search_string) != NULL) { + fclose(f); + simd_support |= JSIMD_MIPS_DSPR2; + return 1; + } + } + fclose(f); + } + /* Did not find string in the proc file, or not Linux ELF. */ + return 0; +} + +#endif + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd (void) +{ + if (simd_support != ~0U) + return; + + simd_support = 0; + +#if defined(__MIPSEL__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) + simd_support |= JSIMD_MIPS_DSPR2; +#elif defined(__linux__) + /* We still have a chance to use MIPS DSPR2 regardless of globally used + * -mdspr2 options passed to gcc by performing runtime detection via + * /proc/cpuinfo parsing on linux */ + if (!parse_proc_cpuinfo("MIPS 74K")) + return; +#endif +} + +static const int mips_idct_ifast_coefs[4] = { + 0x45404540, // FIX( 1.082392200 / 2) = 17734 = 0x4546 + 0x5A805A80, // FIX( 1.414213562 / 2) = 23170 = 0x5A82 + 0x76407640, // FIX( 1.847759065 / 2) = 30274 = 0x7642 + 0xAC60AC60 // FIX(-2.613125930 / 4) = -21407 = 0xAC61 +}; + +/* The following struct is borrowed from jdsample.c */ +typedef void (*upsample1_ptr) (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +typedef struct { + struct jpeg_upsampler pub; + JSAMPARRAY color_buf[MAX_COMPONENTS]; + upsample1_ptr methods[MAX_COMPONENTS]; + int next_row_out; + JDIMENSION rows_to_go; + int rowgroup_height[MAX_COMPONENTS]; + UINT8 h_expand[MAX_COMPONENTS]; + UINT8 v_expand[MAX_COMPONENTS]; +} my_upsampler; + +typedef my_upsampler *my_upsample_ptr; + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_c_can_null_convert (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_extrgbx_ycc_convert_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_extbgr_ycc_convert_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_extbgrx_ycc_convert_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_extxbgr_ycc_convert_mips_dspr2; + + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_extxrgb_ycc_convert_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2; + break; + } + + if (simd_support & JSIMD_MIPS_DSPR2) + mipsdspr2fct(cinfo->image_width, input_buf, output_buf, output_row, + num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; + break; + } + + if (simd_support & JSIMD_MIPS_DSPR2) + mipsdspr2fct(cinfo->image_width, input_buf, output_buf, output_row, + num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_ycc_extrgbx_convert_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_ycc_extbgr_convert_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_ycc_extbgrx_convert_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_ycc_extxbgr_convert_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_ycc_extxrgb_convert_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2; + break; + } + + if (simd_support & JSIMD_MIPS_DSPR2) + mipsdspr2fct(cinfo->output_width, input_buf, input_row, output_buf, + num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(void) +jsimd_c_null_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_c_null_convert_mips_dspr2(cinfo->image_width, input_buf, + output_buf, output_row, num_rows, + cinfo->num_components); +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v2_smooth_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if(DCTSIZE != 8) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v2_downsample_mips_dspr2(cinfo->image_width, + cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); +} + +GLOBAL(void) +jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_smooth_downsample_mips_dspr2(input_data, output_data, + compptr->v_samp_factor, + cinfo->max_v_samp_factor, + cinfo->smoothing_factor, + compptr->width_in_blocks, + cinfo->image_width); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v1_downsample_mips_dspr2(cinfo->image_width, + cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_int_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v2_upsample_mips_dspr2(cinfo->max_v_samp_factor, + cinfo->output_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v1_upsample_mips_dspr2(cinfo->max_v_samp_factor, + cinfo->output_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + + jsimd_int_upsample_mips_dspr2(upsample->h_expand[compptr->component_index], + upsample->v_expand[compptr->component_index], + input_data, output_data_ptr, + cinfo->output_width, + cinfo->max_v_samp_factor); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v2_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor, + compptr->downsampled_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v1_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor, + compptr->downsampled_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, + JSAMPLE *); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_h2v2_extbgr_merged_upsample_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2; + break; + } + + mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf, + cinfo->sample_range_limit); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, + JSAMPLE *); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_h2v1_extbgr_merged_upsample_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2; + break; + } + + mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf, + cinfo->sample_range_limit); +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_convsamp_mips_dspr2(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ + if ((simd_support & JSIMD_MIPS_DSPR2)) + jsimd_convsamp_float_mips_dspr2(sample_data, start_col, workspace); +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_fdct_islow_mips_dspr2(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_fdct_ifast_mips_dspr2(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_quantize_mips_dspr2(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_quantize_float_mips_dspr2(coef_block, divisors, workspace); +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_6x6 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_12x12 (void) +{ + init_simd(); + + if (BITS_IN_JSAMPLE != 8) + return 0; + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_idct_2x2_mips_dspr2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) { + int workspace[DCTSIZE*4]; /* buffers data between passes */ + jsimd_idct_4x4_mips_dspr2(compptr->dct_table, coef_block, output_buf, + output_col, workspace); + } +} + +GLOBAL(void) +jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_idct_6x6_mips_dspr2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) { + int workspace[96]; + int output[12] = { + (int)(output_buf[0] + output_col), + (int)(output_buf[1] + output_col), + (int)(output_buf[2] + output_col), + (int)(output_buf[3] + output_col), + (int)(output_buf[4] + output_col), + (int)(output_buf[5] + output_col), + (int)(output_buf[6] + output_col), + (int)(output_buf[7] + output_col), + (int)(output_buf[8] + output_col), + (int)(output_buf[9] + output_col), + (int)(output_buf[10] + output_col), + (int)(output_buf[11] + output_col), + }; + jsimd_idct_12x12_pass1_mips_dspr2(coef_block, compptr->dct_table, + workspace); + jsimd_idct_12x12_pass2_mips_dspr2(workspace, output); + } +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) { + int output[8] = { + (int)(output_buf[0] + output_col), + (int)(output_buf[1] + output_col), + (int)(output_buf[2] + output_col), + (int)(output_buf[3] + output_col), + (int)(output_buf[4] + output_col), + (int)(output_buf[5] + output_col), + (int)(output_buf[6] + output_col), + (int)(output_buf[7] + output_col), + }; + + jsimd_idct_islow_mips_dspr2(coef_block, compptr->dct_table, + output, IDCT_range_limit(cinfo)); + } +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) { + JCOEFPTR inptr; + IFAST_MULT_TYPE *quantptr; + DCTELEM workspace[DCTSIZE2]; /* buffers data between passes */ + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (IFAST_MULT_TYPE *) compptr->dct_table; + + jsimd_idct_ifast_cols_mips_dspr2(inptr, quantptr, + workspace, mips_idct_ifast_coefs); + + /* Pass 2: process rows from work array, store into output array. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + jsimd_idct_ifast_rows_mips_dspr2(workspace, output_buf, + output_col, mips_idct_ifast_coefs); + } +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return NULL; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_mips_dspr2_asm.h glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_mips_dspr2_asm.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_mips_dspr2_asm.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_mips_dspr2_asm.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,285 @@ +/* + * MIPS DSPr2 optimizations for libjpeg-turbo + * + * Copyright (C) 2013, MIPS Technologies, Inc., California. + * All rights reserved. + * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) + * Darko Laus (darko.laus@imgtec.com) + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#define zero $0 +#define AT $1 +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define k0 $26 +#define k1 $27 +#define gp $28 +#define sp $29 +#define fp $30 +#define s8 $30 +#define ra $31 + +#define f0 $f0 +#define f1 $f1 +#define f2 $f2 +#define f3 $f3 +#define f4 $f4 +#define f5 $f5 +#define f6 $f6 +#define f7 $f7 +#define f8 $f8 +#define f9 $f9 +#define f10 $f10 +#define f11 $f11 +#define f12 $f12 +#define f13 $f13 +#define f14 $f14 +#define f15 $f15 +#define f16 $f16 +#define f17 $f17 +#define f18 $f18 +#define f19 $f19 +#define f20 $f20 +#define f21 $f21 +#define f22 $f22 +#define f23 $f23 +#define f24 $f24 +#define f25 $f25 +#define f26 $f26 +#define f27 $f27 +#define f28 $f28 +#define f29 $f29 +#define f30 $f30 +#define f31 $f31 + +/* + * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 + */ +#define LEAF_MIPS32R2(symbol) \ + .globl symbol; \ + .align 2; \ + .type symbol, @function; \ + .ent symbol, 0; \ +symbol: .frame sp, 0, ra; \ + .set push; \ + .set arch=mips32r2; \ + .set noreorder; \ + .set noat; + +/* + * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2 + */ +#define LEAF_MIPS_DSPR2(symbol) \ +LEAF_MIPS32R2(symbol) \ + .set dspr2; + +/* + * END - mark end of function + */ +#define END(function) \ + .set pop; \ + .end function; \ + .size function,.-function + +/* + * Checks if stack offset is big enough for storing/restoring regs_num + * number of register to/from stack. Stack offset must be greater than + * or equal to the number of bytes needed for storing registers (regs_num*4). + * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is + * preserved for input arguments of the functions, already stored in a0-a3), + * stack size can be further optimized by utilizing this space. + */ +.macro CHECK_STACK_OFFSET regs_num, stack_offset +.if \stack_offset < \regs_num * 4 - 16 +.error "Stack offset too small." +.endif +.endm + +/* + * Saves set of registers on stack. Maximum number of registers that + * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). + * Stack offset is number of bytes that are added to stack pointer (sp) + * before registers are pushed in order to provide enough space on stack + * (offset must be multiple of 4, and must be big enough, as described by + * CHECK_STACK_OFFSET macro). This macro is intended to be used in + * combination with RESTORE_REGS_FROM_STACK macro. Example: + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 + */ +.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ + r2 = 0, r3 = 0, r4 = 0, \ + r5 = 0, r6 = 0, r7 = 0, \ + r8 = 0, r9 = 0, r10 = 0, \ + r11 = 0, r12 = 0, r13 = 0, \ + r14 = 0 + .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) + .error "Stack offset must be pozitive and multiple of 4." + .endif + .if \stack_offset != 0 + addiu sp, sp, -\stack_offset + .endif + sw \r1, 0(sp) + .if \r2 != 0 + sw \r2, 4(sp) + .endif + .if \r3 != 0 + sw \r3, 8(sp) + .endif + .if \r4 != 0 + sw \r4, 12(sp) + .endif + .if \r5 != 0 + CHECK_STACK_OFFSET 5, \stack_offset + sw \r5, 16(sp) + .endif + .if \r6 != 0 + CHECK_STACK_OFFSET 6, \stack_offset + sw \r6, 20(sp) + .endif + .if \r7 != 0 + CHECK_STACK_OFFSET 7, \stack_offset + sw \r7, 24(sp) + .endif + .if \r8 != 0 + CHECK_STACK_OFFSET 8, \stack_offset + sw \r8, 28(sp) + .endif + .if \r9 != 0 + CHECK_STACK_OFFSET 9, \stack_offset + sw \r9, 32(sp) + .endif + .if \r10 != 0 + CHECK_STACK_OFFSET 10, \stack_offset + sw \r10, 36(sp) + .endif + .if \r11 != 0 + CHECK_STACK_OFFSET 11, \stack_offset + sw \r11, 40(sp) + .endif + .if \r12 != 0 + CHECK_STACK_OFFSET 12, \stack_offset + sw \r12, 44(sp) + .endif + .if \r13 != 0 + CHECK_STACK_OFFSET 13, \stack_offset + sw \r13, 48(sp) + .endif + .if \r14 != 0 + CHECK_STACK_OFFSET 14, \stack_offset + sw \r14, 52(sp) + .endif +.endm + +/* + * Restores set of registers from stack. Maximum number of registers that + * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). + * Stack offset is number of bytes that are added to stack pointer (sp) + * after registers are restored (offset must be multiple of 4, and must + * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is + * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. + * Example: + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 + */ +.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ + r2 = 0, r3 = 0, r4 = 0, \ + r5 = 0, r6 = 0, r7 = 0, \ + r8 = 0, r9 = 0, r10 = 0, \ + r11 = 0, r12 = 0, r13 = 0, \ + r14 = 0 + .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) + .error "Stack offset must be pozitive and multiple of 4." + .endif + lw \r1, 0(sp) + .if \r2 != 0 + lw \r2, 4(sp) + .endif + .if \r3 != 0 + lw \r3, 8(sp) + .endif + .if \r4 != 0 + lw \r4, 12(sp) + .endif + .if \r5 != 0 + CHECK_STACK_OFFSET 5, \stack_offset + lw \r5, 16(sp) + .endif + .if \r6 != 0 + CHECK_STACK_OFFSET 6, \stack_offset + lw \r6, 20(sp) + .endif + .if \r7 != 0 + CHECK_STACK_OFFSET 7, \stack_offset + lw \r7, 24(sp) + .endif + .if \r8 != 0 + CHECK_STACK_OFFSET 8, \stack_offset + lw \r8, 28(sp) + .endif + .if \r9 != 0 + CHECK_STACK_OFFSET 9, \stack_offset + lw \r9, 32(sp) + .endif + .if \r10 != 0 + CHECK_STACK_OFFSET 10, \stack_offset + lw \r10, 36(sp) + .endif + .if \r11 != 0 + CHECK_STACK_OFFSET 11, \stack_offset + lw \r11, 40(sp) + .endif + .if \r12 != 0 + CHECK_STACK_OFFSET 12, \stack_offset + lw \r12, 44(sp) + .endif + .if \r13 != 0 + CHECK_STACK_OFFSET 13, \stack_offset + lw \r13, 48(sp) + .endif + .if \r14 != 0 + CHECK_STACK_OFFSET 14, \stack_offset + lw \r14, 52(sp) + .endif + .if \stack_offset != 0 + addiu sp, sp, \stack_offset + .endif +.endm + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_mips_dspr2.S glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_mips_dspr2.S --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_mips_dspr2.S 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_mips_dspr2.S 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,4487 @@ +/* + * MIPS DSPr2 optimizations for libjpeg-turbo + * + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. + * All rights reserved. + * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) + * Darko Laus (darko.laus@imgtec.com) + * Copyright (C) 2015, D. R. Commander. All Rights Reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include "jsimd_mips_dspr2_asm.h" + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_c_null_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - output_buf + * a3 - output_row + * 16(sp) - num_rows + * 20(sp) - cinfo->num_components + * + * Null conversion for compression + */ + + SAVE_REGS_ON_STACK 8, s0, s1 + + lw t9, 24(sp) // t9 = num_rows + lw s0, 28(sp) // s0 = cinfo->num_components + andi t0, a0, 3 // t0 = cinfo->image_width & 3 + beqz t0, 4f // no residual + nop +0: + addiu t9, t9, -1 + bltz t9, 7f + li t1, 0 +1: + sll t3, t1, 2 + lwx t5, t3(a2) // t5 = outptr = output_buf[ci] + lw t2, 0(a1) // t2 = inptr = *input_buf + sll t4, a3, 2 + lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] + addu t2, t2, t1 + addu s1, t5, a0 + addu t6, t5, t0 +2: + lbu t3, 0(t2) + addiu t5, t5, 1 + sb t3, -1(t5) + bne t6, t5, 2b + addu t2, t2, s0 +3: + lbu t3, 0(t2) + addu t4, t2, s0 + addu t7, t4, s0 + addu t8, t7, s0 + addu t2, t8, s0 + lbu t4, 0(t4) + lbu t7, 0(t7) + lbu t8, 0(t8) + addiu t5, t5, 4 + sb t3, -4(t5) + sb t4, -3(t5) + sb t7, -2(t5) + bne s1, t5, 3b + sb t8, -1(t5) + addiu t1, t1, 1 + bne t1, s0, 1b + nop + addiu a1, a1, 4 + bgez t9, 0b + addiu a3, a3, 1 + b 7f + nop +4: + addiu t9, t9, -1 + bltz t9, 7f + li t1, 0 +5: + sll t3, t1, 2 + lwx t5, t3(a2) // t5 = outptr = output_buf[ci] + lw t2, 0(a1) // t2 = inptr = *input_buf + sll t4, a3, 2 + lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] + addu t2, t2, t1 + addu s1, t5, a0 + addu t6, t5, t0 +6: + lbu t3, 0(t2) + addu t4, t2, s0 + addu t7, t4, s0 + addu t8, t7, s0 + addu t2, t8, s0 + lbu t4, 0(t4) + lbu t7, 0(t7) + lbu t8, 0(t8) + addiu t5, t5, 4 + sb t3, -4(t5) + sb t4, -3(t5) + sb t7, -2(t5) + bne s1, t5, 6b + sb t8, -1(t5) + addiu t1, t1, 1 + bne t1, s0, 5b + nop + addiu a1, a1, 4 + bgez t9, 4b + addiu a3, a3, 1 +7: + RESTORE_REGS_FROM_STACK 8, s0, s1 + + j ra + nop + +END(jsimd_c_null_convert_mips_dspr2) + +/*****************************************************************************/ +/* + * jsimd_extrgb_ycc_convert_mips_dspr2 + * jsimd_extbgr_ycc_convert_mips_dspr2 + * jsimd_extrgbx_ycc_convert_mips_dspr2 + * jsimd_extbgrx_ycc_convert_mips_dspr2 + * jsimd_extxbgr_ycc_convert_mips_dspr2 + * jsimd_extxrgb_ycc_convert_mips_dspr2 + * + * Colorspace conversion RGB -> YCbCr + */ + +.macro GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs + +.macro DO_RGB_TO_YCC r, \ + g, \ + b, \ + inptr + lbu \r, \r_offs(\inptr) + lbu \g, \g_offs(\inptr) + lbu \b, \b_offs(\inptr) + addiu \inptr, \pixel_size +.endm + +LEAF_MIPS_DSPR2(jsimd_\colorid\()_ycc_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - output_buf + * a3 - output_row + * 16(sp) - num_rows + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw t7, 48(sp) // t7 = num_rows + li s0, 0x4c8b // FIX(0.29900) + li s1, 0x9646 // FIX(0.58700) + li s2, 0x1d2f // FIX(0.11400) + li s3, 0xffffd4cd // -FIX(0.16874) + li s4, 0xffffab33 // -FIX(0.33126) + li s5, 0x8000 // FIX(0.50000) + li s6, 0xffff94d1 // -FIX(0.41869) + li s7, 0xffffeb2f // -FIX(0.08131) + li t8, 0x807fff // CBCR_OFFSET + ONE_HALF-1 + +0: + addiu t7, -1 // --num_rows + lw t6, 0(a1) // t6 = input_buf[0] + lw t0, 0(a2) + lw t1, 4(a2) + lw t2, 8(a2) + sll t3, a3, 2 + lwx t0, t3(t0) // t0 = output_buf[0][output_row] + lwx t1, t3(t1) // t1 = output_buf[1][output_row] + lwx t2, t3(t2) // t2 = output_buf[2][output_row] + + addu t9, t2, a0 // t9 = end address + addiu a3, 1 + +1: + DO_RGB_TO_YCC t3, t4, t5, t6 + + mtlo s5, $ac0 + mtlo t8, $ac1 + mtlo t8, $ac2 + maddu $ac0, s2, t5 + maddu $ac1, s5, t5 + maddu $ac2, s5, t3 + maddu $ac0, s0, t3 + maddu $ac1, s3, t3 + maddu $ac2, s6, t4 + maddu $ac0, s1, t4 + maddu $ac1, s4, t4 + maddu $ac2, s7, t5 + extr.w t3, $ac0, 16 + extr.w t4, $ac1, 16 + extr.w t5, $ac2, 16 + sb t3, 0(t0) + sb t4, 0(t1) + sb t5, 0(t2) + addiu t0, 1 + addiu t2, 1 + bne t2, t9, 1b + addiu t1, 1 + bgtz t7, 0b + addiu a1, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_\colorid\()_ycc_convert_mips_dspr2) + +.purgem DO_RGB_TO_YCC + +.endm + +/*------------------------------------------id -- pix R G B */ +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3 + +/*****************************************************************************/ +/* + * jsimd_ycc_extrgb_convert_mips_dspr2 + * jsimd_ycc_extbgr_convert_mips_dspr2 + * jsimd_ycc_extrgbx_convert_mips_dspr2 + * jsimd_ycc_extbgrx_convert_mips_dspr2 + * jsimd_ycc_extxbgr_convert_mips_dspr2 + * jsimd_ycc_extxrgb_convert_mips_dspr2 + * + * Colorspace conversion YCbCr -> RGB + */ + +.macro GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs, a_offs + +.macro STORE_YCC_TO_RGB scratch0 \ + scratch1 \ + scratch2 \ + outptr + sb \scratch0, \r_offs(\outptr) + sb \scratch1, \g_offs(\outptr) + sb \scratch2, \b_offs(\outptr) +.if (\pixel_size == 4) + li t0, 0xFF + sb t0, \a_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +LEAF_MIPS_DSPR2(jsimd_ycc_\colorid\()_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - input_row + * a3 - output_buf + * 16(sp) - num_rows + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s1, 48(sp) + li t3, 0x8000 + li t4, 0x166e9 // FIX(1.40200) + li t5, 0x1c5a2 // FIX(1.77200) + li t6, 0xffff492e // -FIX(0.71414) + li t7, 0xffffa7e6 // -FIX(0.34414) + repl.ph t8, 128 + +0: + lw s0, 0(a3) + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + sll s5, a2, 2 + addiu s1, -1 + lwx s2, s5(t0) + lwx s3, s5(t1) + lwx s4, s5(t2) + addu t9, s2, a0 + addiu a2, 1 + +1: + lbu s7, 0(s4) // cr + lbu s6, 0(s3) // cb + lbu s5, 0(s2) // y + addiu s2, 1 + addiu s4, 1 + addiu s7, -128 + addiu s6, -128 + mul t2, t7, s6 + mul t0, t6, s7 // Crgtab[cr] + sll s7, 15 + mulq_rs.w t1, t4, s7 // Crrtab[cr] + sll s6, 15 + addu t2, t3 // Cbgtab[cb] + addu t2, t0 + + mulq_rs.w t0, t5, s6 // Cbbtab[cb] + sra t2, 16 + addu t1, s5 + addu t2, s5 // add y + ins t2, t1, 16, 16 + subu.ph t2, t2, t8 + addu t0, s5 + shll_s.ph t2, t2, 8 + subu t0, 128 + shra.ph t2, t2, 8 + shll_s.w t0, t0, 24 + addu.ph t2, t2, t8 // clip & store + sra t0, t0, 24 + sra t1, t2, 16 + addiu t0, 128 + + STORE_YCC_TO_RGB t1, t2, t0, s0 + + bne s2, t9, 1b + addiu s3, 1 + bgtz s1, 0b + addiu a3, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_ycc_\colorid\()_convert_mips_dspr2) + +.purgem STORE_YCC_TO_RGB + +.endm + +/*------------------------------------------id -- pix R G B A */ +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0 + +/*****************************************************************************/ +/* + * jsimd_extrgb_gray_convert_mips_dspr2 + * jsimd_extbgr_gray_convert_mips_dspr2 + * jsimd_extrgbx_gray_convert_mips_dspr2 + * jsimd_extbgrx_gray_convert_mips_dspr2 + * jsimd_extxbgr_gray_convert_mips_dspr2 + * jsimd_extxrgb_gray_convert_mips_dspr2 + * + * Colorspace conversion RGB -> GRAY + */ + +.macro GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs + +.macro DO_RGB_TO_GRAY r, \ + g, \ + b, \ + inptr + lbu \r, \r_offs(\inptr) + lbu \g, \g_offs(\inptr) + lbu \b, \b_offs(\inptr) + addiu \inptr, \pixel_size +.endm + +LEAF_MIPS_DSPR2(jsimd_\colorid\()_gray_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - output_buf + * a3 - output_row + * 16(sp) - num_rows + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + li s0, 0x4c8b // s0 = FIX(0.29900) + li s1, 0x9646 // s1 = FIX(0.58700) + li s2, 0x1d2f // s2 = FIX(0.11400) + li s7, 0x8000 // s7 = FIX(0.50000) + lw s6, 48(sp) + andi t7, a0, 3 + +0: + addiu s6, -1 // s6 = num_rows + lw t0, 0(a1) + lw t1, 0(a2) + sll t3, a3, 2 + lwx t1, t3(t1) + addiu a3, 1 + addu t9, t1, a0 + subu t8, t9, t7 + beq t1, t8, 2f + nop + +1: + DO_RGB_TO_GRAY t3, t4, t5, t0 + DO_RGB_TO_GRAY s3, s4, s5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + maddu $ac0, s0, t3 + mtlo s7, $ac1 + maddu $ac1, s2, s5 + maddu $ac1, s1, s4 + maddu $ac1, s0, s3 + extr.w t6, $ac0, 16 + + DO_RGB_TO_GRAY t3, t4, t5, t0 + DO_RGB_TO_GRAY s3, s4, s5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + extr.w t2, $ac1, 16 + maddu $ac0, s0, t3 + mtlo s7, $ac1 + maddu $ac1, s2, s5 + maddu $ac1, s1, s4 + maddu $ac1, s0, s3 + extr.w t5, $ac0, 16 + sb t6, 0(t1) + sb t2, 1(t1) + extr.w t3, $ac1, 16 + addiu t1, 4 + sb t5, -2(t1) + sb t3, -1(t1) + bne t1, t8, 1b + nop + +2: + beqz t7, 4f + nop + +3: + DO_RGB_TO_GRAY t3, t4, t5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + maddu $ac0, s0, t3 + extr.w t6, $ac0, 16 + sb t6, 0(t1) + addiu t1, 1 + bne t1, t9, 3b + nop + +4: + bgtz s6, 0b + addiu a1, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_\colorid\()_gray_convert_mips_dspr2) + +.purgem DO_RGB_TO_GRAY + +.endm + +/*------------------------------------------id -- pix R G B */ +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3 +/*****************************************************************************/ +/* + * jsimd_h2v2_merged_upsample_mips_dspr2 + * jsimd_h2v2_extrgb_merged_upsample_mips_dspr2 + * jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2 + * jsimd_h2v2_extbgr_merged_upsample_mips_dspr2 + * jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2 + * jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2 + * jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2 + * + * Merged h2v2 upsample routines + */ +.macro GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 colorid, \ + pixel_size, \ + r1_offs, \ + g1_offs, \ + b1_offs, \ + a1_offs, \ + r2_offs, \ + g2_offs, \ + b2_offs, \ + a2_offs + +.macro STORE_H2V2_2_PIXELS scratch0 \ + scratch1 \ + scratch2 \ + scratch3 \ + scratch4 \ + scratch5 \ + outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + sb \scratch3, \r2_offs(\outptr) + sb \scratch4, \g2_offs(\outptr) + sb \scratch5, \b2_offs(\outptr) +.if (\pixel_size == 8) + li \scratch0, 0xFF + sb \scratch0, \a1_offs(\outptr) + sb \scratch0, \a2_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +.macro STORE_H2V2_1_PIXEL scratch0 \ + scratch1 \ + scratch2 \ + outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) +.endif +.endm + +LEAF_MIPS_DSPR2(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2) +/* + * a0 - cinfo->output_width + * a1 - input_buf + * a2 - in_row_group_ctr + * a3 - output_buf + * 16(sp) - cinfo->sample_range_limit + */ + + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + lw t9, 56(sp) // cinfo->sample_range_limit + lw v0, 0(a1) + lw v1, 4(a1) + lw t0, 8(a1) + sll t1, a2, 3 + addiu t2, t1, 4 + sll t3, a2, 2 + lw t4, 0(a3) // t4 = output_buf[0] + lwx t1, t1(v0) // t1 = input_buf[0][in_row_group_ctr*2] + lwx t2, t2(v0) // t2 = input_buf[0][in_row_group_ctr*2 + 1] + lwx t5, t3(v1) // t5 = input_buf[1][in_row_group_ctr] + lwx t6, t3(t0) // t6 = input_buf[2][in_row_group_ctr] + lw t7, 4(a3) // t7 = output_buf[1] + li s1, 0xe6ea + addiu t8, s1, 0x7fff // t8 = 0x166e9 [FIX(1.40200)] + addiu s0, t8, 0x5eb9 // s0 = 0x1c5a2 [FIX(1.77200)] + addiu s1, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)] + xori s2, s1, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)] + srl t3, a0, 1 + blez t3, 2f + addu t0, t5, t3 // t0 = end address + 1: + lbu t3, 0(t5) + lbu s3, 0(t6) + addiu t5, t5, 1 + addiu t3, t3, -128 // (cb - 128) + addiu s3, s3, -128 // (cr - 128) + mult $ac1, s1, t3 + madd $ac1, s2, s3 + sll s3, s3, 15 + sll t3, t3, 15 + mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS + extr_r.w s5, $ac1, 16 + mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS + lbu v0, 0(t1) + addiu t6, t6, 1 + addiu t1, t1, 2 + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu AT, 0(t3) + lbu s7, 0(s3) + lbu ra, 0(v1) + lbu v0, -1(t1) + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + lbu v0, 0(t2) + + STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t4 + + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu AT, 0(t3) + lbu s7, 0(s3) + lbu ra, 0(v1) + lbu v0, 1(t2) + addiu t2, t2, 2 + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + + STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t7 + + bne t0, t5, 1b + nop +2: + andi t0, a0, 1 + beqz t0, 4f + lbu t3, 0(t5) + lbu s3, 0(t6) + addiu t3, t3, -128 // (cb - 128) + addiu s3, s3, -128 // (cr - 128) + mult $ac1, s1, t3 + madd $ac1, s2, s3 + sll s3, s3, 15 + sll t3, t3, 15 + lbu v0, 0(t1) + extr_r.w s5, $ac1, 16 + mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS + mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + lbu v0, 0(t2) + + STORE_H2V2_1_PIXEL t3, s3, v1, t4 + + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + + STORE_H2V2_1_PIXEL t3, s3, v1, t7 +4: + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + j ra + nop + +END(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2) + +.purgem STORE_H2V2_1_PIXEL +.purgem STORE_H2V2_2_PIXELS +.endm + +/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */ +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4 +/*****************************************************************************/ +/* + * jsimd_h2v1_merged_upsample_mips_dspr2 + * jsimd_h2v1_extrgb_merged_upsample_mips_dspr2 + * jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2 + * jsimd_h2v1_extbgr_merged_upsample_mips_dspr2 + * jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2 + * jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2 + * jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2 + * + * Merged h2v1 upsample routines + */ + +.macro GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 colorid, \ + pixel_size, \ + r1_offs, \ + g1_offs, \ + b1_offs, \ + a1_offs, \ + r2_offs, \ + g2_offs, \ + b2_offs, \ + a2_offs + +.macro STORE_H2V1_2_PIXELS scratch0 \ + scratch1 \ + scratch2 \ + scratch3 \ + scratch4 \ + scratch5 \ + outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + sb \scratch3, \r2_offs(\outptr) + sb \scratch4, \g2_offs(\outptr) + sb \scratch5, \b2_offs(\outptr) +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) + sb t0, \a2_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +.macro STORE_H2V1_1_PIXEL scratch0 \ + scratch1 \ + scratch2 \ + outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) +.endif +.endm + +LEAF_MIPS_DSPR2(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2) +/* + * a0 - cinfo->output_width + * a1 - input_buf + * a2 - in_row_group_ctr + * a3 - output_buf + * 16(sp) - range_limit + */ + + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + li t0, 0xe6ea + lw t1, 0(a1) // t1 = input_buf[0] + lw t2, 4(a1) // t2 = input_buf[1] + lw t3, 8(a1) // t3 = input_buf[2] + lw t8, 56(sp) // t8 = range_limit + addiu s1, t0, 0x7fff // s1 = 0x166e9 [FIX(1.40200)] + addiu s2, s1, 0x5eb9 // s2 = 0x1c5a2 [FIX(1.77200)] + addiu s0, t0, 0x9916 // s0 = 0x8000 + addiu s4, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)] + xori s3, s4, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)] + srl t0, a0, 1 + sll t4, a2, 2 + lwx s5, t4(t1) // s5 = inptr0 + lwx s6, t4(t2) // s6 = inptr1 + lwx s7, t4(t3) // s7 = inptr2 + lw t7, 0(a3) // t7 = outptr + blez t0, 2f + addu t9, s6, t0 // t9 = end address +1: + lbu t2, 0(s6) // t2 = cb + lbu t0, 0(s7) // t0 = cr + lbu t1, 0(s5) // t1 = y + addiu t2, t2, -128 // t2 = cb - 128 + addiu t0, t0, -128 // t0 = cr - 128 + mult $ac1, s4, t2 + madd $ac1, s3, t0 + sll t0, t0, 15 + sll t2, t2, 15 + mulq_rs.w t0, s1, t0 // t0 = (C1*cr + ONE_HALF)>> SCALEBITS + extr_r.w t5, $ac1, 16 + mulq_rs.w t6, s2, t2 // t6 = (C2*cb + ONE_HALF)>> SCALEBITS + addiu s7, s7, 1 + addiu s6, s6, 1 + addu t2, t1, t0 // t2 = y + cred + addu t3, t1, t5 // t3 = y + cgreen + addu t4, t1, t6 // t4 = y + cblue + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t1, 1(s5) + lbu v0, 0(t2) + lbu v1, 0(t3) + lbu ra, 0(t4) + addu t2, t1, t0 + addu t3, t1, t5 + addu t4, t1, t6 + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t2, 0(t2) + lbu t3, 0(t3) + lbu t4, 0(t4) + + STORE_H2V1_2_PIXELS v0, v1, ra, t2, t3, t4, t7 + + bne t9, s6, 1b + addiu s5, s5, 2 +2: + andi t0, a0, 1 + beqz t0, 4f + nop +3: + lbu t2, 0(s6) + lbu t0, 0(s7) + lbu t1, 0(s5) + addiu t2, t2, -128 //(cb - 128) + addiu t0, t0, -128 //(cr - 128) + mul t3, s4, t2 + mul t4, s3, t0 + sll t0, t0, 15 + sll t2, t2, 15 + mulq_rs.w t0, s1, t0 // (C1*cr + ONE_HALF)>> SCALEBITS + mulq_rs.w t6, s2, t2 // (C2*cb + ONE_HALF)>> SCALEBITS + addu t3, t3, s0 + addu t3, t4, t3 + sra t5, t3, 16 // (C4*cb + ONE_HALF + C3*cr)>> SCALEBITS + addu t2, t1, t0 // y + cred + addu t3, t1, t5 // y + cgreen + addu t4, t1, t6 // y + cblue + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t2, 0(t2) + lbu t3, 0(t3) + lbu t4, 0(t4) + + STORE_H2V1_1_PIXEL t2, t3, t4, t7 +4: + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + j ra + nop + +END(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2) + +.purgem STORE_H2V1_1_PIXEL +.purgem STORE_H2V1_2_PIXELS +.endm + +/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */ +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4 +/*****************************************************************************/ +/* + * jsimd_h2v2_fancy_upsample_mips_dspr2 + * + * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. + */ +LEAF_MIPS_DSPR2(jsimd_h2v2_fancy_upsample_mips_dspr2) +/* + * a0 - cinfo->max_v_samp_factor + * a1 - downsampled_width + * a2 - input_data + * a3 - output_data_ptr + */ + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5 + + li s4, 0 + lw s2, 0(a3) // s2 = *output_data_ptr +0: + li t9, 2 + lw s1, -4(a2) // s1 = inptr1 + +1: + lw s0, 0(a2) // s0 = inptr0 + lwx s3, s4(s2) + addiu s5, a1, -2 // s5 = downsampled_width - 2 + srl t4, s5, 1 + sll t4, t4, 1 + lbu t0, 0(s0) + lbu t1, 1(s0) + lbu t2, 0(s1) + lbu t3, 1(s1) + addiu s0, 2 + addiu s1, 2 + addu t8, s0, t4 // t8 = end address + andi s5, s5, 1 // s5 = residual + sll t4, t0, 1 + sll t6, t1, 1 + addu t0, t0, t4 // t0 = (*inptr0++) * 3 + addu t1, t1, t6 // t1 = (*inptr0++) * 3 + addu t7, t0, t2 // t7 = thiscolsum + addu t6, t1, t3 // t5 = nextcolsum + sll t0, t7, 2 // t0 = thiscolsum * 4 + subu t1, t0, t7 // t1 = thiscolsum * 3 + shra_r.w t0, t0, 4 + addiu t1, 7 + addu t1, t1, t6 + srl t1, t1, 4 + sb t0, 0(s3) + sb t1, 1(s3) + beq t8, s0, 22f // skip to final iteration if width == 3 + addiu s3, 2 +2: + lh t0, 0(s0) // t0 = A3|A2 + lh t2, 0(s1) // t2 = B3|B2 + addiu s0, 2 + addiu s1, 2 + preceu.ph.qbr t0, t0 // t0 = 0|A3|0|A2 + preceu.ph.qbr t2, t2 // t2 = 0|B3|0|B2 + shll.ph t1, t0, 1 + sll t3, t6, 1 + addu.ph t0, t1, t0 // t0 = A3*3|A2*3 + addu t3, t3, t6 // t3 = this * 3 + addu.ph t0, t0, t2 // t0 = next2|next1 + addu t1, t3, t7 + andi t7, t0, 0xFFFF // t7 = next1 + sll t2, t7, 1 + addu t2, t7, t2 // t2 = next1*3 + addu t4, t2, t6 + srl t6, t0, 16 // t6 = next2 + shra_r.w t1, t1, 4 // t1 = (this*3 + last + 8) >> 4 + addu t0, t3, t7 + addiu t0, 7 + srl t0, t0, 4 // t0 = (this*3 + next1 + 7) >> 4 + shra_r.w t4, t4, 4 // t3 = (next1*3 + this + 8) >> 4 + addu t2, t2, t6 + addiu t2, 7 + srl t2, t2, 4 // t2 = (next1*3 + next2 + 7) >> 4 + sb t1, 0(s3) + sb t0, 1(s3) + sb t4, 2(s3) + sb t2, 3(s3) + bne t8, s0, 2b + addiu s3, 4 +22: + beqz s5, 4f + addu t8, s0, s5 +3: + lbu t0, 0(s0) + lbu t2, 0(s1) + addiu s0, 1 + addiu s1, 1 + sll t3, t6, 1 + sll t1, t0, 1 + addu t1, t0, t1 // t1 = inptr0 * 3 + addu t3, t3, t6 // t3 = thiscolsum * 3 + addu t5, t1, t2 + addu t1, t3, t7 + shra_r.w t1, t1, 4 + addu t0, t3, t5 + addiu t0, 7 + srl t0, t0, 4 + sb t1, 0(s3) + sb t0, 1(s3) + addiu s3, 2 + move t7, t6 + bne t8, s0, 3b + move t6, t5 +4: + sll t0, t6, 2 // t0 = thiscolsum * 4 + subu t1, t0, t6 // t1 = thiscolsum * 3 + addu t1, t1, t7 + addiu s4, 4 + shra_r.w t1, t1, 4 + addiu t0, 7 + srl t0, t0, 4 + sb t1, 0(s3) + sb t0, 1(s3) + addiu t9, -1 + addiu s3, 2 + bnez t9, 1b + lw s1, 4(a2) + srl t0, s4, 2 + subu t0, a0, t0 + bgtz t0, 0b + addiu a2, 4 + + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5 + + j ra + nop +END(jsimd_h2v2_fancy_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v1_fancy_upsample_mips_dspr2) +/* + * a0 - cinfo->max_v_samp_factor + * a1 - downsampled_width + * a2 - input_data + * a3 - output_data_ptr + */ + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + .set at + + beqz a0, 3f + sll t0, a0, 2 + lw s1, 0(a3) + li s3, 0x10001 + addu s0, s1, t0 +0: + addiu t8, a1, -2 + srl t9, t8, 2 + lw t7, 0(a2) + lw s2, 0(s1) + lbu t0, 0(t7) + lbu t1, 1(t7) // t1 = inptr[1] + sll t2, t0, 1 + addu t2, t2, t0 // t2 = invalue*3 + addu t2, t2, t1 + shra_r.w t2, t2, 2 + sb t0, 0(s2) + sb t2, 1(s2) + beqz t9, 11f + addiu s2, 2 +1: + ulw t0, 0(t7) // t0 = |P3|P2|P1|P0| + ulw t1, 1(t7) + ulh t2, 4(t7) // t2 = |0|0|P5|P4| + preceu.ph.qbl t3, t0 // t3 = |0|P3|0|P2| + preceu.ph.qbr t0, t0 // t0 = |0|P1|0|P0| + preceu.ph.qbr t2, t2 // t2 = |0|P5|0|P4| + preceu.ph.qbl t4, t1 // t4 = |0|P4|0|P3| + preceu.ph.qbr t1, t1 // t1 = |0|P2|0|P1| + shll.ph t5, t4, 1 + shll.ph t6, t1, 1 + addu.ph t5, t5, t4 // t5 = |P4*3|P3*3| + addu.ph t6, t6, t1 // t6 = |P2*3|P1*3| + addu.ph t4, t3, s3 + addu.ph t0, t0, s3 + addu.ph t4, t4, t5 + addu.ph t0, t0, t6 + shrl.ph t4, t4, 2 // t4 = |0|P3|0|P2| + shrl.ph t0, t0, 2 // t0 = |0|P1|0|P0| + addu.ph t2, t2, t5 + addu.ph t3, t3, t6 + shra_r.ph t2, t2, 2 // t2 = |0|P5|0|P4| + shra_r.ph t3, t3, 2 // t3 = |0|P3|0|P2| + shll.ph t2, t2, 8 + shll.ph t3, t3, 8 + or t2, t4, t2 + or t3, t3, t0 + addiu t9, -1 + usw t3, 0(s2) + usw t2, 4(s2) + addiu s2, 8 + bgtz t9, 1b + addiu t7, 4 +11: + andi t8, 3 + beqz t8, 22f + addiu t7, 1 + +2: + lbu t0, 0(t7) + addiu t7, 1 + sll t1, t0, 1 + addu t2, t0, t1 // t2 = invalue + lbu t3, -2(t7) + lbu t4, 0(t7) + addiu t3, 1 + addiu t4, 2 + addu t3, t3, t2 + addu t4, t4, t2 + srl t3, 2 + srl t4, 2 + sb t3, 0(s2) + sb t4, 1(s2) + addiu t8, -1 + bgtz t8, 2b + addiu s2, 2 + +22: + lbu t0, 0(t7) + lbu t2, -1(t7) + sll t1, t0, 1 + addu t1, t1, t0 // t1 = invalue * 3 + addu t1, t1, t2 + addiu t1, 1 + srl t1, t1, 2 + sb t1, 0(s2) + sb t0, 1(s2) + addiu s1, 4 + bne s1, s0, 0b + addiu a2, 4 +3: + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop +END(jsimd_h2v1_fancy_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v1_downsample_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - cinfo->max_v_samp_factor + * a2 - compptr->v_samp_factor + * a3 - compptr->width_in_blocks + * 16(sp) - input_data + * 20(sp) - output_data + */ + .set at + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4 + + beqz a2, 7f + lw s1, 44(sp) // s1 = output_data + lw s0, 40(sp) // s0 = input_data + srl s2, a0, 2 + andi t9, a0, 2 + srl t7, t9, 1 + addu s2, t7, s2 + sll t0, a3, 3 // t0 = width_in_blocks*DCT + srl t7, t0, 1 + subu s2, t7, s2 +0: + andi t6, a0, 1 // t6 = temp_index + addiu t6, -1 + lw t4, 0(s1) // t4 = outptr + lw t5, 0(s0) // t5 = inptr0 + li s3, 0 // s3 = bias + srl t7, a0, 1 // t7 = image_width1 + srl s4, t7, 2 + andi t8, t7, 3 +1: + ulhu t0, 0(t5) + ulhu t1, 2(t5) + ulhu t2, 4(t5) + ulhu t3, 6(t5) + raddu.w.qb t0, t0 + raddu.w.qb t1, t1 + raddu.w.qb t2, t2 + raddu.w.qb t3, t3 + shra.ph t0, t0, 1 + shra_r.ph t1, t1, 1 + shra.ph t2, t2, 1 + shra_r.ph t3, t3, 1 + sb t0, 0(t4) + sb t1, 1(t4) + sb t2, 2(t4) + sb t3, 3(t4) + addiu s4, -1 + addiu t4, 4 + bgtz s4, 1b + addiu t5, 8 + beqz t8, 3f + addu s4, t4, t8 +2: + ulhu t0, 0(t5) + raddu.w.qb t0, t0 + addqh.w t0, t0, s3 + xori s3, s3, 1 + sb t0, 0(t4) + addiu t4, 1 + bne t4, s4, 2b + addiu t5, 2 +3: + lbux t1, t6(t5) + sll t1, 1 + addqh.w t2, t1, s3 // t2 = pixval1 + xori s3, s3, 1 + addqh.w t3, t1, s3 // t3 = pixval2 + blez s2, 5f + append t3, t2, 8 + addu t5, t4, s2 // t5 = loop_end2 +4: + ush t3, 0(t4) + addiu s2, -1 + bgtz s2, 4b + addiu t4, 2 +5: + beqz t9, 6f + nop + sb t2, 0(t4) +6: + addiu s1, 4 + addiu a2, -1 + bnez a2, 0b + addiu s0, 4 +7: + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4 + + j ra + nop +END(jsimd_h2v1_downsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v2_downsample_mips_dspr2) + +/* + * a0 - cinfo->image_width + * a1 - cinfo->max_v_samp_factor + * a2 - compptr->v_samp_factor + * a3 - compptr->width_in_blocks + * 16(sp) - input_data + * 20(sp) - output_data + */ + .set at + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + beqz a2, 8f + lw s1, 52(sp) // s1 = output_data + lw s0, 48(sp) // s0 = input_data + + andi t6, a0, 1 // t6 = temp_index + addiu t6, -1 + srl t7, a0, 1 // t7 = image_width1 + srl s4, t7, 2 + andi t8, t7, 3 + andi t9, a0, 2 + srl s2, a0, 2 + srl t7, t9, 1 + addu s2, t7, s2 + sll t0, a3, 3 // s2 = width_in_blocks*DCT + srl t7, t0, 1 + subu s2, t7, s2 +0: + lw t4, 0(s1) // t4 = outptr + lw t5, 0(s0) // t5 = inptr0 + lw s7, 4(s0) // s7 = inptr1 + li s6, 1 // s6 = bias +2: + ulw t0, 0(t5) // t0 = |P3|P2|P1|P0| + ulw t1, 0(s7) // t1 = |Q3|Q2|Q1|Q0| + ulw t2, 4(t5) + ulw t3, 4(s7) + precrq.ph.w t7, t0, t1 // t2 = |P3|P2|Q3|Q2| + ins t0, t1, 16, 16 // t0 = |Q1|Q0|P1|P0| + raddu.w.qb t1, t7 + raddu.w.qb t0, t0 + shra_r.w t1, t1, 2 + addiu t0, 1 + srl t0, 2 + precrq.ph.w t7, t2, t3 + ins t2, t3, 16, 16 + raddu.w.qb t7, t7 + raddu.w.qb t2, t2 + shra_r.w t7, t7, 2 + addiu t2, 1 + srl t2, 2 + sb t0, 0(t4) + sb t1, 1(t4) + sb t2, 2(t4) + sb t7, 3(t4) + addiu t4, 4 + addiu t5, 8 + addiu s4, s4, -1 + bgtz s4, 2b + addiu s7, 8 + beqz t8, 4f + addu t8, t4, t8 +3: + ulhu t0, 0(t5) + ulhu t1, 0(s7) + ins t0, t1, 16, 16 + raddu.w.qb t0, t0 + addu t0, t0, s6 + srl t0, 2 + xori s6, s6, 3 + sb t0, 0(t4) + addiu t5, 2 + addiu t4, 1 + bne t8, t4, 3b + addiu s7, 2 +4: + lbux t1, t6(t5) + sll t1, 1 + lbux t0, t6(s7) + sll t0, 1 + addu t1, t1, t0 + addu t3, t1, s6 + srl t0, t3, 2 // t2 = pixval1 + xori s6, s6, 3 + addu t2, t1, s6 + srl t1, t2, 2 // t3 = pixval2 + blez s2, 6f + append t1, t0, 8 +5: + ush t1, 0(t4) + addiu s2, -1 + bgtz s2, 5b + addiu t4, 2 +6: + beqz t9, 7f + nop + sb t0, 0(t4) +7: + addiu s1, 4 + addiu a2, -1 + bnez a2, 0b + addiu s0, 8 +8: + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_h2v2_downsample_mips_dspr2) +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2) +/* + * a0 - input_data + * a1 - output_data + * a2 - compptr->v_samp_factor + * a3 - cinfo->max_v_samp_factor + * 16(sp) - cinfo->smoothing_factor + * 20(sp) - compptr->width_in_blocks + * 24(sp) - cinfo->image_width + */ + + .set at + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s7, 52(sp) // compptr->width_in_blocks + lw s0, 56(sp) // cinfo->image_width + lw s6, 48(sp) // cinfo->smoothing_factor + sll s7, 3 // output_cols = width_in_blocks * DCTSIZE + sll v0, s7, 1 + subu v0, v0, s0 + blez v0, 2f + move v1, zero + addiu t0, a3, 2 // t0 = cinfo->max_v_samp_factor + 2 +0: + addiu t1, a0, -4 + sll t2, v1, 2 + lwx t1, t2(t1) + move t3, v0 + addu t1, t1, s0 + lbu t2, -1(t1) +1: + addiu t3, t3, -1 + sb t2, 0(t1) + bgtz t3, 1b + addiu t1, t1, 1 + addiu v1, v1, 1 + bne v1, t0, 0b + nop +2: + li v0, 80 + mul v0, s6, v0 + li v1, 16384 + move t4, zero + move t5, zero + subu t6, v1, v0 // t6 = 16384 - tmp_smoot_f * 80 + sll t7, s6, 4 // t7 = tmp_smoot_f * 16 +3: +/* Special case for first column: pretend column -1 is same as column 0 */ + sll v0, t4, 2 + lwx t8, v0(a1) // outptr = output_data[outrow] + sll v1, t5, 2 + addiu t9, v1, 4 + addiu s0, v1, -4 + addiu s1, v1, 8 + lwx s2, v1(a0) // inptr0 = input_data[inrow] + lwx t9, t9(a0) // inptr1 = input_data[inrow+1] + lwx s0, s0(a0) // above_ptr = input_data[inrow-1] + lwx s1, s1(a0) // below_ptr = input_data[inrow+2] + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, 0(s2) + lbu v1, 2(s2) + lbu t0, 0(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1,t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, 0(s0) + lbu t0, 0(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1,s3, t7 + extr_r.w v0, $ac1, 16 + addiu t8, t8, 1 + addiu s2, s2, 2 + addiu t9, t9, 2 + addiu s0, s0, 2 + addiu s1, s1, 2 + sb v0, -1(t8) + addiu s4, s7, -2 + and s4, s4, 3 + addu s5, s4, t8 //end adress +4: + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 2(s2) + lbu t0, -1(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + addiu t8, t8, 1 + addiu s2, s2, 2 + addiu t9, t9, 2 + addiu s0, s0, 2 + sb t2, -1(t8) + bne s5, t8, 4b + addiu s1, s1, 2 + addiu s5, s7, -2 + subu s5, s5, s4 + addu s5, s5, t8 //end adress +5: + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 2(s2) + lbu t0, -1(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + lh v1, 2(t9) + addu t0, t0, v0 + lh v0, 2(s2) + addu s3, t0, s3 + lh t0, 2(s0) + lh t1, 2(s1) + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 4(s2) + lbu t0, 1(t9) + lbu t1, 4(t9) + sb t2, 0(t8) + raddu.w.qb t3, v0 + lbu v0, 1(s2) + addu t0, t0, t1 + mult $ac1, t3, t6 + addu v0, v0, v1 + lbu t2, 4(s0) + addu t0, t0, v0 + lbu v0, 1(s0) + addu s3, t0, s3 + lbu t0, 1(s1) + lbu t3, 4(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + lh v1, 4(t9) + addu t0, t0, v0 + lh v0, 4(s2) + addu s3, t0, s3 + lh t0, 4(s0) + lh t1, 4(s1) + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 6(s2) + lbu t0, 3(t9) + lbu t1, 6(t9) + sb t2, 1(t8) + raddu.w.qb t3, v0 + lbu v0, 3(s2) + addu t0, t0,t1 + mult $ac1, t3, t6 + addu v0, v0, v1 + lbu t2, 6(s0) + addu t0, t0, v0 + lbu v0, 3(s0) + addu s3, t0, s3 + lbu t0, 3(s1) + lbu t3, 6(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + lh v1, 6(t9) + addu t0, t0, v0 + lh v0, 6(s2) + addu s3, t0, s3 + lh t0, 6(s0) + lh t1, 6(s1) + madd $ac1, s3, t7 + extr_r.w t3, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 8(s2) + lbu t0, 5(t9) + lbu t1, 8(t9) + sb t3, 2(t8) + raddu.w.qb t2, v0 + lbu v0, 5(s2) + addu t0, t0, t1 + mult $ac1, t2, t6 + addu v0, v0, v1 + lbu t2, 8(s0) + addu t0, t0, v0 + lbu v0, 5(s0) + addu s3, t0, s3 + lbu t0, 5(s1) + lbu t3, 8(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + addiu t8, t8, 4 + addu t0, t0, v0 + addiu s2, s2, 8 + addu s3, t0, s3 + addiu t9, t9, 8 + madd $ac1, s3, t7 + extr_r.w t1, $ac1, 16 + addiu s0, s0, 8 + addiu s1, s1, 8 + bne s5, t8, 5b + sb t1, -1(t8) +/* Special case for last column */ + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 1(s2) + lbu t0, -1(t9) + lbu t1, 1(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 1(s0) + addu t0, t0, v0 + lbu t3, 1(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1, s3, t7 + extr_r.w t0, $ac1, 16 + addiu t5, t5, 2 + sb t0, 0(t8) + addiu t4, t4, 1 + bne t4, a2, 3b + addiu t5, t5, 2 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_h2v2_smooth_downsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_int_upsample_mips_dspr2) +/* + * a0 - upsample->h_expand[compptr->component_index] + * a1 - upsample->v_expand[compptr->component_index] + * a2 - input_data + * a3 - output_data_ptr + * 16(sp) - cinfo->output_width + * 20(sp) - cinfo->max_v_samp_factor + */ + .set at + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + lw s0, 0(a3) // s0 = output_data + lw s1, 32(sp) // s1 = cinfo->output_width + lw s2, 36(sp) // s2 = cinfo->max_v_samp_factor + li t6, 0 // t6 = inrow + beqz s2, 10f + li s3, 0 // s3 = outrow +0: + addu t0, a2, t6 + addu t7, s0, s3 + lw t3, 0(t0) // t3 = inptr + lw t8, 0(t7) // t8 = outptr + beqz s1, 4f + addu t5, t8, s1 // t5 = outend +1: + lb t2, 0(t3) // t2 = invalue = *inptr++ + addiu t3, 1 + beqz a0, 3f + move t0, a0 // t0 = h_expand +2: + sb t2, 0(t8) + addiu t0, -1 + bgtz t0, 2b + addiu t8, 1 +3: + bgt t5, t8, 1b + nop +4: + addiu t9, a1, -1 // t9 = v_expand - 1 + blez t9, 9f + nop +5: + lw t3, 0(s0) + lw t4, 4(s0) + subu t0, s1, 0xF + blez t0, 7f + addu t5, t3, s1 // t5 = end address + andi t7, s1, 0xF // t7 = residual + subu t8, t5, t7 +6: + ulw t0, 0(t3) + ulw t1, 4(t3) + ulw t2, 8(t3) + usw t0, 0(t4) + ulw t0, 12(t3) + usw t1, 4(t4) + usw t2, 8(t4) + usw t0, 12(t4) + addiu t3, 16 + bne t3, t8, 6b + addiu t4, 16 + beqz t7, 8f + nop +7: + lbu t0, 0(t3) + sb t0, 0(t4) + addiu t3, 1 + bne t3, t5, 7b + addiu t4, 1 +8: + addiu t9, -1 + bgtz t9, 5b + addiu s0, 8 +9: + addu s3, s3, a1 + bne s3, s2, 0b + addiu t6, 1 +10: + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop +END(jsimd_int_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2) +/* + * a0 - cinfo->max_v_samp_factor + * a1 - cinfo->output_width + * a2 - input_data + * a3 - output_data_ptr + */ + lw t7, 0(a3) // t7 = output_data + andi t8, a1, 0xf // t8 = residual + sll t0, a0, 2 + blez a0, 4f + addu t9, t7, t0 // t9 = output_data end address +0: + lw t5, 0(t7) // t5 = outptr + lw t6, 0(a2) // t6 = inptr + addu t3, t5, a1 // t3 = outptr + output_width (end address) + subu t3, t8 // t3 = end address - residual + beq t5, t3, 2f + move t4, t8 +1: + ulw t0, 0(t6) // t0 = |P3|P2|P1|P0| + ulw t2, 4(t6) // t2 = |P7|P6|P5|P4| + srl t1, t0, 16 // t1 = |X|X|P3|P2| + ins t0, t0, 16, 16 // t0 = |P1|P0|P1|P0| + ins t1, t1, 16, 16 // t1 = |P3|P2|P3|P2| + ins t0, t0, 8, 16 // t0 = |P1|P1|P0|P0| + ins t1, t1, 8, 16 // t1 = |P3|P3|P2|P2| + usw t0, 0(t5) + usw t1, 4(t5) + srl t0, t2, 16 // t0 = |X|X|P7|P6| + ins t2, t2, 16, 16 // t2 = |P5|P4|P5|P4| + ins t0, t0, 16, 16 // t0 = |P7|P6|P7|P6| + ins t2, t2, 8, 16 // t2 = |P5|P5|P4|P4| + ins t0, t0, 8, 16 // t0 = |P7|P7|P6|P6| + usw t2, 8(t5) + usw t0, 12(t5) + addiu t5, 16 + bne t5, t3, 1b + addiu t6, 8 + beqz t8, 3f + move t4, t8 +2: + lbu t1, 0(t6) + sb t1, 0(t5) + sb t1, 1(t5) + addiu t4, -2 + addiu t6, 1 + bgtz t4, 2b + addiu t5, 2 +3: + addiu t7, 4 + bne t9, t7, 0b + addiu a2, 4 +4: + j ra + nop +END(jsimd_h2v1_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v2_upsample_mips_dspr2) +/* + * a0 - cinfo->max_v_samp_factor + * a1 - cinfo->output_width + * a2 - input_data + * a3 - output_data_ptr + */ + lw t7, 0(a3) + blez a0, 7f + andi t9, a1, 0xf // t9 = residual +0: + lw t6, 0(a2) // t6 = inptr + lw t5, 0(t7) // t5 = outptr + addu t8, t5, a1 // t8 = outptr end address + subu t8, t9 // t8 = end address - residual + beq t5, t8, 2f + move t4, t9 +1: + ulw t0, 0(t6) + srl t1, t0, 16 + ins t0, t0, 16, 16 + ins t0, t0, 8, 16 + ins t1, t1, 16, 16 + ins t1, t1, 8, 16 + ulw t2, 4(t6) + usw t0, 0(t5) + usw t1, 4(t5) + srl t3, t2, 16 + ins t2, t2, 16, 16 + ins t2, t2, 8, 16 + ins t3, t3, 16, 16 + ins t3, t3, 8, 16 + usw t2, 8(t5) + usw t3, 12(t5) + addiu t5, 16 + bne t5, t8, 1b + addiu t6, 8 + beqz t9, 3f + move t4, t9 +2: + lbu t0, 0(t6) + sb t0, 0(t5) + sb t0, 1(t5) + addiu t4, -2 + addiu t6, 1 + bgtz t4, 2b + addiu t5, 2 +3: + lw t6, 0(t7) // t6 = outptr[0] + lw t5, 4(t7) // t5 = outptr[1] + addu t4, t6, a1 // t4 = new end address + beq a1, t9, 5f + subu t8, t4, t9 +4: + ulw t0, 0(t6) + ulw t1, 4(t6) + ulw t2, 8(t6) + usw t0, 0(t5) + ulw t0, 12(t6) + usw t1, 4(t5) + usw t2, 8(t5) + usw t0, 12(t5) + addiu t6, 16 + bne t6, t8, 4b + addiu t5, 16 + beqz t9, 6f + nop +5: + lbu t0, 0(t6) + sb t0, 0(t5) + addiu t6, 1 + bne t6, t4, 5b + addiu t5, 1 +6: + addiu t7, 8 + addiu a0, -2 + bgtz a0, 0b + addiu a2, 4 +7: + j ra + nop +END(jsimd_h2v2_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_islow_mips_dspr2) +/* + * a0 - coef_block + * a1 - compptr->dcttable + * a2 - output + * a3 - range_limit + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu sp, sp, -256 + move v0, sp + addiu v1, zero, 8 // v1 = DCTSIZE = 8 +1: + lh s4, 32(a0) // s4 = inptr[16] + lh s5, 64(a0) // s5 = inptr[32] + lh s6, 96(a0) // s6 = inptr[48] + lh t1, 112(a0) // t1 = inptr[56] + lh t7, 16(a0) // t7 = inptr[8] + lh t5, 80(a0) // t5 = inptr[40] + lh t3, 48(a0) // t3 = inptr[24] + or s4, s4, t1 + or s4, s4, t3 + or s4, s4, t5 + or s4, s4, t7 + or s4, s4, s5 + or s4, s4, s6 + bnez s4, 2f + addiu v1, v1, -1 + lh s5, 0(a1) // quantptr[DCTSIZE*0] + lh s6, 0(a0) // inptr[DCTSIZE*0] + mul s5, s5, s6 // DEQUANTIZE(inptr[0], quantptr[0]) + sll s5, s5, 2 + sw s5, 0(v0) + sw s5, 32(v0) + sw s5, 64(v0) + sw s5, 96(v0) + sw s5, 128(v0) + sw s5, 160(v0) + sw s5, 192(v0) + b 3f + sw s5, 224(v0) +2: + lh t0, 112(a1) + lh t2, 48(a1) + lh t4, 80(a1) + lh t6, 16(a1) + mul t0, t0, t1 // DEQUANTIZE(inptr[DCTSIZE*7],quant[DCTSIZE*7]) + mul t1, t2, t3 // DEQUANTIZE(inptr[DCTSIZE*3],quant[DCTSIZE*3]) + mul t2, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*5],quant[DCTSIZE*5]) + mul t3, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*1],quant[DCTSIZE*1]) + lh t4, 32(a1) + lh t5, 32(a0) + lh t6, 96(a1) + lh t7, 96(a0) + addu s0, t0, t1 // z3 = tmp0 + tmp2 + addu s1, t1, t2 // z2 = tmp1 + tmp2 + addu s2, t2, t3 // z4 = tmp1 + tmp3 + addu s3, s0, s2 // z3 + z4 + addiu t9, zero, 9633 // FIX_1_175875602 + mul s3, s3, t9 // z5 = MULTIPLY(z3 + z4, FIX_1_175875602) + addu t8, t0, t3 // z1 = tmp0 + tmp3 + addiu t9, zero, 2446 // FIX_0_298631336 + mul t0, t0, t9 // tmp0 = MULTIPLY(tmp0, FIX_0_298631336) + addiu t9, zero, 16819 // FIX_2_053119869 + mul t2, t2, t9 // tmp1 = MULTIPLY(tmp1, FIX_2_053119869) + addiu t9, zero, 25172 // FIX_3_072711026 + mul t1, t1, t9 // tmp2 = MULTIPLY(tmp2, FIX_3_072711026) + addiu t9, zero, 12299 // FIX_1_501321110 + mul t3, t3, t9 // tmp3 = MULTIPLY(tmp3, FIX_1_501321110) + addiu t9, zero, 16069 // FIX_1_961570560 + mul s0, s0, t9 // -z3 = MULTIPLY(z3, FIX_1_961570560) + addiu t9, zero, 3196 // FIX_0_390180644 + mul s2, s2, t9 // -z4 = MULTIPLY(z4, FIX_0_390180644) + addiu t9, zero, 7373 // FIX_0_899976223 + mul t8, t8, t9 // -z1 = MULTIPLY(z1, FIX_0_899976223) + addiu t9, zero, 20995 // FIX_2_562915447 + mul s1, s1, t9 // -z2 = MULTIPLY(z2, FIX_2_562915447) + subu s0, s3, s0 // z3 += z5 + addu t0, t0, s0 // tmp0 += z3 + addu t1, t1, s0 // tmp2 += z3 + subu s2, s3, s2 // z4 += z5 + addu t2, t2, s2 // tmp1 += z4 + addu t3, t3, s2 // tmp3 += z4 + subu t0, t0, t8 // tmp0 += z1 + subu t1, t1, s1 // tmp2 += z2 + subu t2, t2, s1 // tmp1 += z2 + subu t3, t3, t8 // tmp3 += z1 + mul s0, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*2],quant[DCTSIZE*2]) + addiu t9, zero, 6270 // FIX_0_765366865 + mul s1, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*6],quant[DCTSIZE*6]) + lh t4, 0(a1) + lh t5, 0(a0) + lh t6, 64(a1) + lh t7, 64(a0) + mul s2, t9, s0 // MULTIPLY(z2, FIX_0_765366865) + mul t5, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*0],quant[DCTSIZE*0]) + mul t6, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*4],quant[DCTSIZE*4]) + addiu t9, zero, 4433 // FIX_0_541196100 + addu s3, s0, s1 // z2 + z3 + mul s3, s3, t9 // z1 = MULTIPLY(z2 + z3, FIX_0_541196100) + addiu t9, zero, 15137 // FIX_1_847759065 + mul t8, s1, t9 // MULTIPLY(z3, FIX_1_847759065) + addu t4, t5, t6 + subu t5, t5, t6 + sll t4, t4, 13 // tmp0 = (z2 + z3) << CONST_BITS + sll t5, t5, 13 // tmp1 = (z2 - z3) << CONST_BITS + addu t7, s3, s2 // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) + subu t6, s3, t8 // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065) + addu s0, t4, t7 + subu s1, t4, t7 + addu s2, t5, t6 + subu s3, t5, t6 + addu t4, s0, t3 + subu s0, s0, t3 + addu t3, s2, t1 + subu s2, s2, t1 + addu t1, s3, t2 + subu s3, s3, t2 + addu t2, s1, t0 + subu s1, s1, t0 + shra_r.w t4, t4, 11 + shra_r.w t3, t3, 11 + shra_r.w t1, t1, 11 + shra_r.w t2, t2, 11 + shra_r.w s1, s1, 11 + shra_r.w s3, s3, 11 + shra_r.w s2, s2, 11 + shra_r.w s0, s0, 11 + sw t4, 0(v0) + sw t3, 32(v0) + sw t1, 64(v0) + sw t2, 96(v0) + sw s1, 128(v0) + sw s3, 160(v0) + sw s2, 192(v0) + sw s0, 224(v0) +3: + addiu a1, a1, 2 + addiu a0, a0, 2 + bgtz v1, 1b + addiu v0, v0, 4 + move v0, sp + addiu v1, zero, 8 +4: + lw t0, 8(v0) // z2 = (JLONG) wsptr[2] + lw t1, 24(v0) // z3 = (JLONG) wsptr[6] + lw t2, 0(v0) // (JLONG) wsptr[0] + lw t3, 16(v0) // (JLONG) wsptr[4] + lw s4, 4(v0) // (JLONG) wsptr[1] + lw s5, 12(v0) // (JLONG) wsptr[3] + lw s6, 20(v0) // (JLONG) wsptr[5] + lw s7, 28(v0) // (JLONG) wsptr[7] + or s4, s4, t0 + or s4, s4, t1 + or s4, s4, t3 + or s4, s4, s7 + or s4, s4, s5 + or s4, s4, s6 + bnez s4, 5f + addiu v1, v1, -1 + shra_r.w s5, t2, 5 + andi s5, s5, 0x3ff + lbux s5, s5(a3) + lw s1, 0(a2) + replv.qb s5, s5 + usw s5, 0(s1) + usw s5, 4(s1) + b 6f + nop +5: + addu t4, t0, t1 // z2 + z3 + addiu t8, zero, 4433 // FIX_0_541196100 + mul t5, t4, t8 // z1 = MULTIPLY(z2 + z3, FIX_0_541196100) + addiu t8, zero, 15137 // FIX_1_847759065 + mul t1, t1, t8 // MULTIPLY(z3, FIX_1_847759065) + addiu t8, zero, 6270 // FIX_0_765366865 + mul t0, t0, t8 // MULTIPLY(z2, FIX_0_765366865) + addu t4, t2, t3 // (JLONG) wsptr[0] + (JLONG) wsptr[4] + subu t2, t2, t3 // (JLONG) wsptr[0] - (JLONG) wsptr[4] + sll t4, t4, 13 // tmp0 = ((wsptr[0] + wsptr[4]) << CONST_BITS + sll t2, t2, 13 // tmp1 = ((wsptr[0] - wsptr[4]) << CONST_BITS + subu t1, t5, t1 // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065) + subu t3, t2, t1 // tmp12 = tmp1 - tmp2 + addu t2, t2, t1 // tmp11 = tmp1 + tmp2 + addu t5, t5, t0 // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) + subu t1, t4, t5 // tmp13 = tmp0 - tmp3 + addu t0, t4, t5 // tmp10 = tmp0 + tmp3 + lw t4, 28(v0) // tmp0 = (JLONG) wsptr[7] + lw t6, 12(v0) // tmp2 = (JLONG) wsptr[3] + lw t5, 20(v0) // tmp1 = (JLONG) wsptr[5] + lw t7, 4(v0) // tmp3 = (JLONG) wsptr[1] + addu s0, t4, t6 // z3 = tmp0 + tmp2 + addiu t8, zero, 9633 // FIX_1_175875602 + addu s1, t5, t7 // z4 = tmp1 + tmp3 + addu s2, s0, s1 // z3 + z4 + mul s2, s2, t8 // z5 = MULTIPLY(z3 + z4, FIX_1_175875602) + addu s3, t4, t7 // z1 = tmp0 + tmp3 + addu t9, t5, t6 // z2 = tmp1 + tmp2 + addiu t8, zero, 16069 // FIX_1_961570560 + mul s0, s0, t8 // -z3 = MULTIPLY(z3, FIX_1_961570560) + addiu t8, zero, 3196 // FIX_0_390180644 + mul s1, s1, t8 // -z4 = MULTIPLY(z4, FIX_0_390180644) + addiu t8, zero, 2446 // FIX_0_298631336 + mul t4, t4, t8 // tmp0 = MULTIPLY(tmp0, FIX_0_298631336) + addiu t8, zero, 7373 // FIX_0_899976223 + mul s3, s3, t8 // -z1 = MULTIPLY(z1, FIX_0_899976223) + addiu t8, zero, 16819 // FIX_2_053119869 + mul t5, t5, t8 // tmp1 = MULTIPLY(tmp1, FIX_2_053119869) + addiu t8, zero, 20995 // FIX_2_562915447 + mul t9, t9, t8 // -z2 = MULTIPLY(z2, FIX_2_562915447) + addiu t8, zero, 25172 // FIX_3_072711026 + mul t6, t6, t8 // tmp2 = MULTIPLY(tmp2, FIX_3_072711026) + addiu t8, zero, 12299 // FIX_1_501321110 + mul t7, t7, t8 // tmp3 = MULTIPLY(tmp3, FIX_1_501321110) + subu s0, s2, s0 // z3 += z5 + subu s1, s2, s1 // z4 += z5 + addu t4, t4, s0 + subu t4, t4, s3 // tmp0 + addu t5, t5, s1 + subu t5, t5, t9 // tmp1 + addu t6, t6, s0 + subu t6, t6, t9 // tmp2 + addu t7, t7, s1 + subu t7, t7, s3 // tmp3 + addu s0, t0, t7 + subu t0, t0, t7 + addu t7, t2, t6 + subu t2, t2, t6 + addu t6, t3, t5 + subu t3, t3, t5 + addu t5, t1, t4 + subu t1, t1, t4 + shra_r.w s0, s0, 18 + shra_r.w t7, t7, 18 + shra_r.w t6, t6, 18 + shra_r.w t5, t5, 18 + shra_r.w t1, t1, 18 + shra_r.w t3, t3, 18 + shra_r.w t2, t2, 18 + shra_r.w t0, t0, 18 + andi s0, s0, 0x3ff + andi t7, t7, 0x3ff + andi t6, t6, 0x3ff + andi t5, t5, 0x3ff + andi t1, t1, 0x3ff + andi t3, t3, 0x3ff + andi t2, t2, 0x3ff + andi t0, t0, 0x3ff + lw s1, 0(a2) + lbux s0, s0(a3) + lbux t7, t7(a3) + lbux t6, t6(a3) + lbux t5, t5(a3) + lbux t1, t1(a3) + lbux t3, t3(a3) + lbux t2, t2(a3) + lbux t0, t0(a3) + sb s0, 0(s1) + sb t7, 1(s1) + sb t6, 2(s1) + sb t5, 3(s1) + sb t1, 4(s1) + sb t3, 5(s1) + sb t2, 6(s1) + sb t0, 7(s1) +6: + addiu v0, v0, 32 + bgtz v1, 4b + addiu a2, a2, 4 + addiu sp, sp, 256 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_islow_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_ifast_cols_mips_dspr2) +/* + * a0 - inptr + * a1 - quantptr + * a2 - wsptr + * a3 - mips_idct_ifast_coefs + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu t9, a0, 16 // end address + or AT, a3, zero + +0: + lw s0, 0(a1) // quantptr[DCTSIZE*0] + lw t0, 0(a0) // inptr[DCTSIZE*0] + lw t1, 16(a0) // inptr[DCTSIZE*1] + muleq_s.w.phl v0, t0, s0 // tmp0 ... + lw t2, 32(a0) // inptr[DCTSIZE*2] + lw t3, 48(a0) // inptr[DCTSIZE*3] + lw t4, 64(a0) // inptr[DCTSIZE*4] + lw t5, 80(a0) // inptr[DCTSIZE*5] + muleq_s.w.phr t0, t0, s0 // ... tmp0 ... + lw t6, 96(a0) // inptr[DCTSIZE*6] + lw t7, 112(a0) // inptr[DCTSIZE*7] + or s4, t1, t2 + or s5, t3, t4 + bnez s4, 1f + ins t0, v0, 16, 16 // ... tmp0 + bnez s5, 1f + or s6, t5, t6 + or s6, s6, t7 + bnez s6, 1f + sw t0, 0(a2) // wsptr[DCTSIZE*0] + sw t0, 16(a2) // wsptr[DCTSIZE*1] + sw t0, 32(a2) // wsptr[DCTSIZE*2] + sw t0, 48(a2) // wsptr[DCTSIZE*3] + sw t0, 64(a2) // wsptr[DCTSIZE*4] + sw t0, 80(a2) // wsptr[DCTSIZE*5] + sw t0, 96(a2) // wsptr[DCTSIZE*6] + sw t0, 112(a2) // wsptr[DCTSIZE*7] + addiu a0, a0, 4 + b 2f + addiu a1, a1, 4 + +1: + lw s1, 32(a1) // quantptr[DCTSIZE*2] + lw s2, 64(a1) // quantptr[DCTSIZE*4] + muleq_s.w.phl v0, t2, s1 // tmp1 ... + muleq_s.w.phr t2, t2, s1 // ... tmp1 ... + lw s0, 16(a1) // quantptr[DCTSIZE*1] + lw s1, 48(a1) // quantptr[DCTSIZE*3] + lw s3, 96(a1) // quantptr[DCTSIZE*6] + muleq_s.w.phl v1, t4, s2 // tmp2 ... + muleq_s.w.phr t4, t4, s2 // ... tmp2 ... + lw s2, 80(a1) // quantptr[DCTSIZE*5] + lw t8, 4(AT) // FIX(1.414213562) + ins t2, v0, 16, 16 // ... tmp1 + muleq_s.w.phl v0, t6, s3 // tmp3 ... + muleq_s.w.phr t6, t6, s3 // ... tmp3 ... + ins t4, v1, 16, 16 // ... tmp2 + addq.ph s4, t0, t4 // tmp10 + subq.ph s5, t0, t4 // tmp11 + ins t6, v0, 16, 16 // ... tmp3 + subq.ph s6, t2, t6 // tmp12 ... + addq.ph s7, t2, t6 // tmp13 + mulq_s.ph s6, s6, t8 // ... tmp12 ... + addq.ph t0, s4, s7 // tmp0 + subq.ph t6, s4, s7 // tmp3 + muleq_s.w.phl v0, t1, s0 // tmp4 ... + muleq_s.w.phr t1, t1, s0 // ... tmp4 ... + shll_s.ph s6, s6, 1 // x2 + lw s3, 112(a1) // quantptr[DCTSIZE*7] + subq.ph s6, s6, s7 // ... tmp12 + muleq_s.w.phl v1, t7, s3 // tmp7 ... + muleq_s.w.phr t7, t7, s3 // ... tmp7 ... + ins t1, v0, 16, 16 // ... tmp4 + addq.ph t2, s5, s6 // tmp1 + subq.ph t4, s5, s6 // tmp2 + muleq_s.w.phl v0, t5, s2 // tmp6 ... + muleq_s.w.phr t5, t5, s2 // ... tmp6 ... + ins t7, v1, 16, 16 // ... tmp7 + addq.ph s5, t1, t7 // z11 + subq.ph s6, t1, t7 // z12 + muleq_s.w.phl v1, t3, s1 // tmp5 ... + muleq_s.w.phr t3, t3, s1 // ... tmp5 ... + ins t5, v0, 16, 16 // ... tmp6 + ins t3, v1, 16, 16 // ... tmp5 + addq.ph s7, t5, t3 // z13 + subq.ph v0, t5, t3 // z10 + addq.ph t7, s5, s7 // tmp7 + subq.ph s5, s5, s7 // tmp11 ... + addq.ph v1, v0, s6 // z5 ... + mulq_s.ph s5, s5, t8 // ... tmp11 + lw t8, 8(AT) // FIX(1.847759065) + lw s4, 0(AT) // FIX(1.082392200) + addq.ph s0, t0, t7 + subq.ph s1, t0, t7 + mulq_s.ph v1, v1, t8 // ... z5 + shll_s.ph s5, s5, 1 // x2 + lw t8, 12(AT) // FIX(-2.613125930) + sw s0, 0(a2) // wsptr[DCTSIZE*0] + shll_s.ph v0, v0, 1 // x4 + mulq_s.ph v0, v0, t8 // tmp12 ... + mulq_s.ph s4, s6, s4 // tmp10 ... + shll_s.ph v1, v1, 1 // x2 + addiu a0, a0, 4 + addiu a1, a1, 4 + sw s1, 112(a2) // wsptr[DCTSIZE*7] + shll_s.ph s6, v0, 1 // x4 + shll_s.ph s4, s4, 1 // x2 + addq.ph s6, s6, v1 // ... tmp12 + subq.ph t5, s6, t7 // tmp6 + subq.ph s4, s4, v1 // ... tmp10 + subq.ph t3, s5, t5 // tmp5 + addq.ph s2, t2, t5 + addq.ph t1, s4, t3 // tmp4 + subq.ph s3, t2, t5 + sw s2, 16(a2) // wsptr[DCTSIZE*1] + sw s3, 96(a2) // wsptr[DCTSIZE*6] + addq.ph v0, t4, t3 + subq.ph v1, t4, t3 + sw v0, 32(a2) // wsptr[DCTSIZE*2] + sw v1, 80(a2) // wsptr[DCTSIZE*5] + addq.ph v0, t6, t1 + subq.ph v1, t6, t1 + sw v0, 64(a2) // wsptr[DCTSIZE*4] + sw v1, 48(a2) // wsptr[DCTSIZE*3] + +2: + bne a0, t9, 0b + addiu a2, a2, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_ifast_cols_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_ifast_rows_mips_dspr2) +/* + * a0 - wsptr + * a1 - output_buf + * a2 - output_col + * a3 - mips_idct_ifast_coefs + */ + + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3 + + addiu t9, a0, 128 // end address + lui s8, 0x8080 + ori s8, s8, 0x8080 + +0: + lw AT, 36(sp) // restore $a3 (mips_idct_ifast_coefs) + lw t0, 0(a0) // wsptr[DCTSIZE*0+0/1] b a + lw s0, 16(a0) // wsptr[DCTSIZE*1+0/1] B A + lw t2, 4(a0) // wsptr[DCTSIZE*0+2/3] d c + lw s2, 20(a0) // wsptr[DCTSIZE*1+2/3] D C + lw t4, 8(a0) // wsptr[DCTSIZE*0+4/5] f e + lw s4, 24(a0) // wsptr[DCTSIZE*1+4/5] F E + lw t6, 12(a0) // wsptr[DCTSIZE*0+6/7] h g + lw s6, 28(a0) // wsptr[DCTSIZE*1+6/7] H G + precrq.ph.w t1, s0, t0 // B b + ins t0, s0, 16, 16 // A a + bnez t1, 1f + or s0, t2, s2 + bnez s0, 1f + or s0, t4, s4 + bnez s0, 1f + or s0, t6, s6 + bnez s0, 1f + shll_s.ph s0, t0, 2 // A a + lw a3, 0(a1) + lw AT, 4(a1) + precrq.ph.w t0, s0, s0 // A A + ins s0, s0, 16, 16 // a a + addu a3, a3, a2 + addu AT, AT, a2 + precrq.qb.ph t0, t0, t0 // A A A A + precrq.qb.ph s0, s0, s0 // a a a a + addu.qb s0, s0, s8 + addu.qb t0, t0, s8 + sw s0, 0(a3) + sw s0, 4(a3) + sw t0, 0(AT) + sw t0, 4(AT) + addiu a0, a0, 32 + bne a0, t9, 0b + addiu a1, a1, 8 + b 2f + nop + +1: + precrq.ph.w t3, s2, t2 + ins t2, s2, 16, 16 + precrq.ph.w t5, s4, t4 + ins t4, s4, 16, 16 + precrq.ph.w t7, s6, t6 + ins t6, s6, 16, 16 + lw t8, 4(AT) // FIX(1.414213562) + addq.ph s4, t0, t4 // tmp10 + subq.ph s5, t0, t4 // tmp11 + subq.ph s6, t2, t6 // tmp12 ... + addq.ph s7, t2, t6 // tmp13 + mulq_s.ph s6, s6, t8 // ... tmp12 ... + addq.ph t0, s4, s7 // tmp0 + subq.ph t6, s4, s7 // tmp3 + shll_s.ph s6, s6, 1 // x2 + subq.ph s6, s6, s7 // ... tmp12 + addq.ph t2, s5, s6 // tmp1 + subq.ph t4, s5, s6 // tmp2 + addq.ph s5, t1, t7 // z11 + subq.ph s6, t1, t7 // z12 + addq.ph s7, t5, t3 // z13 + subq.ph v0, t5, t3 // z10 + addq.ph t7, s5, s7 // tmp7 + subq.ph s5, s5, s7 // tmp11 ... + addq.ph v1, v0, s6 // z5 ... + mulq_s.ph s5, s5, t8 // ... tmp11 + lw t8, 8(AT) // FIX(1.847759065) + lw s4, 0(AT) // FIX(1.082392200) + addq.ph s0, t0, t7 // tmp0 + tmp7 + subq.ph s7, t0, t7 // tmp0 - tmp7 + mulq_s.ph v1, v1, t8 // ... z5 + lw a3, 0(a1) + lw t8, 12(AT) // FIX(-2.613125930) + shll_s.ph s5, s5, 1 // x2 + addu a3, a3, a2 + shll_s.ph v0, v0, 1 // x4 + mulq_s.ph v0, v0, t8 // tmp12 ... + mulq_s.ph s4, s6, s4 // tmp10 ... + shll_s.ph v1, v1, 1 // x2 + addiu a0, a0, 32 + addiu a1, a1, 8 + shll_s.ph s6, v0, 1 // x4 + shll_s.ph s4, s4, 1 // x2 + addq.ph s6, s6, v1 // ... tmp12 + shll_s.ph s0, s0, 2 + subq.ph t5, s6, t7 // tmp6 + subq.ph s4, s4, v1 // ... tmp10 + subq.ph t3, s5, t5 // tmp5 + shll_s.ph s7, s7, 2 + addq.ph t1, s4, t3 // tmp4 + addq.ph s1, t2, t5 // tmp1 + tmp6 + subq.ph s6, t2, t5 // tmp1 - tmp6 + addq.ph s2, t4, t3 // tmp2 + tmp5 + subq.ph s5, t4, t3 // tmp2 - tmp5 + addq.ph s4, t6, t1 // tmp3 + tmp4 + subq.ph s3, t6, t1 // tmp3 - tmp4 + shll_s.ph s1, s1, 2 + shll_s.ph s2, s2, 2 + shll_s.ph s3, s3, 2 + shll_s.ph s4, s4, 2 + shll_s.ph s5, s5, 2 + shll_s.ph s6, s6, 2 + precrq.ph.w t0, s1, s0 // B A + ins s0, s1, 16, 16 // b a + precrq.ph.w t2, s3, s2 // D C + ins s2, s3, 16, 16 // d c + precrq.ph.w t4, s5, s4 // F E + ins s4, s5, 16, 16 // f e + precrq.ph.w t6, s7, s6 // H G + ins s6, s7, 16, 16 // h g + precrq.qb.ph t0, t2, t0 // D C B A + precrq.qb.ph s0, s2, s0 // d c b a + precrq.qb.ph t4, t6, t4 // H G F E + precrq.qb.ph s4, s6, s4 // h g f e + addu.qb s0, s0, s8 + addu.qb s4, s4, s8 + sw s0, 0(a3) // outptr[0/1/2/3] d c b a + sw s4, 4(a3) // outptr[4/5/6/7] h g f e + lw a3, -4(a1) + addu.qb t0, t0, s8 + addu a3, a3, a2 + addu.qb t4, t4, s8 + sw t0, 0(a3) // outptr[0/1/2/3] D C B A + bne a0, t9, 0b + sw t4, 4(a3) // outptr[4/5/6/7] H G F E + +2: + + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3 + + j ra + nop + +END(jsimd_idct_ifast_rows_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_fdct_islow_mips_dspr2) +/* + * a0 - data + */ + + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lui t0, 6437 + ori t0, 2260 + lui t1, 9633 + ori t1, 11363 + lui t2, 0xd39e + ori t2, 0xe6dc + lui t3, 0xf72d + ori t3, 9633 + lui t4, 2261 + ori t4, 9633 + lui t5, 0xd39e + ori t5, 6437 + lui t6, 9633 + ori t6, 0xd39d + lui t7, 0xe6dc + ori t7, 2260 + lui t8, 4433 + ori t8, 10703 + lui t9, 0xd630 + ori t9, 4433 + li s8, 8 + move a1, a0 +1: + lw s0, 0(a1) // tmp0 = 1|0 + lw s1, 4(a1) // tmp1 = 3|2 + lw s2, 8(a1) // tmp2 = 5|4 + lw s3, 12(a1) // tmp3 = 7|6 + packrl.ph s1, s1, s1 // tmp1 = 2|3 + packrl.ph s3, s3, s3 // tmp3 = 6|7 + subq.ph s7, s1, s2 // tmp7 = 2-5|3-4 = t5|t4 + subq.ph s5, s0, s3 // tmp5 = 1-6|0-7 = t6|t7 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, s7, t0 // ac0 += t5* 6437 + t4* 2260 + dpa.w.ph $ac0, s5, t1 // ac0 += t6* 9633 + t7* 11363 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, s7, t2 // ac1 += t5*-11362 + t4* -6436 + dpa.w.ph $ac1, s5, t3 // ac1 += t6* -2259 + t7* 9633 + mult $ac2, $0, $0 // ac2 = 0 + dpa.w.ph $ac2, s7, t4 // ac2 += t5* 2261 + t4* 9633 + dpa.w.ph $ac2, s5, t5 // ac2 += t6*-11362 + t7* 6437 + mult $ac3, $0, $0 // ac3 = 0 + dpa.w.ph $ac3, s7, t6 // ac3 += t5* 9633 + t4*-11363 + dpa.w.ph $ac3, s5, t7 // ac3 += t6* -6436 + t7* 2260 + addq.ph s6, s1, s2 // tmp6 = 2+5|3+4 = t2|t3 + addq.ph s4, s0, s3 // tmp4 = 1+6|0+7 = t1|t0 + extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11 + extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11 + extr_r.w s2, $ac2, 11 // tmp2 = (ac2 + 1024) >> 11 + extr_r.w s3, $ac3, 11 // tmp3 = (ac3 + 1024) >> 11 + addq.ph s5, s4, s6 // tmp5 = t1+t2|t0+t3 = t11|t10 + subq.ph s7, s4, s6 // tmp7 = t1-t2|t0-t3 = t12|t13 + sh s0, 2(a1) + sh s1, 6(a1) + sh s2, 10(a1) + sh s3, 14(a1) + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, s7, t8 // ac0 += t12* 4433 + t13* 10703 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, s7, t9 // ac1 += t12*-10704 + t13* 4433 + sra s4, s5, 16 // tmp4 = t11 + addiu a1, a1, 16 + addiu s8, s8, -1 + extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11 + extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11 + addu s2, s5, s4 // tmp2 = t10 + t11 + subu s3, s5, s4 // tmp3 = t10 - t11 + sll s2, s2, 2 // tmp2 = (t10 + t11) << 2 + sll s3, s3, 2 // tmp3 = (t10 - t11) << 2 + sh s2, -16(a1) + sh s3, -8(a1) + sh s0, -12(a1) + bgtz s8, 1b + sh s1, -4(a1) + li t0, 2260 + li t1, 11363 + li t2, 9633 + li t3, 6436 + li t4, 6437 + li t5, 2261 + li t6, 11362 + li t7, 2259 + li t8, 4433 + li t9, 10703 + li a1, 10704 + li s8, 8 + +2: + lh a2, 0(a0) // 0 + lh a3, 16(a0) // 8 + lh v0, 32(a0) // 16 + lh v1, 48(a0) // 24 + lh s4, 64(a0) // 32 + lh s5, 80(a0) // 40 + lh s6, 96(a0) // 48 + lh s7, 112(a0) // 56 + addu s2, v0, s5 // tmp2 = 16 + 40 + subu s5, v0, s5 // tmp5 = 16 - 40 + addu s3, v1, s4 // tmp3 = 24 + 32 + subu s4, v1, s4 // tmp4 = 24 - 32 + addu s0, a2, s7 // tmp0 = 0 + 56 + subu s7, a2, s7 // tmp7 = 0 - 56 + addu s1, a3, s6 // tmp1 = 8 + 48 + subu s6, a3, s6 // tmp6 = 8 - 48 + addu a2, s0, s3 // tmp10 = tmp0 + tmp3 + subu v1, s0, s3 // tmp13 = tmp0 - tmp3 + addu a3, s1, s2 // tmp11 = tmp1 + tmp2 + subu v0, s1, s2 // tmp12 = tmp1 - tmp2 + mult s7, t1 // ac0 = tmp7 * c1 + madd s4, t0 // ac0 += tmp4 * c0 + madd s5, t4 // ac0 += tmp5 * c4 + madd s6, t2 // ac0 += tmp6 * c2 + mult $ac1, s7, t2 // ac1 = tmp7 * c2 + msub $ac1, s4, t3 // ac1 -= tmp4 * c3 + msub $ac1, s5, t6 // ac1 -= tmp5 * c6 + msub $ac1, s6, t7 // ac1 -= tmp6 * c7 + mult $ac2, s7, t4 // ac2 = tmp7 * c4 + madd $ac2, s4, t2 // ac2 += tmp4 * c2 + madd $ac2, s5, t5 // ac2 += tmp5 * c5 + msub $ac2, s6, t6 // ac2 -= tmp6 * c6 + mult $ac3, s7, t0 // ac3 = tmp7 * c0 + msub $ac3, s4, t1 // ac3 -= tmp4 * c1 + madd $ac3, s5, t2 // ac3 += tmp5 * c2 + msub $ac3, s6, t3 // ac3 -= tmp6 * c3 + extr_r.w s0, $ac0, 15 // tmp0 = (ac0 + 16384) >> 15 + extr_r.w s1, $ac1, 15 // tmp1 = (ac1 + 16384) >> 15 + extr_r.w s2, $ac2, 15 // tmp2 = (ac2 + 16384) >> 15 + extr_r.w s3, $ac3, 15 // tmp3 = (ac3 + 16384) >> 15 + addiu s8, s8, -1 + addu s4, a2, a3 // tmp4 = tmp10 + tmp11 + subu s5, a2, a3 // tmp5 = tmp10 - tmp11 + sh s0, 16(a0) + sh s1, 48(a0) + sh s2, 80(a0) + sh s3, 112(a0) + mult v0, t8 // ac0 = tmp12 * c8 + madd v1, t9 // ac0 += tmp13 * c9 + mult $ac1, v1, t8 // ac1 = tmp13 * c8 + msub $ac1, v0, a1 // ac1 -= tmp12 * c10 + addiu a0, a0, 2 + extr_r.w s6, $ac0, 15 // tmp6 = (ac0 + 16384) >> 15 + extr_r.w s7, $ac1, 15 // tmp7 = (ac1 + 16384) >> 15 + shra_r.w s4, s4, 2 // tmp4 = (tmp4 + 2) >> 2 + shra_r.w s5, s5, 2 // tmp5 = (tmp5 + 2) >> 2 + sh s4, -2(a0) + sh s5, 62(a0) + sh s6, 30(a0) + bgtz s8, 2b + sh s7, 94(a0) + + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + jr ra + nop + +END(jsimd_fdct_islow_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_fdct_ifast_mips_dspr2) +/* + * a0 - data + */ + .set at + SAVE_REGS_ON_STACK 8, s0, s1 + li a1, 0x014e014e // FIX_1_306562965 (334 << 16)|(334 & 0xffff) + li a2, 0x008b008b // FIX_0_541196100 (139 << 16)|(139 & 0xffff) + li a3, 0x00620062 // FIX_0_382683433 (98 << 16) |(98 & 0xffff) + li s1, 0x00b500b5 // FIX_0_707106781 (181 << 16)|(181 & 0xffff) + + move v0, a0 + addiu v1, v0, 128 // end address + +0: + lw t0, 0(v0) // tmp0 = 1|0 + lw t1, 4(v0) // tmp1 = 3|2 + lw t2, 8(v0) // tmp2 = 5|4 + lw t3, 12(v0) // tmp3 = 7|6 + packrl.ph t1, t1, t1 // tmp1 = 2|3 + packrl.ph t3, t3, t3 // tmp3 = 6|7 + subq.ph t7, t1, t2 // tmp7 = 2-5|3-4 = t5|t4 + subq.ph t5, t0, t3 // tmp5 = 1-6|0-7 = t6|t7 + addq.ph t6, t1, t2 // tmp6 = 2+5|3+4 = t2|t3 + addq.ph t4, t0, t3 // tmp4 = 1+6|0+7 = t1|t0 + addq.ph t8, t4, t6 // tmp5 = t1+t2|t0+t3 = t11|t10 + subq.ph t9, t4, t6 // tmp7 = t1-t2|t0-t3 = t12|t13 + sra t4, t8, 16 // tmp4 = t11 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t9, s1 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, t7, a3 // ac1 += t4*98 + t5*98 + dpsx.w.ph $ac1, t5, a3 // ac1 += t6*98 + t7*98 + mult $ac2, $0, $0 // ac2 = 0 + dpa.w.ph $ac2, t7, a2 // ac2 += t4*139 + t5*139 + mult $ac3, $0, $0 // ac3 = 0 + dpa.w.ph $ac3, t5, a1 // ac3 += t6*334 + t7*334 + precrq.ph.w t0, t5, t7 // t0 = t5|t6 + addq.ph t2, t8, t4 // tmp2 = t10 + t11 + subq.ph t3, t8, t4 // tmp3 = t10 - t11 + extr.w t4, $ac0, 8 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t0, s1 // ac0 += t5*181 + t6*181 + extr.w t0, $ac1, 8 // t0 = z5 + extr.w t1, $ac2, 8 // t1 = MULTIPLY(tmp10, 139) + extr.w t7, $ac3, 8 // t2 = MULTIPLY(tmp12, 334) + extr.w t8, $ac0, 8 // t8 = z3 = MULTIPLY(tmp11, 181) + add t6, t1, t0 // t6 = z2 + add t7, t7, t0 // t7 = z4 + subq.ph t0, t5, t8 // t0 = z13 = tmp7 - z3 + addq.ph t8, t5, t8 // t9 = z11 = tmp7 + z3 + addq.ph t1, t0, t6 // t1 = z13 + z2 + subq.ph t6, t0, t6 // t6 = z13 - z2 + addq.ph t0, t8, t7 // t0 = z11 + z4 + subq.ph t7, t8, t7 // t7 = z11 - z4 + addq.ph t5, t4, t9 + subq.ph t4, t9, t4 + sh t2, 0(v0) + sh t5, 4(v0) + sh t3, 8(v0) + sh t4, 12(v0) + sh t1, 10(v0) + sh t6, 6(v0) + sh t0, 2(v0) + sh t7, 14(v0) + addiu v0, 16 + bne v1, v0, 0b + nop + move v0, a0 + addiu v1, v0, 16 + +1: + lh t0, 0(v0) // 0 + lh t1, 16(v0) // 8 + lh t2, 32(v0) // 16 + lh t3, 48(v0) // 24 + lh t4, 64(v0) // 32 + lh t5, 80(v0) // 40 + lh t6, 96(v0) // 48 + lh t7, 112(v0) // 56 + add t8, t0, t7 // t8 = tmp0 + sub t7, t0, t7 // t7 = tmp7 + add t0, t1, t6 // t0 = tmp1 + sub t1, t1, t6 // t1 = tmp6 + add t6, t2, t5 // t6 = tmp2 + sub t5, t2, t5 // t5 = tmp5 + add t2, t3, t4 // t2 = tmp3 + sub t3, t3, t4 // t3 = tmp4 + add t4, t8, t2 // t4 = tmp10 = tmp0 + tmp3 + sub t8, t8, t2 // t8 = tmp13 = tmp0 - tmp3 + sub s0, t0, t6 // s0 = tmp12 = tmp1 - tmp2 + ins t8, s0, 16, 16 // t8 = tmp12|tmp13 + add t2, t0, t6 // t2 = tmp11 = tmp1 + tmp2 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t8, s1 // ac0 += t12*181 + t13*181 + add s0, t4, t2 // t8 = tmp10+tmp11 + sub t4, t4, t2 // t4 = tmp10-tmp11 + sh s0, 0(v0) + sh t4, 64(v0) + extr.w t2, $ac0, 8 // z1 = MULTIPLY(tmp12+tmp13,FIX_0_707106781) + addq.ph t4, t8, t2 // t9 = tmp13 + z1 + subq.ph t8, t8, t2 // t2 = tmp13 - z1 + sh t4, 32(v0) + sh t8, 96(v0) + add t3, t3, t5 // t3 = tmp10 = tmp4 + tmp5 + add t0, t5, t1 // t0 = tmp11 = tmp5 + tmp6 + add t1, t1, t7 // t1 = tmp12 = tmp6 + tmp7 + andi t4, a1, 0xffff + mul s0, t1, t4 + sra s0, s0, 8 // s0 = z4 = MULTIPLY(tmp12, FIX_1_306562965) + ins t1, t3, 16, 16 // t1 = tmp10|tmp12 + mult $0, $0 // ac0 = 0 + mulsa.w.ph $ac0, t1, a3 // ac0 += t10*98 - t12*98 + extr.w t8, $ac0, 8 // z5 = MULTIPLY(tmp10-tmp12,FIX_0_382683433) + add t2, t7, t8 // t2 = tmp7 + z5 + sub t7, t7, t8 // t7 = tmp7 - z5 + andi t4, a2, 0xffff + mul t8, t3, t4 + sra t8, t8, 8 // t8 = z2 = MULTIPLY(tmp10, FIX_0_541196100) + andi t4, s1, 0xffff + mul t6, t0, t4 + sra t6, t6, 8 // t6 = z3 = MULTIPLY(tmp11, FIX_0_707106781) + add t0, t6, t8 // t0 = z3 + z2 + sub t1, t6, t8 // t1 = z3 - z2 + add t3, t6, s0 // t3 = z3 + z4 + sub t4, t6, s0 // t4 = z3 - z4 + sub t5, t2, t1 // t5 = dataptr[5] + sub t6, t7, t0 // t6 = dataptr[3] + add t3, t2, t3 // t3 = dataptr[1] + add t4, t7, t4 // t4 = dataptr[7] + sh t5, 80(v0) + sh t6, 48(v0) + sh t3, 16(v0) + sh t4, 112(v0) + addiu v0, 2 + bne v0, v1, 1b + nop + + RESTORE_REGS_FROM_STACK 8, s0, s1 + + j ra + nop +END(jsimd_fdct_ifast_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_quantize_mips_dspr2) +/* + * a0 - coef_block + * a1 - divisors + * a2 - workspace + */ + + .set at + + SAVE_REGS_ON_STACK 16, s0, s1, s2 + + addiu v0, a2, 124 // v0 = workspace_end + lh t0, 0(a2) + lh t1, 0(a1) + lh t2, 128(a1) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0, t3 + lh t4, 384(a1) + lh t5, 130(a1) + lh t6, 2(a2) + lh t7, 2(a1) + lh t8, 386(a1) + +1: + andi t1, 0xffff + add t9, t0, t2 + andi t9, 0xffff + mul v1, t9, t1 + sra s0, t6, 15 + sll s0, s0, 1 + addiu s0, s0, 1 + addiu t9, t4, 16 + srav v1, v1, t9 + mul v1, v1, t3 + mul t6, t6, s0 + andi t7, 0xffff + addiu a2, a2, 4 + addiu a1, a1, 4 + add s1, t6, t5 + andi s1, 0xffff + sh v1, 0(a0) + + mul s2, s1, t7 + addiu s1, t8, 16 + srav s2, s2, s1 + mul s2,s2, s0 + lh t0, 0(a2) + lh t1, 0(a1) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0, t3 + lh t2, 128(a1) + lh t4, 384(a1) + lh t5, 130(a1) + lh t8, 386(a1) + lh t6, 2(a2) + lh t7, 2(a1) + sh s2, 2(a0) + lh t0, 0(a2) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0,t3 + bne a2, v0, 1b + addiu a0, a0, 4 + + andi t1, 0xffff + add t9, t0, t2 + andi t9, 0xffff + mul v1, t9, t1 + sra s0, t6, 15 + sll s0, s0, 1 + addiu s0, s0, 1 + addiu t9, t4, 16 + srav v1, v1, t9 + mul v1, v1, t3 + mul t6, t6, s0 + andi t7, 0xffff + sh v1, 0(a0) + add s1, t6, t5 + andi s1, 0xffff + mul s2, s1, t7 + addiu s1, t8, 16 + addiu a2, a2, 4 + addiu a1, a1, 4 + srav s2, s2, s1 + mul s2, s2, s0 + sh s2, 2(a0) + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2 + + j ra + nop + +END(jsimd_quantize_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_quantize_float_mips_dspr2) +/* + * a0 - coef_block + * a1 - divisors + * a2 - workspace + */ + + .set at + + li t1, 0x46800100 //integer representation 16384.5 + mtc1 t1, f0 + li t0, 63 +0: + lwc1 f2, 0(a2) + lwc1 f10, 0(a1) + lwc1 f4, 4(a2) + lwc1 f12, 4(a1) + lwc1 f6, 8(a2) + lwc1 f14, 8(a1) + lwc1 f8, 12(a2) + lwc1 f16, 12(a1) + madd.s f2, f0, f2, f10 + madd.s f4, f0, f4, f12 + madd.s f6, f0, f6, f14 + madd.s f8, f0, f8, f16 + lwc1 f10, 16(a1) + lwc1 f12, 20(a1) + trunc.w.s f2, f2 + trunc.w.s f4, f4 + trunc.w.s f6, f6 + trunc.w.s f8, f8 + lwc1 f14, 24(a1) + lwc1 f16, 28(a1) + mfc1 t1, f2 + mfc1 t2, f4 + mfc1 t3, f6 + mfc1 t4, f8 + lwc1 f2, 16(a2) + lwc1 f4, 20(a2) + lwc1 f6, 24(a2) + lwc1 f8, 28(a2) + madd.s f2, f0, f2, f10 + madd.s f4, f0, f4, f12 + madd.s f6, f0, f6, f14 + madd.s f8, f0, f8, f16 + addiu t1, t1, -16384 + addiu t2, t2, -16384 + addiu t3, t3, -16384 + addiu t4, t4, -16384 + trunc.w.s f2, f2 + trunc.w.s f4, f4 + trunc.w.s f6, f6 + trunc.w.s f8, f8 + sh t1, 0(a0) + sh t2, 2(a0) + sh t3, 4(a0) + sh t4, 6(a0) + mfc1 t1, f2 + mfc1 t2, f4 + mfc1 t3, f6 + mfc1 t4, f8 + addiu t0, t0, -8 + addiu a2, a2, 32 + addiu a1, a1, 32 + addiu t1, t1, -16384 + addiu t2, t2, -16384 + addiu t3, t3, -16384 + addiu t4, t4, -16384 + sh t1, 8(a0) + sh t2, 10(a0) + sh t3, 12(a0) + sh t4, 14(a0) + bgez t0, 0b + addiu a0, a0, 16 + + j ra + nop + +END(jsimd_quantize_float_mips_dspr2) +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_2x2_mips_dspr2) +/* + * a0 - compptr->dct_table + * a1 - coef_block + * a2 - output_buf + * a3 - output_col + */ + .set at + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5 + + addiu sp, sp, -40 + move v0, sp + addiu s2, zero, 29692 + addiu s3, zero, -10426 + addiu s4, zero, 6967 + addiu s5, zero, -5906 + lh t0, 0(a1) // t0 = inptr[DCTSIZE*0] + lh t5, 0(a0) // t5 = quantptr[DCTSIZE*0] + lh t1, 48(a1) // t1 = inptr[DCTSIZE*3] + lh t6, 48(a0) // t6 = quantptr[DCTSIZE*3] + mul t4, t5, t0 + lh t0, 16(a1) // t0 = inptr[DCTSIZE*1] + lh t5, 16(a0) // t5 = quantptr[DCTSIZE*1] + mul t6, t6, t1 + mul t5, t5, t0 + lh t2, 80(a1) // t2 = inptr[DCTSIZE*5] + lh t7, 80(a0) // t7 = quantptr[DCTSIZE*5] + lh t3, 112(a1) // t3 = inptr[DCTSIZE*7] + lh t8, 112(a0) // t8 = quantptr[DCTSIZE*7] + mul t7, t7, t2 + mult zero, zero + mul t8, t8, t3 + li s0, 0x73FCD746 // s0 = (29692 << 16) | (-10426 & 0xffff) + li s1, 0x1B37E8EE // s1 = (6967 << 16) | (-5906 & 0xffff) + ins t6, t5, 16, 16 // t6 = t5|t6 + sll t4, t4, 15 + dpa.w.ph $ac0, t6, s0 + lh t1, 2(a1) + lh t6, 2(a0) + ins t8, t7, 16, 16 // t8 = t7|t8 + dpa.w.ph $ac0, t8, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 18(a1) + lh t6, 18(a0) + lh t2, 50(a1) + lh t7, 50(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 82(a1) + lh t2, 82(a0) + lh t3, 114(a1) + lh t4, 114(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 0(v0) + sw t8, 20(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 6(a1) + lh t6, 6(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 22(a1) + lh t6, 22(a0) + lh t2, 54(a1) + lh t7, 54(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 86(a1) + lh t2, 86(a0) + lh t3, 118(a1) + lh t4, 118(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 4(v0) + sw t8, 24(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 10(a1) + lh t6, 10(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 26(a1) + lh t6, 26(a0) + lh t2, 58(a1) + lh t7, 58(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 90(a1) + lh t2, 90(a0) + lh t3, 122(a1) + lh t4, 122(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 8(v0) + sw t8, 28(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 14(a1) + lh t6, 14(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 30(a1) + lh t6, 30(a0) + lh t2, 62(a1) + lh t7, 62(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 94(a1) + lh t2, 94(a0) + lh t3, 126(a1) + lh t4, 126(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 12(v0) + sw t8, 32(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + lw t9, 0(a2) + lw t3, 0(v0) + lw t7, 4(v0) + lw t1, 8(v0) + addu t9, t9, a3 + sll t3, t3, 15 + subu t8, t4, t0 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + shra_r.w t8, t8, 13 + sw t0, 16(v0) + sw t8, 36(v0) + lw t5, 12(v0) + lw t6, 16(v0) + mult t7, s2 + madd t1, s3 + madd t5, s4 + madd t6, s5 + lw t5, 24(v0) + lw t7, 28(v0) + mflo t0, $ac0 + lw t8, 32(v0) + lw t2, 36(v0) + mult $ac1, t5, s2 + madd $ac1, t7, s3 + madd $ac1, t8, s4 + madd $ac1, t2, s5 + addu t1, t3, t0 + subu t6, t3, t0 + shra_r.w t1, t1, 20 + shra_r.w t6, t6, 20 + mflo t4, $ac1 + shll_s.w t1, t1, 24 + shll_s.w t6, t6, 24 + sra t1, t1, 24 + sra t6, t6, 24 + addiu t1, t1, 128 + addiu t6, t6, 128 + lw t0, 20(v0) + sb t1, 0(t9) + sb t6, 1(t9) + sll t0, t0, 15 + lw t9, 4(a2) + addu t1, t0, t4 + subu t6, t0, t4 + addu t9, t9, a3 + shra_r.w t1, t1, 20 + shra_r.w t6, t6, 20 + shll_s.w t1, t1, 24 + shll_s.w t6, t6, 24 + sra t1, t1, 24 + sra t6, t6, 24 + addiu t1, t1, 128 + addiu t6, t6, 128 + sb t1, 0(t9) + sb t6, 1(t9) + addiu sp, sp, 40 + + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5 + + j ra + nop + +END(jsimd_idct_2x2_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_4x4_mips_dspr2) +/* + * a0 - compptr->dct_table + * a1 - coef_block + * a2 - output_buf + * a3 - output_col + * 16(sp) - workspace[DCTSIZE*4]; // buffers data between passes + */ + + .set at + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw v1, 48(sp) + move t0, a1 + move t1, v1 + li t9, 4 + li s0, 0x2e75f93e + li s1, 0x21f9ba79 + li s2, 0xecc2efb0 + li s3, 0x52031ccd + +0: + lh s6, 32(t0) // inptr[DCTSIZE*2] + lh t6, 32(a0) // quantptr[DCTSIZE*2] + lh s7, 96(t0) // inptr[DCTSIZE*6] + lh t7, 96(a0) // quantptr[DCTSIZE*6] + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh s4, 0(t0) // inptr[DCTSIZE*0] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s5, 0(a0) // quantptr[0] + li s6, 15137 + li s7, 6270 + mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0]) + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh t5, 112(t0) // inptr[DCTSIZE*7] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s4, 112(a0) // quantptr[DCTSIZE*7] + lh v0, 80(t0) // inptr[DCTSIZE*5] + lh s5, 80(a0) // quantptr[DCTSIZE*5] + lh s6, 48(a0) // quantptr[DCTSIZE*3] + sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1) + lh s7, 16(a0) // quantptr[DCTSIZE*1] + lh t8, 16(t0) // inptr[DCTSIZE*1] + subu t6, t6, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6) + lh t7, 48(t0) // inptr[DCTSIZE*3] + mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7]) + mul v0, s5, v0 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5]) + mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3]) + mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1]) + addu t3, t2, t6 // tmp10 = tmp0 + z2 + subu t4, t2, t6 // tmp10 = tmp0 - z2 + mult $ac0, zero, zero + mult $ac1, zero, zero + ins t5, v0, 16, 16 + ins t7, t8, 16, 16 + addiu t9, t9, -1 + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + mflo s4, $ac0 + mflo s5, $ac1 + addiu a0, a0, 2 + addiu t1, t1, 4 + addiu t0, t0, 2 + addu t6, t4, s4 + subu t5, t4, s4 + addu s6, t3, s5 + subu s7, t3, s5 + shra_r.w t6, t6, 12 // DESCALE(tmp12 + temp1, 12) + shra_r.w t5, t5, 12 // DESCALE(tmp12 - temp1, 12) + shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12) + shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12) + sw t6, 28(t1) + sw t5, 60(t1) + sw s6, -4(t1) + bgtz t9, 0b + sw s7, 92(t1) + // second loop three pass + li t9, 3 +1: + lh s6, 34(t0) // inptr[DCTSIZE*2] + lh t6, 34(a0) // quantptr[DCTSIZE*2] + lh s7, 98(t0) // inptr[DCTSIZE*6] + lh t7, 98(a0) // quantptr[DCTSIZE*6] + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh s4, 2(t0) // inptr[DCTSIZE*0] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s5, 2(a0) // quantptr[DCTSIZE*0] + li s6, 15137 + li s7, 6270 + mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0]) + mul v0, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh t5, 114(t0) // inptr[DCTSIZE*7] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s4, 114(a0) // quantptr[DCTSIZE*7] + lh s5, 82(a0) // quantptr[DCTSIZE*5] + lh t6, 82(t0) // inptr[DCTSIZE*5] + sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1) + lh s6, 50(a0) // quantptr[DCTSIZE*3] + lh t8, 18(t0) // inptr[DCTSIZE*1] + subu v0, v0, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6) + lh t7, 50(t0) // inptr[DCTSIZE*3] + lh s7, 18(a0) // quantptr[DCTSIZE*1] + mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7]) + mul t6, s5, t6 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5]) + mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3]) + mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1]) + addu t3, t2, v0 // tmp10 = tmp0 + z2 + subu t4, t2, v0 // tmp10 = tmp0 - z2 + mult $ac0, zero, zero + mult $ac1, zero, zero + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + mflo t5, $ac0 + mflo t6, $ac1 + addiu t9, t9, -1 + addiu t0, t0, 2 + addiu a0, a0, 2 + addiu t1, t1, 4 + addu s5, t4, t5 + subu s4, t4, t5 + addu s6, t3, t6 + subu s7, t3, t6 + shra_r.w s5, s5, 12 // DESCALE(tmp12 + temp1, 12) + shra_r.w s4, s4, 12 // DESCALE(tmp12 - temp1, 12) + shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12) + shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12) + sw s5, 32(t1) + sw s4, 64(t1) + sw s6, 0(t1) + bgtz t9, 1b + sw s7, 96(t1) + move t1, v1 + li s4, 15137 + lw s6, 8(t1) // wsptr[2] + li s5, 6270 + lw s7, 24(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + lw t2, 0(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) + lh t5, 28(t1) // wsptr[7] + lh t6, 20(t1) // wsptr[5] + lh t7, 12(t1) // wsptr[3] + lh t8, 4(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 0(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + // 2 + li s4, 15137 + lw s6, 40(t1) // wsptr[2] + li s5, 6270 + lw s7, 56(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + lw t2, 32(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) + lh t5, 60(t1) // wsptr[7] + lh t6, 52(t1) // wsptr[5] + lh t7, 44(t1) // wsptr[3] + lh t8, 36(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, CONST_BITS-PASS1_BITS+1) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, CONST_BITS-PASS1_BITS+1) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, CONST_BITS-PASS1_BITS+1) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, CONST_BITS-PASS1_BITS+1) + sll s4, t9, 2 + lw v0, 4(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + // 3 + li s4, 15137 + lw s6, 72(t1) // wsptr[2] + li s5, 6270 + lw s7, 88(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + lw t2, 64(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) + lh t5, 92(t1) // wsptr[7] + lh t6, 84(t1) // wsptr[5] + lh t7, 76(t1) // wsptr[3] + lh t8, 68(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 8(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + li s4, 15137 + lw s6, 104(t1) // wsptr[2] + li s5, 6270 + lw s7, 120(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + lw t2, 96(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], -FIX_0_765366865) + lh t5, 124(t1) // wsptr[7] + lh t6, 116(t1) // wsptr[5] + lh t7, 108(t1) // wsptr[3] + lh t8, 100(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2; + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2; + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 12(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_idct_4x4_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_6x6_mips_dspr2) +/* + * a0 - compptr->dct_table + * a1 - coef_block + * a2 - output_buf + * a3 - output_col + */ + .set at + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu sp, sp, -144 + move v0, sp + addiu v1, v0, 24 + addiu t9, zero, 5793 + addiu s0, zero, 10033 + addiu s1, zero, 2998 + +1: + lh s2, 0(a0) // q0 = quantptr[ 0] + lh s3, 32(a0) // q1 = quantptr[16] + lh s4, 64(a0) // q2 = quantptr[32] + lh t2, 64(a1) // tmp2 = inptr[32] + lh t1, 32(a1) // tmp1 = inptr[16] + lh t0, 0(a1) // tmp0 = inptr[ 0] + mul t2, t2, s4 // tmp2 = tmp2 * q2 + mul t1, t1, s3 // tmp1 = tmp1 * q1 + mul t0, t0, s2 // tmp0 = tmp0 * q0 + lh t6, 16(a1) // z1 = inptr[ 8] + lh t8, 80(a1) // z3 = inptr[40] + lh t7, 48(a1) // z2 = inptr[24] + lh s2, 16(a0) // q0 = quantptr[ 8] + lh s4, 80(a0) // q2 = quantptr[40] + lh s3, 48(a0) // q1 = quantptr[24] + mul t2, t2, t9 // tmp2 = tmp2 * 5793 + mul t1, t1, s0 // tmp1 = tmp1 * 10033 + sll t0, t0, 13 // tmp0 = tmp0 << 13 + mul t6, t6, s2 // z1 = z1 * q0 + mul t8, t8, s4 // z3 = z3 * q2 + mul t7, t7, s3 // z2 = z2 * q1 + addu t3, t0, t2 // tmp10 = tmp0 + tmp2 + sll t2, t2, 1 // tmp2 = tmp2 << 2 + subu t4, t0, t2 // tmp11 = tmp0 - tmp2; + subu t5, t3, t1 // tmp12 = tmp10 - tmp1 + addu t3, t3, t1 // tmp10 = tmp10 + tmp1 + addu t1, t6, t8 // tmp1 = z1 + z3 + mul t1, t1, s1 // tmp1 = tmp1 * 2998 + shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11 + subu t2, t6, t8 // tmp2 = z1 - z3 + subu t2, t2, t7 // tmp2 = tmp2 - z2 + sll t2, t2, 2 // tmp2 = tmp2 << 2 + addu t0, t6, t7 // tmp0 = z1 + z2 + sll t0, t0, 13 // tmp0 = tmp0 << 13 + subu s2, t8, t7 // q0 = z3 - z2 + sll s2, s2, 13 // q0 = q0 << 13 + addu t0, t0, t1 // tmp0 = tmp0 + tmp1 + addu t1, s2, t1 // tmp1 = q0 + tmp1 + addu s2, t4, t2 // q0 = tmp11 + tmp2 + subu s3, t4, t2 // q1 = tmp11 - tmp2 + addu t6, t3, t0 // z1 = tmp10 + tmp0 + subu t7, t3, t0 // z2 = tmp10 - tmp0 + addu t4, t5, t1 // tmp11 = tmp12 + tmp1 + subu t5, t5, t1 // tmp12 = tmp12 - tmp1 + shra_r.w t6, t6, 11 // z1 = (z1 + 1024) >> 11 + shra_r.w t7, t7, 11 // z2 = (z2 + 1024) >> 11 + shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11 + shra_r.w t5, t5, 11 // tmp12 = (tmp12 + 1024) >> 11 + sw s2, 24(v0) + sw s3, 96(v0) + sw t6, 0(v0) + sw t7, 120(v0) + sw t4, 48(v0) + sw t5, 72(v0) + addiu v0, v0, 4 + addiu a1, a1, 2 + bne v0, v1, 1b + addiu a0, a0, 2 + + /* Pass 2: process 6 rows from work array, store into output array. */ + move v0, sp + addiu v1, v0, 144 + +2: + lw t0, 0(v0) + lw t2, 16(v0) + lw s5, 0(a2) + addiu t0, t0, 16 + sll t0, t0, 13 + mul t3, t2, t9 + lw t6, 4(v0) + lw t8, 20(v0) + lw t7, 12(v0) + addu s5, s5, a3 + addu s6, t6, t8 + mul s6, s6, s1 + addu t1, t0, t3 + subu t4, t0, t3 + subu t4, t4, t3 + lw t3, 8(v0) + mul t0, t3, s0 + addu s7, t6, t7 + sll s7, s7, 13 + addu s7, s6, s7 + subu t2, t8, t7 + sll t2, t2, 13 + addu t2, s6, t2 + subu s6, t6, t7 + subu s6, s6, t8 + sll s6, s6, 13 + addu t3, t1, t0 + subu t5, t1, t0 + addu t6, t3, s7 + subu t3, t3, s7 + addu t7, t4, s6 + subu t4, t4, s6 + addu t8, t5, t2 + subu t5, t5, t2 + shll_s.w t6, t6, 6 + shll_s.w t3, t3, 6 + shll_s.w t7, t7, 6 + shll_s.w t4, t4, 6 + shll_s.w t8, t8, 6 + shll_s.w t5, t5, 6 + sra t6, t6, 24 + addiu t6, t6, 128 + sra t3, t3, 24 + addiu t3, t3, 128 + sb t6, 0(s5) + sra t7, t7, 24 + addiu t7, t7, 128 + sb t3, 5(s5) + sra t4, t4, 24 + addiu t4, t4, 128 + sb t7, 1(s5) + sra t8, t8, 24 + addiu t8, t8, 128 + sb t4, 4(s5) + addiu v0, v0, 24 + sra t5, t5, 24 + addiu t5, t5, 128 + sb t8, 2(s5) + addiu a2, a2, 4 + bne v0, v1, 2b + sb t5, 3(s5) + + addiu sp, sp, 144 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_6x6_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass1_mips_dspr2) +/* + * a0 - compptr->dct_table + * a1 - coef_block + * a2 - workspace + */ + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + li a3, 8 + +1: + // odd part + lh t0, 48(a1) + lh t1, 48(a0) + lh t2, 16(a1) + lh t3, 16(a0) + lh t4, 80(a1) + lh t5, 80(a0) + lh t6, 112(a1) + lh t7, 112(a0) + mul t0, t0, t1 // z2 + mul t1, t2, t3 // z1 + mul t2, t4, t5 // z3 + mul t3, t6, t7 // z4 + li t4, 10703 // FIX(1.306562965) + li t5, 4433 // FIX_0_541196100 + li t6, 7053 // FIX(0.860918669) + mul t4, t0,t4 // tmp11 + mul t5, t0,t5 // -tmp14 + addu t7, t1,t2 // tmp10 + addu t8, t7,t3 // tmp10 + z4 + mul t6, t6, t8 // tmp15 + li t8, 2139 // FIX(0.261052384) + mul t8, t7, t8 // MULTIPLY(tmp10, FIX(0.261052384)) + li t7, 2295 // FIX(0.280143716) + mul t7, t1, t7 // MULTIPLY(z1, FIX(0.280143716)) + addu t9, t2, t3 // z3 + z4 + li s0, 8565 // FIX(1.045510580) + mul t9, t9, s0 // -tmp13 + li s0, 12112 // FIX(1.478575242) + mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242) + li s1, 12998 // FIX(1.586706681) + mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681)) + li s2, 5540 // FIX(0.676326758) + mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758)) + li s3, 16244 // FIX(1.982889723) + mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723)) + subu t1, t1, t3 // z1-=z4 + subu t0, t0, t2 // z2-=z3 + addu t2, t0, t1 // z1+z2 + li t3, 4433 // FIX_0_541196100 + mul t2, t2, t3 // z3 + li t3, 6270 // FIX_0_765366865 + mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865) + li t3, 15137 // FIX_0_765366865 + mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065) + addu t8, t6, t8 // tmp12 + addu t3, t8, t4 // tmp12 + tmp11 + addu t3, t3, t7 // tmp10 + subu t8, t8, t9 // tmp12 + tmp13 + addu s0, t5, s0 + subu t8, t8, s0 // tmp12 + subu t9, t6, t9 + subu s1, s1, t4 + addu t9, t9, s1 // tmp13 + subu t6, t6, t5 + subu t6, t6, s2 + subu t6, t6, s3 // tmp15 + // even part start + lh t4, 64(a1) + lh t5, 64(a0) + lh t7, 32(a1) + lh s0, 32(a0) + lh s1, 0(a1) + lh s2, 0(a0) + lh s3, 96(a1) + lh v0, 96(a0) + mul t4, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*4],quantptr[DCTSIZE*4]) + mul t5, t7, s0 // DEQUANTIZE(inptr[DCTSIZE*2],quantptr[DCTSIZE*2]) + mul t7, s1, s2 // DEQUANTIZE(inptr[DCTSIZE*0],quantptr[DCTSIZE*0]) + mul s0, s3, v0 // DEQUANTIZE(inptr[DCTSIZE*6],quantptr[DCTSIZE*6]) + // odd part end + addu t1, t2, t1 // tmp11 + subu t0, t2, t0 // tmp14 + // update counter and pointers + addiu a3, a3, -1 + addiu a0, a0, 2 + addiu a1, a1, 2 + // even part rest + li s1, 10033 + li s2, 11190 + mul t4, t4, s1 // z4 + mul s1, t5, s2 // z4 + sll t5, t5, 13 // z1 + sll t7, t7, 13 + addiu t7, t7, 1024 // z3 + sll s0, s0, 13 // z2 + addu s2, t7, t4 // tmp10 + subu t4, t7, t4 // tmp11 + subu s3, t5, s0 // tmp12 + addu t2, t7, s3 // tmp21 + subu s3, t7, s3 // tmp24 + addu t7, s1, s0 // tmp12 + addu v0, s2, t7 // tmp20 + subu s2, s2, t7 // tmp25 + subu s1, s1, t5 // z4 - z1 + subu s1, s1, s0 // tmp12 + addu s0, t4, s1 // tmp22 + subu t4, t4, s1 // tmp23 + // final output stage + addu t5, v0, t3 + subu v0, v0, t3 + addu t3, t2, t1 + subu t2, t2, t1 + addu t1, s0, t8 + subu s0, s0, t8 + addu t8, t4, t9 + subu t4, t4, t9 + addu t9, s3, t0 + subu s3, s3, t0 + addu t0, s2, t6 + subu s2, s2, t6 + sra t5, t5, 11 + sra t3, t3, 11 + sra t1, t1, 11 + sra t8, t8, 11 + sra t9, t9, 11 + sra t0, t0, 11 + sra s2, s2, 11 + sra s3, s3, 11 + sra t4, t4, 11 + sra s0, s0, 11 + sra t2, t2, 11 + sra v0, v0, 11 + sw t5, 0(a2) + sw t3, 32(a2) + sw t1, 64(a2) + sw t8, 96(a2) + sw t9, 128(a2) + sw t0, 160(a2) + sw s2, 192(a2) + sw s3, 224(a2) + sw t4, 256(a2) + sw s0, 288(a2) + sw t2, 320(a2) + sw v0, 352(a2) + bgtz a3, 1b + addiu a2, a2, 4 + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop + +END(jsimd_idct_12x12_pass1_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass2_mips_dspr2) +/* + * a0 - workspace + * a1 - output + */ + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + li a3, 12 + +1: + // Odd part + lw t0, 12(a0) + lw t1, 4(a0) + lw t2, 20(a0) + lw t3, 28(a0) + li t4, 10703 // FIX(1.306562965) + li t5, 4433 // FIX_0_541196100 + mul t4, t0, t4 // tmp11 + mul t5, t0, t5 // -tmp14 + addu t6, t1, t2 // tmp10 + li t7, 2139 // FIX(0.261052384) + mul t7, t6, t7 // MULTIPLY(tmp10, FIX(0.261052384)) + addu t6, t6, t3 // tmp10 + z4 + li t8, 7053 // FIX(0.860918669) + mul t6, t6, t8 // tmp15 + li t8, 2295 // FIX(0.280143716) + mul t8, t1, t8 // MULTIPLY(z1, FIX(0.280143716)) + addu t9, t2, t3 // z3 + z4 + li s0, 8565 // FIX(1.045510580) + mul t9, t9, s0 // -tmp13 + li s0, 12112 // FIX(1.478575242) + mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242)) + li s1, 12998 // FIX(1.586706681) + mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681)) + li s2, 5540 // FIX(0.676326758) + mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758)) + li s3, 16244 // FIX(1.982889723) + mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723)) + subu t1, t1, t3 // z1 -= z4 + subu t0, t0, t2 // z2 -= z3 + addu t2, t1, t0 // z1 + z2 + li t3, 4433 // FIX_0_541196100 + mul t2, t2, t3 // z3 + li t3, 6270 // FIX_0_765366865 + mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865) + li t3, 15137 // FIX_1_847759065 + mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065) + addu t3, t6, t7 // tmp12 + addu t7, t3, t4 + addu t7, t7, t8 // tmp10 + subu t3, t3, t9 + subu t3, t3, t5 + subu t3, t3, s0 // tmp12 + subu t9, t6, t9 + subu t9, t9, t4 + addu t9, t9, s1 // tmp13 + subu t6, t6, t5 + subu t6, t6, s2 + subu t6, t6, s3 // tmp15 + addu t1, t2, t1 // tmp11 + subu t0, t2, t0 // tmp14 + // even part + lw t2, 16(a0) // z4 + lw t4, 8(a0) // z1 + lw t5, 0(a0) // z3 + lw t8, 24(a0) // z2 + li s0, 10033 // FIX(1.224744871) + li s1, 11190 // FIX(1.366025404) + mul t2, t2, s0 // z4 + mul s0, t4, s1 // z4 + addiu t5, t5, 0x10 + sll t5, t5, 13 // z3 + sll t4, t4, 13 // z1 + sll t8, t8, 13 // z2 + subu s1, t4, t8 // tmp12 + addu s2, t5, t2 // tmp10 + subu t2, t5, t2 // tmp11 + addu s3, t5, s1 // tmp21 + subu s1, t5, s1 // tmp24 + addu t5, s0, t8 // tmp12 + addu v0, s2, t5 // tmp20 + subu t5, s2, t5 // tmp25 + subu t4, s0, t4 + subu t4, t4, t8 // tmp12 + addu t8, t2, t4 // tmp22 + subu t2, t2, t4 // tmp23 + // increment counter and pointers + addiu a3, a3, -1 + addiu a0, a0, 32 + // Final stage + addu t4, v0, t7 + subu v0, v0, t7 + addu t7, s3, t1 + subu s3, s3, t1 + addu t1, t8, t3 + subu t8, t8, t3 + addu t3, t2, t9 + subu t2, t2, t9 + addu t9, s1, t0 + subu s1, s1, t0 + addu t0, t5, t6 + subu t5, t5, t6 + sll t4, t4, 4 + sll t7, t7, 4 + sll t1, t1, 4 + sll t3, t3, 4 + sll t9, t9, 4 + sll t0, t0, 4 + sll t5, t5, 4 + sll s1, s1, 4 + sll t2, t2, 4 + sll t8, t8, 4 + sll s3, s3, 4 + sll v0, v0, 4 + shll_s.w t4, t4, 2 + shll_s.w t7, t7, 2 + shll_s.w t1, t1, 2 + shll_s.w t3, t3, 2 + shll_s.w t9, t9, 2 + shll_s.w t0, t0, 2 + shll_s.w t5, t5, 2 + shll_s.w s1, s1, 2 + shll_s.w t2, t2, 2 + shll_s.w t8, t8, 2 + shll_s.w s3, s3, 2 + shll_s.w v0, v0, 2 + srl t4, t4, 24 + srl t7, t7, 24 + srl t1, t1, 24 + srl t3, t3, 24 + srl t9, t9, 24 + srl t0, t0, 24 + srl t5, t5, 24 + srl s1, s1, 24 + srl t2, t2, 24 + srl t8, t8, 24 + srl s3, s3, 24 + srl v0, v0, 24 + lw t6, 0(a1) + addiu t4, t4, 0x80 + addiu t7, t7, 0x80 + addiu t1, t1, 0x80 + addiu t3, t3, 0x80 + addiu t9, t9, 0x80 + addiu t0, t0, 0x80 + addiu t5, t5, 0x80 + addiu s1, s1, 0x80 + addiu t2, t2, 0x80 + addiu t8, t8, 0x80 + addiu s3, s3, 0x80 + addiu v0, v0, 0x80 + sb t4, 0(t6) + sb t7, 1(t6) + sb t1, 2(t6) + sb t3, 3(t6) + sb t9, 4(t6) + sb t0, 5(t6) + sb t5, 6(t6) + sb s1, 7(t6) + sb t2, 8(t6) + sb t8, 9(t6) + sb s3, 10(t6) + sb v0, 11(t6) + bgtz a3, 1b + addiu a1, a1, 4 + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + jr ra + nop + +END(jsimd_idct_12x12_pass2_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_convsamp_mips_dspr2) +/* + * a0 - sample_data + * a1 - start_col + * a2 - workspace + */ + + lw t0, 0(a0) + li t7, 0xff80ff80 + addu t0, t0, a1 + ulw t1, 0(t0) + ulw t2, 4(t0) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + lw t0, 4(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 0(a2) + usw t4, 4(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 8(a2) + usw t6, 12(a2) + + lw t0, 8(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 16(a2) + usw t4, 20(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 24(a2) + usw t6, 28(a2) + + lw t0, 12(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 32(a2) + usw t4, 36(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 40(a2) + usw t6, 44(a2) + + lw t0, 16(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 48(a2) + usw t4, 52(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 56(a2) + usw t6, 60(a2) + + lw t0, 20(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 64(a2) + usw t4, 68(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 72(a2) + usw t6, 76(a2) + + lw t0, 24(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 80(a2) + usw t4, 84(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 88(a2) + usw t6, 92(a2) + + lw t0, 28(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 96(a2) + usw t4, 100(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 104(a2) + usw t6, 108(a2) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 112(a2) + usw t4, 116(a2) + usw t5, 120(a2) + usw t6, 124(a2) + + j ra + nop + +END(jsimd_convsamp_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_convsamp_float_mips_dspr2) +/* + * a0 - sample_data + * a1 - start_col + * a2 - workspace + */ + + .set at + + lw t0, 0(a0) + addu t0, t0, a1 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 4(a0) + swc1 f2, 0(a2) + swc1 f4, 4(a2) + swc1 f6, 8(a2) + addu t0, t0, a1 + swc1 f8, 12(a2) + swc1 f10, 16(a2) + swc1 f12, 20(a2) + swc1 f14, 24(a2) + swc1 f16, 28(a2) + //elemr 1 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 8(a0) + swc1 f2, 32(a2) + swc1 f4, 36(a2) + swc1 f6, 40(a2) + addu t0, t0, a1 + swc1 f8, 44(a2) + swc1 f10, 48(a2) + swc1 f12, 52(a2) + swc1 f14, 56(a2) + swc1 f16, 60(a2) + //elemr 2 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 12(a0) + swc1 f2, 64(a2) + swc1 f4, 68(a2) + swc1 f6, 72(a2) + addu t0, t0, a1 + swc1 f8, 76(a2) + swc1 f10, 80(a2) + swc1 f12, 84(a2) + swc1 f14, 88(a2) + swc1 f16, 92(a2) + //elemr 3 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 16(a0) + swc1 f2, 96(a2) + swc1 f4, 100(a2) + swc1 f6, 104(a2) + addu t0, t0, a1 + swc1 f8, 108(a2) + swc1 f10, 112(a2) + swc1 f12, 116(a2) + swc1 f14, 120(a2) + swc1 f16, 124(a2) + //elemr 4 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 20(a0) + swc1 f2, 128(a2) + swc1 f4, 132(a2) + swc1 f6, 136(a2) + addu t0, t0, a1 + swc1 f8, 140(a2) + swc1 f10, 144(a2) + swc1 f12, 148(a2) + swc1 f14, 152(a2) + swc1 f16, 156(a2) + //elemr 5 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 24(a0) + swc1 f2, 160(a2) + swc1 f4, 164(a2) + swc1 f6, 168(a2) + addu t0, t0, a1 + swc1 f8, 172(a2) + swc1 f10, 176(a2) + swc1 f12, 180(a2) + swc1 f14, 184(a2) + swc1 f16, 188(a2) + //elemr 6 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 28(a0) + swc1 f2, 192(a2) + swc1 f4, 196(a2) + swc1 f6, 200(a2) + addu t0, t0, a1 + swc1 f8, 204(a2) + swc1 f10, 208(a2) + swc1 f12, 212(a2) + swc1 f14, 216(a2) + swc1 f16, 220(a2) + //elemr 7 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + swc1 f2, 224(a2) + swc1 f4, 228(a2) + swc1 f6, 232(a2) + swc1 f8, 236(a2) + swc1 f10, 240(a2) + swc1 f12, 244(a2) + swc1 f14, 248(a2) + swc1 f16, 252(a2) + + j ra + nop + +END(jsimd_convsamp_float_mips_dspr2) + +/*****************************************************************************/ + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_powerpc.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_powerpc.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_powerpc.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_powerpc.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,741 @@ +/* + * jsimd_powerpc.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014-2015 D. R. Commander + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * PowerPC architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +static unsigned int simd_support = ~0; + +LOCAL(void) +init_simd (void) +{ + char *env = NULL; + + if (simd_support != ~0U) + return; + + simd_support = JSIMD_ALTIVEC; + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*altivecfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_extrgb_ycc_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_extrgbx_ycc_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_extbgr_ycc_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_extbgrx_ycc_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_extxbgr_ycc_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_extxrgb_ycc_convert_altivec; + break; + default: + altivecfct=jsimd_rgb_ycc_convert_altivec; + break; + } + + altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*altivecfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_extrgb_gray_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_extrgbx_gray_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_extbgr_gray_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_extbgrx_gray_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_extxbgr_gray_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_extxrgb_gray_convert_altivec; + break; + default: + altivecfct=jsimd_rgb_gray_convert_altivec; + break; + } + + altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_ycc_extrgb_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_ycc_extrgbx_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_ycc_extbgr_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_ycc_extbgrx_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_ycc_extxbgr_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_ycc_extxrgb_convert_altivec; + break; + default: + altivecfct=jsimd_ycc_rgb_convert_altivec; + break; + } + + altivecfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_downsample_altivec(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v1_downsample_altivec(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_upsample_altivec(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_upsample_altivec(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_fancy_upsample_altivec(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_fancy_upsample_altivec(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_h2v2_extrgb_merged_upsample_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_h2v2_extrgbx_merged_upsample_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_h2v2_extbgr_merged_upsample_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_h2v2_extbgrx_merged_upsample_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_h2v2_extxbgr_merged_upsample_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_h2v2_extxrgb_merged_upsample_altivec; + break; + default: + altivecfct=jsimd_h2v2_merged_upsample_altivec; + break; + } + + altivecfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_h2v1_extrgb_merged_upsample_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_h2v1_extrgbx_merged_upsample_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_h2v1_extbgr_merged_upsample_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_h2v1_extbgrx_merged_upsample_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_h2v1_extxbgr_merged_upsample_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_h2v1_extxrgb_merged_upsample_altivec; + break; + default: + altivecfct=jsimd_h2v1_merged_upsample_altivec; + break; + } + + altivecfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + jsimd_convsamp_altivec(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + jsimd_fdct_islow_altivec(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + jsimd_fdct_ifast_altivec(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + jsimd_quantize_altivec(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_islow_altivec(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_ifast_altivec(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return NULL; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_x86_64.c glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_x86_64.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/jsimd_x86_64.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/jsimd_x86_64.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,887 @@ +/* + * jsimd_x86_64.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014, 2016 D. R. Commander + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 64-bit x86 architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +/* + * In the PIC cases, we have no guarantee that constants will keep + * their alignment. This macro allows us to verify it at runtime. + */ +#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) + +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ + +static unsigned int simd_support = ~0; +static unsigned int simd_huffman = 1; + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd (void) +{ + char *env = NULL; + + if (simd_support != ~0U) + return; + + simd_support = JSIMD_SSE2 | JSIMD_SSE; + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_extrgb_ycc_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_extrgbx_ycc_convert_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_extbgr_ycc_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_extbgrx_ycc_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_extxbgr_ycc_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_extxrgb_ycc_convert_sse2; + break; + default: + sse2fct=jsimd_rgb_ycc_convert_sse2; + break; + } + + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_extrgb_gray_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_extrgbx_gray_convert_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_extbgr_gray_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_extbgrx_gray_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_extxbgr_gray_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_extxrgb_gray_convert_sse2; + break; + default: + sse2fct=jsimd_rgb_gray_convert_sse2; + break; + } + + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_ycc_extrgb_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_ycc_extrgbx_convert_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_ycc_extbgr_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_ycc_extbgrx_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_ycc_extxbgr_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_ycc_extxrgb_convert_sse2; + break; + default: + sse2fct=jsimd_ycc_rgb_convert_sse2; + break; + } + + sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; + break; + default: + sse2fct=jsimd_h2v2_merged_upsample_sse2; + break; + } + + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; + break; + default: + sse2fct=jsimd_h2v1_merged_upsample_sse2; + break; + } + + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + jsimd_convsamp_sse2(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ + jsimd_convsamp_float_sse2(sample_data, start_col, workspace); +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + jsimd_fdct_islow_sse2(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + jsimd_fdct_ifast_sse2(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ + jsimd_fdct_float_sse(data); +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + jsimd_quantize_sse2(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ + jsimd_quantize_float_sse2(coef_block, divisors, workspace); +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + if (sizeof(FLOAT_MULT_TYPE) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && simd_huffman && + IS_ALIGNED_SSE(jconst_huff_encode_one_block)) + return 1; + + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, + dctbl, actbl); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/Makefile.am glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/Makefile.am --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/Makefile.am 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/Makefile.am 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,97 @@ +noinst_LTLIBRARIES = libsimd.la + +BUILT_SOURCES = jsimdcfg.inc + +EXTRA_DIST = nasm_lt.sh CMakeLists.txt \ + jccolext-mmx.asm jcgryext-mmx.asm jdcolext-mmx.asm jdmrgext-mmx.asm \ + jccolext-sse2.asm jcgryext-sse2.asm jdcolext-sse2.asm jdmrgext-sse2.asm \ + jccolext-sse2-64.asm jcgryext-sse2-64.asm jdcolext-sse2-64.asm \ + jdmrgext-sse2-64.asm jccolext-altivec.c jcgryext-altivec.c \ + jdcolext-altivec.c jdmrgext-altivec.c + +if SIMD_X86_64 + +libsimd_la_SOURCES = jsimd_x86_64.c jsimd.h jsimdcfg.inc.h jsimdext.inc \ + jcolsamp.inc jdct.inc jpeg_nbits_table.inc jfdctflt-sse-64.asm \ + jccolor-sse2-64.asm jcgray-sse2-64.asm jchuff-sse2-64.asm \ + jcsample-sse2-64.asm jdcolor-sse2-64.asm jdmerge-sse2-64.asm \ + jdsample-sse2-64.asm jfdctfst-sse2-64.asm jfdctint-sse2-64.asm \ + jidctflt-sse2-64.asm jidctfst-sse2-64.asm jidctint-sse2-64.asm \ + jidctred-sse2-64.asm jquantf-sse2-64.asm jquanti-sse2-64.asm + +jccolor-sse2-64.lo: jccolext-sse2-64.asm +jcgray-sse2-64.lo: jcgryext-sse2-64.asm +jdcolor-sse2-64.lo: jdcolext-sse2-64.asm +jdmerge-sse2-64.lo: jdmrgext-sse2-64.asm + +endif + +if SIMD_I386 + +libsimd_la_SOURCES = jsimd_i386.c jsimd.h jsimdcfg.inc.h jsimdext.inc \ + jcolsamp.inc jdct.inc jpeg_nbits_table.inc jsimdcpu.asm \ + jfdctflt-3dn.asm jidctflt-3dn.asm jquant-3dn.asm \ + jccolor-mmx.asm jcgray-mmx.asm jcsample-mmx.asm \ + jdcolor-mmx.asm jdmerge-mmx.asm jdsample-mmx.asm \ + jfdctfst-mmx.asm jfdctint-mmx.asm jidctfst-mmx.asm \ + jidctint-mmx.asm jidctred-mmx.asm jquant-mmx.asm \ + jfdctflt-sse.asm jidctflt-sse.asm jquant-sse.asm \ + jccolor-sse2.asm jcgray-sse2.asm jchuff-sse2.asm \ + jcsample-sse2.asm jdcolor-sse2.asm jdmerge-sse2.asm \ + jdsample-sse2.asm jfdctfst-sse2.asm jfdctint-sse2.asm \ + jidctflt-sse2.asm jidctfst-sse2.asm jidctint-sse2.asm \ + jidctred-sse2.asm jquantf-sse2.asm jquanti-sse2.asm + +jccolor-mmx.lo: jccolext-mmx.asm +jcgray.-mmx.lo: jcgryext-mmx.asm +jdcolor-mmx.lo: jdcolext-mmx.asm +jdmerge-mmx.lo: jdmrgext-mmx.asm +jccolor-sse2.lo: jccolext-sse2.asm +jcgray-sse2.lo: jcgryext-sse2.asm +jdcolor-sse2.lo: jdcolext-sse2.asm +jdmerge-sse2.lo: jdmrgext-sse2.asm + +endif + +if SIMD_ARM + +libsimd_la_SOURCES = jsimd_arm.c jsimd_arm_neon.S + +endif + +if SIMD_ARM_64 + +libsimd_la_SOURCES = jsimd_arm64.c jsimd_arm64_neon.S + +endif + +if SIMD_MIPS + +libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S + +endif + +if SIMD_POWERPC + +libsimd_la_SOURCES = jsimd_powerpc.c jsimd_altivec.h jcsample.h \ + jccolor-altivec.c jcgray-altivec.c jcsample-altivec.c \ + jdcolor-altivec.c jdmerge-altivec.c jdsample-altivec.c \ + jfdctfst-altivec.c jfdctint-altivec.c \ + jidctfst-altivec.c jidctint-altivec.c \ + jquanti-altivec.c +libsimd_la_CFLAGS = -maltivec + +jccolor-altivec.lo: jccolext-altivec.c +jcgray-altivec.lo: jcgryext-altivec.c +jdcolor-altivec.lo: jdcolext-altivec.c +jdmerge-altivec.lo: jdmrgext-altivec.c + +endif + +AM_CPPFLAGS = -I$(top_srcdir) + +.asm.lo: + $(AM_V_GEN) $(LIBTOOL) $(AM_V_lt) --mode=compile --tag NASM $(srcdir)/nasm_lt.sh $(AM_V_lt) $(NASM) $(NAFLAGS) -I$(srcdir) -I. $< -o $@ + +jsimdcfg.inc: $(srcdir)/jsimdcfg.inc.h ../jpeglib.h ../jconfig.h ../jmorecfg.h + $(AM_V_GEN) $(CPP) -I$(top_builddir) -I$(top_builddir)/simd $(srcdir)/jsimdcfg.inc.h | $(EGREP) "^[\;%]|^\ %" | sed 's%_cpp_protection_%%' | sed 's@% define@%define@g' > $@ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/nasm_lt.sh glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/nasm_lt.sh --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/simd/nasm_lt.sh 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/simd/nasm_lt.sh 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,60 @@ +#! /bin/sh +command="" +infile="" +o_opt=no +pic=no +while [ $# -gt 0 ]; do + case "$1" in + --silent) + exec > /dev/null + ;; + -DPIC|-fPIC|-fpic|-Kpic|-KPIC) + if [ "$pic" != "yes" ] ; then + command="$command -DPIC" + pic=yes + fi + ;; + -f|-fbin|-faout|-faoutb|-fcoff|-felf|-felf64|-fas86| \ + -fobj|-fwin32|-fwin64|-frdf|-fieee|-fmacho|-fmacho64) + # it's a file format specifier for nasm. + command="$command $1" + ;; + -f*) + # maybe a code-generation flag for gcc. + ;; + -[Ii]*) + incdir=`echo "$1" | sed 's/^-[Ii]//'` + if [ "x$incdir" = x -a "x$2" != x ] ; then + case "$2" in + -*) ;; + *) incdir="$2"; shift;; + esac + fi + if [ "x$incdir" != x ] ; then + # In the case of NASM, the trailing slash is necessary. + incdir=`echo "$incdir" | sed 's%/*$%/%'` + command="$command -I$incdir" + fi + ;; + -o*) + o_opt=yes + command="$command $1" + ;; + *.asm) + infile=$1 + command="$command $1" + ;; + *) + command="$command $1" + ;; + esac + shift +done +if [ "$o_opt" != yes ] ; then + # By default, NASM creates an output file + # in the same directory as the input file. + outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o" + command="$command $outfile" +fi +echo $command +exec $command diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/structure.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/structure.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/structure.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/structure.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,906 @@ +IJG JPEG LIBRARY: SYSTEM ARCHITECTURE + +This file was part of the Independent JPEG Group's software: +Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. +It was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo. +For conditions of distribution and use, see the accompanying README.ijg file. + + +This file provides an overview of the architecture of the IJG JPEG software; +that is, the functions of the various modules in the system and the interfaces +between modules. For more precise details about any data structure or calling +convention, see the include files and comments in the source code. + +We assume that the reader is already somewhat familiar with the JPEG standard. +The README.ijg file includes references for learning about JPEG. The file +libjpeg.txt describes the library from the viewpoint of an application +programmer using the library; it's best to read that file before this one. +Also, the file coderules.txt describes the coding style conventions we use. + +In this document, JPEG-specific terminology follows the JPEG standard: + A "component" means a color channel, e.g., Red or Luminance. + A "sample" is a single component value (i.e., one number in the image data). + A "coefficient" is a frequency coefficient (a DCT transform output number). + A "block" is an 8x8 group of samples or coefficients. + An "MCU" (minimum coded unit) is an interleaved set of blocks of size + determined by the sampling factors, or a single block in a + noninterleaved scan. +We do not use the terms "pixel" and "sample" interchangeably. When we say +pixel, we mean an element of the full-size image, while a sample is an element +of the downsampled image. Thus the number of samples may vary across +components while the number of pixels does not. (This terminology is not used +rigorously throughout the code, but it is used in places where confusion would +otherwise result.) + + +*** System features *** + +The IJG distribution contains two parts: + * A subroutine library for JPEG compression and decompression. + * cjpeg/djpeg, two sample applications that use the library to transform + JFIF JPEG files to and from several other image formats. +cjpeg/djpeg are of no great intellectual complexity: they merely add a simple +command-line user interface and I/O routines for several uncompressed image +formats. This document concentrates on the library itself. + +We desire the library to be capable of supporting all JPEG baseline, extended +sequential, and progressive DCT processes. Hierarchical processes are not +supported. + +The library does not support the lossless (spatial) JPEG process. Lossless +JPEG shares little or no code with lossy JPEG, and would normally be used +without the extensive pre- and post-processing provided by this library. +We feel that lossless JPEG is better handled by a separate library. + +Within these limits, any set of compression parameters allowed by the JPEG +spec should be readable for decompression. (We can be more restrictive about +what formats we can generate.) Although the system design allows for all +parameter values, some uncommon settings are not yet implemented and may +never be; nonintegral sampling ratios are the prime example. Furthermore, +we treat 8-bit vs. 12-bit data precision as a compile-time switch, not a +run-time option, because most machines can store 8-bit pixels much more +compactly than 12-bit. + +By itself, the library handles only interchange JPEG datastreams --- in +particular the widely used JFIF file format. The library can be used by +surrounding code to process interchange or abbreviated JPEG datastreams that +are embedded in more complex file formats. (For example, libtiff uses this +library to implement JPEG compression within the TIFF file format.) + +The library includes a substantial amount of code that is not covered by the +JPEG standard but is necessary for typical applications of JPEG. These +functions preprocess the image before JPEG compression or postprocess it after +decompression. They include colorspace conversion, downsampling/upsampling, +and color quantization. This code can be omitted if not needed. + +A wide range of quality vs. speed tradeoffs are possible in JPEG processing, +and even more so in decompression postprocessing. The decompression library +provides multiple implementations that cover most of the useful tradeoffs, +ranging from very-high-quality down to fast-preview operation. On the +compression side we have generally not provided low-quality choices, since +compression is normally less time-critical. It should be understood that the +low-quality modes may not meet the JPEG standard's accuracy requirements; +nonetheless, they are useful for viewers. + + +*** System overview *** + +The compressor and decompressor are each divided into two main sections: +the JPEG compressor or decompressor proper, and the preprocessing or +postprocessing functions. The interface between these two sections is the +image data that the official JPEG spec regards as its input or output: this +data is in the colorspace to be used for compression, and it is downsampled +to the sampling factors to be used. The preprocessing and postprocessing +steps are responsible for converting a normal image representation to or from +this form. (Those few applications that want to deal with YCbCr downsampled +data can skip the preprocessing or postprocessing step.) + +Looking more closely, the compressor library contains the following main +elements: + + Preprocessing: + * Color space conversion (e.g., RGB to YCbCr). + * Edge expansion and downsampling. Optionally, this step can do simple + smoothing --- this is often helpful for low-quality source data. + JPEG proper: + * MCU assembly, DCT, quantization. + * Entropy coding (sequential or progressive, Huffman or arithmetic). + +In addition to these modules we need overall control, marker generation, +and support code (memory management & error handling). There is also a +module responsible for physically writing the output data --- typically +this is just an interface to fwrite(), but some applications may need to +do something else with the data. + +The decompressor library contains the following main elements: + + JPEG proper: + * Entropy decoding (sequential or progressive, Huffman or arithmetic). + * Dequantization, inverse DCT, MCU disassembly. + Postprocessing: + * Upsampling. Optionally, this step may be able to do more general + rescaling of the image. + * Color space conversion (e.g., YCbCr to RGB). This step may also + provide gamma adjustment [ currently it does not ]. + * Optional color quantization (e.g., reduction to 256 colors). + * Optional color precision reduction (e.g., 24-bit to 15-bit color). + [This feature is not currently implemented.] + +We also need overall control, marker parsing, and a data source module. +The support code (memory management & error handling) can be shared with +the compression half of the library. + +There may be several implementations of each of these elements, particularly +in the decompressor, where a wide range of speed/quality tradeoffs is very +useful. It must be understood that some of the best speedups involve +merging adjacent steps in the pipeline. For example, upsampling, color space +conversion, and color quantization might all be done at once when using a +low-quality ordered-dither technique. The system architecture is designed to +allow such merging where appropriate. + + +Note: it is convenient to regard edge expansion (padding to block boundaries) +as a preprocessing/postprocessing function, even though the JPEG spec includes +it in compression/decompression. We do this because downsampling/upsampling +can be simplified a little if they work on padded data: it's not necessary to +have special cases at the right and bottom edges. Therefore the interface +buffer is always an integral number of blocks wide and high, and we expect +compression preprocessing to pad the source data properly. Padding will occur +only to the next block (8-sample) boundary. In an interleaved-scan situation, +additional dummy blocks may be used to fill out MCUs, but the MCU assembly and +disassembly logic will create or discard these blocks internally. (This is +advantageous for speed reasons, since we avoid DCTing the dummy blocks. +It also permits a small reduction in file size, because the compressor can +choose dummy block contents so as to minimize their size in compressed form. +Finally, it makes the interface buffer specification independent of whether +the file is actually interleaved or not.) Applications that wish to deal +directly with the downsampled data must provide similar buffering and padding +for odd-sized images. + + +*** Poor man's object-oriented programming *** + +It should be clear by now that we have a lot of quasi-independent processing +steps, many of which have several possible behaviors. To avoid cluttering the +code with lots of switch statements, we use a simple form of object-style +programming to separate out the different possibilities. + +For example, two different color quantization algorithms could be implemented +as two separate modules that present the same external interface; at runtime, +the calling code will access the proper module indirectly through an "object". + +We can get the limited features we need while staying within portable C. +The basic tool is a function pointer. An "object" is just a struct +containing one or more function pointer fields, each of which corresponds to +a method name in real object-oriented languages. During initialization we +fill in the function pointers with references to whichever module we have +determined we need to use in this run. Then invocation of the module is done +by indirecting through a function pointer; on most machines this is no more +expensive than a switch statement, which would be the only other way of +making the required run-time choice. The really significant benefit, of +course, is keeping the source code clean and well structured. + +We can also arrange to have private storage that varies between different +implementations of the same kind of object. We do this by making all the +module-specific object structs be separately allocated entities, which will +be accessed via pointers in the master compression or decompression struct. +The "public" fields or methods for a given kind of object are specified by +a commonly known struct. But a module's initialization code can allocate +a larger struct that contains the common struct as its first member, plus +additional private fields. With appropriate pointer casting, the module's +internal functions can access these private fields. (For a simple example, +see jdatadst.c, which implements the external interface specified by struct +jpeg_destination_mgr, but adds extra fields.) + +(Of course this would all be a lot easier if we were using C++, but we are +not yet prepared to assume that everyone has a C++ compiler.) + +An important benefit of this scheme is that it is easy to provide multiple +versions of any method, each tuned to a particular case. While a lot of +precalculation might be done to select an optimal implementation of a method, +the cost per invocation is constant. For example, the upsampling step might +have a "generic" method, plus one or more "hardwired" methods for the most +popular sampling factors; the hardwired methods would be faster because they'd +use straight-line code instead of for-loops. The cost to determine which +method to use is paid only once, at startup, and the selection criteria are +hidden from the callers of the method. + +This plan differs a little bit from usual object-oriented structures, in that +only one instance of each object class will exist during execution. The +reason for having the class structure is that on different runs we may create +different instances (choose to execute different modules). You can think of +the term "method" as denoting the common interface presented by a particular +set of interchangeable functions, and "object" as denoting a group of related +methods, or the total shared interface behavior of a group of modules. + + +*** Overall control structure *** + +We previously mentioned the need for overall control logic in the compression +and decompression libraries. In IJG implementations prior to v5, overall +control was mostly provided by "pipeline control" modules, which proved to be +large, unwieldy, and hard to understand. To improve the situation, the +control logic has been subdivided into multiple modules. The control modules +consist of: + +1. Master control for module selection and initialization. This has two +responsibilities: + + 1A. Startup initialization at the beginning of image processing. + The individual processing modules to be used in this run are selected + and given initialization calls. + + 1B. Per-pass control. This determines how many passes will be performed + and calls each active processing module to configure itself + appropriately at the beginning of each pass. End-of-pass processing, + where necessary, is also invoked from the master control module. + + Method selection is partially distributed, in that a particular processing + module may contain several possible implementations of a particular method, + which it will select among when given its initialization call. The master + control code need only be concerned with decisions that affect more than + one module. + +2. Data buffering control. A separate control module exists for each + inter-processing-step data buffer. This module is responsible for + invoking the processing steps that write or read that data buffer. + +Each buffer controller sees the world as follows: + +input data => processing step A => buffer => processing step B => output data + | | | + ------------------ controller ------------------ + +The controller knows the dataflow requirements of steps A and B: how much data +they want to accept in one chunk and how much they output in one chunk. Its +function is to manage its buffer and call A and B at the proper times. + +A data buffer control module may itself be viewed as a processing step by a +higher-level control module; thus the control modules form a binary tree with +elementary processing steps at the leaves of the tree. + +The control modules are objects. A considerable amount of flexibility can +be had by replacing implementations of a control module. For example: +* Merging of adjacent steps in the pipeline is done by replacing a control + module and its pair of processing-step modules with a single processing- + step module. (Hence the possible merges are determined by the tree of + control modules.) +* In some processing modes, a given interstep buffer need only be a "strip" + buffer large enough to accommodate the desired data chunk sizes. In other + modes, a full-image buffer is needed and several passes are required. + The control module determines which kind of buffer is used and manipulates + virtual array buffers as needed. One or both processing steps may be + unaware of the multi-pass behavior. + +In theory, we might be able to make all of the data buffer controllers +interchangeable and provide just one set of implementations for all. In +practice, each one contains considerable special-case processing for its +particular job. The buffer controller concept should be regarded as an +overall system structuring principle, not as a complete description of the +task performed by any one controller. + + +*** Compression object structure *** + +Here is a sketch of the logical structure of the JPEG compression library: + + |-- Colorspace conversion + |-- Preprocessing controller --| + | |-- Downsampling +Main controller --| + | |-- Forward DCT, quantize + |-- Coefficient controller --| + |-- Entropy encoding + +This sketch also describes the flow of control (subroutine calls) during +typical image data processing. Each of the components shown in the diagram is +an "object" which may have several different implementations available. One +or more source code files contain the actual implementation(s) of each object. + +The objects shown above are: + +* Main controller: buffer controller for the subsampled-data buffer, which + holds the preprocessed input data. This controller invokes preprocessing to + fill the subsampled-data buffer, and JPEG compression to empty it. There is + usually no need for a full-image buffer here; a strip buffer is adequate. + +* Preprocessing controller: buffer controller for the downsampling input data + buffer, which lies between colorspace conversion and downsampling. Note + that a unified conversion/downsampling module would probably replace this + controller entirely. + +* Colorspace conversion: converts application image data into the desired + JPEG color space; also changes the data from pixel-interleaved layout to + separate component planes. Processes one pixel row at a time. + +* Downsampling: performs reduction of chroma components as required. + Optionally may perform pixel-level smoothing as well. Processes a "row + group" at a time, where a row group is defined as Vmax pixel rows of each + component before downsampling, and Vk sample rows afterwards (remember Vk + differs across components). Some downsampling or smoothing algorithms may + require context rows above and below the current row group; the + preprocessing controller is responsible for supplying these rows via proper + buffering. The downsampler is responsible for edge expansion at the right + edge (i.e., extending each sample row to a multiple of 8 samples); but the + preprocessing controller is responsible for vertical edge expansion (i.e., + duplicating the bottom sample row as needed to make a multiple of 8 rows). + +* Coefficient controller: buffer controller for the DCT-coefficient data. + This controller handles MCU assembly, including insertion of dummy DCT + blocks when needed at the right or bottom edge. When performing + Huffman-code optimization or emitting a multiscan JPEG file, this + controller is responsible for buffering the full image. The equivalent of + one fully interleaved MCU row of subsampled data is processed per call, + even when the JPEG file is noninterleaved. + +* Forward DCT and quantization: Perform DCT, quantize, and emit coefficients. + Works on one or more DCT blocks at a time. (Note: the coefficients are now + emitted in normal array order, which the entropy encoder is expected to + convert to zigzag order as necessary. Prior versions of the IJG code did + the conversion to zigzag order within the quantization step.) + +* Entropy encoding: Perform Huffman or arithmetic entropy coding and emit the + coded data to the data destination module. Works on one MCU per call. + For progressive JPEG, the same DCT blocks are fed to the entropy coder + during each pass, and the coder must emit the appropriate subset of + coefficients. + +In addition to the above objects, the compression library includes these +objects: + +* Master control: determines the number of passes required, controls overall + and per-pass initialization of the other modules. + +* Marker writing: generates JPEG markers (except for RSTn, which is emitted + by the entropy encoder when needed). + +* Data destination manager: writes the output JPEG datastream to its final + destination (e.g., a file). The destination manager supplied with the + library knows how to write to a stdio stream or to a memory buffer; + for other behaviors, the surrounding application may provide its own + destination manager. + +* Memory manager: allocates and releases memory, controls virtual arrays + (with backing store management, where required). + +* Error handler: performs formatting and output of error and trace messages; + determines handling of nonfatal errors. The surrounding application may + override some or all of this object's methods to change error handling. + +* Progress monitor: supports output of "percent-done" progress reports. + This object represents an optional callback to the surrounding application: + if wanted, it must be supplied by the application. + +The error handler, destination manager, and progress monitor objects are +defined as separate objects in order to simplify application-specific +customization of the JPEG library. A surrounding application may override +individual methods or supply its own all-new implementation of one of these +objects. The object interfaces for these objects are therefore treated as +part of the application interface of the library, whereas the other objects +are internal to the library. + +The error handler and memory manager are shared by JPEG compression and +decompression; the progress monitor, if used, may be shared as well. + + +*** Decompression object structure *** + +Here is a sketch of the logical structure of the JPEG decompression library: + + |-- Entropy decoding + |-- Coefficient controller --| + | |-- Dequantize, Inverse DCT +Main controller --| + | |-- Upsampling + |-- Postprocessing controller --| |-- Colorspace conversion + |-- Color quantization + |-- Color precision reduction + +As before, this diagram also represents typical control flow. The objects +shown are: + +* Main controller: buffer controller for the subsampled-data buffer, which + holds the output of JPEG decompression proper. This controller's primary + task is to feed the postprocessing procedure. Some upsampling algorithms + may require context rows above and below the current row group; when this + is true, the main controller is responsible for managing its buffer so as + to make context rows available. In the current design, the main buffer is + always a strip buffer; a full-image buffer is never required. + +* Coefficient controller: buffer controller for the DCT-coefficient data. + This controller handles MCU disassembly, including deletion of any dummy + DCT blocks at the right or bottom edge. When reading a multiscan JPEG + file, this controller is responsible for buffering the full image. + (Buffering DCT coefficients, rather than samples, is necessary to support + progressive JPEG.) The equivalent of one fully interleaved MCU row of + subsampled data is processed per call, even when the source JPEG file is + noninterleaved. + +* Entropy decoding: Read coded data from the data source module and perform + Huffman or arithmetic entropy decoding. Works on one MCU per call. + For progressive JPEG decoding, the coefficient controller supplies the prior + coefficients of each MCU (initially all zeroes), which the entropy decoder + modifies in each scan. + +* Dequantization and inverse DCT: like it says. Note that the coefficients + buffered by the coefficient controller have NOT been dequantized; we + merge dequantization and inverse DCT into a single step for speed reasons. + When scaled-down output is asked for, simplified DCT algorithms may be used + that emit fewer samples per DCT block, not the full 8x8. Works on one DCT + block at a time. + +* Postprocessing controller: buffer controller for the color quantization + input buffer, when quantization is in use. (Without quantization, this + controller just calls the upsampler.) For two-pass quantization, this + controller is responsible for buffering the full-image data. + +* Upsampling: restores chroma components to full size. (May support more + general output rescaling, too. Note that if undersized DCT outputs have + been emitted by the DCT module, this module must adjust so that properly + sized outputs are created.) Works on one row group at a time. This module + also calls the color conversion module, so its top level is effectively a + buffer controller for the upsampling->color conversion buffer. However, in + all but the highest-quality operating modes, upsampling and color + conversion are likely to be merged into a single step. + +* Colorspace conversion: convert from JPEG color space to output color space, + and change data layout from separate component planes to pixel-interleaved. + Works on one pixel row at a time. + +* Color quantization: reduce the data to colormapped form, using either an + externally specified colormap or an internally generated one. This module + is not used for full-color output. Works on one pixel row at a time; may + require two passes to generate a color map. Note that the output will + always be a single component representing colormap indexes. In the current + design, the output values are JSAMPLEs, so an 8-bit compilation cannot + quantize to more than 256 colors. This is unlikely to be a problem in + practice. + +* Color reduction: this module handles color precision reduction, e.g., + generating 15-bit color (5 bits/primary) from JPEG's 24-bit output. + Not quite clear yet how this should be handled... should we merge it with + colorspace conversion??? + +Note that some high-speed operating modes might condense the entire +postprocessing sequence to a single module (upsample, color convert, and +quantize in one step). + +In addition to the above objects, the decompression library includes these +objects: + +* Master control: determines the number of passes required, controls overall + and per-pass initialization of the other modules. This is subdivided into + input and output control: jdinput.c controls only input-side processing, + while jdmaster.c handles overall initialization and output-side control. + +* Marker reading: decodes JPEG markers (except for RSTn). + +* Data source manager: supplies the input JPEG datastream. The source + manager supplied with the library knows how to read from a stdio stream + or from a memory buffer; for other behaviors, the surrounding application + may provide its own source manager. + +* Memory manager: same as for compression library. + +* Error handler: same as for compression library. + +* Progress monitor: same as for compression library. + +As with compression, the data source manager, error handler, and progress +monitor are candidates for replacement by a surrounding application. + + +*** Decompression input and output separation *** + +To support efficient incremental display of progressive JPEG files, the +decompressor is divided into two sections that can run independently: + +1. Data input includes marker parsing, entropy decoding, and input into the + coefficient controller's DCT coefficient buffer. Note that this + processing is relatively cheap and fast. + +2. Data output reads from the DCT coefficient buffer and performs the IDCT + and all postprocessing steps. + +For a progressive JPEG file, the data input processing is allowed to get +arbitrarily far ahead of the data output processing. (This occurs only +if the application calls jpeg_consume_input(); otherwise input and output +run in lockstep, since the input section is called only when the output +section needs more data.) In this way the application can avoid making +extra display passes when data is arriving faster than the display pass +can run. Furthermore, it is possible to abort an output pass without +losing anything, since the coefficient buffer is read-only as far as the +output section is concerned. See libjpeg.txt for more detail. + +A full-image coefficient array is only created if the JPEG file has multiple +scans (or if the application specifies buffered-image mode anyway). When +reading a single-scan file, the coefficient controller normally creates only +a one-MCU buffer, so input and output processing must run in lockstep in this +case. jpeg_consume_input() is effectively a no-op in this situation. + +The main impact of dividing the decompressor in this fashion is that we must +be very careful with shared variables in the cinfo data structure. Each +variable that can change during the course of decompression must be +classified as belonging to data input or data output, and each section must +look only at its own variables. For example, the data output section may not +depend on any of the variables that describe the current scan in the JPEG +file, because these may change as the data input section advances into a new +scan. + +The progress monitor is (somewhat arbitrarily) defined to treat input of the +file as one pass when buffered-image mode is not used, and to ignore data +input work completely when buffered-image mode is used. Note that the +library has no reliable way to predict the number of passes when dealing +with a progressive JPEG file, nor can it predict the number of output passes +in buffered-image mode. So the work estimate is inherently bogus anyway. + +No comparable division is currently made in the compression library, because +there isn't any real need for it. + + +*** Data formats *** + +Arrays of pixel sample values use the following data structure: + + typedef something JSAMPLE; a pixel component value, 0..MAXJSAMPLE + typedef JSAMPLE *JSAMPROW; ptr to a row of samples + typedef JSAMPROW *JSAMPARRAY; ptr to a list of rows + typedef JSAMPARRAY *JSAMPIMAGE; ptr to a list of color-component arrays + +The basic element type JSAMPLE will typically be one of unsigned char, +(signed) char, or short. Short will be used if samples wider than 8 bits are +to be supported (this is a compile-time option). Otherwise, unsigned char is +used if possible. If the compiler only supports signed chars, then it is +necessary to mask off the value when reading. Thus, all reads of JSAMPLE +values must be coded as "GETJSAMPLE(value)", where the macro will be defined +as "((value) & 0xFF)" on signed-char machines and "((int) (value))" elsewhere. + +With these conventions, JSAMPLE values can be assumed to be >= 0. This helps +simplify correct rounding during downsampling, etc. The JPEG standard's +specification that sample values run from -128..127 is accommodated by +subtracting 128 from the sample value in the DCT step. Similarly, during +decompression the output of the IDCT step will be immediately shifted back to +0..255. (NB: different values are required when 12-bit samples are in use. +The code is written in terms of MAXJSAMPLE and CENTERJSAMPLE, which will be +defined as 255 and 128 respectively in an 8-bit implementation, and as 4095 +and 2048 in a 12-bit implementation.) + +We use a pointer per row, rather than a two-dimensional JSAMPLE array. This +choice costs only a small amount of memory and has several benefits: +* Code using the data structure doesn't need to know the allocated width of + the rows. This simplifies edge expansion/compression, since we can work + in an array that's wider than the logical picture width. +* Indexing doesn't require multiplication; this is a performance win on many + machines. +* Arrays with more than 64K total elements can be supported even on machines + where malloc() cannot allocate chunks larger than 64K. +* The rows forming a component array may be allocated at different times + without extra copying. This trick allows some speedups in smoothing steps + that need access to the previous and next rows. + +Note that each color component is stored in a separate array; we don't use the +traditional layout in which the components of a pixel are stored together. +This simplifies coding of modules that work on each component independently, +because they don't need to know how many components there are. Furthermore, +we can read or write each component to a temporary file independently, which +is helpful when dealing with noninterleaved JPEG files. + +In general, a specific sample value is accessed by code such as + GETJSAMPLE(image[colorcomponent][row][col]) +where col is measured from the image left edge, but row is measured from the +first sample row currently in memory. Either of the first two indexings can +be precomputed by copying the relevant pointer. + + +Since most image-processing applications prefer to work on images in which +the components of a pixel are stored together, the data passed to or from the +surrounding application uses the traditional convention: a single pixel is +represented by N consecutive JSAMPLE values, and an image row is an array of +(# of color components)*(image width) JSAMPLEs. One or more rows of data can +be represented by a pointer of type JSAMPARRAY in this scheme. This scheme is +converted to component-wise storage inside the JPEG library. (Applications +that want to skip JPEG preprocessing or postprocessing will have to contend +with component-wise storage.) + + +Arrays of DCT-coefficient values use the following data structure: + + typedef short JCOEF; a 16-bit signed integer + typedef JCOEF JBLOCK[DCTSIZE2]; an 8x8 block of coefficients + typedef JBLOCK *JBLOCKROW; ptr to one horizontal row of 8x8 blocks + typedef JBLOCKROW *JBLOCKARRAY; ptr to a list of such rows + typedef JBLOCKARRAY *JBLOCKIMAGE; ptr to a list of color component arrays + +The underlying type is at least a 16-bit signed integer; while "short" is big +enough on all machines of interest, on some machines it is preferable to use +"int" for speed reasons, despite the storage cost. Coefficients are grouped +into 8x8 blocks (but we always use #defines DCTSIZE and DCTSIZE2 rather than +"8" and "64"). + +The contents of a coefficient block may be in either "natural" or zigzagged +order, and may be true values or divided by the quantization coefficients, +depending on where the block is in the processing pipeline. In the current +library, coefficient blocks are kept in natural order everywhere; the entropy +codecs zigzag or dezigzag the data as it is written or read. The blocks +contain quantized coefficients everywhere outside the DCT/IDCT subsystems. +(This latter decision may need to be revisited to support variable +quantization a la JPEG Part 3.) + +Notice that the allocation unit is now a row of 8x8 blocks, corresponding to +eight rows of samples. Otherwise the structure is much the same as for +samples, and for the same reasons. + + +*** Suspendable processing *** + +In some applications it is desirable to use the JPEG library as an +incremental, memory-to-memory filter. In this situation the data source or +destination may be a limited-size buffer, and we can't rely on being able to +empty or refill the buffer at arbitrary times. Instead the application would +like to have control return from the library at buffer overflow/underrun, and +then resume compression or decompression at a later time. + +This scenario is supported for simple cases. (For anything more complex, we +recommend that the application "bite the bullet" and develop real multitasking +capability.) The libjpeg.txt file goes into more detail about the usage and +limitations of this capability; here we address the implications for library +structure. + +The essence of the problem is that the entropy codec (coder or decoder) must +be prepared to stop at arbitrary times. In turn, the controllers that call +the entropy codec must be able to stop before having produced or consumed all +the data that they normally would handle in one call. That part is reasonably +straightforward: we make the controller call interfaces include "progress +counters" which indicate the number of data chunks successfully processed, and +we require callers to test the counter rather than just assume all of the data +was processed. + +Rather than trying to restart at an arbitrary point, the current Huffman +codecs are designed to restart at the beginning of the current MCU after a +suspension due to buffer overflow/underrun. At the start of each call, the +codec's internal state is loaded from permanent storage (in the JPEG object +structures) into local variables. On successful completion of the MCU, the +permanent state is updated. (This copying is not very expensive, and may even +lead to *improved* performance if the local variables can be registerized.) +If a suspension occurs, the codec simply returns without updating the state, +thus effectively reverting to the start of the MCU. Note that this implies +leaving some data unprocessed in the source/destination buffer (ie, the +compressed partial MCU). The data source/destination module interfaces are +specified so as to make this possible. This also implies that the data buffer +must be large enough to hold a worst-case compressed MCU; a couple thousand +bytes should be enough. + +In a successive-approximation AC refinement scan, the progressive Huffman +decoder has to be able to undo assignments of newly nonzero coefficients if it +suspends before the MCU is complete, since decoding requires distinguishing +previously-zero and previously-nonzero coefficients. This is a bit tedious +but probably won't have much effect on performance. Other variants of Huffman +decoding need not worry about this, since they will just store the same values +again if forced to repeat the MCU. + +This approach would probably not work for an arithmetic codec, since its +modifiable state is quite large and couldn't be copied cheaply. Instead it +would have to suspend and resume exactly at the point of the buffer end. + +The JPEG marker reader is designed to cope with suspension at an arbitrary +point. It does so by backing up to the start of the marker parameter segment, +so the data buffer must be big enough to hold the largest marker of interest. +Again, a couple KB should be adequate. (A special "skip" convention is used +to bypass COM and APPn markers, so these can be larger than the buffer size +without causing problems; otherwise a 64K buffer would be needed in the worst +case.) + +The JPEG marker writer currently does *not* cope with suspension. +We feel that this is not necessary; it is much easier simply to require +the application to ensure there is enough buffer space before starting. (An +empty 2K buffer is more than sufficient for the header markers; and ensuring +there are a dozen or two bytes available before calling jpeg_finish_compress() +will suffice for the trailer.) This would not work for writing multi-scan +JPEG files, but we simply do not intend to support that capability with +suspension. + + +*** Memory manager services *** + +The JPEG library's memory manager controls allocation and deallocation of +memory, and it manages large "virtual" data arrays on machines where the +operating system does not provide virtual memory. Note that the same +memory manager serves both compression and decompression operations. + +In all cases, allocated objects are tied to a particular compression or +decompression master record, and they will be released when that master +record is destroyed. + +The memory manager does not provide explicit deallocation of objects. +Instead, objects are created in "pools" of free storage, and a whole pool +can be freed at once. This approach helps prevent storage-leak bugs, and +it speeds up operations whenever malloc/free are slow (as they often are). +The pools can be regarded as lifetime identifiers for objects. Two +pools/lifetimes are defined: + * JPOOL_PERMANENT lasts until master record is destroyed + * JPOOL_IMAGE lasts until done with image (JPEG datastream) +Permanent lifetime is used for parameters and tables that should be carried +across from one datastream to another; this includes all application-visible +parameters. Image lifetime is used for everything else. (A third lifetime, +JPOOL_PASS = one processing pass, was originally planned. However it was +dropped as not being worthwhile. The actual usage patterns are such that the +peak memory usage would be about the same anyway; and having per-pass storage +substantially complicates the virtual memory allocation rules --- see below.) + +The memory manager deals with three kinds of object: +1. "Small" objects. Typically these require no more than 10K-20K total. +2. "Large" objects. These may require tens to hundreds of K depending on + image size. Semantically they behave the same as small objects, but we + distinguish them because pool allocation heuristics may differ for large and + small objects (historically, large objects were also referenced by far + pointers on MS-DOS machines.) Note that individual "large" objects cannot + exceed the size allowed by type size_t, which may be 64K or less on some + machines. +3. "Virtual" objects. These are large 2-D arrays of JSAMPLEs or JBLOCKs + (typically large enough for the entire image being processed). The + memory manager provides stripwise access to these arrays. On machines + without virtual memory, the rest of the array may be swapped out to a + temporary file. + +(Note: JSAMPARRAY and JBLOCKARRAY data structures are a combination of large +objects for the data proper and small objects for the row pointers. For +convenience and speed, the memory manager provides single routines to create +these structures. Similarly, virtual arrays include a small control block +and a JSAMPARRAY or JBLOCKARRAY working buffer, all created with one call.) + +In the present implementation, virtual arrays are only permitted to have image +lifespan. (Permanent lifespan would not be reasonable, and pass lifespan is +not very useful since a virtual array's raison d'etre is to store data for +multiple passes through the image.) We also expect that only "small" objects +will be given permanent lifespan, though this restriction is not required by +the memory manager. + +In a non-virtual-memory machine, some performance benefit can be gained by +making the in-memory buffers for virtual arrays be as large as possible. +(For small images, the buffers might fit entirely in memory, so blind +swapping would be very wasteful.) The memory manager will adjust the height +of the buffers to fit within a prespecified maximum memory usage. In order +to do this in a reasonably optimal fashion, the manager needs to allocate all +of the virtual arrays at once. Therefore, there isn't a one-step allocation +routine for virtual arrays; instead, there is a "request" routine that simply +allocates the control block, and a "realize" routine (called just once) that +determines space allocation and creates all of the actual buffers. The +realize routine must allow for space occupied by non-virtual large objects. +(We don't bother to factor in the space needed for small objects, on the +grounds that it isn't worth the trouble.) + +To support all this, we establish the following protocol for doing business +with the memory manager: + 1. Modules must request virtual arrays (which may have only image lifespan) + during the initial setup phase, i.e., in their jinit_xxx routines. + 2. All "large" objects (including JSAMPARRAYs and JBLOCKARRAYs) must also be + allocated during initial setup. + 3. realize_virt_arrays will be called at the completion of initial setup. + The above conventions ensure that sufficient information is available + for it to choose a good size for virtual array buffers. +Small objects of any lifespan may be allocated at any time. We expect that +the total space used for small objects will be small enough to be negligible +in the realize_virt_arrays computation. + +In a virtual-memory machine, we simply pretend that the available space is +infinite, thus causing realize_virt_arrays to decide that it can allocate all +the virtual arrays as full-size in-memory buffers. The overhead of the +virtual-array access protocol is very small when no swapping occurs. + +A virtual array can be specified to be "pre-zeroed"; when this flag is set, +never-yet-written sections of the array are set to zero before being made +available to the caller. If this flag is not set, never-written sections +of the array contain garbage. (This feature exists primarily because the +equivalent logic would otherwise be needed in jdcoefct.c for progressive +JPEG mode; we may as well make it available for possible other uses.) + +The first write pass on a virtual array is required to occur in top-to-bottom +order; read passes, as well as any write passes after the first one, may +access the array in any order. This restriction exists partly to simplify +the virtual array control logic, and partly because some file systems may not +support seeking beyond the current end-of-file in a temporary file. The main +implication of this restriction is that rearrangement of rows (such as +converting top-to-bottom data order to bottom-to-top) must be handled while +reading data out of the virtual array, not while putting it in. + + +*** Memory manager internal structure *** + +To isolate system dependencies as much as possible, we have broken the +memory manager into two parts. There is a reasonably system-independent +"front end" (jmemmgr.c) and a "back end" that contains only the code +likely to change across systems. All of the memory management methods +outlined above are implemented by the front end. The back end provides +the following routines for use by the front end (none of these routines +are known to the rest of the JPEG code): + +jpeg_mem_init, jpeg_mem_term system-dependent initialization/shutdown + +jpeg_get_small, jpeg_free_small interface to malloc and free library routines + (or their equivalents) + +jpeg_get_large, jpeg_free_large historically was used to interface with + FAR malloc/free on MS-DOS machines; now the + same as jpeg_get_small/jpeg_free_small + +jpeg_mem_available estimate available memory + +jpeg_open_backing_store create a backing-store object + +read_backing_store, manipulate a backing-store object +write_backing_store, +close_backing_store + +On some systems there will be more than one type of backing-store object +(specifically, in MS-DOS a backing store file might be an area of extended +memory as well as a disk file). jpeg_open_backing_store is responsible for +choosing how to implement a given object. The read/write/close routines +are method pointers in the structure that describes a given object; this +lets them be different for different object types. + +It may be necessary to ensure that backing store objects are explicitly +released upon abnormal program termination. For example, MS-DOS won't free +extended memory by itself. To support this, we will expect the main program +or surrounding application to arrange to call self_destruct (typically via +jpeg_destroy) upon abnormal termination. This may require a SIGINT signal +handler or equivalent. We don't want to have the back end module install its +own signal handler, because that would pre-empt the surrounding application's +ability to control signal handling. + +The IJG distribution includes several memory manager back end implementations. +Usually the same back end should be suitable for all applications on a given +system, but it is possible for an application to supply its own back end at +need. + + +*** Implications of DNL marker *** + +Some JPEG files may use a DNL marker to postpone definition of the image +height (this would be useful for a fax-like scanner's output, for instance). +In these files the SOF marker claims the image height is 0, and you only +find out the true image height at the end of the first scan. + +We could read these files as follows: +1. Upon seeing zero image height, replace it by 65535 (the maximum allowed). +2. When the DNL is found, update the image height in the global image + descriptor. +This implies that control modules must avoid making copies of the image +height, and must re-test for termination after each MCU row. This would +be easy enough to do. + +In cases where image-size data structures are allocated, this approach will +result in very inefficient use of virtual memory or much-larger-than-necessary +temporary files. This seems acceptable for something that probably won't be a +mainstream usage. People might have to forgo use of memory-hogging options +(such as two-pass color quantization or noninterleaved JPEG files) if they +want efficient conversion of such files. (One could improve efficiency by +demanding a user-supplied upper bound for the height, less than 65536; in most +cases it could be much less.) + +The standard also permits the SOF marker to overestimate the image height, +with a DNL to give the true, smaller height at the end of the first scan. +This would solve the space problems if the overestimate wasn't too great. +However, it implies that you don't even know whether DNL will be used. + +This leads to a couple of very serious objections: +1. Testing for a DNL marker must occur in the inner loop of the decompressor's + Huffman decoder; this implies a speed penalty whether the feature is used + or not. +2. There is no way to hide the last-minute change in image height from an + application using the decoder. Thus *every* application using the IJG + library would suffer a complexity penalty whether it cared about DNL or + not. +We currently do not support DNL because of these problems. + +A different approach is to insist that DNL-using files be preprocessed by a +separate program that reads ahead to the DNL, then goes back and fixes the SOF +marker. This is a much simpler solution and is probably far more efficient. +Even if one wants piped input, buffering the first scan of the JPEG file needs +a lot smaller temp file than is implied by the maximum-height method. For +this approach we'd simply treat DNL as a no-op in the decompressor (at most, +check that it matches the SOF image height). + +We will not worry about making the compressor capable of outputting DNL. +Something similar to the first scheme above could be applied if anyone ever +wants to make that work. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjbench.c glmark2-2021.02/android/jni/src/libjpeg-turbo/tjbench.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjbench.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/tjbench.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,990 @@ +/* + * Copyright (C)2009-2016 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "./bmp.h" +#include "./tjutil.h" +#include "./turbojpeg.h" + + +#define _throw(op, err) { \ + printf("ERROR in line %d while %s:\n%s\n", __LINE__, op, err); \ + retval=-1; goto bailout;} +#define _throwunix(m) _throw(m, strerror(errno)) +#define _throwtj(m) _throw(m, tjGetErrorStr()) +#define _throwbmp(m) _throw(m, bmpgeterr()) + +int flags=TJFLAG_NOREALLOC, componly=0, decomponly=0, doyuv=0, quiet=0, + dotile=0, pf=TJPF_BGR, yuvpad=1, warmup=1, dowrite=1; +char *ext="ppm"; +const char *pixFormatStr[TJ_NUMPF]= +{ + "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY", "", "", "", "", "CMYK" +}; +const char *subNameLong[TJ_NUMSAMP]= +{ + "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" +}; +const char *csName[TJ_NUMCS]= +{ + "RGB", "YCbCr", "GRAY", "CMYK", "YCCK" +}; +const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"}; +tjscalingfactor *scalingfactors=NULL, sf={1, 1}; int nsf=0; +int xformop=TJXOP_NONE, xformopt=0; +int (*customFilter)(short *, tjregion, tjregion, int, int, tjtransform *); +double benchtime=5.0; + + +char *formatName(int subsamp, int cs, char *buf) +{ + if(cs==TJCS_YCbCr) return (char *)subNameLong[subsamp]; + else if(cs==TJCS_YCCK) + { + snprintf(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]); + return buf; + } + else return (char *)csName[cs]; +} + + +char *sigfig(double val, int figs, char *buf, int len) +{ + char format[80]; + int digitsafterdecimal=figs-(int)ceil(log10(fabs(val))); + if(digitsafterdecimal<1) snprintf(format, 80, "%%.0f"); + else snprintf(format, 80, "%%.%df", digitsafterdecimal); + snprintf(buf, len, format, val); + return buf; +} + + +/* Custom DCT filter which produces a negative of the image */ +int dummyDCTFilter(short *coeffs, tjregion arrayRegion, tjregion planeRegion, + int componentIndex, int transformIndex, tjtransform *transform) +{ + int i; + for(i=0; i0) + { + snprintf(qualstr, 6, "_Q%d", jpegqual); + qualstr[5]=0; + } + + if((handle=tjInitDecompress())==NULL) + _throwtj("executing tjInitDecompress()"); + + if(dstbuf==NULL) + { + if((dstbuf=(unsigned char *)malloc(pitch*scaledh))==NULL) + _throwunix("allocating destination buffer"); + dstbufalloc=1; + } + /* Set the destination buffer to gray so we know whether the decompressor + attempted to write to it */ + memset(dstbuf, 127, pitch*scaledh); + + if(doyuv) + { + int width=dotile? tilew:scaledw; + int height=dotile? tileh:scaledh; + int yuvsize=tjBufSizeYUV2(width, yuvpad, height, subsamp); + if((yuvbuf=(unsigned char *)malloc(yuvsize))==NULL) + _throwunix("allocating YUV buffer"); + memset(yuvbuf, 127, yuvsize); + } + + /* Benchmark */ + iter=-warmup; + elapsed=elapsedDecode=0.; + while(1) + { + int tile=0; + double start=gettime(); + for(row=0, dstptr=dstbuf; row=0) elapsedDecode+=gettime()-startDecode; + } + else + if(tjDecompress2(handle, jpegbuf[tile], jpegsize[tile], dstptr2, + width, pitch, height, pf, flags)==-1) + _throwtj("executing tjDecompress2()"); + } + } + iter++; + if(iter>=1) + { + elapsed+=gettime()-start; + if(elapsed>=benchtime) break; + } + } + if(doyuv) elapsed-=elapsedDecode; + + if(tjDestroy(handle)==-1) _throwtj("executing tjDestroy()"); + handle=NULL; + + if(quiet) + { + printf("%-6s%s", + sigfig((double)(w*h)/1000000.*(double)iter/elapsed, 4, tempstr, 1024), + quiet==2? "\n":" "); + if(doyuv) + printf("%s\n", + sigfig((double)(w*h)/1000000.*(double)iter/elapsedDecode, 4, tempstr, + 1024)); + else if(quiet!=2) printf("\n"); + } + else + { + printf("%s --> Frame rate: %f fps\n", + doyuv? "Decomp to YUV":"Decompress ", (double)iter/elapsed); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000.*(double)iter/elapsed); + if(doyuv) + { + printf("YUV Decode --> Frame rate:  %f fps\n", + (double)iter/elapsedDecode); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000.*(double)iter/elapsedDecode); + } + } + + if (!dowrite) goto bailout; + + if(sf.num!=1 || sf.denom!=1) + snprintf(sizestr, 20, "%d_%d", sf.num, sf.denom); + else if(tilew!=w || tileh!=h) + snprintf(sizestr, 20, "%dx%d", tilew, tileh); + else snprintf(sizestr, 20, "full"); + if(decomponly) + snprintf(tempstr, 1024, "%s_%s.%s", filename, sizestr, ext); + else + snprintf(tempstr, 1024, "%s_%s%s_%s.%s", filename, subName[subsamp], + qualstr, sizestr, ext); + + if(savebmp(tempstr, dstbuf, scaledw, scaledh, pf, + (flags&TJFLAG_BOTTOMUP)!=0)==-1) + _throwbmp("saving bitmap"); + ptr=strrchr(tempstr, '.'); + snprintf(ptr, 1024-(ptr-tempstr), "-err.%s", ext); + if(srcbuf && sf.num==1 && sf.denom==1) + { + if(!quiet) printf("Compression error written to %s.\n", tempstr); + if(subsamp==TJ_GRAYSCALE) + { + int index, index2; + for(row=0, index=0; row255) y=255; if(y<0) y=0; + dstbuf[rindex]=abs(dstbuf[rindex]-y); + dstbuf[gindex]=abs(dstbuf[gindex]-y); + dstbuf[bindex]=abs(dstbuf[bindex]-y); + } + } + } + else + { + for(row=0; row>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", pfStr, + (flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down", subNameLong[subsamp], + jpegqual); + + for(tilew=dotile? 8:w, tileh=dotile? 8:h; ; tilew*=2, tileh*=2) + { + if(tilew>w) tilew=w; if(tileh>h) tileh=h; + ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh; + + if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *) + *ntilesw*ntilesh))==NULL) + _throwunix("allocating JPEG tile array"); + memset(jpegbuf, 0, sizeof(unsigned char *)*ntilesw*ntilesh); + if((jpegsize=(unsigned long *)malloc(sizeof(unsigned long) + *ntilesw*ntilesh))==NULL) + _throwunix("allocating JPEG size array"); + memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh); + + if((flags&TJFLAG_NOREALLOC)!=0) + for(i=0; i=0) elapsedEncode+=gettime()-startEncode; + if(tjCompressFromYUV(handle, yuvbuf, width, yuvpad, height, + subsamp, &jpegbuf[tile], &jpegsize[tile], jpegqual, flags)==-1) + _throwtj("executing tjCompressFromYUV()"); + } + else + { + if(tjCompress2(handle, srcptr2, width, pitch, height, pf, + &jpegbuf[tile], &jpegsize[tile], subsamp, jpegqual, flags)==-1) + _throwtj("executing tjCompress2()"); + } + totaljpegsize+=jpegsize[tile]; + } + } + iter++; + if(iter>=1) + { + elapsed+=gettime()-start; + if(elapsed>=benchtime) break; + } + } + if(doyuv) elapsed-=elapsedEncode; + + if(tjDestroy(handle)==-1) _throwtj("executing tjDestroy()"); + handle=NULL; + + if(quiet==1) printf("%-5d %-5d ", tilew, tileh); + if(quiet) + { + if(doyuv) + printf("%-6s%s", + sigfig((double)(w*h)/1000000.*(double)iter/elapsedEncode, 4, tempstr, + 1024), quiet==2? "\n":" "); + printf("%-6s%s", + sigfig((double)(w*h)/1000000.*(double)iter/elapsed, 4, tempstr, 1024), + quiet==2? "\n":" "); + printf("%-6s%s", + sigfig((double)(w*h*ps)/(double)totaljpegsize, 4, tempstr2, 80), + quiet==2? "\n":" "); + } + else + { + printf("\n%s size: %d x %d\n", dotile? "Tile":"Image", tilew, + tileh); + if(doyuv) + { + printf("Encode YUV --> Frame rate: %f fps\n", + (double)iter/elapsedEncode); + printf(" Output image size: %d bytes\n", yuvsize); + printf(" Compression ratio: %f:1\n", + (double)(w*h*ps)/(double)yuvsize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000.*(double)iter/elapsedEncode); + printf(" Output bit stream: %f Megabits/sec\n", + (double)yuvsize*8./1000000.*(double)iter/elapsedEncode); + } + printf("%s --> Frame rate: %f fps\n", + doyuv? "Comp from YUV":"Compress ", (double)iter/elapsed); + printf(" Output image size: %d bytes\n", + totaljpegsize); + printf(" Compression ratio: %f:1\n", + (double)(w*h*ps)/(double)totaljpegsize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000.*(double)iter/elapsed); + printf(" Output bit stream: %f Megabits/sec\n", + (double)totaljpegsize*8./1000000.*(double)iter/elapsed); + } + if(tilew==w && tileh==h && dowrite) + { + snprintf(tempstr, 1024, "%s_%s_Q%d.jpg", filename, subName[subsamp], + jpegqual); + if((file=fopen(tempstr, "wb"))==NULL) + _throwunix("opening reference image"); + if(fwrite(jpegbuf[0], jpegsize[0], 1, file)!=1) + _throwunix("writing reference image"); + fclose(file); file=NULL; + if(!quiet) printf("Reference image written to %s\n", tempstr); + } + + /* Decompression test */ + if(!componly) + { + if(decomp(srcbuf, jpegbuf, jpegsize, tmpbuf, w, h, subsamp, jpegqual, + filename, tilew, tileh)==-1) + goto bailout; + } + + for(i=0; i>>>> JPEG %s --> %s (%s) <<<<<\n", + formatName(subsamp, cs, tempstr), pixFormatStr[pf], + (flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down"); + + for(tilew=dotile? 16:w, tileh=dotile? 16:h; ; tilew*=2, tileh*=2) + { + if(tilew>w) tilew=w; if(tileh>h) tileh=h; + ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh; + + if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *) + *ntilesw*ntilesh))==NULL) + _throwunix("allocating JPEG tile array"); + memset(jpegbuf, 0, sizeof(unsigned char *)*ntilesw*ntilesh); + if((jpegsize=(unsigned long *)malloc(sizeof(unsigned long) + *ntilesw*ntilesh))==NULL) + _throwunix("allocating JPEG size array"); + memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh); + + if((flags&TJFLAG_NOREALLOC)!=0 || !dotile) + for(i=0; i %d x %d", TJSCALED(_w, sf), TJSCALED(_h, sf)); + printf("\n"); + } + else if(quiet==1) + { + printf("%-4s (%s) %-5s %-5s ", pixFormatStr[pf], + (flags&TJFLAG_BOTTOMUP)? "BU":"TD", csName[cs], subNameLong[subsamp]); + printf("%-5d %-5d ", tilew, tileh); + } + + _subsamp=subsamp; + if(dotile || xformop!=TJXOP_NONE || xformopt!=0 || customFilter) + { + if((t=(tjtransform *)malloc(sizeof(tjtransform)*ntilesw*ntilesh)) + ==NULL) + _throwunix("allocating image transform array"); + + if(xformop==TJXOP_TRANSPOSE || xformop==TJXOP_TRANSVERSE + || xformop==TJXOP_ROT90 || xformop==TJXOP_ROT270) + { + _w=h; _h=w; _tilew=tileh; _tileh=tilew; + } + + if(xformopt&TJXOPT_GRAY) _subsamp=TJ_GRAYSCALE; + if(xformop==TJXOP_HFLIP || xformop==TJXOP_ROT180) + _w=_w-(_w%tjMCUWidth[_subsamp]); + if(xformop==TJXOP_VFLIP || xformop==TJXOP_ROT180) + _h=_h-(_h%tjMCUHeight[_subsamp]); + if(xformop==TJXOP_TRANSVERSE || xformop==TJXOP_ROT90) + _w=_w-(_w%tjMCUHeight[_subsamp]); + if(xformop==TJXOP_TRANSVERSE || xformop==TJXOP_ROT270) + _h=_h-(_h%tjMCUWidth[_subsamp]); + _ntilesw=(_w+_tilew-1)/_tilew; + _ntilesh=(_h+_tileh-1)/_tileh; + + if(xformop==TJXOP_TRANSPOSE || xformop==TJXOP_TRANSVERSE + || xformop==TJXOP_ROT90 || xformop==TJXOP_ROT270) + { + if(_subsamp==TJSAMP_422) _subsamp=TJSAMP_440; + else if(_subsamp==TJSAMP_440) _subsamp=TJSAMP_422; + } + + for(row=0, tile=0; row<_ntilesh; row++) + { + for(col=0; col<_ntilesw; col++, tile++) + { + t[tile].r.w=min(_tilew, _w-col*_tilew); + t[tile].r.h=min(_tileh, _h-row*_tileh); + t[tile].r.x=col*_tilew; + t[tile].r.y=row*_tileh; + t[tile].op=xformop; + t[tile].options=xformopt|TJXOPT_TRIM; + t[tile].customFilter=customFilter; + if(t[tile].options&TJXOPT_NOOUTPUT && jpegbuf[tile]) + { + tjFree(jpegbuf[tile]); jpegbuf[tile]=NULL; + } + } + } + + iter=-warmup; + elapsed=0.; + while(1) + { + start=gettime(); + if(tjTransform(handle, srcbuf, srcsize, _ntilesw*_ntilesh, jpegbuf, + jpegsize, t, flags)==-1) + _throwtj("executing tjTransform()"); + iter++; + if(iter>=1) + { + elapsed+=gettime()-start; + if(elapsed>=benchtime) break; + } + } + + free(t); t=NULL; + + for(tile=0, totaljpegsize=0; tile<_ntilesw*_ntilesh; tile++) + totaljpegsize+=jpegsize[tile]; + + if(quiet) + { + printf("%-6s%s%-6s%s", + sigfig((double)(w*h)/1000000./elapsed, 4, tempstr, 80), + quiet==2? "\n":" ", + sigfig((double)(w*h*ps)/(double)totaljpegsize, 4, tempstr2, 80), + quiet==2? "\n":" "); + } + else if(!quiet) + { + printf("Transform --> Frame rate: %f fps\n", 1.0/elapsed); + printf(" Output image size: %lu bytes\n", totaljpegsize); + printf(" Compression ratio: %f:1\n", + (double)(w*h*ps)/(double)totaljpegsize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000./elapsed); + printf(" Output bit stream: %f Megabits/sec\n", + (double)totaljpegsize*8./1000000./elapsed); + } + } + else + { + if(quiet==1) printf("N/A N/A "); + jpegsize[0]=srcsize; + memcpy(jpegbuf[0], srcbuf, srcsize); + } + + if(w==tilew) _tilew=_w; + if(h==tileh) _tileh=_h; + if(!(xformopt&TJXOPT_NOOUTPUT)) + { + if(decomp(NULL, jpegbuf, jpegsize, NULL, _w, _h, _subsamp, 0, + filename, _tilew, _tileh)==-1) + goto bailout; + } + else if(quiet==1) printf("N/A\n"); + + for(i=0; i [options]\n\n"); + printf(" %s\n", progname); + printf(" [options]\n\n"); + printf("Options:\n\n"); + printf("-alloc = Dynamically allocate JPEG image buffers\n"); + printf("-bmp = Generate output images in Windows Bitmap format (default = PPM)\n"); + printf("-bottomup = Test bottom-up compression/decompression\n"); + printf("-tile = Test performance of the codec when the image is encoded as separate\n"); + printf(" tiles of varying sizes.\n"); + printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n"); + printf(" Test the specified color conversion path in the codec (default = BGR)\n"); + printf("-cmyk = Indirectly test YCCK JPEG compression/decompression (the source\n"); + printf(" and destination bitmaps are still RGB. The conversion is done\n"); + printf(" internally prior to compression or after decompression.)\n"); + printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n"); + printf(" the underlying codec\n"); + printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n"); + printf(" codec\n"); + printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n"); + printf(" underlying codec\n"); + printf("-subsamp = When testing JPEG compression, this option specifies the level\n"); + printf(" of chrominance subsampling to use ( = 444, 422, 440, 420, 411, or\n"); + printf(" GRAY). The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in\n"); + printf(" sequence.\n"); + printf("-quiet = Output results in tabular rather than verbose format\n"); + printf("-yuv = Test YUV encoding/decoding functions\n"); + printf("-yuvpad

= If testing YUV encoding/decoding, this specifies the number of\n"); + printf(" bytes to which each row of each plane in the intermediate YUV image is\n"); + printf(" padded (default = 1)\n"); + printf("-scale M/N = Scale down the width/height of the decompressed JPEG image by a\n"); + printf(" factor of M/N (M/N = "); + for(i=0; i2) + { + if(i!=nsf-1) printf(", "); + if(i==nsf-2) printf("or "); + } + if(i%8==0 && i!=0) printf("\n "); + } + printf(")\n"); + printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n"); + printf(" Perform the corresponding lossless transform prior to\n"); + printf(" decompression (these options are mutually exclusive)\n"); + printf("-grayscale = Perform lossless grayscale conversion prior to decompression\n"); + printf(" test (can be combined with the other transforms above)\n"); + printf("-benchtime = Run each benchmark for at least seconds (default = 5.0)\n"); + printf("-warmup = Execute each benchmark times to prime the cache before\n"); + printf(" taking performance measurements (default = 1)\n"); + printf("-componly = Stop after running compression tests. Do not test decompression.\n"); + printf("-nowrite = Do not write reference or output images (improves consistency of\n"); + printf(" performance measurements.)\n\n"); + printf("NOTE: If the quality is specified as a range (e.g. 90-100), a separate\n"); + printf("test will be performed for all quality values in the range.\n\n"); + exit(1); +} + + +int main(int argc, char *argv[]) +{ + unsigned char *srcbuf=NULL; int w=0, h=0, i, j; + int minqual=-1, maxqual=-1; char *temp; + int minarg=2, retval=0, subsamp=-1; + + if((scalingfactors=tjGetScalingFactors(&nsf))==NULL || nsf==0) + _throwtj("executing tjGetScalingFactors()"); + + if(argc100) + { + puts("ERROR: Quality must be between 1 and 100."); + exit(1); + } + if((temp=strchr(argv[2], '-'))!=NULL && strlen(temp)>1 + && sscanf(&temp[1], "%d", &maxqual)==1 && maxqual>minqual && maxqual>=1 + && maxqual<=100) {} + else maxqual=minqual; + } + + if(argc>minarg) + { + for(i=minarg; i0.0) benchtime=temp; + else usage(argv[0]); + } + if(!strcasecmp(argv[i], "-warmup") && i=0) + { + warmup=temp; + printf("Warmup runs = %d\n\n", warmup); + } + else usage(argv[0]); + } + if(!strcmp(argv[i], "-?")) usage(argv[0]); + if(!strcasecmp(argv[i], "-alloc")) flags&=(~TJFLAG_NOREALLOC); + if(!strcasecmp(argv[i], "-bmp")) ext="bmp"; + if(!strcasecmp(argv[i], "-yuv")) + { + printf("Testing YUV planar encoding/decoding\n\n"); + doyuv=1; + } + if(!strcasecmp(argv[i], "-yuvpad") && i=1) yuvpad=temp; + } + if(!strcasecmp(argv[i], "-subsamp") && i=0 && subsamp=minqual; i--) + fullTest(srcbuf, w, h, subsamp, i, argv[1]); + printf("\n"); + } + else + { + if(pf!=TJPF_CMYK) + { + for(i=maxqual; i>=minqual; i--) + fullTest(srcbuf, w, h, TJSAMP_GRAY, i, argv[1]); + printf("\n"); + } + for(i=maxqual; i>=minqual; i--) + fullTest(srcbuf, w, h, TJSAMP_420, i, argv[1]); + printf("\n"); + for(i=maxqual; i>=minqual; i--) + fullTest(srcbuf, w, h, TJSAMP_422, i, argv[1]); + printf("\n"); + for(i=maxqual; i>=minqual; i--) + fullTest(srcbuf, w, h, TJSAMP_444, i, argv[1]); + printf("\n"); + } + + bailout: + if(srcbuf) free(srcbuf); + return retval; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjbenchtest.in glmark2-2021.02/android/jni/src/libjpeg-turbo/tjbenchtest.in --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjbenchtest.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/tjbenchtest.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,249 @@ +#!/bin/bash + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +onexit() +{ + if [ -d $OUTDIR ]; then + rm -rf $OUTDIR + fi +} + +runme() +{ + echo \*\*\* $* + $* +} + +EXT=bmp +IMAGES="vgl_5674_0098.${EXT} vgl_6434_0018a.${EXT} vgl_6548_0026a.${EXT} nightshot_iso_100.${EXT}" +IMGDIR=@srcdir@/testimages +OUTDIR=`mktemp -d /tmp/__tjbenchtest_output.XXXXXX` +EXEDIR=. +BMPARG= +NSARG= +YUVARG= +ALLOC=0 +ALLOCARG= +if [ "$EXT" = "bmp" ]; then BMPARG=-bmp; fi + +if [ -d $OUTDIR ]; then + rm -rf $OUTDIR +fi +mkdir -p $OUTDIR + +exec >$EXEDIR/tjbenchtest.log + +if [ $# -gt 0 ]; then + if [ "$1" = "-yuv" ]; then + NSARG=-nosmooth + YUVARG=-yuv + +# NOTE: The combination of tjEncodeYUV*() and tjCompressFromYUV*() does not +# always produce bitwise-identical results to tjCompress*() if subsampling is +# enabled. In both cases, if the image width or height are not evenly +# divisible by the MCU width/height, then the bottom and/or right edge are +# expanded. However, the libjpeg code performs this expansion prior to +# downsampling, and TurboJPEG performs it in tjCompressFromYUV*(), which is +# after downsampling. Thus, the two will agree only if the width/height along +# each downsampled dimension is an odd number or is evenly divisible by the MCU +# width/height. This disagreement basically amounts to a round-off error, but +# there is no easy way around it, so for now, we just test the only image that +# works. (NOTE: nightshot_iso_100 does not suffer from the above issue, but +# it suffers from an unrelated problem whereby the combination of +# tjDecompressToYUV*() and tjDecodeYUV*() do not produce bitwise-identical +# results to tjDecompress*() if decompression scaling is enabled. This latter +# phenomenon is not yet fully understood but is also believed to be some sort +# of round-off error.) + IMAGES="vgl_6548_0026a.${EXT}" + fi + if [ "$1" = "-alloc" ]; then + ALLOCARG=-alloc + ALLOC=1 + fi +fi + +# Standard tests +for image in $IMAGES; do + + cp $IMGDIR/$image $OUTDIR + basename=`basename $image .${EXT}` + runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_default_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_accurate_djpeg.${EXT} $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + + # Compression + for dct in accurate fast; do + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG $ALLOCARG + for samp in GRAY 420 422 444; do + runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg + done + done + + for dct in fast accurate default; do + dctarg=-${dct}dct + if [ "${dct}" = "default" ]; then + dctarg= + fi + + # Tiled compression & decompression + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $ALLOCARG + for samp in GRAY 444; do + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} + rm $i + done + fi + done + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $ALLOCARG + for samp in 420 422; do + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} + rm $i + done + fi + done + + # Tiled decompression + for samp in GRAY 444; do + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} + rm $i + done + fi + done + for samp in 420 422; do + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} + rm $i + done + fi + done + done + + # Scaled decompression + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} + done + done + + # Transforms + for samp in GRAY 420 422 444; do + runme $EXEDIR/jpegtran -flip horizontal -trim -outfile $OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -flip vertical -trim -outfile $OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -transpose -trim -outfile $OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -transverse -trim -outfile $OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 90 -trim -outfile $OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 180 -trim -outfile $OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 270 -trim -outfile $OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + done + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444; do + runme $EXEDIR/djpeg -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} + rm $i + done + fi + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} + rm $i + done + fi + done + done + + # Grayscale transform + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT} + rm $i + done + fi + done + done + + # Transforms with scaling + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} + done + done + done + +done + +echo SUCCESS! diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjbenchtest.java.in glmark2-2021.02/android/jni/src/libjpeg-turbo/tjbenchtest.java.in --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjbenchtest.java.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/tjbenchtest.java.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,207 @@ +#!/bin/bash + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +onexit() +{ + if [ -d $OUTDIR ]; then + rm -rf $OUTDIR + fi +} + +runme() +{ + echo \*\*\* $* + $* +} + +IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp" +IMGDIR=@srcdir@/testimages +OUTDIR=`mktemp -d /tmp/__tjbenchtest_java_output.XXXXXX` +EXEDIR=. +JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs" +BMPARG= +NSARG= +YUVARG= + +if [ -d $OUTDIR ]; then + rm -rf $OUTDIR +fi +mkdir -p $OUTDIR + +exec >$EXEDIR/tjbenchtest-java.log + +if [ $# -gt 0 ]; then + if [ "$1" = "-yuv" ]; then + NSARG=-nosmooth + YUVARG=-yuv + +# NOTE: The combination of tjEncodeYUV*() and tjCompressFromYUV*() does not +# always produce bitwise-identical results to tjCompress*() if subsampling is +# enabled. In both cases, if the image width or height are not evenly +# divisible by the MCU width/height, then the bottom and/or right edge are +# expanded. However, the libjpeg code performs this expansion prior to +# downsampling, and TurboJPEG performs it in tjCompressFromYUV*(), which is +# after downsampling. Thus, the two will agree only if the width/height along +# each downsampled dimension is an odd number or is evenly divisible by the MCU +# width/height. This disagreement basically amounts to a round-off error, but +# there is no easy way around it, so for now, we just test the only image that +# works. (NOTE: nightshot_iso_100 does not suffer from the above issue, but +# it suffers from an unrelated problem whereby the combination of +# tjDecompressToYUV*() and tjDecodeYUV*() do not produce bitwise-identical +# results to tjDecompress*() if decompression scaling is enabled. This latter +# phenomenon is not yet fully understood but is also believed to be some sort +# of round-off error.) + IMAGES="vgl_6548_0026a.bmp" + fi +fi + +# Standard tests +for image in $IMAGES; do + + cp $IMGDIR/$image $OUTDIR + basename=`basename $image .bmp` + runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_default_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + + # Compression + for dct in accurate fast; do + runme $JAVA TJBench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG + for samp in GRAY 420 422 444; do + runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg + done + done + + for dct in fast accurate default; do + dctarg=-${dct}dct + if [ "${dct}" = "default" ]; then + dctarg= + fi + + # Tiled compression & decompression + runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG + for samp in GRAY 444; do + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp + rm $i + done + done + runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG + for samp in 420 422; do + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp + rm $i + done + done + + # Tiled decompression + for samp in GRAY 444; do + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp + rm $i + done + done + for samp in 420 422; do + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp $i -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp + rm $i + done + done + done + + # Scaled decompression + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp + done + done + + # Transforms + for samp in GRAY 420 422 444; do + runme $EXEDIR/jpegtran -flip horizontal -trim -outfile $OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -flip vertical -trim -outfile $OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -transpose -trim -outfile $OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -transverse -trim -outfile $OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 90 -trim -outfile $OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 180 -trim -outfile $OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 270 -trim -outfile $OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + done + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444; do + runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $i + done + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $i + done + done + done + + # Grayscale transform + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp + rm $i + done + done + done + + # Transforms with scaling + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp + done + done + done + +done + +echo SUCCESS! diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjexampletest.in glmark2-2021.02/android/jni/src/libjpeg-turbo/tjexampletest.in --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjexampletest.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/tjexampletest.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,150 @@ +#!/bin/bash + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +onexit() +{ + if [ -d $OUTDIR ]; then + rm -rf $OUTDIR + fi +} + +runme() +{ + echo \*\*\* $* + $* +} + +IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp" +IMGDIR=@srcdir@/testimages +OUTDIR=__tjexampletest_output +EXEDIR=. +JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs" + +if [ -d $OUTDIR ]; then + rm -rf $OUTDIR +fi +mkdir -p $OUTDIR + +exec >$EXEDIR/tjexampletest.log + +for image in $IMAGES; do + + cp $IMGDIR/$image $OUTDIR + basename=`basename $image .bmp` + $EXEDIR/cjpeg -quality 95 -dct fast -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_fast_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_fast_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_fast_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_fast_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct int -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_accurate_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_accurate_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_accurate_cjpeg.jpg + for samp in GRAY 420 422 444; do + $EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_djpeg.bmp + $EXEDIR/djpeg -dct fast -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_djpeg.bmp + $EXEDIR/djpeg -dct int -rgb -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_djpeg.bmp + done + for samp in 420 422; do + $EXEDIR/djpeg -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp + $EXEDIR/djpeg -dct fast -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp + $EXEDIR/djpeg -dct int -nosmooth -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp + done + + # Compression + for dct in fast accurate; do + for samp in GRAY 420 422 444; do + runme $JAVA TJExample $OUTDIR/$image $OUTDIR/${basename}_${samp}_${dct}.jpg -q 95 -samp ${samp} -${dct}dct + runme cmp $OUTDIR/${basename}_${samp}_${dct}.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg + done + done + + # Decompression + for dct in fast accurate default; do + srcdct=${dct} + dctarg=-${dct}dct + if [ "${dct}" = "default" ]; then + srcdct=fast + dctarg= + fi + for samp in GRAY 420 422 444; do + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}.bmp ${dctarg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}.bmp $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${dct}.bmp + done + for samp in 420 422; do + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp -fastupsample ${dctarg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp + done + done + + # Scaled decompression + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + for samp in GRAY 420 422 444; do + $EXEDIR/djpeg -rgb -bmp -scale ${scalearg} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${scale}.bmp -scale ${scalearg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${scale}.bmp + done + done + + # Transforms + for samp in GRAY 420 422 444; do + $EXEDIR/jpegtran -crop 70x60+16+16 -flip horizontal -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -flip vertical -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -transpose -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -transverse -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 90 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 180 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 270 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg + done + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 420 422 444; do + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -crop 16,16,70x60 + runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + $EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 16,16,70x60 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + for samp in 420 422; do + $EXEDIR/djpeg -nosmooth -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 16,16,70x60 -fastupsample + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + done + + # Grayscale transform + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -grayscale -crop 16,16,70x60 + runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_GRAY_${xform}_jpegtran.jpg + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -grayscale -crop 16,16,70x60 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + done + + # Transforms with scaling + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + $EXEDIR/djpeg -rgb -bmp -scale ${scalearg} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp -$xform -scale ${scalearg} -crop 16,16,70x60 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp + done + done + done + +done + +echo SUCCESS! diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjunittest.c glmark2-2021.02/android/jni/src/libjpeg-turbo/tjunittest.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjunittest.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/tjunittest.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,735 @@ +/* + * Copyright (C)2009-2014 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This program tests the various code paths in the TurboJPEG C Wrapper + */ + +#include +#include +#include +#include +#include "./tjutil.h" +#include "./turbojpeg.h" +#ifdef _WIN32 + #include + #define random() rand() +#endif + + +void usage(char *progName) +{ + printf("\nUSAGE: %s [options]\n", progName); + printf("Options:\n"); + printf("-yuv = test YUV encoding/decoding support\n"); + printf("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n"); + printf(" 4-byte boundary\n"); + printf("-alloc = test automatic buffer allocation\n"); + exit(1); +} + + +#define _throwtj() {printf("TurboJPEG ERROR:\n%s\n", tjGetErrorStr()); \ + bailout();} +#define _tj(f) {if((f)==-1) _throwtj();} +#define _throw(m) {printf("ERROR: %s\n", m); bailout();} + +const char *subNameLong[TJ_NUMSAMP]= +{ + "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" +}; +const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"}; + +const char *pixFormatStr[TJ_NUMPF]= +{ + "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale", + "RGBA", "BGRA", "ABGR", "ARGB", "CMYK" +}; + +const int alphaOffset[TJ_NUMPF] = {-1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1}; + +const int _3byteFormats[]={TJPF_RGB, TJPF_BGR}; +const int _4byteFormats[]={TJPF_RGBX, TJPF_BGRX, TJPF_XBGR, TJPF_XRGB, + TJPF_CMYK}; +const int _onlyGray[]={TJPF_GRAY}; +const int _onlyRGB[]={TJPF_RGB}; + +int doyuv=0, alloc=0, pad=4; + +int exitStatus=0; +#define bailout() {exitStatus=-1; goto bailout;} + + +void initBuf(unsigned char *buf, int w, int h, int pf, int flags) +{ + int roffset=tjRedOffset[pf]; + int goffset=tjGreenOffset[pf]; + int boffset=tjBlueOffset[pf]; + int ps=tjPixelSize[pf]; + int index, row, col, halfway=16; + + if(pf==TJPF_GRAY) + { + memset(buf, 0, w*h*ps); + for(row=0; row=halfway) buf[index*ps+3]=0; + } + else + { + buf[index*ps+2]=0; + if(row=halfway) buf[index*ps+goffset]=255; + } + } + } + } +} + + +#define checkval(v, cv) { \ + if(vcv+1) { \ + printf("\nComp. %s at %d,%d should be %d, not %d\n", \ + #v, row, col, cv, v); \ + retval=0; exitStatus=-1; goto bailout; \ + }} + +#define checkval0(v) { \ + if(v>1) { \ + printf("\nComp. %s at %d,%d should be 0, not %d\n", #v, row, col, v); \ + retval=0; exitStatus=-1; goto bailout; \ + }} + +#define checkval255(v) { \ + if(v<254) { \ + printf("\nComp. %s at %d,%d should be 255, not %d\n", #v, row, col, v); \ + retval=0; exitStatus=-1; goto bailout; \ + }} + + +int checkBuf(unsigned char *buf, int w, int h, int pf, int subsamp, + tjscalingfactor sf, int flags) +{ + int roffset=tjRedOffset[pf]; + int goffset=tjGreenOffset[pf]; + int boffset=tjBlueOffset[pf]; + int aoffset=alphaOffset[pf]; + int ps=tjPixelSize[pf]; + int index, row, col, retval=1; + int halfway=16*sf.num/sf.denom; + int blocksize=8*sf.num/sf.denom; + + if(pf==TJPF_CMYK) + { + for(row=0; row=0? buf[index*ps+aoffset]:0xFF; + if(((row/blocksize)+(col/blocksize))%2==0) + { + if(row0) memset(*dstBuf, 0, *dstSize); + + + if(!alloc) flags|=TJFLAG_NOREALLOC; + if(doyuv) + { + unsigned long yuvSize=tjBufSizeYUV2(w, pad, h, subsamp); + tjscalingfactor sf={1, 1}; + tjhandle handle2=tjInitCompress(); + if(!handle2) _throwtj(); + + if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL) + _throw("Memory allocation failure"); + memset(yuvBuf, 0, yuvSize); + + printf("%s %s -> YUV %s ... ", pfStr, buStrLong, subNameLong[subsamp]); + _tj(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, pad, subsamp, + flags)); + tjDestroy(handle2); + if(checkBufYUV(yuvBuf, w, h, subsamp, sf)) printf("Passed.\n"); + else printf("FAILED!\n"); + + printf("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp], buStrLong, + jpegQual); + _tj(tjCompressFromYUV(handle, yuvBuf, w, pad, h, subsamp, dstBuf, + dstSize, jpegQual, flags)); + } + else + { + printf("%s %s -> %s Q%d ... ", pfStr, buStrLong, subNameLong[subsamp], + jpegQual); + _tj(tjCompress2(handle, srcBuf, w, 0, h, pf, dstBuf, dstSize, subsamp, + jpegQual, flags)); + } + + snprintf(tempStr, 1024, "%s_enc_%s_%s_%s_Q%d.jpg", basename, pfStr, buStr, + subName[subsamp], jpegQual); + writeJPEG(*dstBuf, *dstSize, tempStr); + printf("Done.\n Result in %s\n", tempStr); + + bailout: + if(yuvBuf) free(yuvBuf); + if(srcBuf) free(srcBuf); +} + + +void _decompTest(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp, + int flags, tjscalingfactor sf) +{ + unsigned char *dstBuf=NULL, *yuvBuf=NULL; + int _hdrw=0, _hdrh=0, _hdrsubsamp=-1; + int scaledWidth=TJSCALED(w, sf); + int scaledHeight=TJSCALED(h, sf); + unsigned long dstSize=0; + + _tj(tjDecompressHeader2(handle, jpegBuf, jpegSize, &_hdrw, &_hdrh, + &_hdrsubsamp)); + if(_hdrw!=w || _hdrh!=h || _hdrsubsamp!=subsamp) + _throw("Incorrect JPEG header"); + + dstSize=scaledWidth*scaledHeight*tjPixelSize[pf]; + if((dstBuf=(unsigned char *)malloc(dstSize))==NULL) + _throw("Memory allocation failure"); + memset(dstBuf, 0, dstSize); + + if(doyuv) + { + unsigned long yuvSize=tjBufSizeYUV2(scaledWidth, pad, scaledHeight, + subsamp); + tjhandle handle2=tjInitDecompress(); + if(!handle2) _throwtj(); + + if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL) + _throw("Memory allocation failure"); + memset(yuvBuf, 0, yuvSize); + + printf("JPEG -> YUV %s ", subNameLong[subsamp]); + if(sf.num!=1 || sf.denom!=1) + printf("%d/%d ... ", sf.num, sf.denom); + else printf("... "); + _tj(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, scaledWidth, + pad, scaledHeight, flags)); + if(checkBufYUV(yuvBuf, scaledWidth, scaledHeight, subsamp, sf)) + printf("Passed.\n"); + else printf("FAILED!\n"); + + printf("YUV %s -> %s %s ... ", subNameLong[subsamp], pixFormatStr[pf], + (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down "); + _tj(tjDecodeYUV(handle2, yuvBuf, pad, subsamp, dstBuf, scaledWidth, 0, + scaledHeight, pf, flags)); + tjDestroy(handle2); + } + else + { + printf("JPEG -> %s %s ", pixFormatStr[pf], + (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down "); + if(sf.num!=1 || sf.denom!=1) + printf("%d/%d ... ", sf.num, sf.denom); + else printf("... "); + _tj(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth, 0, + scaledHeight, pf, flags)); + } + + if(checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags)) + printf("Passed."); + else printf("FAILED!"); + printf("\n"); + + bailout: + if(yuvBuf) free(yuvBuf); + if(dstBuf) free(dstBuf); +} + + +void decompTest(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp, + int flags) +{ + int i, n=0; + tjscalingfactor *sf=tjGetScalingFactors(&n); + if(!sf || !n) _throwtj(); + + for(i=0; i=TJPF_RGBX && pf<=TJPF_XRGB) + { + printf("\n"); + decompTest(dhandle, dstBuf, size, w, h, pf+(TJPF_RGBA-TJPF_RGBX), + basename, subsamp, flags); + } + printf("\n"); + } + } + printf("--------------------\n\n"); + + bailout: + if(chandle) tjDestroy(chandle); + if(dhandle) tjDestroy(dhandle); + + if(dstBuf) tjFree(dstBuf); +} + + +void bufSizeTest(void) +{ + int w, h, i, subsamp; + unsigned char *srcBuf=NULL, *dstBuf=NULL; + tjhandle handle=NULL; + unsigned long dstSize=0; + + if((handle=tjInitCompress())==NULL) _throwtj(); + + printf("Buffer size regression test\n"); + for(subsamp=0; subsamp1) + { + for(i=1; i + +static double getfreq(void) +{ + LARGE_INTEGER freq; + if(!QueryPerformanceFrequency(&freq)) return 0.0; + return (double)freq.QuadPart; +} + +static double f=-1.0; + +double gettime(void) +{ + LARGE_INTEGER t; + if(f<0.0) f=getfreq(); + if(f==0.0) return (double)GetTickCount()/1000.; + else + { + QueryPerformanceCounter(&t); + return (double)t.QuadPart/f; + } +} + +#else + +#include +#include + +double gettime(void) +{ + struct timeval tv; + if(gettimeofday(&tv, NULL)<0) return 0.0; + else return (double)tv.tv_sec+((double)tv.tv_usec/1000000.); +} + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjutil.h glmark2-2021.02/android/jni/src/libjpeg-turbo/tjutil.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/tjutil.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/tjutil.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,47 @@ +/* + * Copyright (C)2011 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _WIN32 + #ifndef __MINGW32__ + #include + #define snprintf(str, n, format, ...) \ + _snprintf_s(str, n, _TRUNCATE, format, __VA_ARGS__) + #endif + #define strcasecmp stricmp + #define strncasecmp strnicmp +#endif + +#ifndef min + #define min(a,b) ((a)<(b)?(a):(b)) +#endif + +#ifndef max + #define max(a,b) ((a)>(b)?(a):(b)) +#endif + +extern double gettime(void); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/transupp.c glmark2-2021.02/android/jni/src/libjpeg-turbo/transupp.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/transupp.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/transupp.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1621 @@ +/* + * transupp.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains image transformation routines and other utility code + * used by the jpegtran sample application. These are NOT part of the core + * JPEG library. But we keep these routines separate from jpegtran.c to + * ease the task of maintaining jpegtran-like programs that have other user + * interfaces. + */ + +/* Although this file really shouldn't have access to the library internals, + * it's helpful to let it call jround_up() and jcopy_block_row(). + */ +#define JPEG_INTERNALS + +#include "jinclude.h" +#include "jpeglib.h" +#include "transupp.h" /* My own external interface */ +#include "jpegcomp.h" +#include /* to declare isdigit() */ + + +#if JPEG_LIB_VERSION >= 70 +#define dstinfo_min_DCT_h_scaled_size dstinfo->min_DCT_h_scaled_size +#define dstinfo_min_DCT_v_scaled_size dstinfo->min_DCT_v_scaled_size +#else +#define dstinfo_min_DCT_h_scaled_size DCTSIZE +#define dstinfo_min_DCT_v_scaled_size DCTSIZE +#endif + + +#if TRANSFORMS_SUPPORTED + +/* + * Lossless image transformation routines. These routines work on DCT + * coefficient arrays and thus do not require any lossy decompression + * or recompression of the image. + * Thanks to Guido Vollbeding for the initial design and code of this feature, + * and to Ben Jackson for introducing the cropping feature. + * + * Horizontal flipping is done in-place, using a single top-to-bottom + * pass through the virtual source array. It will thus be much the + * fastest option for images larger than main memory. + * + * The other routines require a set of destination virtual arrays, so they + * need twice as much memory as jpegtran normally does. The destination + * arrays are always written in normal scan order (top to bottom) because + * the virtual array manager expects this. The source arrays will be scanned + * in the corresponding order, which means multiple passes through the source + * arrays for most of the transforms. That could result in much thrashing + * if the image is larger than main memory. + * + * If cropping or trimming is involved, the destination arrays may be smaller + * than the source arrays. Note it is not possible to do horizontal flip + * in-place when a nonzero Y crop offset is specified, since we'd have to move + * data from one block row to another but the virtual array manager doesn't + * guarantee we can touch more than one row at a time. So in that case, + * we have to use a separate destination array. + * + * Some notes about the operating environment of the individual transform + * routines: + * 1. Both the source and destination virtual arrays are allocated from the + * source JPEG object, and therefore should be manipulated by calling the + * source's memory manager. + * 2. The destination's component count should be used. It may be smaller + * than the source's when forcing to grayscale. + * 3. Likewise the destination's sampling factors should be used. When + * forcing to grayscale the destination's sampling factors will be all 1, + * and we may as well take that as the effective iMCU size. + * 4. When "trim" is in effect, the destination's dimensions will be the + * trimmed values but the source's will be untrimmed. + * 5. When "crop" is in effect, the destination's dimensions will be the + * cropped values but the source's will be uncropped. Each transform + * routine is responsible for picking up source data starting at the + * correct X and Y offset for the crop region. (The X and Y offsets + * passed to the transform routines are measured in iMCU blocks of the + * destination.) + * 6. All the routines assume that the source and destination buffers are + * padded out to a full iMCU boundary. This is true, although for the + * source buffer it is an undocumented property of jdcoefct.c. + */ + + +LOCAL(void) +do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Crop. This is only used when no rotate/flip is requested with the crop. */ +{ + JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks; + int ci, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + jpeg_component_info *compptr; + + /* We simply have to copy the right amount of data (the destination's + * image size) starting at the given X and Y offsets in the source. + */ + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_y + y_crop_blocks, + (JDIMENSION) compptr->v_samp_factor, FALSE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + jcopy_block_row(src_buffer[offset_y] + x_crop_blocks, + dst_buffer[offset_y], + compptr->width_in_blocks); + } + } + } +} + + +LOCAL(void) +do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, + jvirt_barray_ptr *src_coef_arrays) +/* Horizontal flip; done in-place, so no separate dest array is required. + * NB: this only works when y_crop_offset is zero. + */ +{ + JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks; + int ci, k, offset_y; + JBLOCKARRAY buffer; + JCOEFPTR ptr1, ptr2; + JCOEF temp1, temp2; + jpeg_component_info *compptr; + + /* Horizontal mirroring of DCT blocks is accomplished by swapping + * pairs of blocks in-place. Within a DCT block, we perform horizontal + * mirroring by changing the signs of odd-numbered columns. + * Partial iMCUs at the right edge are left untouched. + */ + MCU_cols = srcinfo->output_width / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + for (blk_y = 0; blk_y < compptr->height_in_blocks; + blk_y += compptr->v_samp_factor) { + buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + /* Do the mirroring */ + for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) { + ptr1 = buffer[offset_y][blk_x]; + ptr2 = buffer[offset_y][comp_width - blk_x - 1]; + /* this unrolled loop doesn't need to know which row it's on... */ + for (k = 0; k < DCTSIZE2; k += 2) { + temp1 = *ptr1; /* swap even column */ + temp2 = *ptr2; + *ptr1++ = temp2; + *ptr2++ = temp1; + temp1 = *ptr1; /* swap odd column with sign change */ + temp2 = *ptr2; + *ptr1++ = -temp2; + *ptr2++ = -temp1; + } + } + if (x_crop_blocks > 0) { + /* Now left-justify the portion of the data to be kept. + * We can't use a single jcopy_block_row() call because that routine + * depends on memcpy(), whose behavior is unspecified for overlapping + * source and destination areas. Sigh. + */ + for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) { + jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks, + buffer[offset_y] + blk_x, + (JDIMENSION) 1); + } + } + } + } + } +} + + +LOCAL(void) +do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Horizontal flip in general cropping case */ +{ + JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, k, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JBLOCKROW src_row_ptr, dst_row_ptr; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* Here we must output into a separate array because we can't touch + * different rows of a single virtual array simultaneously. Otherwise, + * this is essentially the same as the routine above. + */ + MCU_cols = srcinfo->output_width / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_y + y_crop_blocks, + (JDIMENSION) compptr->v_samp_factor, FALSE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + dst_row_ptr = dst_buffer[offset_y]; + src_row_ptr = src_buffer[offset_y]; + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Do the mirrorable blocks */ + dst_ptr = dst_row_ptr[dst_blk_x]; + src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; + /* this unrolled loop doesn't need to know which row it's on... */ + for (k = 0; k < DCTSIZE2; k += 2) { + *dst_ptr++ = *src_ptr++; /* copy even column */ + *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */ + } + } else { + /* Copy last partial block(s) verbatim */ + jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks, + dst_row_ptr + dst_blk_x, + (JDIMENSION) 1); + } + } + } + } + } +} + + +LOCAL(void) +do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Vertical flip */ +{ + JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JBLOCKROW src_row_ptr, dst_row_ptr; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* We output into a separate array because we can't touch different + * rows of the source virtual array simultaneously. Otherwise, this + * is a pretty straightforward analog of horizontal flip. + * Within a DCT block, vertical mirroring is done by changing the signs + * of odd-numbered rows. + * Partial iMCUs at the bottom edge are copied verbatim. + */ + MCU_rows = srcinfo->output_height / + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_height = MCU_rows * compptr->v_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Row is within the mirrorable area. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + comp_height - y_crop_blocks - dst_blk_y - + (JDIMENSION) compptr->v_samp_factor, + (JDIMENSION) compptr->v_samp_factor, FALSE); + } else { + /* Bottom-edge blocks will be copied verbatim. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_y + y_crop_blocks, + (JDIMENSION) compptr->v_samp_factor, FALSE); + } + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Row is within the mirrorable area. */ + dst_row_ptr = dst_buffer[offset_y]; + src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1]; + src_row_ptr += x_crop_blocks; + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x++) { + dst_ptr = dst_row_ptr[dst_blk_x]; + src_ptr = src_row_ptr[dst_blk_x]; + for (i = 0; i < DCTSIZE; i += 2) { + /* copy even row */ + for (j = 0; j < DCTSIZE; j++) + *dst_ptr++ = *src_ptr++; + /* copy odd row with sign change */ + for (j = 0; j < DCTSIZE; j++) + *dst_ptr++ = - *src_ptr++; + } + } + } else { + /* Just copy row verbatim. */ + jcopy_block_row(src_buffer[offset_y] + x_crop_blocks, + dst_buffer[offset_y], + compptr->width_in_blocks); + } + } + } + } +} + + +LOCAL(void) +do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Transpose source into destination */ +{ + JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_x, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* Transposing pixels within a block just requires transposing the + * DCT coefficients. + * Partial iMCUs at the edges require no special treatment; we simply + * process all the available DCT blocks for every component. + */ + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x += compptr->h_samp_factor) { + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_x + x_crop_blocks, + (JDIMENSION) compptr->h_samp_factor, FALSE); + for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { + dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; + src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } + } + } +} + + +LOCAL(void) +do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* 90 degree rotation is equivalent to + * 1. Transposing the image; + * 2. Horizontal mirroring. + * These two steps are merged into a single processing routine. + */ +{ + JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_x, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* Because of the horizontal mirror step, we can't process partial iMCUs + * at the (output) right edge properly. They just get transposed and + * not mirrored. + */ + MCU_cols = srcinfo->output_height / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x += compptr->h_samp_factor) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Block is within the mirrorable area. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + comp_width - x_crop_blocks - dst_blk_x - + (JDIMENSION) compptr->h_samp_factor, + (JDIMENSION) compptr->h_samp_factor, FALSE); + } else { + /* Edge blocks are transposed but not mirrored. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_x + x_crop_blocks, + (JDIMENSION) compptr->h_samp_factor, FALSE); + } + for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { + dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Block is within the mirrorable area. */ + src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + i++; + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + } else { + /* Edge blocks are transposed but not mirrored. */ + src_ptr = src_buffer[offset_x] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } + } + } + } +} + + +LOCAL(void) +do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* 270 degree rotation is equivalent to + * 1. Horizontal mirroring; + * 2. Transposing the image. + * These two steps are merged into a single processing routine. + */ +{ + JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_x, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* Because of the horizontal mirror step, we can't process partial iMCUs + * at the (output) bottom edge properly. They just get transposed and + * not mirrored. + */ + MCU_rows = srcinfo->output_width / + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_height = MCU_rows * compptr->v_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x += compptr->h_samp_factor) { + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_x + x_crop_blocks, + (JDIMENSION) compptr->h_samp_factor, FALSE); + for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { + dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Block is within the mirrorable area. */ + src_ptr = src_buffer[offset_x] + [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) { + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + j++; + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + } + } else { + /* Edge blocks are transposed but not mirrored. */ + src_ptr = src_buffer[offset_x] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } + } + } + } +} + + +LOCAL(void) +do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* 180 degree rotation is equivalent to + * 1. Vertical mirroring; + * 2. Horizontal mirroring. + * These two steps are merged into a single processing routine. + */ +{ + JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JBLOCKROW src_row_ptr, dst_row_ptr; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + MCU_cols = srcinfo->output_width / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + MCU_rows = srcinfo->output_height / + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + comp_height = MCU_rows * compptr->v_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Row is within the vertically mirrorable area. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + comp_height - y_crop_blocks - dst_blk_y - + (JDIMENSION) compptr->v_samp_factor, + (JDIMENSION) compptr->v_samp_factor, FALSE); + } else { + /* Bottom-edge rows are only mirrored horizontally. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_y + y_crop_blocks, + (JDIMENSION) compptr->v_samp_factor, FALSE); + } + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + dst_row_ptr = dst_buffer[offset_y]; + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Row is within the mirrorable area. */ + src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1]; + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + dst_ptr = dst_row_ptr[dst_blk_x]; + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Process the blocks that can be mirrored both ways. */ + src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; + for (i = 0; i < DCTSIZE; i += 2) { + /* For even row, negate every odd column. */ + for (j = 0; j < DCTSIZE; j += 2) { + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = - *src_ptr++; + } + /* For odd row, negate every even column. */ + for (j = 0; j < DCTSIZE; j += 2) { + *dst_ptr++ = - *src_ptr++; + *dst_ptr++ = *src_ptr++; + } + } + } else { + /* Any remaining right-edge blocks are only mirrored vertically. */ + src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x]; + for (i = 0; i < DCTSIZE; i += 2) { + for (j = 0; j < DCTSIZE; j++) + *dst_ptr++ = *src_ptr++; + for (j = 0; j < DCTSIZE; j++) + *dst_ptr++ = - *src_ptr++; + } + } + } + } else { + /* Remaining rows are just mirrored horizontally. */ + src_row_ptr = src_buffer[offset_y]; + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Process the blocks that can be mirrored. */ + dst_ptr = dst_row_ptr[dst_blk_x]; + src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; + for (i = 0; i < DCTSIZE2; i += 2) { + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = - *src_ptr++; + } + } else { + /* Any remaining right-edge blocks are only copied. */ + jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks, + dst_row_ptr + dst_blk_x, + (JDIMENSION) 1); + } + } + } + } + } + } +} + + +LOCAL(void) +do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Transverse transpose is equivalent to + * 1. 180 degree rotation; + * 2. Transposition; + * or + * 1. Horizontal mirroring; + * 2. Transposition; + * 3. Horizontal mirroring. + * These steps are merged into a single processing routine. + */ +{ + JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_x, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + MCU_cols = srcinfo->output_height / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + MCU_rows = srcinfo->output_width / + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + comp_height = MCU_rows * compptr->v_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x += compptr->h_samp_factor) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Block is within the mirrorable area. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + comp_width - x_crop_blocks - dst_blk_x - + (JDIMENSION) compptr->h_samp_factor, + (JDIMENSION) compptr->h_samp_factor, FALSE); + } else { + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_x + x_crop_blocks, + (JDIMENSION) compptr->h_samp_factor, FALSE); + } + for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { + dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; + if (y_crop_blocks + dst_blk_y < comp_height) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Block is within the mirrorable area. */ + src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1] + [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) { + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + j++; + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + i++; + for (j = 0; j < DCTSIZE; j++) { + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + j++; + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } else { + /* Right-edge blocks are mirrored in y only */ + src_ptr = src_buffer[offset_x] + [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) { + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + j++; + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + } + } + } else { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Bottom-edge blocks are mirrored in x only */ + src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + i++; + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + } else { + /* At lower right corner, just transpose, no mirroring */ + src_ptr = src_buffer[offset_x] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } + } + } + } + } +} + + +/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec. + * Returns TRUE if valid integer found, FALSE if not. + * *strptr is advanced over the digit string, and *result is set to its value. + */ + +LOCAL(boolean) +jt_read_integer (const char **strptr, JDIMENSION *result) +{ + const char *ptr = *strptr; + JDIMENSION val = 0; + + for (; isdigit(*ptr); ptr++) { + val = val * 10 + (JDIMENSION) (*ptr - '0'); + } + *result = val; + if (ptr == *strptr) + return FALSE; /* oops, no digits */ + *strptr = ptr; + return TRUE; +} + + +/* Parse a crop specification (written in X11 geometry style). + * The routine returns TRUE if the spec string is valid, FALSE if not. + * + * The crop spec string should have the format + * [f]x[f]{+-}{+-} + * where width, height, xoffset, and yoffset are unsigned integers. + * Each of the elements can be omitted to indicate a default value. + * (A weakness of this style is that it is not possible to omit xoffset + * while specifying yoffset, since they look alike.) + * + * This code is loosely based on XParseGeometry from the X11 distribution. + */ + +GLOBAL(boolean) +jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec) +{ + info->crop = FALSE; + info->crop_width_set = JCROP_UNSET; + info->crop_height_set = JCROP_UNSET; + info->crop_xoffset_set = JCROP_UNSET; + info->crop_yoffset_set = JCROP_UNSET; + + if (isdigit(*spec)) { + /* fetch width */ + if (! jt_read_integer(&spec, &info->crop_width)) + return FALSE; + if (*spec == 'f' || *spec == 'F') { + spec++; + info->crop_width_set = JCROP_FORCE; + } else + info->crop_width_set = JCROP_POS; + } + if (*spec == 'x' || *spec == 'X') { + /* fetch height */ + spec++; + if (! jt_read_integer(&spec, &info->crop_height)) + return FALSE; + if (*spec == 'f' || *spec == 'F') { + spec++; + info->crop_height_set = JCROP_FORCE; + } else + info->crop_height_set = JCROP_POS; + } + if (*spec == '+' || *spec == '-') { + /* fetch xoffset */ + info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS; + spec++; + if (! jt_read_integer(&spec, &info->crop_xoffset)) + return FALSE; + } + if (*spec == '+' || *spec == '-') { + /* fetch yoffset */ + info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS; + spec++; + if (! jt_read_integer(&spec, &info->crop_yoffset)) + return FALSE; + } + /* We had better have gotten to the end of the string. */ + if (*spec != '\0') + return FALSE; + info->crop = TRUE; + return TRUE; +} + + +/* Trim off any partial iMCUs on the indicated destination edge */ + +LOCAL(void) +trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width) +{ + JDIMENSION MCU_cols; + + MCU_cols = info->output_width / info->iMCU_sample_width; + if (MCU_cols > 0 && info->x_crop_offset + MCU_cols == + full_width / info->iMCU_sample_width) + info->output_width = MCU_cols * info->iMCU_sample_width; +} + +LOCAL(void) +trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height) +{ + JDIMENSION MCU_rows; + + MCU_rows = info->output_height / info->iMCU_sample_height; + if (MCU_rows > 0 && info->y_crop_offset + MCU_rows == + full_height / info->iMCU_sample_height) + info->output_height = MCU_rows * info->iMCU_sample_height; +} + + +/* Request any required workspace. + * + * This routine figures out the size that the output image will be + * (which implies that all the transform parameters must be set before + * it is called). + * + * We allocate the workspace virtual arrays from the source decompression + * object, so that all the arrays (both the original data and the workspace) + * will be taken into account while making memory management decisions. + * Hence, this routine must be called after jpeg_read_header (which reads + * the image dimensions) and before jpeg_read_coefficients (which realizes + * the source's virtual arrays). + * + * This function returns FALSE right away if -perfect is given + * and transformation is not perfect. Otherwise returns TRUE. + */ + +GLOBAL(boolean) +jtransform_request_workspace (j_decompress_ptr srcinfo, + jpeg_transform_info *info) +{ + jvirt_barray_ptr *coef_arrays; + boolean need_workspace, transpose_it; + jpeg_component_info *compptr; + JDIMENSION xoffset, yoffset; + JDIMENSION width_in_iMCUs, height_in_iMCUs; + JDIMENSION width_in_blocks, height_in_blocks; + int ci, h_samp_factor, v_samp_factor; + + /* Determine number of components in output image */ + if (info->force_grayscale && + srcinfo->jpeg_color_space == JCS_YCbCr && + srcinfo->num_components == 3) + /* We'll only process the first component */ + info->num_components = 1; + else + /* Process all the components */ + info->num_components = srcinfo->num_components; + + /* Compute output image dimensions and related values. */ +#if JPEG_LIB_VERSION >= 80 + jpeg_core_output_dimensions(srcinfo); +#else + srcinfo->output_width = srcinfo->image_width; + srcinfo->output_height = srcinfo->image_height; +#endif + + /* Return right away if -perfect is given and transformation is not perfect. + */ + if (info->perfect) { + if (info->num_components == 1) { + if (!jtransform_perfect_transform(srcinfo->output_width, + srcinfo->output_height, + srcinfo->_min_DCT_h_scaled_size, + srcinfo->_min_DCT_v_scaled_size, + info->transform)) + return FALSE; + } else { + if (!jtransform_perfect_transform(srcinfo->output_width, + srcinfo->output_height, + srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size, + srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size, + info->transform)) + return FALSE; + } + } + + /* If there is only one output component, force the iMCU size to be 1; + * else use the source iMCU size. (This allows us to do the right thing + * when reducing color to grayscale, and also provides a handy way of + * cleaning up "funny" grayscale images whose sampling factors are not 1x1.) + */ + switch (info->transform) { + case JXFORM_TRANSPOSE: + case JXFORM_TRANSVERSE: + case JXFORM_ROT_90: + case JXFORM_ROT_270: + info->output_width = srcinfo->output_height; + info->output_height = srcinfo->output_width; + if (info->num_components == 1) { + info->iMCU_sample_width = srcinfo->_min_DCT_v_scaled_size; + info->iMCU_sample_height = srcinfo->_min_DCT_h_scaled_size; + } else { + info->iMCU_sample_width = + srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size; + info->iMCU_sample_height = + srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size; + } + break; + default: + info->output_width = srcinfo->output_width; + info->output_height = srcinfo->output_height; + if (info->num_components == 1) { + info->iMCU_sample_width = srcinfo->_min_DCT_h_scaled_size; + info->iMCU_sample_height = srcinfo->_min_DCT_v_scaled_size; + } else { + info->iMCU_sample_width = + srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size; + info->iMCU_sample_height = + srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size; + } + break; + } + + /* If cropping has been requested, compute the crop area's position and + * dimensions, ensuring that its upper left corner falls at an iMCU boundary. + */ + if (info->crop) { + /* Insert default values for unset crop parameters */ + if (info->crop_xoffset_set == JCROP_UNSET) + info->crop_xoffset = 0; /* default to +0 */ + if (info->crop_yoffset_set == JCROP_UNSET) + info->crop_yoffset = 0; /* default to +0 */ + if (info->crop_xoffset >= info->output_width || + info->crop_yoffset >= info->output_height) + ERREXIT(srcinfo, JERR_BAD_CROP_SPEC); + if (info->crop_width_set == JCROP_UNSET) + info->crop_width = info->output_width - info->crop_xoffset; + if (info->crop_height_set == JCROP_UNSET) + info->crop_height = info->output_height - info->crop_yoffset; + /* Ensure parameters are valid */ + if (info->crop_width <= 0 || info->crop_width > info->output_width || + info->crop_height <= 0 || info->crop_height > info->output_height || + info->crop_xoffset > info->output_width - info->crop_width || + info->crop_yoffset > info->output_height - info->crop_height) + ERREXIT(srcinfo, JERR_BAD_CROP_SPEC); + /* Convert negative crop offsets into regular offsets */ + if (info->crop_xoffset_set == JCROP_NEG) + xoffset = info->output_width - info->crop_width - info->crop_xoffset; + else + xoffset = info->crop_xoffset; + if (info->crop_yoffset_set == JCROP_NEG) + yoffset = info->output_height - info->crop_height - info->crop_yoffset; + else + yoffset = info->crop_yoffset; + /* Now adjust so that upper left corner falls at an iMCU boundary */ + if (info->crop_width_set == JCROP_FORCE) + info->output_width = info->crop_width; + else + info->output_width = + info->crop_width + (xoffset % info->iMCU_sample_width); + if (info->crop_height_set == JCROP_FORCE) + info->output_height = info->crop_height; + else + info->output_height = + info->crop_height + (yoffset % info->iMCU_sample_height); + /* Save x/y offsets measured in iMCUs */ + info->x_crop_offset = xoffset / info->iMCU_sample_width; + info->y_crop_offset = yoffset / info->iMCU_sample_height; + } else { + info->x_crop_offset = 0; + info->y_crop_offset = 0; + } + + /* Figure out whether we need workspace arrays, + * and if so whether they are transposed relative to the source. + */ + need_workspace = FALSE; + transpose_it = FALSE; + switch (info->transform) { + case JXFORM_NONE: + if (info->x_crop_offset != 0 || info->y_crop_offset != 0) + need_workspace = TRUE; + /* No workspace needed if neither cropping nor transforming */ + break; + case JXFORM_FLIP_H: + if (info->trim) + trim_right_edge(info, srcinfo->output_width); + if (info->y_crop_offset != 0 || info->slow_hflip) + need_workspace = TRUE; + /* do_flip_h_no_crop doesn't need a workspace array */ + break; + case JXFORM_FLIP_V: + if (info->trim) + trim_bottom_edge(info, srcinfo->output_height); + /* Need workspace arrays having same dimensions as source image. */ + need_workspace = TRUE; + break; + case JXFORM_TRANSPOSE: + /* transpose does NOT have to trim anything */ + /* Need workspace arrays having transposed dimensions. */ + need_workspace = TRUE; + transpose_it = TRUE; + break; + case JXFORM_TRANSVERSE: + if (info->trim) { + trim_right_edge(info, srcinfo->output_height); + trim_bottom_edge(info, srcinfo->output_width); + } + /* Need workspace arrays having transposed dimensions. */ + need_workspace = TRUE; + transpose_it = TRUE; + break; + case JXFORM_ROT_90: + if (info->trim) + trim_right_edge(info, srcinfo->output_height); + /* Need workspace arrays having transposed dimensions. */ + need_workspace = TRUE; + transpose_it = TRUE; + break; + case JXFORM_ROT_180: + if (info->trim) { + trim_right_edge(info, srcinfo->output_width); + trim_bottom_edge(info, srcinfo->output_height); + } + /* Need workspace arrays having same dimensions as source image. */ + need_workspace = TRUE; + break; + case JXFORM_ROT_270: + if (info->trim) + trim_bottom_edge(info, srcinfo->output_width); + /* Need workspace arrays having transposed dimensions. */ + need_workspace = TRUE; + transpose_it = TRUE; + break; + } + + /* Allocate workspace if needed. + * Note that we allocate arrays padded out to the next iMCU boundary, + * so that transform routines need not worry about missing edge blocks. + */ + if (need_workspace) { + coef_arrays = (jvirt_barray_ptr *) + (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE, + sizeof(jvirt_barray_ptr) * info->num_components); + width_in_iMCUs = (JDIMENSION) + jdiv_round_up((long) info->output_width, + (long) info->iMCU_sample_width); + height_in_iMCUs = (JDIMENSION) + jdiv_round_up((long) info->output_height, + (long) info->iMCU_sample_height); + for (ci = 0; ci < info->num_components; ci++) { + compptr = srcinfo->comp_info + ci; + if (info->num_components == 1) { + /* we're going to force samp factors to 1x1 in this case */ + h_samp_factor = v_samp_factor = 1; + } else if (transpose_it) { + h_samp_factor = compptr->v_samp_factor; + v_samp_factor = compptr->h_samp_factor; + } else { + h_samp_factor = compptr->h_samp_factor; + v_samp_factor = compptr->v_samp_factor; + } + width_in_blocks = width_in_iMCUs * h_samp_factor; + height_in_blocks = height_in_iMCUs * v_samp_factor; + coef_arrays[ci] = (*srcinfo->mem->request_virt_barray) + ((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE, + width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor); + } + info->workspace_coef_arrays = coef_arrays; + } else + info->workspace_coef_arrays = NULL; + + return TRUE; +} + + +/* Transpose destination image parameters */ + +LOCAL(void) +transpose_critical_parameters (j_compress_ptr dstinfo) +{ + int tblno, i, j, ci, itemp; + jpeg_component_info *compptr; + JQUANT_TBL *qtblptr; + JDIMENSION jtemp; + UINT16 qtemp; + + /* Transpose image dimensions */ + jtemp = dstinfo->image_width; + dstinfo->image_width = dstinfo->image_height; + dstinfo->image_height = jtemp; +#if JPEG_LIB_VERSION >= 70 + itemp = dstinfo->min_DCT_h_scaled_size; + dstinfo->min_DCT_h_scaled_size = dstinfo->min_DCT_v_scaled_size; + dstinfo->min_DCT_v_scaled_size = itemp; +#endif + + /* Transpose sampling factors */ + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + itemp = compptr->h_samp_factor; + compptr->h_samp_factor = compptr->v_samp_factor; + compptr->v_samp_factor = itemp; + } + + /* Transpose quantization tables */ + for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) { + qtblptr = dstinfo->quant_tbl_ptrs[tblno]; + if (qtblptr != NULL) { + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < i; j++) { + qtemp = qtblptr->quantval[i*DCTSIZE+j]; + qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i]; + qtblptr->quantval[j*DCTSIZE+i] = qtemp; + } + } + } + } +} + + +/* Adjust Exif image parameters. + * + * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible. + */ + +#if JPEG_LIB_VERSION >= 70 +LOCAL(void) +adjust_exif_parameters (JOCTET *data, unsigned int length, + JDIMENSION new_width, JDIMENSION new_height) +{ + boolean is_motorola; /* Flag for byte order */ + unsigned int number_of_tags, tagnum; + unsigned int firstoffset, offset; + JDIMENSION new_value; + + if (length < 12) return; /* Length of an IFD entry */ + + /* Discover byte order */ + if (GETJOCTET(data[0]) == 0x49 && GETJOCTET(data[1]) == 0x49) + is_motorola = FALSE; + else if (GETJOCTET(data[0]) == 0x4D && GETJOCTET(data[1]) == 0x4D) + is_motorola = TRUE; + else + return; + + /* Check Tag Mark */ + if (is_motorola) { + if (GETJOCTET(data[2]) != 0) return; + if (GETJOCTET(data[3]) != 0x2A) return; + } else { + if (GETJOCTET(data[3]) != 0) return; + if (GETJOCTET(data[2]) != 0x2A) return; + } + + /* Get first IFD offset (offset to IFD0) */ + if (is_motorola) { + if (GETJOCTET(data[4]) != 0) return; + if (GETJOCTET(data[5]) != 0) return; + firstoffset = GETJOCTET(data[6]); + firstoffset <<= 8; + firstoffset += GETJOCTET(data[7]); + } else { + if (GETJOCTET(data[7]) != 0) return; + if (GETJOCTET(data[6]) != 0) return; + firstoffset = GETJOCTET(data[5]); + firstoffset <<= 8; + firstoffset += GETJOCTET(data[4]); + } + if (firstoffset > length - 2) return; /* check end of data segment */ + + /* Get the number of directory entries contained in this IFD */ + if (is_motorola) { + number_of_tags = GETJOCTET(data[firstoffset]); + number_of_tags <<= 8; + number_of_tags += GETJOCTET(data[firstoffset+1]); + } else { + number_of_tags = GETJOCTET(data[firstoffset+1]); + number_of_tags <<= 8; + number_of_tags += GETJOCTET(data[firstoffset]); + } + if (number_of_tags == 0) return; + firstoffset += 2; + + /* Search for ExifSubIFD offset Tag in IFD0 */ + for (;;) { + if (firstoffset > length - 12) return; /* check end of data segment */ + /* Get Tag number */ + if (is_motorola) { + tagnum = GETJOCTET(data[firstoffset]); + tagnum <<= 8; + tagnum += GETJOCTET(data[firstoffset+1]); + } else { + tagnum = GETJOCTET(data[firstoffset+1]); + tagnum <<= 8; + tagnum += GETJOCTET(data[firstoffset]); + } + if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */ + if (--number_of_tags == 0) return; + firstoffset += 12; + } + + /* Get the ExifSubIFD offset */ + if (is_motorola) { + if (GETJOCTET(data[firstoffset+8]) != 0) return; + if (GETJOCTET(data[firstoffset+9]) != 0) return; + offset = GETJOCTET(data[firstoffset+10]); + offset <<= 8; + offset += GETJOCTET(data[firstoffset+11]); + } else { + if (GETJOCTET(data[firstoffset+11]) != 0) return; + if (GETJOCTET(data[firstoffset+10]) != 0) return; + offset = GETJOCTET(data[firstoffset+9]); + offset <<= 8; + offset += GETJOCTET(data[firstoffset+8]); + } + if (offset > length - 2) return; /* check end of data segment */ + + /* Get the number of directory entries contained in this SubIFD */ + if (is_motorola) { + number_of_tags = GETJOCTET(data[offset]); + number_of_tags <<= 8; + number_of_tags += GETJOCTET(data[offset+1]); + } else { + number_of_tags = GETJOCTET(data[offset+1]); + number_of_tags <<= 8; + number_of_tags += GETJOCTET(data[offset]); + } + if (number_of_tags < 2) return; + offset += 2; + + /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */ + do { + if (offset > length - 12) return; /* check end of data segment */ + /* Get Tag number */ + if (is_motorola) { + tagnum = GETJOCTET(data[offset]); + tagnum <<= 8; + tagnum += GETJOCTET(data[offset+1]); + } else { + tagnum = GETJOCTET(data[offset+1]); + tagnum <<= 8; + tagnum += GETJOCTET(data[offset]); + } + if (tagnum == 0xA002 || tagnum == 0xA003) { + if (tagnum == 0xA002) + new_value = new_width; /* ExifImageWidth Tag */ + else + new_value = new_height; /* ExifImageHeight Tag */ + if (is_motorola) { + data[offset+2] = 0; /* Format = unsigned long (4 octets) */ + data[offset+3] = 4; + data[offset+4] = 0; /* Number Of Components = 1 */ + data[offset+5] = 0; + data[offset+6] = 0; + data[offset+7] = 1; + data[offset+8] = 0; + data[offset+9] = 0; + data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF); + data[offset+11] = (JOCTET)(new_value & 0xFF); + } else { + data[offset+2] = 4; /* Format = unsigned long (4 octets) */ + data[offset+3] = 0; + data[offset+4] = 1; /* Number Of Components = 1 */ + data[offset+5] = 0; + data[offset+6] = 0; + data[offset+7] = 0; + data[offset+8] = (JOCTET)(new_value & 0xFF); + data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF); + data[offset+10] = 0; + data[offset+11] = 0; + } + } + offset += 12; + } while (--number_of_tags); +} +#endif + + +/* Adjust output image parameters as needed. + * + * This must be called after jpeg_copy_critical_parameters() + * and before jpeg_write_coefficients(). + * + * The return value is the set of virtual coefficient arrays to be written + * (either the ones allocated by jtransform_request_workspace, or the + * original source data arrays). The caller will need to pass this value + * to jpeg_write_coefficients(). + */ + +GLOBAL(jvirt_barray_ptr *) +jtransform_adjust_parameters (j_decompress_ptr srcinfo, + j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info) +{ + /* If force-to-grayscale is requested, adjust destination parameters */ + if (info->force_grayscale) { + /* First, ensure we have YCbCr or grayscale data, and that the source's + * Y channel is full resolution. (No reasonable person would make Y + * be less than full resolution, so actually coping with that case + * isn't worth extra code space. But we check it to avoid crashing.) + */ + if (((dstinfo->jpeg_color_space == JCS_YCbCr && + dstinfo->num_components == 3) || + (dstinfo->jpeg_color_space == JCS_GRAYSCALE && + dstinfo->num_components == 1)) && + srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor && + srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) { + /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed + * properly. Among other things, it sets the target h_samp_factor & + * v_samp_factor to 1, which typically won't match the source. + * We have to preserve the source's quantization table number, however. + */ + int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no; + jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE); + dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no; + } else { + /* Sorry, can't do it */ + ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL); + } + } else if (info->num_components == 1) { + /* For a single-component source, we force the destination sampling factors + * to 1x1, with or without force_grayscale. This is useful because some + * decoders choke on grayscale images with other sampling factors. + */ + dstinfo->comp_info[0].h_samp_factor = 1; + dstinfo->comp_info[0].v_samp_factor = 1; + } + + /* Correct the destination's image dimensions as necessary + * for rotate/flip, resize, and crop operations. + */ +#if JPEG_LIB_VERSION >= 70 + dstinfo->jpeg_width = info->output_width; + dstinfo->jpeg_height = info->output_height; +#endif + + /* Transpose destination image parameters */ + switch (info->transform) { + case JXFORM_TRANSPOSE: + case JXFORM_TRANSVERSE: + case JXFORM_ROT_90: + case JXFORM_ROT_270: +#if JPEG_LIB_VERSION < 70 + dstinfo->image_width = info->output_height; + dstinfo->image_height = info->output_width; +#endif + transpose_critical_parameters(dstinfo); + break; + default: +#if JPEG_LIB_VERSION < 70 + dstinfo->image_width = info->output_width; + dstinfo->image_height = info->output_height; +#endif + break; + } + + /* Adjust Exif properties */ + if (srcinfo->marker_list != NULL && + srcinfo->marker_list->marker == JPEG_APP0+1 && + srcinfo->marker_list->data_length >= 6 && + GETJOCTET(srcinfo->marker_list->data[0]) == 0x45 && + GETJOCTET(srcinfo->marker_list->data[1]) == 0x78 && + GETJOCTET(srcinfo->marker_list->data[2]) == 0x69 && + GETJOCTET(srcinfo->marker_list->data[3]) == 0x66 && + GETJOCTET(srcinfo->marker_list->data[4]) == 0 && + GETJOCTET(srcinfo->marker_list->data[5]) == 0) { + /* Suppress output of JFIF marker */ + dstinfo->write_JFIF_header = FALSE; +#if JPEG_LIB_VERSION >= 70 + /* Adjust Exif image parameters */ + if (dstinfo->jpeg_width != srcinfo->image_width || + dstinfo->jpeg_height != srcinfo->image_height) + /* Align data segment to start of TIFF structure for parsing */ + adjust_exif_parameters(srcinfo->marker_list->data + 6, + srcinfo->marker_list->data_length - 6, + dstinfo->jpeg_width, dstinfo->jpeg_height); +#endif + } + + /* Return the appropriate output data set */ + if (info->workspace_coef_arrays != NULL) + return info->workspace_coef_arrays; + return src_coef_arrays; +} + + +/* Execute the actual transformation, if any. + * + * This must be called *after* jpeg_write_coefficients, because it depends + * on jpeg_write_coefficients to have computed subsidiary values such as + * the per-component width and height fields in the destination object. + * + * Note that some transformations will modify the source data arrays! + */ + +GLOBAL(void) +jtransform_execute_transform (j_decompress_ptr srcinfo, + j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info) +{ + jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays; + + /* Note: conditions tested here should match those in switch statement + * in jtransform_request_workspace() + */ + switch (info->transform) { + case JXFORM_NONE: + if (info->x_crop_offset != 0 || info->y_crop_offset != 0) + do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_FLIP_H: + if (info->y_crop_offset != 0 || info->slow_hflip) + do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + else + do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset, + src_coef_arrays); + break; + case JXFORM_FLIP_V: + do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_TRANSPOSE: + do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_TRANSVERSE: + do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_ROT_90: + do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_ROT_180: + do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_ROT_270: + do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + } +} + +/* jtransform_perfect_transform + * + * Determine whether lossless transformation is perfectly + * possible for a specified image and transformation. + * + * Inputs: + * image_width, image_height: source image dimensions. + * MCU_width, MCU_height: pixel dimensions of MCU. + * transform: transformation identifier. + * Parameter sources from initialized jpeg_struct + * (after reading source header): + * image_width = cinfo.image_width + * image_height = cinfo.image_height + * MCU_width = cinfo.max_h_samp_factor * cinfo.block_size + * MCU_height = cinfo.max_v_samp_factor * cinfo.block_size + * Result: + * TRUE = perfect transformation possible + * FALSE = perfect transformation not possible + * (may use custom action then) + */ + +GLOBAL(boolean) +jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height, + int MCU_width, int MCU_height, + JXFORM_CODE transform) +{ + boolean result = TRUE; /* initialize TRUE */ + + switch (transform) { + case JXFORM_FLIP_H: + case JXFORM_ROT_270: + if (image_width % (JDIMENSION) MCU_width) + result = FALSE; + break; + case JXFORM_FLIP_V: + case JXFORM_ROT_90: + if (image_height % (JDIMENSION) MCU_height) + result = FALSE; + break; + case JXFORM_TRANSVERSE: + case JXFORM_ROT_180: + if (image_width % (JDIMENSION) MCU_width) + result = FALSE; + if (image_height % (JDIMENSION) MCU_height) + result = FALSE; + break; + default: + break; + } + + return result; +} + +#endif /* TRANSFORMS_SUPPORTED */ + + +/* Setup decompression object to save desired markers in memory. + * This must be called before jpeg_read_header() to have the desired effect. + */ + +GLOBAL(void) +jcopy_markers_setup (j_decompress_ptr srcinfo, JCOPY_OPTION option) +{ +#ifdef SAVE_MARKERS_SUPPORTED + int m; + + /* Save comments except under NONE option */ + if (option != JCOPYOPT_NONE) { + jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF); + } + /* Save all types of APPn markers iff ALL option */ + if (option == JCOPYOPT_ALL) { + for (m = 0; m < 16; m++) + jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF); + } +#endif /* SAVE_MARKERS_SUPPORTED */ +} + +/* Copy markers saved in the given source object to the destination object. + * This should be called just after jpeg_start_compress() or + * jpeg_write_coefficients(). + * Note that those routines will have written the SOI, and also the + * JFIF APP0 or Adobe APP14 markers if selected. + */ + +GLOBAL(void) +jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JCOPY_OPTION option) +{ + jpeg_saved_marker_ptr marker; + + /* In the current implementation, we don't actually need to examine the + * option flag here; we just copy everything that got saved. + * But to avoid confusion, we do not output JFIF and Adobe APP14 markers + * if the encoder library already wrote one. + */ + for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) { + if (dstinfo->write_JFIF_header && + marker->marker == JPEG_APP0 && + marker->data_length >= 5 && + GETJOCTET(marker->data[0]) == 0x4A && + GETJOCTET(marker->data[1]) == 0x46 && + GETJOCTET(marker->data[2]) == 0x49 && + GETJOCTET(marker->data[3]) == 0x46 && + GETJOCTET(marker->data[4]) == 0) + continue; /* reject duplicate JFIF */ + if (dstinfo->write_Adobe_marker && + marker->marker == JPEG_APP0+14 && + marker->data_length >= 5 && + GETJOCTET(marker->data[0]) == 0x41 && + GETJOCTET(marker->data[1]) == 0x64 && + GETJOCTET(marker->data[2]) == 0x6F && + GETJOCTET(marker->data[3]) == 0x62 && + GETJOCTET(marker->data[4]) == 0x65) + continue; /* reject duplicate Adobe */ + jpeg_write_marker(dstinfo, marker->marker, + marker->data, marker->data_length); + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/transupp.h glmark2-2021.02/android/jni/src/libjpeg-turbo/transupp.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/transupp.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/transupp.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,207 @@ +/* + * transupp.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains declarations for image transformation routines and + * other utility code used by the jpegtran sample application. These are + * NOT part of the core JPEG library. But we keep these routines separate + * from jpegtran.c to ease the task of maintaining jpegtran-like programs + * that have other user interfaces. + * + * NOTE: all the routines declared here have very specific requirements + * about when they are to be executed during the reading and writing of the + * source and destination files. See the comments in transupp.c, or see + * jpegtran.c for an example of correct usage. + */ + +/* If you happen not to want the image transform support, disable it here */ +#ifndef TRANSFORMS_SUPPORTED +#define TRANSFORMS_SUPPORTED 1 /* 0 disables transform code */ +#endif + +/* + * Although rotating and flipping data expressed as DCT coefficients is not + * hard, there is an asymmetry in the JPEG format specification for images + * whose dimensions aren't multiples of the iMCU size. The right and bottom + * image edges are padded out to the next iMCU boundary with junk data; but + * no padding is possible at the top and left edges. If we were to flip + * the whole image including the pad data, then pad garbage would become + * visible at the top and/or left, and real pixels would disappear into the + * pad margins --- perhaps permanently, since encoders & decoders may not + * bother to preserve DCT blocks that appear to be completely outside the + * nominal image area. So, we have to exclude any partial iMCUs from the + * basic transformation. + * + * Transpose is the only transformation that can handle partial iMCUs at the + * right and bottom edges completely cleanly. flip_h can flip partial iMCUs + * at the bottom, but leaves any partial iMCUs at the right edge untouched. + * Similarly flip_v leaves any partial iMCUs at the bottom edge untouched. + * The other transforms are defined as combinations of these basic transforms + * and process edge blocks in a way that preserves the equivalence. + * + * The "trim" option causes untransformable partial iMCUs to be dropped; + * this is not strictly lossless, but it usually gives the best-looking + * result for odd-size images. Note that when this option is active, + * the expected mathematical equivalences between the transforms may not hold. + * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim + * followed by -rot 180 -trim trims both edges.) + * + * We also offer a lossless-crop option, which discards data outside a given + * image region but losslessly preserves what is inside. Like the rotate and + * flip transforms, lossless crop is restricted by the JPEG format: the upper + * left corner of the selected region must fall on an iMCU boundary. If this + * does not hold for the given crop parameters, we silently move the upper left + * corner up and/or left to make it so, simultaneously increasing the region + * dimensions to keep the lower right crop corner unchanged. (Thus, the + * output image covers at least the requested region, but may cover more.) + * The adjustment of the region dimensions may be optionally disabled. + * + * We also provide a lossless-resize option, which is kind of a lossless-crop + * operation in the DCT coefficient block domain - it discards higher-order + * coefficients and losslessly preserves lower-order coefficients of a + * sub-block. + * + * Rotate/flip transform, resize, and crop can be requested together in a + * single invocation. The crop is applied last --- that is, the crop region + * is specified in terms of the destination image after transform/resize. + * + * We also offer a "force to grayscale" option, which simply discards the + * chrominance channels of a YCbCr image. This is lossless in the sense that + * the luminance channel is preserved exactly. It's not the same kind of + * thing as the rotate/flip transformations, but it's convenient to handle it + * as part of this package, mainly because the transformation routines have to + * be aware of the option to know how many components to work on. + */ + + +/* + * Codes for supported types of image transformations. + */ + +typedef enum { + JXFORM_NONE, /* no transformation */ + JXFORM_FLIP_H, /* horizontal flip */ + JXFORM_FLIP_V, /* vertical flip */ + JXFORM_TRANSPOSE, /* transpose across UL-to-LR axis */ + JXFORM_TRANSVERSE, /* transpose across UR-to-LL axis */ + JXFORM_ROT_90, /* 90-degree clockwise rotation */ + JXFORM_ROT_180, /* 180-degree rotation */ + JXFORM_ROT_270 /* 270-degree clockwise (or 90 ccw) */ +} JXFORM_CODE; + +/* + * Codes for crop parameters, which can individually be unspecified, + * positive or negative for xoffset or yoffset, + * positive or forced for width or height. + */ + +typedef enum { + JCROP_UNSET, + JCROP_POS, + JCROP_NEG, + JCROP_FORCE +} JCROP_CODE; + +/* + * Transform parameters struct. + * NB: application must not change any elements of this struct after + * calling jtransform_request_workspace. + */ + +typedef struct { + /* Options: set by caller */ + JXFORM_CODE transform; /* image transform operator */ + boolean perfect; /* if TRUE, fail if partial MCUs are requested */ + boolean trim; /* if TRUE, trim partial MCUs as needed */ + boolean force_grayscale; /* if TRUE, convert color image to grayscale */ + boolean crop; /* if TRUE, crop source image */ + boolean slow_hflip; /* For best performance, the JXFORM_FLIP_H transform + normally modifies the source coefficients in place. + Setting this to TRUE will instead use a slower, + double-buffered algorithm, which leaves the source + coefficients in tact (necessary if other transformed + images must be generated from the same set of + coefficients. */ + + /* Crop parameters: application need not set these unless crop is TRUE. + * These can be filled in by jtransform_parse_crop_spec(). + */ + JDIMENSION crop_width; /* Width of selected region */ + JCROP_CODE crop_width_set; /* (forced disables adjustment) */ + JDIMENSION crop_height; /* Height of selected region */ + JCROP_CODE crop_height_set; /* (forced disables adjustment) */ + JDIMENSION crop_xoffset; /* X offset of selected region */ + JCROP_CODE crop_xoffset_set; /* (negative measures from right edge) */ + JDIMENSION crop_yoffset; /* Y offset of selected region */ + JCROP_CODE crop_yoffset_set; /* (negative measures from bottom edge) */ + + /* Internal workspace: caller should not touch these */ + int num_components; /* # of components in workspace */ + jvirt_barray_ptr *workspace_coef_arrays; /* workspace for transformations */ + JDIMENSION output_width; /* cropped destination dimensions */ + JDIMENSION output_height; + JDIMENSION x_crop_offset; /* destination crop offsets measured in iMCUs */ + JDIMENSION y_crop_offset; + int iMCU_sample_width; /* destination iMCU size */ + int iMCU_sample_height; +} jpeg_transform_info; + + +#if TRANSFORMS_SUPPORTED + +/* Parse a crop specification (written in X11 geometry style) */ +EXTERN(boolean) jtransform_parse_crop_spec + (jpeg_transform_info *info, const char *spec); +/* Request any required workspace */ +EXTERN(boolean) jtransform_request_workspace + (j_decompress_ptr srcinfo, jpeg_transform_info *info); +/* Adjust output image parameters */ +EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters + (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info); +/* Execute the actual transformation, if any */ +EXTERN(void) jtransform_execute_transform + (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info); +/* Determine whether lossless transformation is perfectly + * possible for a specified image and transformation. + */ +EXTERN(boolean) jtransform_perfect_transform + (JDIMENSION image_width, JDIMENSION image_height, int MCU_width, + int MCU_height, JXFORM_CODE transform); + +/* jtransform_execute_transform used to be called + * jtransform_execute_transformation, but some compilers complain about + * routine names that long. This macro is here to avoid breaking any + * old source code that uses the original name... + */ +#define jtransform_execute_transformation jtransform_execute_transform + +#endif /* TRANSFORMS_SUPPORTED */ + + +/* + * Support for copying optional markers from source to destination file. + */ + +typedef enum { + JCOPYOPT_NONE, /* copy no optional markers */ + JCOPYOPT_COMMENTS, /* copy only comment (COM) markers */ + JCOPYOPT_ALL /* copy all optional markers */ +} JCOPY_OPTION; + +#define JCOPYOPT_DEFAULT JCOPYOPT_COMMENTS /* recommended default */ + +/* Setup decompression object to save desired markers in memory */ +EXTERN(void) jcopy_markers_setup + (j_decompress_ptr srcinfo, JCOPY_OPTION option); +/* Copy markers saved in the given source object to the destination object */ +EXTERN(void) jcopy_markers_execute + (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JCOPY_OPTION option); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg.c glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,2121 @@ +/* + * Copyright (C)2009-2016 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* TurboJPEG/LJT: this implements the TurboJPEG API using libjpeg or + libjpeg-turbo */ + +#include +#include +#include +#include +#define JPEG_INTERNALS +#include +#include +#include +#include "./turbojpeg.h" +#include "./tjutil.h" +#include "transupp.h" +#include "./jpegcomp.h" + +extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **, + unsigned long *, boolean); +extern void jpeg_mem_src_tj(j_decompress_ptr, const unsigned char *, + unsigned long); + +#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) +#define isPow2(x) (((x)&(x-1))==0) + + +/* Error handling (based on example in example.c) */ + +static char errStr[JMSG_LENGTH_MAX]="No error"; + +struct my_error_mgr +{ + struct jpeg_error_mgr pub; + jmp_buf setjmp_buffer; + void (*emit_message)(j_common_ptr, int); + boolean warning; +}; +typedef struct my_error_mgr *my_error_ptr; + +static void my_error_exit(j_common_ptr cinfo) +{ + my_error_ptr myerr=(my_error_ptr)cinfo->err; + (*cinfo->err->output_message)(cinfo); + longjmp(myerr->setjmp_buffer, 1); +} + +/* Based on output_message() in jerror.c */ + +static void my_output_message(j_common_ptr cinfo) +{ + (*cinfo->err->format_message)(cinfo, errStr); +} + +static void my_emit_message(j_common_ptr cinfo, int msg_level) +{ + my_error_ptr myerr=(my_error_ptr)cinfo->err; + myerr->emit_message(cinfo, msg_level); + if(msg_level<0) myerr->warning=TRUE; +} + + +/* Global structures, macros, etc. */ + +enum {COMPRESS=1, DECOMPRESS=2}; + +typedef struct _tjinstance +{ + struct jpeg_compress_struct cinfo; + struct jpeg_decompress_struct dinfo; + struct my_error_mgr jerr; + int init, headerRead; +} tjinstance; + +static const int pixelsize[TJ_NUMSAMP]={3, 3, 3, 1, 3, 3}; + +static const JXFORM_CODE xformtypes[TJ_NUMXOP]= +{ + JXFORM_NONE, JXFORM_FLIP_H, JXFORM_FLIP_V, JXFORM_TRANSPOSE, + JXFORM_TRANSVERSE, JXFORM_ROT_90, JXFORM_ROT_180, JXFORM_ROT_270 +}; + +#define NUMSF 16 +static const tjscalingfactor sf[NUMSF]={ + {2, 1}, + {15, 8}, + {7, 4}, + {13, 8}, + {3, 2}, + {11, 8}, + {5, 4}, + {9, 8}, + {1, 1}, + {7, 8}, + {3, 4}, + {5, 8}, + {1, 2}, + {3, 8}, + {1, 4}, + {1, 8} +}; + +#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \ + retval=-1; goto bailout;} +#define getinstance(handle) tjinstance *this=(tjinstance *)handle; \ + j_compress_ptr cinfo=NULL; j_decompress_ptr dinfo=NULL; \ + if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1;} \ + cinfo=&this->cinfo; dinfo=&this->dinfo; \ + this->jerr.warning=FALSE; +#define getcinstance(handle) tjinstance *this=(tjinstance *)handle; \ + j_compress_ptr cinfo=NULL; \ + if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1;} \ + cinfo=&this->cinfo; \ + this->jerr.warning=FALSE; +#define getdinstance(handle) tjinstance *this=(tjinstance *)handle; \ + j_decompress_ptr dinfo=NULL; \ + if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1;} \ + dinfo=&this->dinfo; \ + this->jerr.warning=FALSE; + +static int getPixelFormat(int pixelSize, int flags) +{ + if(pixelSize==1) return TJPF_GRAY; + if(pixelSize==3) + { + if(flags&TJ_BGR) return TJPF_BGR; + else return TJPF_RGB; + } + if(pixelSize==4) + { + if(flags&TJ_ALPHAFIRST) + { + if(flags&TJ_BGR) return TJPF_XBGR; + else return TJPF_XRGB; + } + else + { + if(flags&TJ_BGR) return TJPF_BGRX; + else return TJPF_RGBX; + } + } + return -1; +} + +static int setCompDefaults(struct jpeg_compress_struct *cinfo, + int pixelFormat, int subsamp, int jpegQual, int flags) +{ + int retval=0; + char *env=NULL; + + switch(pixelFormat) + { + case TJPF_GRAY: + cinfo->in_color_space=JCS_GRAYSCALE; break; + #if JCS_EXTENSIONS==1 + case TJPF_RGB: + cinfo->in_color_space=JCS_EXT_RGB; break; + case TJPF_BGR: + cinfo->in_color_space=JCS_EXT_BGR; break; + case TJPF_RGBX: + case TJPF_RGBA: + cinfo->in_color_space=JCS_EXT_RGBX; break; + case TJPF_BGRX: + case TJPF_BGRA: + cinfo->in_color_space=JCS_EXT_BGRX; break; + case TJPF_XRGB: + case TJPF_ARGB: + cinfo->in_color_space=JCS_EXT_XRGB; break; + case TJPF_XBGR: + case TJPF_ABGR: + cinfo->in_color_space=JCS_EXT_XBGR; break; + #else + case TJPF_RGB: + case TJPF_BGR: + case TJPF_RGBX: + case TJPF_BGRX: + case TJPF_XRGB: + case TJPF_XBGR: + case TJPF_RGBA: + case TJPF_BGRA: + case TJPF_ARGB: + case TJPF_ABGR: + cinfo->in_color_space=JCS_RGB; pixelFormat=TJPF_RGB; + break; + #endif + case TJPF_CMYK: + cinfo->in_color_space=JCS_CMYK; break; + } + + cinfo->input_components=tjPixelSize[pixelFormat]; + jpeg_set_defaults(cinfo); + +#ifndef NO_GETENV + if((env=getenv("TJ_OPTIMIZE"))!=NULL && strlen(env)>0 && !strcmp(env, "1")) + cinfo->optimize_coding=TRUE; + if((env=getenv("TJ_ARITHMETIC"))!=NULL && strlen(env)>0 && !strcmp(env, "1")) + cinfo->arith_code=TRUE; + if((env=getenv("TJ_RESTART"))!=NULL && strlen(env)>0) + { + int temp=-1; char tempc=0; + if(sscanf(env, "%d%c", &temp, &tempc)>=1 && temp>=0 && temp<=65535) + { + if(toupper(tempc)=='B') + { + cinfo->restart_interval=temp; + cinfo->restart_in_rows=0; + } + else + cinfo->restart_in_rows=temp; + } + } +#endif + + if(jpegQual>=0) + { + jpeg_set_quality(cinfo, jpegQual, TRUE); + if(jpegQual>=96 || flags&TJFLAG_ACCURATEDCT) cinfo->dct_method=JDCT_ISLOW; + else cinfo->dct_method=JDCT_FASTEST; + } + if(subsamp==TJSAMP_GRAY) + jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); + else if(pixelFormat==TJPF_CMYK) + jpeg_set_colorspace(cinfo, JCS_YCCK); + else jpeg_set_colorspace(cinfo, JCS_YCbCr); + +#ifndef NO_GETENV + if((env=getenv("TJ_PROGRESSIVE"))!=NULL && strlen(env)>0 + && !strcmp(env, "1")) + jpeg_simple_progression(cinfo); +#endif + + cinfo->comp_info[0].h_samp_factor=tjMCUWidth[subsamp]/8; + cinfo->comp_info[1].h_samp_factor=1; + cinfo->comp_info[2].h_samp_factor=1; + if(cinfo->num_components>3) + cinfo->comp_info[3].h_samp_factor=tjMCUWidth[subsamp]/8; + cinfo->comp_info[0].v_samp_factor=tjMCUHeight[subsamp]/8; + cinfo->comp_info[1].v_samp_factor=1; + cinfo->comp_info[2].v_samp_factor=1; + if(cinfo->num_components>3) + cinfo->comp_info[3].v_samp_factor=tjMCUHeight[subsamp]/8; + + return retval; +} + +static int setDecompDefaults(struct jpeg_decompress_struct *dinfo, + int pixelFormat, int flags) +{ + int retval=0; + + switch(pixelFormat) + { + case TJPF_GRAY: + dinfo->out_color_space=JCS_GRAYSCALE; break; + #if JCS_EXTENSIONS==1 + case TJPF_RGB: + dinfo->out_color_space=JCS_EXT_RGB; break; + case TJPF_BGR: + dinfo->out_color_space=JCS_EXT_BGR; break; + case TJPF_RGBX: + dinfo->out_color_space=JCS_EXT_RGBX; break; + case TJPF_BGRX: + dinfo->out_color_space=JCS_EXT_BGRX; break; + case TJPF_XRGB: + dinfo->out_color_space=JCS_EXT_XRGB; break; + case TJPF_XBGR: + dinfo->out_color_space=JCS_EXT_XBGR; break; + #if JCS_ALPHA_EXTENSIONS==1 + case TJPF_RGBA: + dinfo->out_color_space=JCS_EXT_RGBA; break; + case TJPF_BGRA: + dinfo->out_color_space=JCS_EXT_BGRA; break; + case TJPF_ARGB: + dinfo->out_color_space=JCS_EXT_ARGB; break; + case TJPF_ABGR: + dinfo->out_color_space=JCS_EXT_ABGR; break; + #endif + #else + case TJPF_RGB: + case TJPF_BGR: + case TJPF_RGBX: + case TJPF_BGRX: + case TJPF_XRGB: + case TJPF_XBGR: + case TJPF_RGBA: + case TJPF_BGRA: + case TJPF_ARGB: + case TJPF_ABGR: + dinfo->out_color_space=JCS_RGB; break; + #endif + case TJPF_CMYK: + dinfo->out_color_space=JCS_CMYK; break; + default: + _throw("Unsupported pixel format"); + } + + if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST; + + bailout: + return retval; +} + + +static int getSubsamp(j_decompress_ptr dinfo) +{ + int retval=-1, i, k; + + /* The sampling factors actually have no meaning with grayscale JPEG files, + and in fact it's possible to generate grayscale JPEGs with sampling + factors > 1 (even though those sampling factors are ignored by the + decompressor.) Thus, we need to treat grayscale as a special case. */ + if(dinfo->num_components==1 && dinfo->jpeg_color_space==JCS_GRAYSCALE) + return TJSAMP_GRAY; + + for(i=0; inum_components==pixelsize[i] + || ((dinfo->jpeg_color_space==JCS_YCCK + || dinfo->jpeg_color_space==JCS_CMYK) + && pixelsize[i]==3 && dinfo->num_components==4)) + { + if(dinfo->comp_info[0].h_samp_factor==tjMCUWidth[i]/8 + && dinfo->comp_info[0].v_samp_factor==tjMCUHeight[i]/8) + { + int match=0; + for(k=1; knum_components; k++) + { + int href=1, vref=1; + if(dinfo->jpeg_color_space==JCS_YCCK && k==3) + { + href=tjMCUWidth[i]/8; vref=tjMCUHeight[i]/8; + } + if(dinfo->comp_info[k].h_samp_factor==href + && dinfo->comp_info[k].v_samp_factor==vref) + match++; + } + if(match==dinfo->num_components-1) + { + retval=i; break; + } + } + } + } + return retval; +} + + +#ifndef JCS_EXTENSIONS + +/* Conversion functions to emulate the colorspace extensions. This allows the + TurboJPEG wrapper to be used with libjpeg */ + +#define TORGB(PS, ROFFSET, GOFFSET, BOFFSET) { \ + int rowPad=pitch-width*PS; \ + while(height--) \ + { \ + unsigned char *endOfRow=src+width*PS; \ + while(srcjerr.setjmp_buffer)) return -1; + if(this->init&COMPRESS) jpeg_destroy_compress(cinfo); + if(this->init&DECOMPRESS) jpeg_destroy_decompress(dinfo); + free(this); + return 0; +} + + +/* These are exposed mainly because Windows can't malloc() and free() across + DLL boundaries except when the CRT DLL is used, and we don't use the CRT DLL + with turbojpeg.dll for compatibility reasons. However, these functions + can potentially be used for other purposes by different implementations. */ + +DLLEXPORT void DLLCALL tjFree(unsigned char *buf) +{ + if(buf) free(buf); +} + + +DLLEXPORT unsigned char *DLLCALL tjAlloc(int bytes) +{ + return (unsigned char *)malloc(bytes); +} + + +/* Compressor */ + +static tjhandle _tjInitCompress(tjinstance *this) +{ + static unsigned char buffer[1]; + unsigned char *buf=buffer; unsigned long size=1; + + /* This is also straight out of example.c */ + this->cinfo.err=jpeg_std_error(&this->jerr.pub); + this->jerr.pub.error_exit=my_error_exit; + this->jerr.pub.output_message=my_output_message; + this->jerr.emit_message=this->jerr.pub.emit_message; + this->jerr.pub.emit_message=my_emit_message; + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + if(this) free(this); return NULL; + } + + jpeg_create_compress(&this->cinfo); + /* Make an initial call so it will create the destination manager */ + jpeg_mem_dest_tj(&this->cinfo, &buf, &size, 0); + + this->init|=COMPRESS; + return (tjhandle)this; +} + +DLLEXPORT tjhandle DLLCALL tjInitCompress(void) +{ + tjinstance *this=NULL; + if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) + { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitCompress(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + return _tjInitCompress(this); +} + + +DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height, + int jpegSubsamp) +{ + unsigned long retval=0; int mcuw, mcuh, chromasf; + if(width<1 || height<1 || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT) + _throw("tjBufSize(): Invalid argument"); + + /* This allows for rare corner cases in which a JPEG image can actually be + larger than the uncompressed input (we wouldn't mention it if it hadn't + happened before.) */ + mcuw=tjMCUWidth[jpegSubsamp]; + mcuh=tjMCUHeight[jpegSubsamp]; + chromasf=jpegSubsamp==TJSAMP_GRAY? 0: 4*64/(mcuw*mcuh); + retval=PAD(width, mcuw) * PAD(height, mcuh) * (2 + chromasf) + 2048; + + bailout: + return retval; +} + +DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height) +{ + unsigned long retval=0; + if(width<1 || height<1) + _throw("TJBUFSIZE(): Invalid argument"); + + /* This allows for rare corner cases in which a JPEG image can actually be + larger than the uncompressed input (we wouldn't mention it if it hadn't + happened before.) */ + retval=PAD(width, 16) * PAD(height, 16) * 6 + 2048; + + bailout: + return retval; +} + + +DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height, + int subsamp) +{ + int retval=0, nc, i; + + if(subsamp<0 || subsamp>=NUMSUBOPT) + _throw("tjBufSizeYUV2(): Invalid argument"); + + nc=(subsamp==TJSAMP_GRAY? 1:3); + for(i=0; i=TJ_NUMSAMP) + _throw("tjPlaneWidth(): Invalid argument"); + nc=(subsamp==TJSAMP_GRAY? 1:3); + if(componentID<0 || componentID>=nc) + _throw("tjPlaneWidth(): Invalid argument"); + + pw=PAD(width, tjMCUWidth[subsamp]/8); + if(componentID==0) + retval=pw; + else + retval=pw*8/tjMCUWidth[subsamp]; + + bailout: + return retval; +} + + +DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp) +{ + int ph, nc, retval=0; + + if(height<1 || subsamp<0 || subsamp>=TJ_NUMSAMP) + _throw("tjPlaneHeight(): Invalid argument"); + nc=(subsamp==TJSAMP_GRAY? 1:3); + if(componentID<0 || componentID>=nc) + _throw("tjPlaneHeight(): Invalid argument"); + + ph=PAD(height, tjMCUHeight[subsamp]/8); + if(componentID==0) + retval=ph; + else + retval=ph*8/tjMCUHeight[subsamp]; + + bailout: + return retval; +} + + +DLLEXPORT unsigned long DLLCALL tjPlaneSizeYUV(int componentID, int width, + int stride, int height, int subsamp) +{ + unsigned long retval=0; + int pw, ph; + + if(width<1 || height<1 || subsamp<0 || subsamp>=NUMSUBOPT) + _throw("tjPlaneSizeYUV(): Invalid argument"); + + pw=tjPlaneWidth(componentID, width, subsamp); + ph=tjPlaneHeight(componentID, height, subsamp); + if(pw<0 || ph<0) return -1; + + if(stride==0) stride=pw; + else stride=abs(stride); + + retval=stride*(ph-1)+pw; + + bailout: + return retval; +} + + +DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags) +{ + int i, retval=0, alloc=1; JSAMPROW *row_pointer=NULL; + #ifndef JCS_EXTENSIONS + unsigned char *rgbBuf=NULL; + #endif + + getcinstance(handle) + if((this->init&COMPRESS)==0) + _throw("tjCompress2(): Instance has not been initialized for compression"); + + if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 + || pixelFormat>=TJ_NUMPF || jpegBuf==NULL || jpegSize==NULL + || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT || jpegQual<0 || jpegQual>100) + _throw("tjCompress2(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; + + #ifndef JCS_EXTENSIONS + if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK) + { + rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE); + if(!rgbBuf) _throw("tjCompress2(): Memory allocation failure"); + srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf); + pitch=width*RGB_PIXELSIZE; + } + #endif + + cinfo->image_width=width; + cinfo->image_height=height; + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(flags&TJFLAG_NOREALLOC) + { + alloc=0; *jpegSize=tjBufSize(width, height, jpegSubsamp); + } + jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc); + if(setCompDefaults(cinfo, pixelFormat, jpegSubsamp, jpegQual, flags)==-1) + return -1; + + jpeg_start_compress(cinfo, TRUE); + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*height))==NULL) + _throw("tjCompress2(): Memory allocation failure"); + for(i=0; inext_scanlineimage_height) + { + jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline], + cinfo->image_height-cinfo->next_scanline); + } + jpeg_finish_compress(cinfo); + + bailout: + if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); + #ifndef JCS_EXTENSIONS + if(rgbBuf) free(rgbBuf); + #endif + if(row_pointer) free(row_pointer); + if(this->jerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf, + int width, int pitch, int height, int pixelSize, unsigned char *jpegBuf, + unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags) +{ + int retval=0; unsigned long size; + if(flags&TJ_YUV) + { + size=tjBufSizeYUV(width, height, jpegSubsamp); + retval=tjEncodeYUV2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), jpegBuf, jpegSubsamp, flags); + } + else + { + retval=tjCompress2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), &jpegBuf, &size, jpegSubsamp, jpegQual, + flags|TJFLAG_NOREALLOC); + } + *jpegSize=size; + return retval; +} + + +DLLEXPORT int DLLCALL tjEncodeYUVPlanes(tjhandle handle, + const unsigned char *srcBuf, int width, int pitch, int height, + int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, + int flags) +{ + int i, retval=0; JSAMPROW *row_pointer=NULL; + JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS]; + JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS]; + JSAMPROW *outbuf[MAX_COMPONENTS]; + int row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS]; + JSAMPLE *ptr; + jpeg_component_info *compptr; + #ifndef JCS_EXTENSIONS + unsigned char *rgbBuf=NULL; + #endif + + getcinstance(handle); + + for(i=0; iinit&COMPRESS)==0) + _throw("tjEncodeYUVPlanes(): Instance has not been initialized for compression"); + + if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 + || pixelFormat>=TJ_NUMPF || !dstPlanes || !dstPlanes[0] || subsamp<0 + || subsamp>=NUMSUBOPT) + _throw("tjEncodeYUVPlanes(): Invalid argument"); + if(subsamp!=TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) + _throw("tjEncodeYUVPlanes(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + if(pixelFormat==TJPF_CMYK) + _throw("tjEncodeYUVPlanes(): Cannot generate YUV images from CMYK pixels"); + + if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; + + #ifndef JCS_EXTENSIONS + if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK) + { + rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE); + if(!rgbBuf) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf); + pitch=width*RGB_PIXELSIZE; + } + #endif + + cinfo->image_width=width; + cinfo->image_height=height; + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags)==-1) return -1; + + /* Execute only the parts of jpeg_start_compress() that we need. If we + were to call the whole jpeg_start_compress() function, then it would try + to write the file headers, which could overflow the output buffer if the + YUV image were very small. */ + if(cinfo->global_state!=CSTATE_START) + _throw("tjEncodeYUVPlanes(): libjpeg API is in the wrong state"); + (*cinfo->err->reset_error_mgr)((j_common_ptr)cinfo); + jinit_c_master_control(cinfo, FALSE); + jinit_color_converter(cinfo); + jinit_downsampler(cinfo); + (*cinfo->cconvert->start_pass)(cinfo); + + pw0=PAD(width, cinfo->max_h_samp_factor); + ph0=PAD(height, cinfo->max_v_samp_factor); + + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph0))==NULL) + _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for(i=0; inum_components; i++) + { + compptr=&cinfo->comp_info[i]; + _tmpbuf[i]=(JSAMPLE *)malloc( + PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE) + /compptr->h_samp_factor, 16) * cinfo->max_v_samp_factor + 16); + if(!_tmpbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*cinfo->max_v_samp_factor); + if(!tmpbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for(row=0; rowmax_v_samp_factor; row++) + { + unsigned char *_tmpbuf_aligned= + (unsigned char *)PAD((size_t)_tmpbuf[i], 16); + tmpbuf[i][row]=&_tmpbuf_aligned[ + PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE) + /compptr->h_samp_factor, 16) * row]; + } + _tmpbuf2[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16) + * compptr->v_samp_factor + 16); + if(!_tmpbuf2[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + tmpbuf2[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor); + if(!tmpbuf2[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for(row=0; rowv_samp_factor; row++) + { + unsigned char *_tmpbuf2_aligned= + (unsigned char *)PAD((size_t)_tmpbuf2[i], 16); + tmpbuf2[i][row]=&_tmpbuf2_aligned[ + PAD(compptr->width_in_blocks*DCTSIZE, 16) * row]; + } + pw[i]=pw0*compptr->h_samp_factor/cinfo->max_h_samp_factor; + ph[i]=ph0*compptr->v_samp_factor/cinfo->max_v_samp_factor; + outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]); + if(!outbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + ptr=dstPlanes[i]; + for(row=0; rowmax_v_samp_factor) + { + (*cinfo->cconvert->color_convert)(cinfo, &row_pointer[row], tmpbuf, 0, + cinfo->max_v_samp_factor); + (cinfo->downsample->downsample)(cinfo, tmpbuf, 0, tmpbuf2, 0); + for(i=0, compptr=cinfo->comp_info; inum_components; i++, compptr++) + jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i], + row*compptr->v_samp_factor/cinfo->max_v_samp_factor, + compptr->v_samp_factor, pw[i]); + } + cinfo->next_scanline+=height; + jpeg_abort_compress(cinfo); + + bailout: + if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); + #ifndef JCS_EXTENSIONS + if(rgbBuf) free(rgbBuf); + #endif + if(row_pointer) free(row_pointer); + for(i=0; ijerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, + const unsigned char *srcBuf, int width, int pitch, int height, + int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags) +{ + unsigned char *dstPlanes[3]; + int pw0, ph0, strides[3], retval=-1; + + if(width<=0 || height<=0 || dstBuf==NULL || pad<0 || !isPow2(pad) + || subsamp<0 || subsamp>=NUMSUBOPT) + _throw("tjEncodeYUV3(): Invalid argument"); + + pw0=tjPlaneWidth(0, width, subsamp); + ph0=tjPlaneHeight(0, height, subsamp); + dstPlanes[0]=dstBuf; + strides[0]=PAD(pw0, pad); + if(subsamp==TJSAMP_GRAY) + { + strides[1]=strides[2]=0; + dstPlanes[1]=dstPlanes[2]=NULL; + } + else + { + int pw1=tjPlaneWidth(1, width, subsamp); + int ph1=tjPlaneHeight(1, height, subsamp); + strides[1]=strides[2]=PAD(pw1, pad); + dstPlanes[1]=dstPlanes[0]+strides[0]*ph0; + dstPlanes[2]=dstPlanes[1]+strides[1]*ph1; + } + + return tjEncodeYUVPlanes(handle, srcBuf, width, pitch, height, pixelFormat, + dstPlanes, strides, subsamp, flags); + + bailout: + return retval; +} + +DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, + int subsamp, int flags) +{ + return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat, + dstBuf, 4, subsamp, flags); +} + +DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, + int width, int pitch, int height, int pixelSize, unsigned char *dstBuf, + int subsamp, int flags) +{ + return tjEncodeYUV2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), dstBuf, subsamp, flags); +} + + +DLLEXPORT int DLLCALL tjCompressFromYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, int width, const int *strides, int height, + int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, + int flags) +{ + int i, row, retval=0, alloc=1; JSAMPROW *inbuf[MAX_COMPONENTS]; + int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS], + tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS]; + JSAMPLE *_tmpbuf=NULL, *ptr; JSAMPROW *tmpbuf[MAX_COMPONENTS]; + + getcinstance(handle) + + for(i=0; iinit&COMPRESS)==0) + _throw("tjCompressFromYUVPlanes(): Instance has not been initialized for compression"); + + if(!srcPlanes || !srcPlanes[0] || width<=0 || height<=0 || subsamp<0 + || subsamp>=NUMSUBOPT || jpegBuf==NULL || jpegSize==NULL || jpegQual<0 + || jpegQual>100) + _throw("tjCompressFromYUVPlanes(): Invalid argument"); + if(subsamp!=TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) + _throw("tjCompressFromYUVPlanes(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + cinfo->image_width=width; + cinfo->image_height=height; + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(flags&TJFLAG_NOREALLOC) + { + alloc=0; *jpegSize=tjBufSize(width, height, subsamp); + } + jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc); + if(setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags)==-1) + return -1; + cinfo->raw_data_in=TRUE; + + jpeg_start_compress(cinfo, TRUE); + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&cinfo->comp_info[i]; + int ih; + iw[i]=compptr->width_in_blocks*DCTSIZE; + ih=compptr->height_in_blocks*DCTSIZE; + pw[i]=PAD(cinfo->image_width, cinfo->max_h_samp_factor) + *compptr->h_samp_factor/cinfo->max_h_samp_factor; + ph[i]=PAD(cinfo->image_height, cinfo->max_v_samp_factor) + *compptr->v_samp_factor/cinfo->max_v_samp_factor; + if(iw[i]!=pw[i] || ih!=ph[i]) usetmpbuf=1; + th[i]=compptr->v_samp_factor*DCTSIZE; + tmpbufsize+=iw[i]*th[i]; + if((inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]))==NULL) + _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); + ptr=(JSAMPLE *)srcPlanes[i]; + for(row=0; rownum_components; i++) + { + if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL) + _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); + for(row=0; rowimage_height; + row+=cinfo->max_v_samp_factor*DCTSIZE) + { + JSAMPARRAY yuvptr[MAX_COMPONENTS]; + int crow[MAX_COMPONENTS]; + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&cinfo->comp_info[i]; + crow[i]=row*compptr->v_samp_factor/cinfo->max_v_samp_factor; + if(usetmpbuf) + { + int j, k; + for(j=0; jmax_v_samp_factor*DCTSIZE); + } + jpeg_finish_compress(cinfo); + + bailout: + if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); + for(i=0; ijerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, + const unsigned char *srcBuf, int width, int pad, int height, int subsamp, + unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags) +{ + const unsigned char *srcPlanes[3]; + int pw0, ph0, strides[3], retval=-1; + + if(srcBuf==NULL || width<=0 || pad<1 || height<=0 || subsamp<0 + || subsamp>=NUMSUBOPT) + _throw("tjCompressFromYUV(): Invalid argument"); + + pw0=tjPlaneWidth(0, width, subsamp); + ph0=tjPlaneHeight(0, height, subsamp); + srcPlanes[0]=srcBuf; + strides[0]=PAD(pw0, pad); + if(subsamp==TJSAMP_GRAY) + { + strides[1]=strides[2]=0; + srcPlanes[1]=srcPlanes[2]=NULL; + } + else + { + int pw1=tjPlaneWidth(1, width, subsamp); + int ph1=tjPlaneHeight(1, height, subsamp); + strides[1]=strides[2]=PAD(pw1, pad); + srcPlanes[1]=srcPlanes[0]+strides[0]*ph0; + srcPlanes[2]=srcPlanes[1]+strides[1]*ph1; + } + + return tjCompressFromYUVPlanes(handle, srcPlanes, width, strides, height, + subsamp, jpegBuf, jpegSize, jpegQual, flags); + + bailout: + return retval; +} + + +/* Decompressor */ + +static tjhandle _tjInitDecompress(tjinstance *this) +{ + static unsigned char buffer[1]; + + /* This is also straight out of example.c */ + this->dinfo.err=jpeg_std_error(&this->jerr.pub); + this->jerr.pub.error_exit=my_error_exit; + this->jerr.pub.output_message=my_output_message; + this->jerr.emit_message=this->jerr.pub.emit_message; + this->jerr.pub.emit_message=my_emit_message; + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + if(this) free(this); return NULL; + } + + jpeg_create_decompress(&this->dinfo); + /* Make an initial call so it will create the source manager */ + jpeg_mem_src_tj(&this->dinfo, buffer, 1); + + this->init|=DECOMPRESS; + return (tjhandle)this; +} + +DLLEXPORT tjhandle DLLCALL tjInitDecompress(void) +{ + tjinstance *this; + if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) + { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitDecompress(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + return _tjInitDecompress(this); +} + + +DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp, int *jpegColorspace) +{ + int retval=0; + + getdinstance(handle); + if((this->init&DECOMPRESS)==0) + _throw("tjDecompressHeader3(): Instance has not been initialized for decompression"); + + if(jpegBuf==NULL || jpegSize<=0 || width==NULL || height==NULL + || jpegSubsamp==NULL || jpegColorspace==NULL) + _throw("tjDecompressHeader3(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + return -1; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + + *width=dinfo->image_width; + *height=dinfo->image_height; + *jpegSubsamp=getSubsamp(dinfo); + switch(dinfo->jpeg_color_space) + { + case JCS_GRAYSCALE: *jpegColorspace=TJCS_GRAY; break; + case JCS_RGB: *jpegColorspace=TJCS_RGB; break; + case JCS_YCbCr: *jpegColorspace=TJCS_YCbCr; break; + case JCS_CMYK: *jpegColorspace=TJCS_CMYK; break; + case JCS_YCCK: *jpegColorspace=TJCS_YCCK; break; + default: *jpegColorspace=-1; break; + } + + jpeg_abort_decompress(dinfo); + + if(*jpegSubsamp<0) + _throw("tjDecompressHeader3(): Could not determine subsampling type for JPEG image"); + if(*jpegColorspace<0) + _throw("tjDecompressHeader3(): Could not determine colorspace of JPEG image"); + if(*width<1 || *height<1) + _throw("tjDecompressHeader3(): Invalid data returned in header"); + + bailout: + if(this->jerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, + int *jpegSubsamp) +{ + int jpegColorspace; + return tjDecompressHeader3(handle, jpegBuf, jpegSize, width, height, + jpegSubsamp, &jpegColorspace); +} + +DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height) +{ + int jpegSubsamp; + return tjDecompressHeader2(handle, jpegBuf, jpegSize, width, height, + &jpegSubsamp); +} + + +DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors) +{ + if(numscalingfactors==NULL) + { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjGetScalingFactors(): Invalid argument"); + return NULL; + } + + *numscalingfactors=NUMSF; + return (tjscalingfactor *)sf; +} + + +DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, int flags) +{ + int i, retval=0; JSAMPROW *row_pointer=NULL; + int jpegwidth, jpegheight, scaledw, scaledh; + #ifndef JCS_EXTENSIONS + unsigned char *rgbBuf=NULL; + unsigned char *_dstBuf=NULL; int _pitch=0; + #endif + + getdinstance(handle); + if((this->init&DECOMPRESS)==0) + _throw("tjDecompress2(): Instance has not been initialized for decompression"); + + if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pitch<0 + || height<0 || pixelFormat<0 || pixelFormat>=TJ_NUMPF) + _throw("tjDecompress2(): Invalid argument"); + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + if(setDecompDefaults(dinfo, pixelFormat, flags)==-1) + { + retval=-1; goto bailout; + } + + if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE; + + jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; + if(width==0) width=jpegwidth; + if(height==0) height=jpegheight; + for(i=0; i=NUMSF) + _throw("tjDecompress2(): Could not scale down to desired image dimensions"); + width=scaledw; height=scaledh; + dinfo->scale_num=sf[i].num; + dinfo->scale_denom=sf[i].denom; + + jpeg_start_decompress(dinfo); + if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat]; + + #ifndef JCS_EXTENSIONS + if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK && + (RGB_RED!=tjRedOffset[pixelFormat] || + RGB_GREEN!=tjGreenOffset[pixelFormat] || + RGB_BLUE!=tjBlueOffset[pixelFormat] || + RGB_PIXELSIZE!=tjPixelSize[pixelFormat])) + { + rgbBuf=(unsigned char *)malloc(width*height*3); + if(!rgbBuf) _throw("tjDecompress2(): Memory allocation failure"); + _pitch=pitch; pitch=width*3; + _dstBuf=dstBuf; dstBuf=rgbBuf; + } + #endif + + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW) + *dinfo->output_height))==NULL) + _throw("tjDecompress2(): Memory allocation failure"); + for(i=0; i<(int)dinfo->output_height; i++) + { + if(flags&TJFLAG_BOTTOMUP) + row_pointer[i]=&dstBuf[(dinfo->output_height-i-1)*pitch]; + else row_pointer[i]=&dstBuf[i*pitch]; + } + while(dinfo->output_scanlineoutput_height) + { + jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline], + dinfo->output_height-dinfo->output_scanline); + } + jpeg_finish_decompress(dinfo); + + #ifndef JCS_EXTENSIONS + fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat); + #endif + + bailout: + if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); + #ifndef JCS_EXTENSIONS + if(rgbBuf) free(rgbBuf); + #endif + if(row_pointer) free(row_pointer); + if(this->jerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, + int height, int pixelSize, int flags) +{ + if(flags&TJ_YUV) + return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags); + else + return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch, + height, getPixelFormat(pixelSize, flags), flags); +} + + +static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo, + int pixelFormat, int subsamp, int flags) +{ + int i; + + dinfo->scale_num=dinfo->scale_denom=1; + + if(subsamp==TJSAMP_GRAY) + { + dinfo->num_components=dinfo->comps_in_scan=1; + dinfo->jpeg_color_space=JCS_GRAYSCALE; + } + else + { + dinfo->num_components=dinfo->comps_in_scan=3; + dinfo->jpeg_color_space=JCS_YCbCr; + } + + dinfo->comp_info=(jpeg_component_info *) + (*dinfo->mem->alloc_small)((j_common_ptr)dinfo, JPOOL_IMAGE, + dinfo->num_components*sizeof(jpeg_component_info)); + + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&dinfo->comp_info[i]; + compptr->h_samp_factor=(i==0)? tjMCUWidth[subsamp]/8:1; + compptr->v_samp_factor=(i==0)? tjMCUHeight[subsamp]/8:1; + compptr->component_index=i; + compptr->component_id=i+1; + compptr->quant_tbl_no=compptr->dc_tbl_no=compptr->ac_tbl_no= + (i==0)? 0:1; + dinfo->cur_comp_info[i]=compptr; + } + dinfo->data_precision=8; + for(i=0; i<2; i++) + { + if(dinfo->quant_tbl_ptrs[i]==NULL) + dinfo->quant_tbl_ptrs[i]=jpeg_alloc_quant_table((j_common_ptr)dinfo); + } + + return 0; +} + + +int my_read_markers(j_decompress_ptr dinfo) +{ + return JPEG_REACHED_SOS; +} + +void my_reset_marker_reader(j_decompress_ptr dinfo) +{ +} + +DLLEXPORT int DLLCALL tjDecodeYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, const int *strides, int subsamp, + unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, + int flags) +{ + int i, retval=0; JSAMPROW *row_pointer=NULL; + JSAMPLE *_tmpbuf[MAX_COMPONENTS]; + JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS]; + int row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS]; + JSAMPLE *ptr; + jpeg_component_info *compptr; + #ifndef JCS_EXTENSIONS + unsigned char *rgbBuf=NULL; + unsigned char *_dstBuf=NULL; int _pitch=0; + #endif + int (*old_read_markers)(j_decompress_ptr); + void (*old_reset_marker_reader)(j_decompress_ptr); + + getdinstance(handle); + + for(i=0; iinit&DECOMPRESS)==0) + _throw("tjDecodeYUVPlanes(): Instance has not been initialized for decompression"); + + if(!srcPlanes || !srcPlanes[0] || subsamp<0 || subsamp>=NUMSUBOPT + || dstBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 + || pixelFormat>=TJ_NUMPF) + _throw("tjDecodeYUVPlanes(): Invalid argument"); + if(subsamp!=TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) + _throw("tjDecodeYUVPlanes(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + if(pixelFormat==TJPF_CMYK) + _throw("tjDecodeYUVPlanes(): Cannot decode YUV images into CMYK pixels."); + + if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; + dinfo->image_width=width; + dinfo->image_height=height; + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setDecodeDefaults(dinfo, pixelFormat, subsamp, flags)==-1) + { + retval=-1; goto bailout; + } + old_read_markers=dinfo->marker->read_markers; + dinfo->marker->read_markers=my_read_markers; + old_reset_marker_reader=dinfo->marker->reset_marker_reader; + dinfo->marker->reset_marker_reader=my_reset_marker_reader; + jpeg_read_header(dinfo, TRUE); + dinfo->marker->read_markers=old_read_markers; + dinfo->marker->reset_marker_reader=old_reset_marker_reader; + + if(setDecompDefaults(dinfo, pixelFormat, flags)==-1) + { + retval=-1; goto bailout; + } + dinfo->do_fancy_upsampling=FALSE; + dinfo->Se=DCTSIZE2-1; + jinit_master_decompress(dinfo); + (*dinfo->upsample->start_pass)(dinfo); + + pw0=PAD(width, dinfo->max_h_samp_factor); + ph0=PAD(height, dinfo->max_v_samp_factor); + + if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat]; + + #ifndef JCS_EXTENSIONS + if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK && + (RGB_RED!=tjRedOffset[pixelFormat] || + RGB_GREEN!=tjGreenOffset[pixelFormat] || + RGB_BLUE!=tjBlueOffset[pixelFormat] || + RGB_PIXELSIZE!=tjPixelSize[pixelFormat])) + { + rgbBuf=(unsigned char *)malloc(width*height*3); + if(!rgbBuf) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + _pitch=pitch; pitch=width*3; + _dstBuf=dstBuf; dstBuf=rgbBuf; + } + #endif + + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph0))==NULL) + _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + for(i=0; inum_components; i++) + { + compptr=&dinfo->comp_info[i]; + _tmpbuf[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16) + * compptr->v_samp_factor + 16); + if(!_tmpbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor); + if(!tmpbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + for(row=0; rowv_samp_factor; row++) + { + unsigned char *_tmpbuf_aligned= + (unsigned char *)PAD((size_t)_tmpbuf[i], 16); + tmpbuf[i][row]=&_tmpbuf_aligned[ + PAD(compptr->width_in_blocks*DCTSIZE, 16) * row]; + } + pw[i]=pw0*compptr->h_samp_factor/dinfo->max_h_samp_factor; + ph[i]=ph0*compptr->v_samp_factor/dinfo->max_v_samp_factor; + inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]); + if(!inbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + ptr=(JSAMPLE *)srcPlanes[i]; + for(row=0; rowmax_v_samp_factor) + { + JDIMENSION inrow=0, outrow=0; + for(i=0, compptr=dinfo->comp_info; inum_components; i++, compptr++) + jcopy_sample_rows(inbuf[i], + row*compptr->v_samp_factor/dinfo->max_v_samp_factor, tmpbuf[i], 0, + compptr->v_samp_factor, pw[i]); + (dinfo->upsample->upsample)(dinfo, tmpbuf, &inrow, + dinfo->max_v_samp_factor, &row_pointer[row], &outrow, + dinfo->max_v_samp_factor); + } + jpeg_abort_decompress(dinfo); + + #ifndef JCS_EXTENSIONS + fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat); + #endif + + bailout: + if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); + #ifndef JCS_EXTENSIONS + if(rgbBuf) free(rgbBuf); + #endif + if(row_pointer) free(row_pointer); + for(i=0; ijerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, + int height, int pixelFormat, int flags) +{ + const unsigned char *srcPlanes[3]; + int pw0, ph0, strides[3], retval=-1; + + if(srcBuf==NULL || pad<0 || !isPow2(pad) || subsamp<0 || subsamp>=NUMSUBOPT + || width<=0 || height<=0) + _throw("tjDecodeYUV(): Invalid argument"); + + pw0=tjPlaneWidth(0, width, subsamp); + ph0=tjPlaneHeight(0, height, subsamp); + srcPlanes[0]=srcBuf; + strides[0]=PAD(pw0, pad); + if(subsamp==TJSAMP_GRAY) + { + strides[1]=strides[2]=0; + srcPlanes[1]=srcPlanes[2]=NULL; + } + else + { + int pw1=tjPlaneWidth(1, width, subsamp); + int ph1=tjPlaneHeight(1, height, subsamp); + strides[1]=strides[2]=PAD(pw1, pad); + srcPlanes[1]=srcPlanes[0]+strides[0]*ph0; + srcPlanes[2]=srcPlanes[1]+strides[1]*ph1; + } + + return tjDecodeYUVPlanes(handle, srcPlanes, strides, subsamp, dstBuf, width, + pitch, height, pixelFormat, flags); + + bailout: + return retval; +} + +DLLEXPORT int DLLCALL tjDecompressToYUVPlanes(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, + unsigned char **dstPlanes, int width, int *strides, int height, int flags) +{ + int i, sfi, row, retval=0; JSAMPROW *outbuf[MAX_COMPONENTS]; + int jpegwidth, jpegheight, jpegSubsamp, scaledw, scaledh; + int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS], + tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS]; + JSAMPLE *_tmpbuf=NULL, *ptr; JSAMPROW *tmpbuf[MAX_COMPONENTS]; + int dctsize; + + getdinstance(handle); + + for(i=0; iinit&DECOMPRESS)==0) + _throw("tjDecompressToYUVPlanes(): Instance has not been initialized for decompression"); + + if(jpegBuf==NULL || jpegSize<=0 || !dstPlanes || !dstPlanes[0] || width<0 + || height<0) + _throw("tjDecompressToYUVPlanes(): Invalid argument"); + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + if(!this->headerRead) + { + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + } + this->headerRead=0; + jpegSubsamp=getSubsamp(dinfo); + if(jpegSubsamp<0) + _throw("tjDecompressToYUVPlanes(): Could not determine subsampling type for JPEG image"); + + if(jpegSubsamp!=TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) + _throw("tjDecompressToYUVPlanes(): Invalid argument"); + + jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; + if(width==0) width=jpegwidth; + if(height==0) height=jpegheight; + for(i=0; i=NUMSF) + _throw("tjDecompressToYUVPlanes(): Could not scale down to desired image dimensions"); + if(dinfo->num_components>3) + _throw("tjDecompressToYUVPlanes(): JPEG image must have 3 or fewer components"); + + width=scaledw; height=scaledh; + dinfo->scale_num=sf[i].num; + dinfo->scale_denom=sf[i].denom; + sfi=i; + jpeg_calc_output_dimensions(dinfo); + + dctsize=DCTSIZE*sf[sfi].num/sf[sfi].denom; + + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&dinfo->comp_info[i]; + int ih; + iw[i]=compptr->width_in_blocks*dctsize; + ih=compptr->height_in_blocks*dctsize; + pw[i]=PAD(dinfo->output_width, dinfo->max_h_samp_factor) + *compptr->h_samp_factor/dinfo->max_h_samp_factor; + ph[i]=PAD(dinfo->output_height, dinfo->max_v_samp_factor) + *compptr->v_samp_factor/dinfo->max_v_samp_factor; + if(iw[i]!=pw[i] || ih!=ph[i]) usetmpbuf=1; + th[i]=compptr->v_samp_factor*dctsize; + tmpbufsize+=iw[i]*th[i]; + if((outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]))==NULL) + _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); + ptr=dstPlanes[i]; + for(row=0; rownum_components; i++) + { + if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL) + _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); + for(row=0; rowdo_fancy_upsampling=FALSE; + if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST; + dinfo->raw_data_out=TRUE; + + jpeg_start_decompress(dinfo); + for(row=0; row<(int)dinfo->output_height; + row+=dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size) + { + JSAMPARRAY yuvptr[MAX_COMPONENTS]; + int crow[MAX_COMPONENTS]; + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&dinfo->comp_info[i]; + if(jpegSubsamp==TJ_420) + { + /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try + to be clever and use the IDCT to perform upsampling on the U and V + planes. For instance, if the output image is to be scaled by 1/2 + relative to the JPEG image, then the scaling factor and upsampling + effectively cancel each other, so a normal 8x8 IDCT can be used. + However, this is not desirable when using the decompress-to-YUV + functionality in TurboJPEG, since we want to output the U and V + planes in their subsampled form. Thus, we have to override some + internal libjpeg parameters to force it to use the "scaled" IDCT + functions on the U and V planes. */ + compptr->_DCT_scaled_size=dctsize; + compptr->MCU_sample_width=tjMCUWidth[jpegSubsamp]* + sf[sfi].num/sf[sfi].denom* + compptr->v_samp_factor/dinfo->max_v_samp_factor; + dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0]; + } + crow[i]=row*compptr->v_samp_factor/dinfo->max_v_samp_factor; + if(usetmpbuf) yuvptr[i]=tmpbuf[i]; + else yuvptr[i]=&outbuf[i][crow[i]]; + } + jpeg_read_raw_data(dinfo, yuvptr, + dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size); + if(usetmpbuf) + { + int j; + for(i=0; inum_components; i++) + { + for(j=0; jglobal_state>DSTATE_START) jpeg_abort_decompress(dinfo); + for(i=0; ijerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pad, int height, int flags) +{ + unsigned char *dstPlanes[3]; + int pw0, ph0, strides[3], retval=-1, jpegSubsamp=-1; + int i, jpegwidth, jpegheight, scaledw, scaledh; + + getdinstance(handle); + + if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pad<1 + || !isPow2(pad) || height<0) + _throw("tjDecompressToYUV2(): Invalid argument"); + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + jpegSubsamp=getSubsamp(dinfo); + if(jpegSubsamp<0) + _throw("tjDecompressToYUV2(): Could not determine subsampling type for JPEG image"); + + jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; + if(width==0) width=jpegwidth; + if(height==0) height=jpegheight; + + for(i=0; i=NUMSF) + _throw("tjDecompressToYUV2(): Could not scale down to desired image dimensions"); + + pw0=tjPlaneWidth(0, width, jpegSubsamp); + ph0=tjPlaneHeight(0, height, jpegSubsamp); + dstPlanes[0]=dstBuf; + strides[0]=PAD(pw0, pad); + if(jpegSubsamp==TJSAMP_GRAY) + { + strides[1]=strides[2]=0; + dstPlanes[1]=dstPlanes[2]=NULL; + } + else + { + int pw1=tjPlaneWidth(1, width, jpegSubsamp); + int ph1=tjPlaneHeight(1, height, jpegSubsamp); + strides[1]=strides[2]=PAD(pw1, pad); + dstPlanes[1]=dstPlanes[0]+strides[0]*ph0; + dstPlanes[2]=dstPlanes[1]+strides[1]*ph1; + } + + this->headerRead=1; + return tjDecompressToYUVPlanes(handle, jpegBuf, jpegSize, dstPlanes, width, + strides, height, flags); + + bailout: + return retval; + +} + +DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int flags) +{ + return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags); +} + + +/* Transformer */ + +DLLEXPORT tjhandle DLLCALL tjInitTransform(void) +{ + tjinstance *this=NULL; tjhandle handle=NULL; + if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) + { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitTransform(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + handle=_tjInitCompress(this); + if(!handle) return NULL; + handle=_tjInitDecompress(this); + return handle; +} + + +DLLEXPORT int DLLCALL tjTransform(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, int n, + unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *t, int flags) +{ + jpeg_transform_info *xinfo=NULL; + jvirt_barray_ptr *srccoefs, *dstcoefs; + int retval=0, i, jpegSubsamp; + + getinstance(handle); + if((this->init&COMPRESS)==0 || (this->init&DECOMPRESS)==0) + _throw("tjTransform(): Instance has not been initialized for transformation"); + + if(jpegBuf==NULL || jpegSize<=0 || n<1 || dstBufs==NULL || dstSizes==NULL + || t==NULL || flags<0) + _throw("tjTransform(): Invalid argument"); + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + + if((xinfo=(jpeg_transform_info *)malloc(sizeof(jpeg_transform_info)*n)) + ==NULL) + _throw("tjTransform(): Memory allocation failure"); + MEMZERO(xinfo, sizeof(jpeg_transform_info)*n); + + for(i=0; iimage_width; h=dinfo->image_height; + } + else + { + w=xinfo[i].crop_width; h=xinfo[i].crop_height; + } + if(flags&TJFLAG_NOREALLOC) + { + alloc=0; dstSizes[i]=tjBufSize(w, h, jpegSubsamp); + } + if(!(t[i].options&TJXOPT_NOOUTPUT)) + jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc); + jpeg_copy_critical_parameters(dinfo, cinfo); + dstcoefs=jtransform_adjust_parameters(dinfo, cinfo, srccoefs, + &xinfo[i]); + if(!(t[i].options&TJXOPT_NOOUTPUT)) + { + jpeg_write_coefficients(cinfo, dstcoefs); + jcopy_markers_execute(dinfo, cinfo, JCOPYOPT_ALL); + } + else jinit_c_master_control(cinfo, TRUE); + jtransform_execute_transformation(dinfo, cinfo, srccoefs, + &xinfo[i]); + if(t[i].customFilter) + { + int ci, y; JDIMENSION by; + for(ci=0; cinum_components; ci++) + { + jpeg_component_info *compptr=&cinfo->comp_info[ci]; + tjregion arrayRegion={0, 0, compptr->width_in_blocks*DCTSIZE, + DCTSIZE}; + tjregion planeRegion={0, 0, compptr->width_in_blocks*DCTSIZE, + compptr->height_in_blocks*DCTSIZE}; + for(by=0; byheight_in_blocks; by+=compptr->v_samp_factor) + { + JBLOCKARRAY barray=(dinfo->mem->access_virt_barray) + ((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor, + TRUE); + for(y=0; yv_samp_factor; y++) + { + if(t[i].customFilter(barray[y][0], arrayRegion, planeRegion, + ci, i, &t[i])==-1) + _throw("tjTransform(): Error in custom filter"); + arrayRegion.y+=DCTSIZE; + } + } + } + } + if(!(t[i].options&TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo); + } + + jpeg_finish_decompress(dinfo); + + bailout: + if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); + if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); + if(xinfo) free(xinfo); + if(this->jerr.warning) retval=-1; + return retval; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg.h glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1542 @@ +/* + * Copyright (C)2009-2015 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TURBOJPEG_H__ +#define __TURBOJPEG_H__ + +#if defined(_WIN32) && defined(DLLDEFINE) +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif +#define DLLCALL + + +/** + * @addtogroup TurboJPEG + * TurboJPEG API. This API provides an interface for generating, decoding, and + * transforming planar YUV and JPEG images in memory. + * + * @anchor YUVnotes + * YUV Image Format Notes + * ---------------------- + * Technically, the JPEG format uses the YCbCr colorspace (which is technically + * not a colorspace but a color transform), but per the convention of the + * digital video community, the TurboJPEG API uses "YUV" to refer to an image + * format consisting of Y, Cb, and Cr image planes. + * + * Each plane is simply a 2D array of bytes, each byte representing the value + * of one of the components (Y, Cb, or Cr) at a particular location in the + * image. The width and height of each plane are determined by the image + * width, height, and level of chrominance subsampling. The luminance plane + * width is the image width padded to the nearest multiple of the horizontal + * subsampling factor (2 in the case of 4:2:0 and 4:2:2, 4 in the case of + * 4:1:1, 1 in the case of 4:4:4 or grayscale.) Similarly, the luminance plane + * height is the image height padded to the nearest multiple of the vertical + * subsampling factor (2 in the case of 4:2:0 or 4:4:0, 1 in the case of 4:4:4 + * or grayscale.) This is irrespective of any additional padding that may be + * specified as an argument to the various YUV functions. The chrominance + * plane width is equal to the luminance plane width divided by the horizontal + * subsampling factor, and the chrominance plane height is equal to the + * luminance plane height divided by the vertical subsampling factor. + * + * For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is + * used, then the luminance plane would be 36 x 35 bytes, and each of the + * chrominance planes would be 18 x 35 bytes. If you specify a line padding of + * 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and + * each of the chrominance planes would be 20 x 35 bytes. + * + * @{ + */ + + +/** + * The number of chrominance subsampling options + */ +#define TJ_NUMSAMP 6 + +/** + * Chrominance subsampling options. + * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK + * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of + * the Cb and Cr (chrominance) components can be discarded or averaged together + * to produce a smaller image with little perceptible loss of image clarity + * (the human eye is more sensitive to small changes in brightness than to + * small changes in color.) This is called "chrominance subsampling". + */ +enum TJSAMP +{ + /** + * 4:4:4 chrominance subsampling (no chrominance subsampling). The JPEG or + * YUV image will contain one chrominance component for every pixel in the + * source image. + */ + TJSAMP_444=0, + /** + * 4:2:2 chrominance subsampling. The JPEG or YUV image will contain one + * chrominance component for every 2x1 block of pixels in the source image. + */ + TJSAMP_422, + /** + * 4:2:0 chrominance subsampling. The JPEG or YUV image will contain one + * chrominance component for every 2x2 block of pixels in the source image. + */ + TJSAMP_420, + /** + * Grayscale. The JPEG or YUV image will contain no chrominance components. + */ + TJSAMP_GRAY, + /** + * 4:4:0 chrominance subsampling. The JPEG or YUV image will contain one + * chrominance component for every 1x2 block of pixels in the source image. + * + * @note 4:4:0 subsampling is not fully accelerated in libjpeg-turbo. + */ + TJSAMP_440, + /** + * 4:1:1 chrominance subsampling. The JPEG or YUV image will contain one + * chrominance component for every 4x1 block of pixels in the source image. + * JPEG images compressed with 4:1:1 subsampling will be almost exactly the + * same size as those compressed with 4:2:0 subsampling, and in the + * aggregate, both subsampling methods produce approximately the same + * perceptual quality. However, 4:1:1 is better able to reproduce sharp + * horizontal features. + * + * @note 4:1:1 subsampling is not fully accelerated in libjpeg-turbo. + */ + TJSAMP_411 +}; + +/** + * MCU block width (in pixels) for a given level of chrominance subsampling. + * MCU block sizes: + * - 8x8 for no subsampling or grayscale + * - 16x8 for 4:2:2 + * - 8x16 for 4:4:0 + * - 16x16 for 4:2:0 + * - 32x8 for 4:1:1 + */ +static const int tjMCUWidth[TJ_NUMSAMP] = {8, 16, 16, 8, 8, 32}; + +/** + * MCU block height (in pixels) for a given level of chrominance subsampling. + * MCU block sizes: + * - 8x8 for no subsampling or grayscale + * - 16x8 for 4:2:2 + * - 8x16 for 4:4:0 + * - 16x16 for 4:2:0 + * - 32x8 for 4:1:1 + */ +static const int tjMCUHeight[TJ_NUMSAMP] = {8, 8, 16, 8, 16, 8}; + + +/** + * The number of pixel formats + */ +#define TJ_NUMPF 12 + +/** + * Pixel formats + */ +enum TJPF +{ + /** + * RGB pixel format. The red, green, and blue components in the image are + * stored in 3-byte pixels in the order R, G, B from lowest to highest byte + * address within each pixel. + */ + TJPF_RGB=0, + /** + * BGR pixel format. The red, green, and blue components in the image are + * stored in 3-byte pixels in the order B, G, R from lowest to highest byte + * address within each pixel. + */ + TJPF_BGR, + /** + * RGBX pixel format. The red, green, and blue components in the image are + * stored in 4-byte pixels in the order R, G, B from lowest to highest byte + * address within each pixel. The X component is ignored when compressing + * and undefined when decompressing. + */ + TJPF_RGBX, + /** + * BGRX pixel format. The red, green, and blue components in the image are + * stored in 4-byte pixels in the order B, G, R from lowest to highest byte + * address within each pixel. The X component is ignored when compressing + * and undefined when decompressing. + */ + TJPF_BGRX, + /** + * XBGR pixel format. The red, green, and blue components in the image are + * stored in 4-byte pixels in the order R, G, B from highest to lowest byte + * address within each pixel. The X component is ignored when compressing + * and undefined when decompressing. + */ + TJPF_XBGR, + /** + * XRGB pixel format. The red, green, and blue components in the image are + * stored in 4-byte pixels in the order B, G, R from highest to lowest byte + * address within each pixel. The X component is ignored when compressing + * and undefined when decompressing. + */ + TJPF_XRGB, + /** + * Grayscale pixel format. Each 1-byte pixel represents a luminance + * (brightness) level from 0 to 255. + */ + TJPF_GRAY, + /** + * RGBA pixel format. This is the same as @ref TJPF_RGBX, except that when + * decompressing, the X component is guaranteed to be 0xFF, which can be + * interpreted as an opaque alpha channel. + */ + TJPF_RGBA, + /** + * BGRA pixel format. This is the same as @ref TJPF_BGRX, except that when + * decompressing, the X component is guaranteed to be 0xFF, which can be + * interpreted as an opaque alpha channel. + */ + TJPF_BGRA, + /** + * ABGR pixel format. This is the same as @ref TJPF_XBGR, except that when + * decompressing, the X component is guaranteed to be 0xFF, which can be + * interpreted as an opaque alpha channel. + */ + TJPF_ABGR, + /** + * ARGB pixel format. This is the same as @ref TJPF_XRGB, except that when + * decompressing, the X component is guaranteed to be 0xFF, which can be + * interpreted as an opaque alpha channel. + */ + TJPF_ARGB, + /** + * CMYK pixel format. Unlike RGB, which is an additive color model used + * primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive + * color model used primarily for printing. In the CMYK color model, the + * value of each color component typically corresponds to an amount of cyan, + * magenta, yellow, or black ink that is applied to a white background. In + * order to convert between CMYK and RGB, it is necessary to use a color + * management system (CMS.) A CMS will attempt to map colors within the + * printer's gamut to perceptually similar colors in the display's gamut and + * vice versa, but the mapping is typically not 1:1 or reversible, nor can it + * be defined with a simple formula. Thus, such a conversion is out of scope + * for a codec library. However, the TurboJPEG API allows for compressing + * CMYK pixels into a YCCK JPEG image (see #TJCS_YCCK) and decompressing YCCK + * JPEG images into CMYK pixels. + */ + TJPF_CMYK +}; + + +/** + * Red offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the red component is offset from the start of the pixel. For + * instance, if a pixel of format TJ_BGRX is stored in char pixel[], + * then the red component will be pixel[tjRedOffset[TJ_BGRX]]. + */ +static const int tjRedOffset[TJ_NUMPF] = {0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1, -1}; +/** + * Green offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the green component is offset from the start of the pixel. + * For instance, if a pixel of format TJ_BGRX is stored in + * char pixel[], then the green component will be + * pixel[tjGreenOffset[TJ_BGRX]]. + */ +static const int tjGreenOffset[TJ_NUMPF] = {1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2, -1}; +/** + * Blue offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the Blue component is offset from the start of the pixel. For + * instance, if a pixel of format TJ_BGRX is stored in char pixel[], + * then the blue component will be pixel[tjBlueOffset[TJ_BGRX]]. + */ +static const int tjBlueOffset[TJ_NUMPF] = {2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3, -1}; + +/** + * Pixel size (in bytes) for a given pixel format. + */ +static const int tjPixelSize[TJ_NUMPF] = {3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4}; + + +/** + * The number of JPEG colorspaces + */ +#define TJ_NUMCS 5 + +/** + * JPEG colorspaces + */ +enum TJCS +{ + /** + * RGB colorspace. When compressing the JPEG image, the R, G, and B + * components in the source image are reordered into image planes, but no + * colorspace conversion or subsampling is performed. RGB JPEG images can be + * decompressed to any of the extended RGB pixel formats or grayscale, but + * they cannot be decompressed to YUV images. + */ + TJCS_RGB=0, + /** + * YCbCr colorspace. YCbCr is not an absolute colorspace but rather a + * mathematical transformation of RGB designed solely for storage and + * transmission. YCbCr images must be converted to RGB before they can + * actually be displayed. In the YCbCr colorspace, the Y (luminance) + * component represents the black & white portion of the original image, and + * the Cb and Cr (chrominance) components represent the color portion of the + * original image. Originally, the analog equivalent of this transformation + * allowed the same signal to drive both black & white and color televisions, + * but JPEG images use YCbCr primarily because it allows the color data to be + * optionally subsampled for the purposes of reducing bandwidth or disk + * space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images + * can be compressed from and decompressed to any of the extended RGB pixel + * formats or grayscale, or they can be decompressed to YUV planar images. + */ + TJCS_YCbCr, + /** + * Grayscale colorspace. The JPEG image retains only the luminance data (Y + * component), and any color data from the source image is discarded. + * Grayscale JPEG images can be compressed from and decompressed to any of + * the extended RGB pixel formats or grayscale, or they can be decompressed + * to YUV planar images. + */ + TJCS_GRAY, + /** + * CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K + * components in the source image are reordered into image planes, but no + * colorspace conversion or subsampling is performed. CMYK JPEG images can + * only be decompressed to CMYK pixels. + */ + TJCS_CMYK, + /** + * YCCK colorspace. YCCK (AKA "YCbCrK") is not an absolute colorspace but + * rather a mathematical transformation of CMYK designed solely for storage + * and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be + * reversibly transformed into YCCK, and as with YCbCr, the chrominance + * components in the YCCK pixels can be subsampled without incurring major + * perceptual loss. YCCK JPEG images can only be compressed from and + * decompressed to CMYK pixels. + */ + TJCS_YCCK +}; + + +/** + * The uncompressed source/destination image is stored in bottom-up (Windows, + * OpenGL) order, not top-down (X11) order. + */ +#define TJFLAG_BOTTOMUP 2 +/** + * When decompressing an image that was compressed using chrominance + * subsampling, use the fastest chrominance upsampling algorithm available in + * the underlying codec. The default is to use smooth upsampling, which + * creates a smooth transition between neighboring chrominance components in + * order to reduce upsampling artifacts in the decompressed image. + */ +#define TJFLAG_FASTUPSAMPLE 256 +/** + * Disable buffer (re)allocation. If passed to #tjCompress2() or + * #tjTransform(), this flag will cause those functions to generate an error if + * the JPEG image buffer is invalid or too small rather than attempting to + * allocate or reallocate that buffer. This reproduces the behavior of earlier + * versions of TurboJPEG. + */ +#define TJFLAG_NOREALLOC 1024 +/** + * Use the fastest DCT/IDCT algorithm available in the underlying codec. The + * default if this flag is not specified is implementation-specific. For + * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast + * algorithm by default when compressing, because this has been shown to have + * only a very slight effect on accuracy, but it uses the accurate algorithm + * when decompressing, because this has been shown to have a larger effect. + */ +#define TJFLAG_FASTDCT 2048 +/** + * Use the most accurate DCT/IDCT algorithm available in the underlying codec. + * The default if this flag is not specified is implementation-specific. For + * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast + * algorithm by default when compressing, because this has been shown to have + * only a very slight effect on accuracy, but it uses the accurate algorithm + * when decompressing, because this has been shown to have a larger effect. + */ +#define TJFLAG_ACCURATEDCT 4096 + + +/** + * The number of transform operations + */ +#define TJ_NUMXOP 8 + +/** + * Transform operations for #tjTransform() + */ +enum TJXOP +{ + /** + * Do not transform the position of the image pixels + */ + TJXOP_NONE=0, + /** + * Flip (mirror) image horizontally. This transform is imperfect if there + * are any partial MCU blocks on the right edge (see #TJXOPT_PERFECT.) + */ + TJXOP_HFLIP, + /** + * Flip (mirror) image vertically. This transform is imperfect if there are + * any partial MCU blocks on the bottom edge (see #TJXOPT_PERFECT.) + */ + TJXOP_VFLIP, + /** + * Transpose image (flip/mirror along upper left to lower right axis.) This + * transform is always perfect. + */ + TJXOP_TRANSPOSE, + /** + * Transverse transpose image (flip/mirror along upper right to lower left + * axis.) This transform is imperfect if there are any partial MCU blocks in + * the image (see #TJXOPT_PERFECT.) + */ + TJXOP_TRANSVERSE, + /** + * Rotate image clockwise by 90 degrees. This transform is imperfect if + * there are any partial MCU blocks on the bottom edge (see + * #TJXOPT_PERFECT.) + */ + TJXOP_ROT90, + /** + * Rotate image 180 degrees. This transform is imperfect if there are any + * partial MCU blocks in the image (see #TJXOPT_PERFECT.) + */ + TJXOP_ROT180, + /** + * Rotate image counter-clockwise by 90 degrees. This transform is imperfect + * if there are any partial MCU blocks on the right edge (see + * #TJXOPT_PERFECT.) + */ + TJXOP_ROT270 +}; + + +/** + * This option will cause #tjTransform() to return an error if the transform is + * not perfect. Lossless transforms operate on MCU blocks, whose size depends + * on the level of chrominance subsampling used (see #tjMCUWidth + * and #tjMCUHeight.) If the image's width or height is not evenly divisible + * by the MCU block size, then there will be partial MCU blocks on the right + * and/or bottom edges. It is not possible to move these partial MCU blocks to + * the top or left of the image, so any transform that would require that is + * "imperfect." If this option is not specified, then any partial MCU blocks + * that cannot be transformed will be left in place, which will create + * odd-looking strips on the right or bottom edge of the image. + */ +#define TJXOPT_PERFECT 1 +/** + * This option will cause #tjTransform() to discard any partial MCU blocks that + * cannot be transformed. + */ +#define TJXOPT_TRIM 2 +/** + * This option will enable lossless cropping. See #tjTransform() for more + * information. + */ +#define TJXOPT_CROP 4 +/** + * This option will discard the color data in the input image and produce + * a grayscale output image. + */ +#define TJXOPT_GRAY 8 +/** + * This option will prevent #tjTransform() from outputting a JPEG image for + * this particular transform (this can be used in conjunction with a custom + * filter to capture the transformed DCT coefficients without transcoding + * them.) + */ +#define TJXOPT_NOOUTPUT 16 + + +/** + * Scaling factor + */ +typedef struct +{ + /** + * Numerator + */ + int num; + /** + * Denominator + */ + int denom; +} tjscalingfactor; + +/** + * Cropping region + */ +typedef struct +{ + /** + * The left boundary of the cropping region. This must be evenly divisible + * by the MCU block width (see #tjMCUWidth.) + */ + int x; + /** + * The upper boundary of the cropping region. This must be evenly divisible + * by the MCU block height (see #tjMCUHeight.) + */ + int y; + /** + * The width of the cropping region. Setting this to 0 is the equivalent of + * setting it to the width of the source JPEG image - x. + */ + int w; + /** + * The height of the cropping region. Setting this to 0 is the equivalent of + * setting it to the height of the source JPEG image - y. + */ + int h; +} tjregion; + +/** + * Lossless transform + */ +typedef struct tjtransform +{ + /** + * Cropping region + */ + tjregion r; + /** + * One of the @ref TJXOP "transform operations" + */ + int op; + /** + * The bitwise OR of one of more of the @ref TJXOPT_CROP "transform options" + */ + int options; + /** + * Arbitrary data that can be accessed within the body of the callback + * function + */ + void *data; + /** + * A callback function that can be used to modify the DCT coefficients + * after they are losslessly transformed but before they are transcoded to a + * new JPEG image. This allows for custom filters or other transformations + * to be applied in the frequency domain. + * + * @param coeffs pointer to an array of transformed DCT coefficients. (NOTE: + * this pointer is not guaranteed to be valid once the callback returns, so + * applications wishing to hand off the DCT coefficients to another function + * or library should make a copy of them within the body of the callback.) + * + * @param arrayRegion #tjregion structure containing the width and height of + * the array pointed to by coeffs as well as its offset relative to + * the component plane. TurboJPEG implementations may choose to split each + * component plane into multiple DCT coefficient arrays and call the callback + * function once for each array. + * + * @param planeRegion #tjregion structure containing the width and height of + * the component plane to which coeffs belongs + * + * @param componentID ID number of the component plane to which + * coeffs belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, + * and 2 in typical JPEG images.) + * + * @param transformID ID number of the transformed image to which + * coeffs belongs. This is the same as the index of the transform + * in the transforms array that was passed to #tjTransform(). + * + * @param transform a pointer to a #tjtransform structure that specifies the + * parameters and/or cropping region for this transform + * + * @return 0 if the callback was successful, or -1 if an error occurred. + */ + int (*customFilter)(short *coeffs, tjregion arrayRegion, + tjregion planeRegion, int componentIndex, int transformIndex, + struct tjtransform *transform); +} tjtransform; + +/** + * TurboJPEG instance handle + */ +typedef void* tjhandle; + + +/** + * Pad the given width to the nearest 32-bit boundary + */ +#define TJPAD(width) (((width)+3)&(~3)) + +/** + * Compute the scaled value of dimension using the given scaling + * factor. This macro performs the integer equivalent of ceil(dimension * + * scalingFactor). + */ +#define TJSCALED(dimension, scalingFactor) ((dimension * scalingFactor.num \ + + scalingFactor.denom - 1) / scalingFactor.denom) + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Create a TurboJPEG compressor instance. + * + * @return a handle to the newly-created instance, or NULL if an error + * occurred (see #tjGetErrorStr().) + */ +DLLEXPORT tjhandle DLLCALL tjInitCompress(void); + + +/** + * Compress an RGB, grayscale, or CMYK image into a JPEG image. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing RGB, grayscale, or + * CMYK pixels to be compressed + * + * @param width width (in pixels) of the source image + * + * @param pitch bytes per line in the source image. Normally, this should be + * width * #tjPixelSize[pixelFormat] if the image is unpadded, or + * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image + * is padded to the nearest 32-bit boundary, as is the case for Windows + * bitmaps. You can also be clever and use this parameter to skip lines, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source image + * + * @param pixelFormat pixel format of the source image (see @ref TJPF + * "Pixel formats".) + * + * @param jpegBuf address of a pointer to an image buffer that will receive the + * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer + * to accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, + * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer + * for you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be + * re-allocated (setting #TJFLAG_NOREALLOC guarantees this.) + * . + * If you choose option 1, *jpegSize should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, + * you should always check *jpegBuf upon return from this function, as + * it may have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of + * the JPEG image buffer. If *jpegBuf points to a pre-allocated + * buffer, then *jpegSize should be set to the size of the buffer. + * Upon return, *jpegSize will contain the size of the JPEG image (in + * bytes.) If *jpegBuf points to a JPEG image buffer that is being + * reused from a previous call to one of the JPEG compression functions, then + * *jpegSize is ignored. + * + * @param jpegSubsamp the level of chrominance subsampling to be used when + * generating the JPEG image (see @ref TJSAMP + * "Chrominance subsampling options".) + * + * @param jpegQual the image quality of the generated JPEG image (1 = worst, + * 100 = best) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags); + + +/** + * Compress a YUV planar image into a JPEG image. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing a YUV planar image to be + * compressed. The size of this buffer should match the value returned by + * #tjBufSizeYUV2() for the given image width, height, padding, and level of + * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be + * stored sequentially in the source buffer (refer to @ref YUVnotes + * "YUV Image Format Notes".) + * + * @param width width (in pixels) of the source image. If the width is not an + * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate + * buffer copy will be performed within TurboJPEG. + * + * @param pad the line padding used in the source image. For instance, if each + * line in each plane of the YUV image is padded to the nearest multiple of 4 + * bytes, then pad should be set to 4. + * + * @param height height (in pixels) of the source image. If the height is not + * an even multiple of the MCU block height (see #tjMCUHeight), then an + * intermediate buffer copy will be performed within TurboJPEG. + * + * @param subsamp the level of chrominance subsampling used in the source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param jpegBuf address of a pointer to an image buffer that will receive the + * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to + * accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, + * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer + * for you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be + * re-allocated (setting #TJFLAG_NOREALLOC guarantees this.) + * . + * If you choose option 1, *jpegSize should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, + * you should always check *jpegBuf upon return from this function, as + * it may have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of + * the JPEG image buffer. If *jpegBuf points to a pre-allocated + * buffer, then *jpegSize should be set to the size of the buffer. + * Upon return, *jpegSize will contain the size of the JPEG image (in + * bytes.) If *jpegBuf points to a JPEG image buffer that is being + * reused from a previous call to one of the JPEG compression functions, then + * *jpegSize is ignored. + * + * @param jpegQual the image quality of the generated JPEG image (1 = worst, + * 100 = best) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, + const unsigned char *srcBuf, int width, int pad, int height, int subsamp, + unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags); + + +/** + * Compress a set of Y, U (Cb), and V (Cr) image planes into a JPEG image. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if compressing a grayscale image) that contain a YUV + * image to be compressed. These planes can be contiguous or non-contiguous in + * memory. The size of each plane should match the value returned by + * #tjPlaneSizeYUV() for the given image width, height, strides, and level of + * chrominance subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" + * for more details. + * + * @param width width (in pixels) of the source image. If the width is not an + * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate + * buffer copy will be performed within TurboJPEG. + * + * @param strides an array of integers, each specifying the number of bytes per + * line in the corresponding plane of the YUV source image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see + * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then + * the strides for all planes will be set to their respective plane widths. + * You can adjust the strides in order to specify an arbitrary amount of line + * padding in each plane or to create a JPEG image from a subregion of a larger + * YUV planar image. + * + * @param height height (in pixels) of the source image. If the height is not + * an even multiple of the MCU block height (see #tjMCUHeight), then an + * intermediate buffer copy will be performed within TurboJPEG. + * + * @param subsamp the level of chrominance subsampling used in the source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param jpegBuf address of a pointer to an image buffer that will receive the + * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to + * accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, + * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer + * for you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be + * re-allocated (setting #TJFLAG_NOREALLOC guarantees this.) + * . + * If you choose option 1, *jpegSize should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, + * you should always check *jpegBuf upon return from this function, as + * it may have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of + * the JPEG image buffer. If *jpegBuf points to a pre-allocated + * buffer, then *jpegSize should be set to the size of the buffer. + * Upon return, *jpegSize will contain the size of the JPEG image (in + * bytes.) If *jpegBuf points to a JPEG image buffer that is being + * reused from a previous call to one of the JPEG compression functions, then + * *jpegSize is ignored. + * + * @param jpegQual the image quality of the generated JPEG image (1 = worst, + * 100 = best) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjCompressFromYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, int width, const int *strides, int height, + int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, + int flags); + + +/** + * The maximum size of the buffer (in bytes) required to hold a JPEG image with + * the given parameters. The number of bytes returned by this function is + * larger than the size of the uncompressed source image. The reason for this + * is that the JPEG format uses 16-bit coefficients, and it is thus possible + * for a very high-quality JPEG image with very high-frequency content to + * expand rather than compress when converted to the JPEG format. Such images + * represent a very rare corner case, but since there is no way to predict the + * size of a JPEG image prior to compression, the corner case has to be + * handled. + * + * @param width width (in pixels) of the image + * + * @param height height (in pixels) of the image + * + * @param jpegSubsamp the level of chrominance subsampling to be used when + * generating the JPEG image (see @ref TJSAMP + * "Chrominance subsampling options".) + * + * @return the maximum size of the buffer (in bytes) required to hold the + * image, or -1 if the arguments are out of bounds. + */ +DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height, + int jpegSubsamp); + + +/** + * The size of the buffer (in bytes) required to hold a YUV planar image with + * the given parameters. + * + * @param width width (in pixels) of the image + * + * @param pad the width of each line in each plane of the image is padded to + * the nearest multiple of this number of bytes (must be a power of 2.) + * + * @param height height (in pixels) of the image + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * + * @return the size of the buffer (in bytes) required to hold the image, or + * -1 if the arguments are out of bounds. + */ +DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height, + int subsamp); + + +/** + * The size of the buffer (in bytes) required to hold a YUV image plane with + * the given parameters. + * + * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) + * + * @param width width (in pixels) of the YUV image. NOTE: this is the width of + * the whole image, not the plane width. + * + * @param stride bytes per line in the image plane. Setting this to 0 is the + * equivalent of setting it to the plane width. + * + * @param height height (in pixels) of the YUV image. NOTE: this is the height + * of the whole image, not the plane height. + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * + * @return the size of the buffer (in bytes) required to hold the YUV image + * plane, or -1 if the arguments are out of bounds. + */ +DLLEXPORT unsigned long DLLCALL tjPlaneSizeYUV(int componentID, int width, + int stride, int height, int subsamp); + + +/** + * The plane width of a YUV image plane with the given parameters. Refer to + * @ref YUVnotes "YUV Image Format Notes" for a description of plane width. + * + * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) + * + * @param width width (in pixels) of the YUV image + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * + * @return the plane width of a YUV image plane with the given parameters, or + * -1 if the arguments are out of bounds. + */ +DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp); + + +/** + * The plane height of a YUV image plane with the given parameters. Refer to + * @ref YUVnotes "YUV Image Format Notes" for a description of plane height. + * + * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) + * + * @param height height (in pixels) of the YUV image + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * + * @return the plane height of a YUV image plane with the given parameters, or + * -1 if the arguments are out of bounds. + */ +DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp); + + +/** + * Encode an RGB or grayscale image into a YUV planar image. This function + * uses the accelerated color conversion routines in the underlying + * codec but does not execute any of the other steps in the JPEG compression + * process. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels + * to be encoded + * + * @param width width (in pixels) of the source image + * + * @param pitch bytes per line in the source image. Normally, this should be + * width * #tjPixelSize[pixelFormat] if the image is unpadded, or + * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image + * is padded to the nearest 32-bit boundary, as is the case for Windows + * bitmaps. You can also be clever and use this parameter to skip lines, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source image + * + * @param pixelFormat pixel format of the source image (see @ref TJPF + * "Pixel formats".) + * + * @param dstBuf pointer to an image buffer that will receive the YUV image. + * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based + * on the image width, height, padding, and level of chrominance subsampling. + * The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the + * buffer (refer to @ref YUVnotes "YUV Image Format Notes".) + * + * @param pad the width of each line in each plane of the YUV image will be + * padded to the nearest multiple of this number of bytes (must be a power of + * 2.) To generate images suitable for X Video, pad should be set to + * 4. + * + * @param subsamp the level of chrominance subsampling to be used when + * generating the YUV image (see @ref TJSAMP + * "Chrominance subsampling options".) To generate images suitable for X + * Video, subsamp should be set to @ref TJSAMP_420. This produces an + * image compatible with the I420 (AKA "YUV420P") format. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, + const unsigned char *srcBuf, int width, int pitch, int height, + int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags); + + +/** + * Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image + * planes. This function uses the accelerated color conversion routines in the + * underlying codec but does not execute any of the other steps in the JPEG + * compression process. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels + * to be encoded + * + * @param width width (in pixels) of the source image + * + * @param pitch bytes per line in the source image. Normally, this should be + * width * #tjPixelSize[pixelFormat] if the image is unpadded, or + * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image + * is padded to the nearest 32-bit boundary, as is the case for Windows + * bitmaps. You can also be clever and use this parameter to skip lines, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source image + * + * @param pixelFormat pixel format of the source image (see @ref TJPF + * "Pixel formats".) + * + * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if generating a grayscale image) that will receive the + * encoded image. These planes can be contiguous or non-contiguous in memory. + * Use #tjPlaneSizeYUV() to determine the appropriate size for each plane based + * on the image width, height, strides, and level of chrominance subsampling. + * Refer to @ref YUVnotes "YUV Image Format Notes" for more details. + * + * @param strides an array of integers, each specifying the number of bytes per + * line in the corresponding plane of the output image. Setting the stride for + * any plane to 0 is the same as setting it to the plane width (see + * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then + * the strides for all planes will be set to their respective plane widths. + * You can adjust the strides in order to add an arbitrary amount of line + * padding to each plane or to encode an RGB or grayscale image into a + * subregion of a larger YUV planar image. + * + * @param subsamp the level of chrominance subsampling to be used when + * generating the YUV image (see @ref TJSAMP + * "Chrominance subsampling options".) To generate images suitable for X + * Video, subsamp should be set to @ref TJSAMP_420. This produces an + * image compatible with the I420 (AKA "YUV420P") format. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjEncodeYUVPlanes(tjhandle handle, + const unsigned char *srcBuf, int width, int pitch, int height, + int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, + int flags); + + +/** + * Create a TurboJPEG decompressor instance. + * + * @return a handle to the newly-created instance, or NULL if an error + * occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT tjhandle DLLCALL tjInitDecompress(void); + + +/** + * Retrieve information about a JPEG image without decompressing it. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param jpegBuf pointer to a buffer containing a JPEG image + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param width pointer to an integer variable that will receive the width (in + * pixels) of the JPEG image + * + * @param height pointer to an integer variable that will receive the height + * (in pixels) of the JPEG image + * + * @param jpegSubsamp pointer to an integer variable that will receive the + * level of chrominance subsampling used when the JPEG image was compressed + * (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param jpegColorspace pointer to an integer variable that will receive one + * of the JPEG colorspace constants, indicating the colorspace of the JPEG + * image (see @ref TJCS "JPEG colorspaces".) + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp, int *jpegColorspace); + + +/** + * Returns a list of fractional scaling factors that the JPEG decompressor in + * this implementation of TurboJPEG supports. + * + * @param numscalingfactors pointer to an integer variable that will receive + * the number of elements in the list + * + * @return a pointer to a list of fractional scaling factors, or NULL if an + * error is encountered (see #tjGetErrorStr().) +*/ +DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors); + + +/** + * Decompress a JPEG image to an RGB, grayscale, or CMYK image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param jpegBuf pointer to a buffer containing the JPEG image to decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param dstBuf pointer to an image buffer that will receive the decompressed + * image. This buffer should normally be pitch * scaledHeight bytes + * in size, where scaledHeight can be determined by calling + * #TJSCALED() with the JPEG image height and one of the scaling factors + * returned by #tjGetScalingFactors(). The dstBuf pointer may also be + * used to decompress into a specific region of a larger buffer. + * + * @param width desired width (in pixels) of the destination image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired width. If width is + * set to 0, then only the height will be considered when determining the + * scaled image size. + * + * @param pitch bytes per line in the destination image. Normally, this is + * scaledWidth * #tjPixelSize[pixelFormat] if the decompressed image + * is unpadded, else #TJPAD(scaledWidth * #tjPixelSize[pixelFormat]) + * if each line of the decompressed image is padded to the nearest 32-bit + * boundary, as is the case for Windows bitmaps. (NOTE: scaledWidth + * can be determined by calling #TJSCALED() with the JPEG image width and one + * of the scaling factors returned by #tjGetScalingFactors().) You can also be + * clever and use the pitch parameter to skip lines, etc. Setting this + * parameter to 0 is the equivalent of setting it to + * scaledWidth * #tjPixelSize[pixelFormat]. + * + * @param height desired height (in pixels) of the destination image. If this + * is different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired height. If height + * is set to 0, then only the width will be considered when determining the + * scaled image size. + * + * @param pixelFormat pixel format of the destination image (see @ref + * TJPF "Pixel formats".) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, int flags); + + +/** + * Decompress a JPEG image to a YUV planar image. This function performs JPEG + * decompression but leaves out the color conversion step, so a planar YUV + * image is generated instead of an RGB image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param jpegBuf pointer to a buffer containing the JPEG image to decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param dstBuf pointer to an image buffer that will receive the YUV image. + * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based + * on the image width, height, padding, and level of subsampling. The Y, + * U (Cb), and V (Cr) image planes will be stored sequentially in the buffer + * (refer to @ref YUVnotes "YUV Image Format Notes".) + * + * @param width desired width (in pixels) of the YUV image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired width. If width is + * set to 0, then only the height will be considered when determining the + * scaled image size. If the scaled width is not an even multiple of the MCU + * block width (see #tjMCUWidth), then an intermediate buffer copy will be + * performed within TurboJPEG. + * + * @param pad the width of each line in each plane of the YUV image will be + * padded to the nearest multiple of this number of bytes (must be a power of + * 2.) To generate images suitable for X Video, pad should be set to + * 4. + * + * @param height desired height (in pixels) of the YUV image. If this is + * different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired height. If height + * is set to 0, then only the width will be considered when determining the + * scaled image size. If the scaled height is not an even multiple of the MCU + * block height (see #tjMCUHeight), then an intermediate buffer copy will be + * performed within TurboJPEG. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pad, int height, int flags); + + +/** + * Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image + * planes. This function performs JPEG decompression but leaves out the color + * conversion step, so a planar YUV image is generated instead of an RGB image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param jpegBuf pointer to a buffer containing the JPEG image to decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if decompressing a grayscale image) that will receive + * the YUV image. These planes can be contiguous or non-contiguous in memory. + * Use #tjPlaneSizeYUV() to determine the appropriate size for each plane based + * on the scaled image width, scaled image height, strides, and level of + * chrominance subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" + * for more details. + * + * @param width desired width (in pixels) of the YUV image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired width. If width is + * set to 0, then only the height will be considered when determining the + * scaled image size. If the scaled width is not an even multiple of the MCU + * block width (see #tjMCUWidth), then an intermediate buffer copy will be + * performed within TurboJPEG. + * + * @param strides an array of integers, each specifying the number of bytes per + * line in the corresponding plane of the output image. Setting the stride for + * any plane to 0 is the same as setting it to the scaled plane width (see + * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then + * the strides for all planes will be set to their respective scaled plane + * widths. You can adjust the strides in order to add an arbitrary amount of + * line padding to each plane or to decompress the JPEG image into a subregion + * of a larger YUV planar image. + * + * @param height desired height (in pixels) of the YUV image. If this is + * different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired height. If height + * is set to 0, then only the width will be considered when determining the + * scaled image size. If the scaled height is not an even multiple of the MCU + * block height (see #tjMCUHeight), then an intermediate buffer copy will be + * performed within TurboJPEG. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecompressToYUVPlanes(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, + unsigned char **dstPlanes, int width, int *strides, int height, int flags); + + +/** + * Decode a YUV planar image into an RGB or grayscale image. This function + * uses the accelerated color conversion routines in the underlying + * codec but does not execute any of the other steps in the JPEG decompression + * process. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing a YUV planar image to be + * decoded. The size of this buffer should match the value returned by + * #tjBufSizeYUV2() for the given image width, height, padding, and level of + * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be + * stored sequentially in the source buffer (refer to @ref YUVnotes + * "YUV Image Format Notes".) + * + * @param pad Use this parameter to specify that the width of each line in each + * plane of the YUV source image is padded to the nearest multiple of this + * number of bytes (must be a power of 2.) + * + * @param subsamp the level of chrominance subsampling used in the YUV source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param dstBuf pointer to an image buffer that will receive the decoded + * image. This buffer should normally be pitch * height bytes in + * size, but the dstBuf pointer can also be used to decode into a + * specific region of a larger buffer. + * + * @param width width (in pixels) of the source and destination images + * + * @param pitch bytes per line in the destination image. Normally, this should + * be width * #tjPixelSize[pixelFormat] if the destination image is + * unpadded, or #TJPAD(width * #tjPixelSize[pixelFormat]) if each line + * of the destination image should be padded to the nearest 32-bit boundary, as + * is the case for Windows bitmaps. You can also be clever and use the pitch + * parameter to skip lines, etc. Setting this parameter to 0 is the equivalent + * of setting it to width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source and destination images + * + * @param pixelFormat pixel format of the destination image (see @ref TJPF + * "Pixel formats".) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, + int height, int pixelFormat, int flags); + + +/** + * Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale + * image. This function uses the accelerated color conversion routines in the + * underlying codec but does not execute any of the other steps in the JPEG + * decompression process. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if decoding a grayscale image) that contain a YUV image + * to be decoded. These planes can be contiguous or non-contiguous in memory. + * The size of each plane should match the value returned by #tjPlaneSizeYUV() + * for the given image width, height, strides, and level of chrominance + * subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" for more + * details. + * + * @param strides an array of integers, each specifying the number of bytes per + * line in the corresponding plane of the YUV source image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see + * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then + * the strides for all planes will be set to their respective plane widths. + * You can adjust the strides in order to specify an arbitrary amount of line + * padding in each plane or to decode a subregion of a larger YUV planar image. + * + * @param subsamp the level of chrominance subsampling used in the YUV source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param dstBuf pointer to an image buffer that will receive the decoded + * image. This buffer should normally be pitch * height bytes in + * size, but the dstBuf pointer can also be used to decode into a + * specific region of a larger buffer. + * + * @param width width (in pixels) of the source and destination images + * + * @param pitch bytes per line in the destination image. Normally, this should + * be width * #tjPixelSize[pixelFormat] if the destination image is + * unpadded, or #TJPAD(width * #tjPixelSize[pixelFormat]) if each line + * of the destination image should be padded to the nearest 32-bit boundary, as + * is the case for Windows bitmaps. You can also be clever and use the pitch + * parameter to skip lines, etc. Setting this parameter to 0 is the equivalent + * of setting it to width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source and destination images + * + * @param pixelFormat pixel format of the destination image (see @ref TJPF + * "Pixel formats".) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecodeYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, const int *strides, int subsamp, + unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, + int flags); + + +/** + * Create a new TurboJPEG transformer instance. + * + * @return a handle to the newly-created instance, or NULL if an error + * occurred (see #tjGetErrorStr().) + */ +DLLEXPORT tjhandle DLLCALL tjInitTransform(void); + + +/** + * Losslessly transform a JPEG image into another JPEG image. Lossless + * transforms work by moving the raw DCT coefficients from one JPEG image + * structure to another without altering the values of the coefficients. While + * this is typically faster than decompressing the image, transforming it, and + * re-compressing it, lossless transforms are not free. Each lossless + * transform requires reading and performing Huffman decoding on all of the + * coefficients in the source image, regardless of the size of the destination + * image. Thus, this function provides a means of generating multiple + * transformed images from the same source or applying multiple + * transformations simultaneously, in order to eliminate the need to read the + * source coefficients multiple times. + * + * @param handle a handle to a TurboJPEG transformer instance + * + * @param jpegBuf pointer to a buffer containing the JPEG source image to + * transform + * + * @param jpegSize size of the JPEG source image (in bytes) + * + * @param n the number of transformed JPEG images to generate + * + * @param dstBufs pointer to an array of n image buffers. dstBufs[i] + * will receive a JPEG image that has been transformed using the parameters in + * transforms[i]. TurboJPEG has the ability to reallocate the JPEG + * buffer to accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, + * -# set dstBufs[i] to NULL to tell TurboJPEG to allocate the buffer + * for you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize() with the transformed or cropped width and height. This should + * ensure that the buffer never has to be re-allocated (setting + * #TJFLAG_NOREALLOC guarantees this.) + * . + * If you choose option 1, dstSizes[i] should be set to the size of + * your pre-allocated buffer. In any case, unless you have set + * #TJFLAG_NOREALLOC, you should always check dstBufs[i] upon return + * from this function, as it may have changed. + * + * @param dstSizes pointer to an array of n unsigned long variables that will + * receive the actual sizes (in bytes) of each transformed JPEG image. If + * dstBufs[i] points to a pre-allocated buffer, then + * dstSizes[i] should be set to the size of the buffer. Upon return, + * dstSizes[i] will contain the size of the JPEG image (in bytes.) + * + * @param transforms pointer to an array of n #tjtransform structures, each of + * which specifies the transform parameters and/or cropping region for the + * corresponding transformed output image. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjTransform(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, int n, + unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, + int flags); + + +/** + * Destroy a TurboJPEG compressor, decompressor, or transformer instance. + * + * @param handle a handle to a TurboJPEG compressor, decompressor or + * transformer instance + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDestroy(tjhandle handle); + + +/** + * Allocate an image buffer for use with TurboJPEG. You should always use + * this function to allocate the JPEG destination buffer(s) for #tjCompress2() + * and #tjTransform() unless you are disabling automatic buffer + * (re)allocation (by setting #TJFLAG_NOREALLOC.) + * + * @param bytes the number of bytes to allocate + * + * @return a pointer to a newly-allocated buffer with the specified number of + * bytes. + * + * @sa tjFree() + */ +DLLEXPORT unsigned char* DLLCALL tjAlloc(int bytes); + + +/** + * Free an image buffer previously allocated by TurboJPEG. You should always + * use this function to free JPEG destination buffer(s) that were automatically + * (re)allocated by #tjCompress2() or #tjTransform() or that were manually + * allocated using #tjAlloc(). + * + * @param buffer address of the buffer to free + * + * @sa tjAlloc() + */ +DLLEXPORT void DLLCALL tjFree(unsigned char *buffer); + + +/** + * Returns a descriptive error message explaining why the last command failed. + * + * @return a descriptive error message explaining why the last command failed. + */ +DLLEXPORT char* DLLCALL tjGetErrorStr(void); + + +/* Deprecated functions and macros */ +#define TJFLAG_FORCEMMX 8 +#define TJFLAG_FORCESSE 16 +#define TJFLAG_FORCESSE2 32 +#define TJFLAG_FORCESSE3 128 + + +/* Backward compatibility functions and macros (nothing to see here) */ +#define NUMSUBOPT TJ_NUMSAMP +#define TJ_444 TJSAMP_444 +#define TJ_422 TJSAMP_422 +#define TJ_420 TJSAMP_420 +#define TJ_411 TJSAMP_420 +#define TJ_GRAYSCALE TJSAMP_GRAY + +#define TJ_BGR 1 +#define TJ_BOTTOMUP TJFLAG_BOTTOMUP +#define TJ_FORCEMMX TJFLAG_FORCEMMX +#define TJ_FORCESSE TJFLAG_FORCESSE +#define TJ_FORCESSE2 TJFLAG_FORCESSE2 +#define TJ_ALPHAFIRST 64 +#define TJ_FORCESSE3 TJFLAG_FORCESSE3 +#define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE +#define TJ_YUV 512 + +DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height); + +DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height, + int jpegSubsamp); + +DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height, + int subsamp); + +DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf, + int width, int pitch, int height, int pixelSize, unsigned char *dstBuf, + unsigned long *compressedSize, int jpegSubsamp, int jpegQual, int flags); + +DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, + unsigned char *srcBuf, int width, int pitch, int height, int pixelSize, + unsigned char *dstBuf, int subsamp, int flags); + +DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, + unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, + unsigned char *dstBuf, int subsamp, int flags); + +DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height); + +DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, + int *jpegSubsamp); + +DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelSize, int flags); + +DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int flags); + + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg-jni.c glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg-jni.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg-jni.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg-jni.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1166 @@ +/* + * Copyright (C)2011-2016 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "turbojpeg.h" +#ifdef WIN32 +#include "tjutil.h" +#endif +#include +#include "java/org_libjpegturbo_turbojpeg_TJCompressor.h" +#include "java/org_libjpegturbo_turbojpeg_TJDecompressor.h" +#include "java/org_libjpegturbo_turbojpeg_TJ.h" + +#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) + +#define _throw(msg, exceptionClass) { \ + jclass _exccls=(*env)->FindClass(env, exceptionClass); \ + if(!_exccls || (*env)->ExceptionCheck(env)) goto bailout; \ + (*env)->ThrowNew(env, _exccls, msg); \ + goto bailout; \ +} + +#define _throwtj() _throw(tjGetErrorStr(), "org/libjpegturbo/turbojpeg/TJException") + +#define _throwarg(msg) _throw(msg, "java/lang/IllegalArgumentException") + +#define _throwmem() _throw("Memory allocation failure", "java/lang/OutOfMemoryError"); + +#define bailif0(f) {if(!(f) || (*env)->ExceptionCheck(env)) { \ + goto bailout; \ +}} + +#define gethandle() \ + jclass _cls=(*env)->GetObjectClass(env, obj); \ + jfieldID _fid; \ + if(!_cls || (*env)->ExceptionCheck(env)) goto bailout; \ + bailif0(_fid=(*env)->GetFieldID(env, _cls, "handle", "J")); \ + handle=(tjhandle)(size_t)(*env)->GetLongField(env, obj, _fid); \ + +#ifdef _WIN32 +#define setenv(envvar, value, dummy) _putenv_s(envvar, value) +#endif + +#define prop2env(property, envvar) \ +{ \ + if((jName=(*env)->NewStringUTF(env, property))!=NULL \ + && (jValue=(*env)->CallStaticObjectMethod(env, cls, mid, jName))!=NULL) \ + { \ + if((value=(*env)->GetStringUTFChars(env, jValue, 0))!=NULL) \ + { \ + setenv(envvar, value, 1); \ + (*env)->ReleaseStringUTFChars(env, jValue, value); \ + } \ + } \ +} + +int ProcessSystemProperties(JNIEnv *env) +{ + jclass cls; jmethodID mid; + jstring jName, jValue; + const char *value; + + bailif0(cls=(*env)->FindClass(env, "java/lang/System")); + bailif0(mid=(*env)->GetStaticMethodID(env, cls, "getProperty", + "(Ljava/lang/String;)Ljava/lang/String;")); + + prop2env("turbojpeg.optimize", "TJ_OPTIMIZE"); + prop2env("turbojpeg.arithmetic", "TJ_ARITHMETIC"); + prop2env("turbojpeg.restart", "TJ_RESTART"); + prop2env("turbojpeg.progressive", "TJ_PROGRESSIVE"); + return 0; + + bailout: + return -1; +} + +/* TurboJPEG 1.2.x: TJ::bufSize() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize + (JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp) +{ + jint retval=(jint)tjBufSize(width, height, jpegSubsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.4.x: TJ::bufSizeYUV() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII + (JNIEnv *env, jclass cls, jint width, jint pad, jint height, jint subsamp) +{ + jint retval=(jint)tjBufSizeYUV2(width, pad, height, subsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.2.x: TJ::bufSizeYUV() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III + (JNIEnv *env, jclass cls, jint width, jint height, jint subsamp) +{ + return Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII(env, cls, width, + 4, height, subsamp); +} + +/* TurboJPEG 1.4.x: TJ::planeSizeYUV() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII + (JNIEnv *env, jclass cls, jint componentID, jint width, jint stride, + jint height, jint subsamp) +{ + jint retval=(jint)tjPlaneSizeYUV(componentID, width, stride, height, + subsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.4.x: TJ::planeWidth() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III + (JNIEnv *env, jclass cls, jint componentID, jint width, jint subsamp) +{ + jint retval=(jint)tjPlaneWidth(componentID, width, subsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.4.x: TJ::planeHeight() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III + (JNIEnv *env, jclass cls, jint componentID, jint height, jint subsamp) +{ + jint retval=(jint)tjPlaneHeight(componentID, height, subsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.2.x: TJCompressor::init() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init + (JNIEnv *env, jobject obj) +{ + jclass cls; + jfieldID fid; + tjhandle handle; + + if((handle=tjInitCompress())==NULL) + _throwtj(); + + bailif0(cls=(*env)->GetObjectClass(env, obj)); + bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); + + bailout: + return; +} + +static jint TJCompressor_compress + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jbyteArray dst, + jint jpegSubsamp, jint jpegQual, jint flags) +{ + tjhandle handle=0; + unsigned long jpegSize=0; + jsize arraySize=0, actualPitch; + unsigned char *srcBuf=NULL, *jpegBuf=NULL; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 + || pitch<0) + _throwarg("Invalid argument in compress()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch; + arraySize=(y+height-1)*actualPitch + (x+width)*tjPixelSize[pf]; + if((*env)->GetArrayLength(env, src)*srcElementSizeGetArrayLength(env, dst)<(jsize)jpegSize) + _throwarg("Destination buffer is not large enough"); + + bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(ProcessSystemProperties(env)<0) goto bailout; + + if(tjCompress2(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], width, + pitch, height, pf, &jpegBuf, &jpegSize, jpegSubsamp, jpegQual, + flags|TJFLAG_NOREALLOC)==-1) + _throwtj(); + + bailout: + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0); + if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); + return (jint)jpegSize; +} + +/* TurboJPEG 1.3.x: TJCompressor::compress() byte source */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII + (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width, + jint pitch, jint height, jint pf, jbyteArray dst, jint jpegSubsamp, + jint jpegQual, jint flags) +{ + return TJCompressor_compress(env, obj, src, 1, x, y, width, pitch, height, + pf, dst, jpegSubsamp, jpegQual, flags); +} + +/* TurboJPEG 1.2.x: TJCompressor::compress() byte source */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII + (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch, + jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual, + jint flags) +{ + return TJCompressor_compress(env, obj, src, 1, 0, 0, width, pitch, height, + pf, dst, jpegSubsamp, jpegQual, flags); +} + +/* TurboJPEG 1.3.x: TJCompressor::compress() int source */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII + (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width, + jint stride, jint height, jint pf, jbyteArray dst, jint jpegSubsamp, + jint jpegQual, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in compress()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when compressing from an integer buffer."); + + return TJCompressor_compress(env, obj, src, sizeof(jint), x, y, width, + stride*sizeof(jint), height, pf, dst, jpegSubsamp, jpegQual, flags); + + bailout: + return 0; +} + +/* TurboJPEG 1.2.x: TJCompressor::compress() int source */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII + (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride, + jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual, + jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in compress()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when compressing from an integer buffer."); + + return TJCompressor_compress(env, obj, src, sizeof(jint), 0, 0, width, + stride*sizeof(jint), height, pf, dst, jpegSubsamp, jpegQual, flags); + + bailout: + return 0; +} + +/* TurboJPEG 1.4.x: TJCompressor::compressFromYUV() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jint width, jintArray jSrcStrides, jint height, jint subsamp, + jbyteArray dst, jint jpegQual, jint flags) +{ + tjhandle handle=0; + unsigned long jpegSize=0; + jbyteArray jSrcPlanes[3]={NULL, NULL, NULL}; + const unsigned char *srcPlanes[3]; + unsigned char *jpegBuf=NULL; + int *srcOffsets=NULL, *srcStrides=NULL; + int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; + + gethandle(); + + if(subsamp<0 || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in compressFromYUV()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if((*env)->GetArrayLength(env, srcobjs)GetArrayLength(env, jSrcOffsets)GetArrayLength(env, jSrcStrides)GetArrayLength(env, dst)<(jsize)jpegSize) + _throwarg("Destination buffer is not large enough"); + + bailif0(srcOffsets=(*env)->GetPrimitiveArrayCritical(env, jSrcOffsets, 0)); + bailif0(srcStrides=(*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0)); + for(i=0; iGetObjectArrayElement(env, srcobjs, i)); + if((*env)->GetArrayLength(env, jSrcPlanes[i])GetPrimitiveArrayCritical(env, jSrcPlanes[i], + 0)); + srcPlanes[i]=&srcPlanes[i][srcOffsets[i]]; + } + bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(ProcessSystemProperties(env)<0) goto bailout; + + if(tjCompressFromYUVPlanes(handle, srcPlanes, width, srcStrides, height, + subsamp, &jpegBuf, &jpegSize, jpegQual, flags|TJFLAG_NOREALLOC)==-1) + _throwtj(); + + bailout: + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0); + for(i=0; iReleasePrimitiveArrayCritical(env, jSrcPlanes[i], + (unsigned char *)srcPlanes[i], 0); + } + if(srcStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcStrides, srcStrides, 0); + if(srcOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcOffsets, srcOffsets, 0); + return (jint)jpegSize; +} + +static void TJCompressor_encodeYUV + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) +{ + tjhandle handle=0; + jsize arraySize=0, actualPitch; + jbyteArray jDstPlanes[3]={NULL, NULL, NULL}; + unsigned char *srcBuf=NULL, *dstPlanes[3]; + int *dstOffsets=NULL, *dstStrides=NULL; + int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 + || pitch<0 || subsamp<0 || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in encodeYUV()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF + || org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if((*env)->GetArrayLength(env, dstobjs)GetArrayLength(env, jDstOffsets)GetArrayLength(env, jDstStrides)GetArrayLength(env, src)*srcElementSizeGetPrimitiveArrayCritical(env, jDstOffsets, 0)); + bailif0(dstStrides=(*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0)); + for(i=0; iGetObjectArrayElement(env, dstobjs, i)); + if((*env)->GetArrayLength(env, jDstPlanes[i])GetPrimitiveArrayCritical(env, jDstPlanes[i], + 0)); + dstPlanes[i]=&dstPlanes[i][dstOffsets[i]]; + } + bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + + if(tjEncodeYUVPlanes(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], + width, pitch, height, pf, dstPlanes, dstStrides, subsamp, flags)==-1) + _throwtj(); + + bailout: + if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); + for(i=0; iReleasePrimitiveArrayCritical(env, jDstPlanes[i], dstPlanes[i], + 0); + } + if(dstStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jDstStrides, dstStrides, 0); + if(dstOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jDstOffsets, dstOffsets, 0); + return; +} + +/* TurboJPEG 1.4.x: TJCompressor::encodeYUV() byte source */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III + (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width, + jint pitch, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) +{ + TJCompressor_encodeYUV(env, obj, src, 1, x, y, width, pitch, height, pf, + dstobjs, jDstOffsets, jDstStrides, subsamp, flags); +} + +/* TurboJPEG 1.4.x: TJCompressor::encodeYUV() int source */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3_3B_3I_3III + (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width, + jint stride, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in encodeYUV()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when encoding from an integer buffer."); + + TJCompressor_encodeYUV(env, obj, src, sizeof(jint), x, y, width, + stride*sizeof(jint), height, pf, dstobjs, jDstOffsets, jDstStrides, + subsamp, flags); + + bailout: + return; +} + +JNIEXPORT void JNICALL TJCompressor_encodeYUV_12 + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint width, + jint pitch, jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) +{ + tjhandle handle=0; + jsize arraySize=0; + unsigned char *srcBuf=NULL, *dstBuf=NULL; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 + || pitch<0) + _throwarg("Invalid argument in encodeYUV()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + arraySize=(pitch==0)? width*tjPixelSize[pf]*height:pitch*height; + if((*env)->GetArrayLength(env, src)*srcElementSizeGetArrayLength(env, dst) + <(jsize)tjBufSizeYUV(width, height, subsamp)) + _throwarg("Destination buffer is not large enough"); + + bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(tjEncodeYUV2(handle, srcBuf, width, pitch, height, pf, dstBuf, subsamp, + flags)==-1) + _throwtj(); + + bailout: + if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); + return; +} + +/* TurboJPEG 1.2.x: TJCompressor::encodeYUV() byte source */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII + (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch, + jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) +{ + TJCompressor_encodeYUV_12(env, obj, src, 1, width, pitch, height, pf, dst, + subsamp, flags); +} + +/* TurboJPEG 1.2.x: TJCompressor::encodeYUV() int source */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII + (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride, + jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in encodeYUV()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when encoding from an integer buffer."); + + TJCompressor_encodeYUV_12(env, obj, src, sizeof(jint), width, + stride*sizeof(jint), height, pf, dst, subsamp, flags); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJCompressor::destroy() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy + (JNIEnv *env, jobject obj) +{ + tjhandle handle=0; + + gethandle(); + + if(tjDestroy(handle)==-1) _throwtj(); + (*env)->SetLongField(env, obj, _fid, 0); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJDecompressor::init() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init + (JNIEnv *env, jobject obj) +{ + jclass cls; + jfieldID fid; + tjhandle handle; + + if((handle=tjInitDecompress())==NULL) _throwtj(); + + bailif0(cls=(*env)->GetObjectClass(env, obj)); + bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJDecompressor::getScalingFactors() */ +JNIEXPORT jobjectArray JNICALL Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors + (JNIEnv *env, jclass cls) +{ + jclass sfcls=NULL; jfieldID fid=0; + tjscalingfactor *sf=NULL; int n=0, i; + jobject sfobj=NULL; + jobjectArray sfjava=NULL; + + if((sf=tjGetScalingFactors(&n))==NULL || n==0) + _throwarg(tjGetErrorStr()); + + bailif0(sfcls=(*env)->FindClass(env, "org/libjpegturbo/turbojpeg/TJScalingFactor")); + bailif0(sfjava=(jobjectArray)(*env)->NewObjectArray(env, n, sfcls, 0)); + + for(i=0; iAllocObject(env, sfcls)); + bailif0(fid=(*env)->GetFieldID(env, sfcls, "num", "I")); + (*env)->SetIntField(env, sfobj, fid, sf[i].num); + bailif0(fid=(*env)->GetFieldID(env, sfcls, "denom", "I")); + (*env)->SetIntField(env, sfobj, fid, sf[i].denom); + (*env)->SetObjectArrayElement(env, sfjava, i, sfobj); + } + + bailout: + return sfjava; +} + +/* TurboJPEG 1.2.x: TJDecompressor::decompressHeader() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize) +{ + tjhandle handle=0; + unsigned char *jpegBuf=NULL; + int width=0, height=0, jpegSubsamp=-1, jpegColorspace=-1; + + gethandle(); + + if((*env)->GetArrayLength(env, src)GetPrimitiveArrayCritical(env, src, 0)); + + if(tjDecompressHeader3(handle, jpegBuf, (unsigned long)jpegSize, + &width, &height, &jpegSubsamp, &jpegColorspace)==-1) + _throwtj(); + + (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); jpegBuf=NULL; + + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); + (*env)->SetIntField(env, obj, _fid, jpegSubsamp); + if((_fid=(*env)->GetFieldID(env, _cls, "jpegColorspace", "I"))==0) + (*env)->ExceptionClear(env); + else + (*env)->SetIntField(env, obj, _fid, jpegColorspace); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + (*env)->SetIntField(env, obj, _fid, width); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + (*env)->SetIntField(env, obj, _fid, height); + + bailout: + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + return; +} + +static void TJDecompressor_decompress + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jarray dst, + jint dstElementSize, jint x, jint y, jint width, jint pitch, jint height, + jint pf, jint flags) +{ + tjhandle handle=0; + jsize arraySize=0, actualPitch; + unsigned char *jpegBuf=NULL, *dstBuf=NULL; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + if((*env)->GetArrayLength(env, src)GetArrayLength(env, dst)*dstElementSizeGetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize, + &dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf, + flags)==-1) + _throwtj(); + + bailout: + if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + return; +} + +/* TurboJPEG 1.3.x: TJDecompressor::decompress() byte destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags) +{ + TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, x, y, width, + pitch, height, pf, flags); +} + +/* TurboJPEG 1.2.x: TJDecompressor::decompress() byte destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint width, jint pitch, jint height, jint pf, jint flags) +{ + TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, 0, 0, width, + pitch, height, pf, flags); +} + +/* TurboJPEG 1.3.x: TJDecompressor::decompress() int destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst, + jint x, jint y, jint width, jint stride, jint height, jint pf, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decompressing to an integer buffer."); + + TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), x, y, + width, stride*sizeof(jint), height, pf, flags); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJDecompressor::decompress() int destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst, + jint width, jint stride, jint height, jint pf, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decompressing to an integer buffer."); + + TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), 0, 0, + width, stride*sizeof(jint), height, pf, flags); + + bailout: + return; + +} + +/* TurboJPEG 1.4.x: TJDecompressor::decompressToYUV() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3_3B_3II_3III + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, + jobjectArray dstobjs, jintArray jDstOffsets, jint desiredWidth, + jintArray jDstStrides, jint desiredHeight, jint flags) +{ + tjhandle handle=0; + jbyteArray jDstPlanes[3]={NULL, NULL, NULL}; + unsigned char *jpegBuf=NULL, *dstPlanes[3]; + int *dstOffsets=NULL, *dstStrides=NULL; + int jpegSubsamp=-1, jpegWidth=0, jpegHeight=0; + int nc=0, i, width, height, scaledWidth, scaledHeight, nsf=0; + tjscalingfactor *sf; + + + gethandle(); + + if((*env)->GetArrayLength(env, src)GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); + + nc=(jpegSubsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3); + + width=desiredWidth; height=desiredHeight; + if(width==0) width=jpegWidth; + if(height==0) height=jpegHeight; + sf=tjGetScalingFactors(&nsf); + if(!sf || nsf<1) + _throwarg(tjGetErrorStr()); + for(i=0; i=nsf) + _throwarg("Could not scale down to desired image dimensions"); + + bailif0(dstOffsets=(*env)->GetPrimitiveArrayCritical(env, jDstOffsets, 0)); + bailif0(dstStrides=(*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0)); + for(i=0; iGetObjectArrayElement(env, dstobjs, i)); + if((*env)->GetArrayLength(env, jDstPlanes[i])GetPrimitiveArrayCritical(env, jDstPlanes[i], + 0)); + dstPlanes[i]=&dstPlanes[i][dstOffsets[i]]; + } + bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + + if(tjDecompressToYUVPlanes(handle, jpegBuf, (unsigned long)jpegSize, + dstPlanes, desiredWidth, dstStrides, desiredHeight, flags)==-1) + _throwtj(); + + bailout: + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + for(i=0; iReleasePrimitiveArrayCritical(env, jDstPlanes[i], dstPlanes[i], + 0); + } + if(dstStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jDstStrides, dstStrides, 0); + if(dstOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jDstOffsets, dstOffsets, 0); + return; +} + +/* TurboJPEG 1.2.x: TJDecompressor::decompressToYUV() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint flags) +{ + tjhandle handle=0; + unsigned char *jpegBuf=NULL, *dstBuf=NULL; + int jpegSubsamp=-1, jpegWidth=0, jpegHeight=0; + + gethandle(); + + if((*env)->GetArrayLength(env, src)GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); + if((*env)->GetArrayLength(env, dst) + <(jsize)tjBufSizeYUV(jpegWidth, jpegHeight, jpegSubsamp)) + _throwarg("Destination buffer is not large enough"); + + bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(tjDecompressToYUV(handle, jpegBuf, (unsigned long)jpegSize, dstBuf, + flags)==-1) + _throwtj(); + + bailout: + if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + return; +} + +static void TJDecompressor_decodeYUV + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jarray dst, jint dstElementSize, + jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags) +{ + tjhandle handle=0; + jsize arraySize=0, actualPitch; + jbyteArray jSrcPlanes[3]={NULL, NULL, NULL}; + const unsigned char *srcPlanes[3]; + unsigned char *dstBuf=NULL; + int *srcOffsets=NULL, *srcStrides=NULL; + int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || subsamp<0 + || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in decodeYUV()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF + || org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if((*env)->GetArrayLength(env, srcobjs)GetArrayLength(env, jSrcOffsets)GetArrayLength(env, jSrcStrides)GetArrayLength(env, dst)*dstElementSizeGetPrimitiveArrayCritical(env, jSrcOffsets, 0)); + bailif0(srcStrides=(*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0)); + for(i=0; iGetObjectArrayElement(env, srcobjs, i)); + if((*env)->GetArrayLength(env, jSrcPlanes[i])GetPrimitiveArrayCritical(env, jSrcPlanes[i], + 0)); + srcPlanes[i]=&srcPlanes[i][srcOffsets[i]]; + } + bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(tjDecodeYUVPlanes(handle, srcPlanes, srcStrides, subsamp, + &dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf, + flags)==-1) + _throwtj(); + + bailout: + if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + for(i=0; iReleasePrimitiveArrayCritical(env, jSrcPlanes[i], + (unsigned char *)srcPlanes[i], 0); + } + if(srcStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcStrides, srcStrides, 0); + if(srcOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcOffsets, srcOffsets, 0); + return; +} + +/* TurboJPEG 1.4.x: TJDecompressor::decodeYUV() byte destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3BIIIIIII + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jbyteArray dst, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jint flags) +{ + TJDecompressor_decodeYUV(env, obj, srcobjs, jSrcOffsets, jSrcStrides, + subsamp, dst, 1, x, y, width, pitch, height, pf, flags); +} + +/* TurboJPEG 1.4.x: TJDecompressor::decodeYUV() int destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3IIIIIIII + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jintArray dst, jint x, jint y, + jint width, jint stride, jint height, jint pf, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decodeYUV()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decoding to an integer buffer."); + + TJDecompressor_decodeYUV(env, obj, srcobjs, jSrcOffsets, jSrcStrides, + subsamp, dst, sizeof(jint), x, y, width, stride*sizeof(jint), height, pf, + flags); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJTransformer::init() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_init + (JNIEnv *env, jobject obj) +{ + jclass cls; + jfieldID fid; + tjhandle handle; + + if((handle=tjInitTransform())==NULL) _throwtj(); + + bailif0(cls=(*env)->GetObjectClass(env, obj)); + bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); + + bailout: + return; +} + +typedef struct _JNICustomFilterParams +{ + JNIEnv *env; + jobject tobj; + jobject cfobj; +} JNICustomFilterParams; + +static int JNICustomFilter(short *coeffs, tjregion arrayRegion, + tjregion planeRegion, int componentIndex, int transformIndex, + tjtransform *transform) +{ + JNICustomFilterParams *params=(JNICustomFilterParams *)transform->data; + JNIEnv *env=params->env; + jobject tobj=params->tobj, cfobj=params->cfobj; + jobject arrayRegionObj, planeRegionObj, bufobj, borobj; + jclass cls; jmethodID mid; jfieldID fid; + + bailif0(bufobj=(*env)->NewDirectByteBuffer(env, coeffs, + sizeof(short)*arrayRegion.w*arrayRegion.h)); + bailif0(cls=(*env)->FindClass(env, "java/nio/ByteOrder")); + bailif0(mid=(*env)->GetStaticMethodID(env, cls, "nativeOrder", + "()Ljava/nio/ByteOrder;")); + bailif0(borobj=(*env)->CallStaticObjectMethod(env, cls, mid)); + bailif0(cls=(*env)->GetObjectClass(env, bufobj)); + bailif0(mid=(*env)->GetMethodID(env, cls, "order", + "(Ljava/nio/ByteOrder;)Ljava/nio/ByteBuffer;")); + (*env)->CallObjectMethod(env, bufobj, mid, borobj); + bailif0(mid=(*env)->GetMethodID(env, cls, "asShortBuffer", + "()Ljava/nio/ShortBuffer;")); + bailif0(bufobj=(*env)->CallObjectMethod(env, bufobj, mid)); + + bailif0(cls=(*env)->FindClass(env, "java/awt/Rectangle")); + bailif0(arrayRegionObj=(*env)->AllocObject(env, cls)); + bailif0(fid=(*env)->GetFieldID(env, cls, "x", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.x); + bailif0(fid=(*env)->GetFieldID(env, cls, "y", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.y); + bailif0(fid=(*env)->GetFieldID(env, cls, "width", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.w); + bailif0(fid=(*env)->GetFieldID(env, cls, "height", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.h); + + bailif0(planeRegionObj=(*env)->AllocObject(env, cls)); + bailif0(fid=(*env)->GetFieldID(env, cls, "x", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.x); + bailif0(fid=(*env)->GetFieldID(env, cls, "y", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.y); + bailif0(fid=(*env)->GetFieldID(env, cls, "width", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.w); + bailif0(fid=(*env)->GetFieldID(env, cls, "height", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.h); + + bailif0(cls=(*env)->GetObjectClass(env, cfobj)); + bailif0(mid=(*env)->GetMethodID(env, cls, "customFilter", + "(Ljava/nio/ShortBuffer;Ljava/awt/Rectangle;Ljava/awt/Rectangle;IILorg/libjpegturbo/turbojpeg/TJTransform;)V")); + (*env)->CallVoidMethod(env, cfobj, mid, bufobj, arrayRegionObj, + planeRegionObj, componentIndex, transformIndex, tobj); + + return 0; + + bailout: + return -1; +} + +/* TurboJPEG 1.2.x: TJTransformer::transform() */ +JNIEXPORT jintArray JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_transform + (JNIEnv *env, jobject obj, jbyteArray jsrcBuf, jint jpegSize, + jobjectArray dstobjs, jobjectArray tobjs, jint flags) +{ + tjhandle handle=0; int i; + unsigned char *jpegBuf=NULL, **dstBufs=NULL; jsize n=0; + unsigned long *dstSizes=NULL; tjtransform *t=NULL; + jbyteArray *jdstBufs=NULL; + int jpegWidth=0, jpegHeight=0, jpegSubsamp; + jintArray jdstSizes=0; jint *dstSizesi=NULL; + JNICustomFilterParams *params=NULL; + + gethandle(); + + if((*env)->GetArrayLength(env, jsrcBuf)GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); + + n=(*env)->GetArrayLength(env, dstobjs); + if(n!=(*env)->GetArrayLength(env, tobjs)) + _throwarg("Mismatch between size of transforms array and destination buffers array"); + + if((dstBufs=(unsigned char **)malloc(sizeof(unsigned char *)*n))==NULL) + _throwmem(); + if((jdstBufs=(jbyteArray *)malloc(sizeof(jbyteArray)*n))==NULL) + _throwmem(); + if((dstSizes=(unsigned long *)malloc(sizeof(unsigned long)*n))==NULL) + _throwmem(); + if((t=(tjtransform *)malloc(sizeof(tjtransform)*n))==NULL) + _throwmem(); + if((params=(JNICustomFilterParams *)malloc(sizeof(JNICustomFilterParams)*n)) + ==NULL) + _throwmem(); + for(i=0; iGetObjectArrayElement(env, tobjs, i)); + bailif0(_cls=(*env)->GetObjectClass(env, tobj)); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "op", "I")); + t[i].op=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "options", "I")); + t[i].options=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "x", "I")); + t[i].r.x=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "y", "I")); + t[i].r.y=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "width", "I")); + t[i].r.w=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "height", "I")); + t[i].r.h=(*env)->GetIntField(env, tobj, _fid); + + bailif0(_fid=(*env)->GetFieldID(env, _cls, "cf", + "Lorg/libjpegturbo/turbojpeg/TJCustomFilter;")); + cfobj=(*env)->GetObjectField(env, tobj, _fid); + if(cfobj) + { + params[i].env=env; + params[i].tobj=tobj; + params[i].cfobj=cfobj; + t[i].customFilter=JNICustomFilter; + t[i].data=(void *)¶ms[i]; + } + } + + for(i=0; iGetObjectArrayElement(env, dstobjs, i)); + if((unsigned long)(*env)->GetArrayLength(env, jdstBufs[i]) + GetPrimitiveArrayCritical(env, jsrcBuf, 0)); + for(i=0; iGetPrimitiveArrayCritical(env, jdstBufs[i], 0)); + + if(tjTransform(handle, jpegBuf, jpegSize, n, dstBufs, dstSizes, t, + flags|TJFLAG_NOREALLOC)==-1) + _throwtj(); + + for(i=0; iReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0); + dstBufs[i]=NULL; + } + (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0); + jpegBuf=NULL; + + jdstSizes=(*env)->NewIntArray(env, n); + bailif0(dstSizesi=(*env)->GetIntArrayElements(env, jdstSizes, 0)); + for(i=0; iReleaseIntArrayElements(env, jdstSizes, dstSizesi, 0); + if(dstBufs) + { + for(i=0; iReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0); + } + free(dstBufs); + } + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0); + if(jdstBufs) free(jdstBufs); + if(dstSizes) free(dstSizes); + if(t) free(t); + return jdstSizes; +} + +/* TurboJPEG 1.2.x: TJDecompressor::destroy() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy + (JNIEnv *env, jobject obj) +{ + Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy(env, obj); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg-mapfile glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg-mapfile --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg-mapfile 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg-mapfile 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,56 @@ +TURBOJPEG_1.0 +{ + global: + tjInitCompress; + tjCompress; + TJBUFSIZE; + tjInitDecompress; + tjDecompressHeader; + tjDecompress; + tjDestroy; + tjGetErrorStr; + local: + *; +}; + +TURBOJPEG_1.1 +{ + global: + TJBUFSIZEYUV; + tjDecompressHeader2; + tjDecompressToYUV; + tjEncodeYUV; +} TURBOJPEG_1.0; + +TURBOJPEG_1.2 +{ + global: + tjAlloc; + tjBufSize; + tjBufSizeYUV; + tjCompress2; + tjDecompress2; + tjEncodeYUV2; + tjFree; + tjGetScalingFactors; + tjInitTransform; + tjTransform; +} TURBOJPEG_1.1; + +TURBOJPEG_1.4 +{ + global: + tjBufSizeYUV2; + tjCompressFromYUV; + tjCompressFromYUVPlanes; + tjDecodeYUV; + tjDecodeYUVPlanes; + tjDecompressHeader3; + tjDecompressToYUV2; + tjDecompressToYUVPlanes; + tjEncodeYUV3; + tjEncodeYUVPlanes; + tjPlaneHeight; + tjPlaneSizeYUV; + tjPlaneWidth; +} TURBOJPEG_1.2; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg-mapfile.jni glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg-mapfile.jni --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/turbojpeg-mapfile.jni 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/turbojpeg-mapfile.jni 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,92 @@ +TURBOJPEG_1.0 +{ + global: + tjInitCompress; + tjCompress; + TJBUFSIZE; + tjInitDecompress; + tjDecompressHeader; + tjDecompress; + tjDestroy; + tjGetErrorStr; + local: + *; +}; + +TURBOJPEG_1.1 +{ + global: + TJBUFSIZEYUV; + tjDecompressHeader2; + tjDecompressToYUV; + tjEncodeYUV; +} TURBOJPEG_1.0; + +TURBOJPEG_1.2 +{ + global: + tjAlloc; + tjBufSize; + tjBufSizeYUV; + tjCompress2; + tjDecompress2; + tjEncodeYUV2; + tjFree; + tjGetScalingFactors; + tjInitTransform; + tjTransform; + Java_org_libjpegturbo_turbojpeg_TJ_bufSize; + Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III; + Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors; + Java_org_libjpegturbo_turbojpeg_TJCompressor_init; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_init; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy; + Java_org_libjpegturbo_turbojpeg_TJTransformer_init; + Java_org_libjpegturbo_turbojpeg_TJTransformer_transform; +} TURBOJPEG_1.1; + +TURBOJPEG_1.3 +{ + global: + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII; +} TURBOJPEG_1.2; + +TURBOJPEG_1.4 +{ + global: + tjBufSizeYUV2; + tjCompressFromYUV; + tjCompressFromYUVPlanes; + tjDecodeYUV; + tjDecodeYUVPlanes; + tjDecompressHeader3; + tjDecompressToYUV2; + tjDecompressToYUVPlanes; + tjEncodeYUV3; + tjEncodeYUVPlanes; + tjPlaneHeight; + tjPlaneSizeYUV; + tjPlaneWidth; + Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3_3B_3I_3III; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3_3B_3II_3III; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3BIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3IIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III; + Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII; + Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III; +} TURBOJPEG_1.3; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/usage.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/usage.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/usage.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/usage.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,660 @@ +NOTE: This file was modified by The libjpeg-turbo Project to include only +information relevant to libjpeg-turbo and to wordsmith certain sections. + +USAGE instructions for the Independent JPEG Group's JPEG software +================================================================= + +This file describes usage of the JPEG conversion programs cjpeg and djpeg, +as well as the utility programs jpegtran, rdjpgcom and wrjpgcom. (See +the other documentation files if you wish to use the JPEG library within +your own programs.) + +If you are on a Unix machine you may prefer to read the Unix-style manual +pages in files cjpeg.1, djpeg.1, jpegtran.1, rdjpgcom.1, wrjpgcom.1. + + +INTRODUCTION + +These programs implement JPEG image encoding, decoding, and transcoding. +JPEG (pronounced "jay-peg") is a standardized compression method for +full-color and grayscale images. + + +GENERAL USAGE + +We provide two programs, cjpeg to compress an image file into JPEG format, +and djpeg to decompress a JPEG file back into a conventional image format. + +On Unix-like systems, you say: + cjpeg [switches] [imagefile] >jpegfile +or + djpeg [switches] [jpegfile] >imagefile +The programs read the specified input file, or standard input if none is +named. They always write to standard output (with trace/error messages to +standard error). These conventions are handy for piping images between +programs. + +On most non-Unix systems, you say: + cjpeg [switches] imagefile jpegfile +or + djpeg [switches] jpegfile imagefile +i.e., both the input and output files are named on the command line. This +style is a little more foolproof, and it loses no functionality if you don't +have pipes. (You can get this style on Unix too, if you prefer, by defining +TWO_FILE_COMMANDLINE when you compile the programs; see install.txt.) + +You can also say: + cjpeg [switches] -outfile jpegfile imagefile +or + djpeg [switches] -outfile imagefile jpegfile +This syntax works on all systems, so it is useful for scripts. + +The currently supported image file formats are: PPM (PBMPLUS color format), +PGM (PBMPLUS grayscale format), BMP, Targa, and RLE (Utah Raster Toolkit +format). (RLE is supported only if the URT library is available, which it +isn't on most non-Unix systems.) cjpeg recognizes the input image format +automatically, with the exception of some Targa files. You have to tell djpeg +which format to generate. + +JPEG files are in the defacto standard JFIF file format. There are other, +less widely used JPEG-based file formats, but we don't support them. + +All switch names may be abbreviated; for example, -grayscale may be written +-gray or -gr. Most of the "basic" switches can be abbreviated to as little as +one letter. Upper and lower case are equivalent (-BMP is the same as -bmp). +British spellings are also accepted (e.g., -greyscale), though for brevity +these are not mentioned below. + + +CJPEG DETAILS + +The basic command line switches for cjpeg are: + + -quality N[,...] Scale quantization tables to adjust image quality. + Quality is 0 (worst) to 100 (best); default is 75. + (See below for more info.) + + -grayscale Create monochrome JPEG file from color input. + Be sure to use this switch when compressing a grayscale + BMP file, because cjpeg isn't bright enough to notice + whether a BMP file uses only shades of gray. By + saying -grayscale, you'll get a smaller JPEG file that + takes less time to process. + + -rgb Create RGB JPEG file. + Using this switch suppresses the conversion from RGB + colorspace input to the default YCbCr JPEG colorspace. + + -optimize Perform optimization of entropy encoding parameters. + Without this, default encoding parameters are used. + -optimize usually makes the JPEG file a little smaller, + but cjpeg runs somewhat slower and needs much more + memory. Image quality and speed of decompression are + unaffected by -optimize. + + -progressive Create progressive JPEG file (see below). + + -targa Input file is Targa format. Targa files that contain + an "identification" field will not be automatically + recognized by cjpeg; for such files you must specify + -targa to make cjpeg treat the input as Targa format. + For most Targa files, you won't need this switch. + +The -quality switch lets you trade off compressed file size against quality of +the reconstructed image: the higher the quality setting, the larger the JPEG +file, and the closer the output image will be to the original input. Normally +you want to use the lowest quality setting (smallest file) that decompresses +into something visually indistinguishable from the original image. For this +purpose the quality setting should generally be between 50 and 95 (the default +is 75) for photographic images. If you see defects at -quality 75, then go up +5 or 10 counts at a time until you are happy with the output image. (The +optimal setting will vary from one image to another.) + +-quality 100 will generate a quantization table of all 1's, minimizing loss +in the quantization step (but there is still information loss in subsampling, +as well as roundoff error.) For most images, specifying a quality value above +about 95 will increase the size of the compressed file dramatically, and while +the quality gain from these higher quality values is measurable (using metrics +such as PSNR or SSIM), it is rarely perceivable by human vision. + +In the other direction, quality values below 50 will produce very small files +of low image quality. Settings around 5 to 10 might be useful in preparing an +index of a large image library, for example. Try -quality 2 (or so) for some +amusing Cubist effects. (Note: quality values below about 25 generate 2-byte +quantization tables, which are considered optional in the JPEG standard. +cjpeg emits a warning message when you give such a quality value, because some +other JPEG programs may be unable to decode the resulting file. Use -baseline +if you need to ensure compatibility at low quality values.) + +The -quality option has been extended in this version of cjpeg to support +separate quality settings for luminance and chrominance (or, in general, +separate settings for every quantization table slot.) The principle is the +same as chrominance subsampling: since the human eye is more sensitive to +spatial changes in brightness than spatial changes in color, the chrominance +components can be quantized more than the luminance components without +incurring any visible image quality loss. However, unlike subsampling, this +feature reduces data in the frequency domain instead of the spatial domain, +which allows for more fine-grained control. This option is useful in +quality-sensitive applications, for which the artifacts generated by +subsampling may be unacceptable. + +The -quality option accepts a comma-separated list of parameters, which +respectively refer to the quality levels that should be assigned to the +quantization table slots. If there are more q-table slots than parameters, +then the last parameter is replicated. Thus, if only one quality parameter is +given, this is used for both luminance and chrominance (slots 0 and 1, +respectively), preserving the legacy behavior of cjpeg v6b and prior. More (or +customized) quantization tables can be set with the -qtables option and +assigned to components with the -qslots option (see the "wizard" switches +below.) + +JPEG files generated with separate luminance and chrominance quality are +fully compliant with standard JPEG decoders. + +CAUTION: For this setting to be useful, be sure to pass an argument of +-sample 1x1 to cjpeg to disable chrominance subsampling. Otherwise, the +default subsampling level (2x2, AKA "4:2:0") will be used. + +The -progressive switch creates a "progressive JPEG" file. In this type of +JPEG file, the data is stored in multiple scans of increasing quality. If the +file is being transmitted over a slow communications link, the decoder can use +the first scan to display a low-quality image very quickly, and can then +improve the display with each subsequent scan. The final image is exactly +equivalent to a standard JPEG file of the same quality setting, and the total +file size is about the same --- often a little smaller. + +Switches for advanced users: + + -arithmetic Use arithmetic coding. CAUTION: arithmetic coded JPEG + is not yet widely implemented, so many decoders will + be unable to view an arithmetic coded JPEG file at + all. + + -dct int Use integer DCT method (default). + -dct fast Use fast integer DCT (less accurate). + In libjpeg-turbo, the fast method is generally about + 5-15% faster than the int method when using the + x86/x86-64 SIMD extensions (results may vary with other + SIMD implementations, or when using libjpeg-turbo + without SIMD extensions.) For quality levels of 90 and + below, there should be little or no perceptible + difference between the two algorithms. For quality + levels above 90, however, the difference between + the fast and the int methods becomes more pronounced. + With quality=97, for instance, the fast method incurs + generally about a 1-3 dB loss (in PSNR) relative to + the int method, but this can be larger for some images. + Do not use the fast method with quality levels above + 97. The algorithm often degenerates at quality=98 and + above and can actually produce a more lossy image than + if lower quality levels had been used. Also, in + libjpeg-turbo, the fast method is not fully accerated + for quality levels above 97, so it will be slower than + the int method. + -dct float Use floating-point DCT method. + The float method is mainly a legacy feature. It does + not produce significantly more accurate results than + the int method, and it is much slower. The float + method may also give different results on different + machines due to varying roundoff behavior, whereas the + integer methods should give the same results on all + machines. + + -restart N Emit a JPEG restart marker every N MCU rows, or every + N MCU blocks if "B" is attached to the number. + -restart 0 (the default) means no restart markers. + + -smooth N Smooth the input image to eliminate dithering noise. + N, ranging from 1 to 100, indicates the strength of + smoothing. 0 (the default) means no smoothing. + + -maxmemory N Set limit for amount of memory to use in processing + large images. Value is in thousands of bytes, or + millions of bytes if "M" is attached to the number. + For example, -max 4m selects 4000000 bytes. If more + space is needed, temporary files will be used. + + -verbose Enable debug printout. More -v's give more printout. + or -debug Also, version information is printed at startup. + +The -restart option inserts extra markers that allow a JPEG decoder to +resynchronize after a transmission error. Without restart markers, any damage +to a compressed file will usually ruin the image from the point of the error +to the end of the image; with restart markers, the damage is usually confined +to the portion of the image up to the next restart marker. Of course, the +restart markers occupy extra space. We recommend -restart 1 for images that +will be transmitted across unreliable networks such as Usenet. + +The -smooth option filters the input to eliminate fine-scale noise. This is +often useful when converting dithered images to JPEG: a moderate smoothing +factor of 10 to 50 gets rid of dithering patterns in the input file, resulting +in a smaller JPEG file and a better-looking image. Too large a smoothing +factor will visibly blur the image, however. + +Switches for wizards: + + -baseline Force baseline-compatible quantization tables to be + generated. This clamps quantization values to 8 bits + even at low quality settings. (This switch is poorly + named, since it does not ensure that the output is + actually baseline JPEG. For example, you can use + -baseline and -progressive together.) + + -qtables file Use the quantization tables given in the specified + text file. + + -qslots N[,...] Select which quantization table to use for each color + component. + + -sample HxV[,...] Set JPEG sampling factors for each color component. + + -scans file Use the scan script given in the specified text file. + +The "wizard" switches are intended for experimentation with JPEG. If you +don't know what you are doing, DON'T USE THEM. These switches are documented +further in the file wizard.txt. + + +DJPEG DETAILS + +The basic command line switches for djpeg are: + + -colors N Reduce image to at most N colors. This reduces the + or -quantize N number of colors used in the output image, so that it + can be displayed on a colormapped display or stored in + a colormapped file format. For example, if you have + an 8-bit display, you'd need to reduce to 256 or fewer + colors. (-colors is the recommended name, -quantize + is provided only for backwards compatibility.) + + -fast Select recommended processing options for fast, low + quality output. (The default options are chosen for + highest quality output.) Currently, this is equivalent + to "-dct fast -nosmooth -onepass -dither ordered". + + -grayscale Force grayscale output even if JPEG file is color. + Useful for viewing on monochrome displays; also, + djpeg runs noticeably faster in this mode. + + -rgb Force RGB output even if JPEG file is grayscale. + + -scale M/N Scale the output image by a factor M/N. Currently + the scale factor must be M/8, where M is an integer + between 1 and 16 inclusive, or any reduced fraction + thereof (such as 1/2, 3/4, etc. Scaling is handy if + the image is larger than your screen; also, djpeg runs + much faster when scaling down the output. + + -bmp Select BMP output format (Windows flavor). 8-bit + colormapped format is emitted if -colors or -grayscale + is specified, or if the JPEG file is grayscale; + otherwise, 24-bit full-color format is emitted. + + -gif Select GIF output format. Since GIF does not support + more than 256 colors, -colors 256 is assumed (unless + you specify a smaller number of colors). If you + specify -fast, the default number of colors is 216. + + -os2 Select BMP output format (OS/2 1.x flavor). 8-bit + colormapped format is emitted if -colors or -grayscale + is specified, or if the JPEG file is grayscale; + otherwise, 24-bit full-color format is emitted. + + -pnm Select PBMPLUS (PPM/PGM) output format (this is the + default format). PGM is emitted if the JPEG file is + grayscale or if -grayscale is specified; otherwise + PPM is emitted. + + -rle Select RLE output format. (Requires URT library.) + + -targa Select Targa output format. Grayscale format is + emitted if the JPEG file is grayscale or if + -grayscale is specified; otherwise, colormapped format + is emitted if -colors is specified; otherwise, 24-bit + full-color format is emitted. + +Switches for advanced users: + + -dct int Use integer DCT method (default). + -dct fast Use fast integer DCT (less accurate). + In libjpeg-turbo, the fast method is generally about + 5-15% faster than the int method when using the + x86/x86-64 SIMD extensions (results may vary with other + SIMD implementations, or when using libjpeg-turbo + without SIMD extensions.) If the JPEG image was + compressed using a quality level of 85 or below, then + there should be little or no perceptible difference + between the two algorithms. When decompressing images + that were compressed using quality levels above 85, + however, the difference between the fast and int + methods becomes more pronounced. With images + compressed using quality=97, for instance, the fast + method incurs generally about a 4-6 dB loss (in PSNR) + relative to the int method, but this can be larger for + some images. If you can avoid it, do not use the fast + method when decompressing images that were compressed + using quality levels above 97. The algorithm often + degenerates for such images and can actually produce + a more lossy output image than if the JPEG image had + been compressed using lower quality levels. + -dct float Use floating-point DCT method. + The float method is mainly a legacy feature. It does +  not produce significantly more accurate results than + the int method, and it is much slower. The float + method may also give different results on different + machines due to varying roundoff behavior, whereas the + integer methods should give the same results on all + machines. + + -dither fs Use Floyd-Steinberg dithering in color quantization. + -dither ordered Use ordered dithering in color quantization. + -dither none Do not use dithering in color quantization. + By default, Floyd-Steinberg dithering is applied when + quantizing colors; this is slow but usually produces + the best results. Ordered dither is a compromise + between speed and quality; no dithering is fast but + usually looks awful. Note that these switches have + no effect unless color quantization is being done. + Ordered dither is only available in -onepass mode. + + -map FILE Quantize to the colors used in the specified image + file. This is useful for producing multiple files + with identical color maps, or for forcing a predefined + set of colors to be used. The FILE must be a GIF + or PPM file. This option overrides -colors and + -onepass. + + -nosmooth Use a faster, lower-quality upsampling routine. + + -onepass Use one-pass instead of two-pass color quantization. + The one-pass method is faster and needs less memory, + but it produces a lower-quality image. -onepass is + ignored unless you also say -colors N. Also, + the one-pass method is always used for grayscale + output (the two-pass method is no improvement then). + + -maxmemory N Set limit for amount of memory to use in processing + large images. Value is in thousands of bytes, or + millions of bytes if "M" is attached to the number. + For example, -max 4m selects 4000000 bytes. If more + space is needed, temporary files will be used. + + -verbose Enable debug printout. More -v's give more printout. + or -debug Also, version information is printed at startup. + + +HINTS FOR CJPEG + +Color GIF files are not the ideal input for JPEG; JPEG is really intended for +compressing full-color (24-bit) images. In particular, don't try to convert +cartoons, line drawings, and other images that have only a few distinct +colors. GIF works great on these, JPEG does not. If you want to convert a +GIF to JPEG, you should experiment with cjpeg's -quality and -smooth options +to get a satisfactory conversion. -smooth 10 or so is often helpful. + +Avoid running an image through a series of JPEG compression/decompression +cycles. Image quality loss will accumulate; after ten or so cycles the image +may be noticeably worse than it was after one cycle. It's best to use a +lossless format while manipulating an image, then convert to JPEG format when +you are ready to file the image away. + +The -optimize option to cjpeg is worth using when you are making a "final" +version for posting or archiving. It's also a win when you are using low +quality settings to make very small JPEG files; the percentage improvement +is often a lot more than it is on larger files. (At present, -optimize +mode is always selected when generating progressive JPEG files.) + +Support for GIF input files was removed in cjpeg v6b due to concerns over +the Unisys LZW patent. Although this patent expired in 2006, cjpeg still +lacks GIF support, for these historical reasons. (Conversion of GIF files to +JPEG is usually a bad idea anyway.) + + +HINTS FOR DJPEG + +To get a quick preview of an image, use the -grayscale and/or -scale switches. +"-grayscale -scale 1/8" is the fastest case. + +Several options are available that trade off image quality to gain speed. +"-fast" turns on the recommended settings. + +"-dct fast" and/or "-nosmooth" gain speed at a small sacrifice in quality. +When producing a color-quantized image, "-onepass -dither ordered" is fast but +much lower quality than the default behavior. "-dither none" may give +acceptable results in two-pass mode, but is seldom tolerable in one-pass mode. + +Two-pass color quantization requires a good deal of memory; on MS-DOS machines +it may run out of memory even with -maxmemory 0. In that case you can still +decompress, with some loss of image quality, by specifying -onepass for +one-pass quantization. + +To avoid the Unisys LZW patent (now expired), djpeg produces uncompressed GIF +files. These are larger than they should be, but are readable by standard GIF +decoders. + + +HINTS FOR BOTH PROGRAMS + +If more space is needed than will fit in the available main memory (as +determined by -maxmemory), temporary files will be used. (MS-DOS versions +will try to get extended or expanded memory first.) The temporary files are +often rather large: in typical cases they occupy three bytes per pixel, for +example 3*800*600 = 1.44Mb for an 800x600 image. If you don't have enough +free disk space, leave out -progressive and -optimize (for cjpeg) or specify +-onepass (for djpeg). + +On MS-DOS, the temporary files are created in the directory named by the TMP +or TEMP environment variable, or in the current directory if neither of those +exist. Amiga implementations put the temp files in the directory named by +JPEGTMP:, so be sure to assign JPEGTMP: to a disk partition with adequate free +space. + +The default memory usage limit (-maxmemory) is set when the software is +compiled. If you get an "insufficient memory" error, try specifying a smaller +-maxmemory value, even -maxmemory 0 to use the absolute minimum space. You +may want to recompile with a smaller default value if this happens often. + +On machines that have "environment" variables, you can define the environment +variable JPEGMEM to set the default memory limit. The value is specified as +described for the -maxmemory switch. JPEGMEM overrides the default value +specified when the program was compiled, and itself is overridden by an +explicit -maxmemory switch. + +On MS-DOS machines, -maxmemory is the amount of main (conventional) memory to +use. (Extended or expanded memory is also used if available.) Most +DOS-specific versions of this software do their own memory space estimation +and do not need you to specify -maxmemory. + + +JPEGTRAN + +jpegtran performs various useful transformations of JPEG files. +It can translate the coded representation from one variant of JPEG to another, +for example from baseline JPEG to progressive JPEG or vice versa. It can also +perform some rearrangements of the image data, for example turning an image +from landscape to portrait format by rotation. For EXIF files and JPEG files +containing Exif data, you may prefer to use exiftran instead. + +jpegtran works by rearranging the compressed data (DCT coefficients), without +ever fully decoding the image. Therefore, its transformations are lossless: +there is no image degradation at all, which would not be true if you used +djpeg followed by cjpeg to accomplish the same conversion. But by the same +token, jpegtran cannot perform lossy operations such as changing the image +quality. However, while the image data is losslessly transformed, metadata +can be removed. See the -copy option for specifics. + +jpegtran uses a command line syntax similar to cjpeg or djpeg. +On Unix-like systems, you say: + jpegtran [switches] [inputfile] >outputfile +On most non-Unix systems, you say: + jpegtran [switches] inputfile outputfile +where both the input and output files are JPEG files. + +To specify the coded JPEG representation used in the output file, +jpegtran accepts a subset of the switches recognized by cjpeg: + -optimize Perform optimization of entropy encoding parameters. + -progressive Create progressive JPEG file. + -arithmetic Use arithmetic coding. + -restart N Emit a JPEG restart marker every N MCU rows, or every + N MCU blocks if "B" is attached to the number. + -scans file Use the scan script given in the specified text file. +See the previous discussion of cjpeg for more details about these switches. +If you specify none of these switches, you get a plain baseline-JPEG output +file. The quality setting and so forth are determined by the input file. + +The image can be losslessly transformed by giving one of these switches: + -flip horizontal Mirror image horizontally (left-right). + -flip vertical Mirror image vertically (top-bottom). + -rotate 90 Rotate image 90 degrees clockwise. + -rotate 180 Rotate image 180 degrees. + -rotate 270 Rotate image 270 degrees clockwise (or 90 ccw). + -transpose Transpose image (across UL-to-LR axis). + -transverse Transverse transpose (across UR-to-LL axis). + +The transpose transformation has no restrictions regarding image dimensions. +The other transformations operate rather oddly if the image dimensions are not +a multiple of the iMCU size (usually 8 or 16 pixels), because they can only +transform complete blocks of DCT coefficient data in the desired way. + +jpegtran's default behavior when transforming an odd-size image is designed +to preserve exact reversibility and mathematical consistency of the +transformation set. As stated, transpose is able to flip the entire image +area. Horizontal mirroring leaves any partial iMCU column at the right edge +untouched, but is able to flip all rows of the image. Similarly, vertical +mirroring leaves any partial iMCU row at the bottom edge untouched, but is +able to flip all columns. The other transforms can be built up as sequences +of transpose and flip operations; for consistency, their actions on edge +pixels are defined to be the same as the end result of the corresponding +transpose-and-flip sequence. + +For practical use, you may prefer to discard any untransformable edge pixels +rather than having a strange-looking strip along the right and/or bottom edges +of a transformed image. To do this, add the -trim switch: + -trim Drop non-transformable edge blocks. +Obviously, a transformation with -trim is not reversible, so strictly speaking +jpegtran with this switch is not lossless. Also, the expected mathematical +equivalences between the transformations no longer hold. For example, +"-rot 270 -trim" trims only the bottom edge, but "-rot 90 -trim" followed by +"-rot 180 -trim" trims both edges. + +If you are only interested in perfect transformations, add the -perfect switch: + -perfect Fail with an error if the transformation is not + perfect. +For example, you may want to do + jpegtran -rot 90 -perfect foo.jpg || djpeg foo.jpg | pnmflip -r90 | cjpeg +to do a perfect rotation, if available, or an approximated one if not. + +This version of jpegtran also offers a lossless crop option, which discards +data outside of a given image region but losslessly preserves what is inside. +Like the rotate and flip transforms, lossless crop is restricted by the current +JPEG format; the upper left corner of the selected region must fall on an iMCU +boundary. If it doesn't, then it is silently moved up and/or left to the +nearest iMCU boundary (the lower right corner is unchanged.) Thus, the output +image covers at least the requested region, but it may cover more. The +adjustment of the region dimensions may be optionally disabled by attaching an +'f' character ("force") to the width or height number. + +The image can be losslessly cropped by giving the switch: + -crop WxH+X+Y Crop to a rectangular region of width W and height H, + starting at point X,Y. + +Other not-strictly-lossless transformation switches are: + + -grayscale Force grayscale output. +This option discards the chrominance channels if the input image is YCbCr +(ie, a standard color JPEG), resulting in a grayscale JPEG file. The +luminance channel is preserved exactly, so this is a better method of reducing +to grayscale than decompression, conversion, and recompression. This switch +is particularly handy for fixing a monochrome picture that was mistakenly +encoded as a color JPEG. (In such a case, the space savings from getting rid +of the near-empty chroma channels won't be large; but the decoding time for +a grayscale JPEG is substantially less than that for a color JPEG.) + +jpegtran also recognizes these switches that control what to do with "extra" +markers, such as comment blocks: + -copy none Copy no extra markers from source file. This setting + suppresses all comments and other metadata in the + source file. + -copy comments Copy only comment markers. This setting copies + comments from the source file but discards any other + metadata. + -copy all Copy all extra markers. This setting preserves + miscellaneous markers found in the source file, such + as JFIF thumbnails, Exif data, and Photoshop settings. + In some files, these extra markers can be sizable. + Note that this option will copy thumbnails as-is; + they will not be transformed. +The default behavior is -copy comments. (Note: in IJG releases v6 and v6a, +jpegtran always did the equivalent of -copy none.) + +Additional switches recognized by jpegtran are: + -outfile filename + -maxmemory N + -verbose + -debug +These work the same as in cjpeg or djpeg. + + +THE COMMENT UTILITIES + +The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file. +Although the standard doesn't actually define what COM blocks are for, they +are widely used to hold user-supplied text strings. This lets you add +annotations, titles, index terms, etc to your JPEG files, and later retrieve +them as text. COM blocks do not interfere with the image stored in the JPEG +file. The maximum size of a COM block is 64K, but you can have as many of +them as you like in one JPEG file. + +We provide two utility programs to display COM block contents and add COM +blocks to a JPEG file. + +rdjpgcom searches a JPEG file and prints the contents of any COM blocks on +standard output. The command line syntax is + rdjpgcom [-raw] [-verbose] [inputfilename] +The switch "-raw" (or just "-r") causes rdjpgcom to output non-printable +characters in JPEG comments. These characters are normally escaped for +security reasons. +The switch "-verbose" (or just "-v") causes rdjpgcom to also display the JPEG +image dimensions. If you omit the input file name from the command line, +the JPEG file is read from standard input. (This may not work on some +operating systems, if binary data can't be read from stdin.) + +wrjpgcom adds a COM block, containing text you provide, to a JPEG file. +Ordinarily, the COM block is added after any existing COM blocks, but you +can delete the old COM blocks if you wish. wrjpgcom produces a new JPEG +file; it does not modify the input file. DO NOT try to overwrite the input +file by directing wrjpgcom's output back into it; on most systems this will +just destroy your file. + +The command line syntax for wrjpgcom is similar to cjpeg's. On Unix-like +systems, it is + wrjpgcom [switches] [inputfilename] +The output file is written to standard output. The input file comes from +the named file, or from standard input if no input file is named. + +On most non-Unix systems, the syntax is + wrjpgcom [switches] inputfilename outputfilename +where both input and output file names must be given explicitly. + +wrjpgcom understands three switches: + -replace Delete any existing COM blocks from the file. + -comment "Comment text" Supply new COM text on command line. + -cfile name Read text for new COM block from named file. +(Switch names can be abbreviated.) If you have only one line of comment text +to add, you can provide it on the command line with -comment. The comment +text must be surrounded with quotes so that it is treated as a single +argument. Longer comments can be read from a text file. + +If you give neither -comment nor -cfile, then wrjpgcom will read the comment +text from standard input. (In this case an input image file name MUST be +supplied, so that the source JPEG file comes from somewhere else.) You can +enter multiple lines, up to 64KB worth. Type an end-of-file indicator +(usually control-D or control-Z) to terminate the comment text entry. + +wrjpgcom will not add a COM block if the provided comment string is empty. +Therefore -replace -comment "" can be used to delete all COM blocks from a +file. + +These utility programs do not depend on the IJG JPEG library. In +particular, the source code for rdjpgcom is intended as an illustration of +the minimum amount of code required to parse a JPEG file header correctly. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wizard.txt glmark2-2021.02/android/jni/src/libjpeg-turbo/wizard.txt --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wizard.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wizard.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,211 @@ +Advanced usage instructions for the Independent JPEG Group's JPEG software +========================================================================== + +This file describes cjpeg's "switches for wizards". + +The "wizard" switches are intended for experimentation with JPEG by persons +who are reasonably knowledgeable about the JPEG standard. If you don't know +what you are doing, DON'T USE THESE SWITCHES. You'll likely produce files +with worse image quality and/or poorer compression than you'd get from the +default settings. Furthermore, these switches must be used with caution +when making files intended for general use, because not all JPEG decoders +will support unusual JPEG parameter settings. + + +Quantization Table Adjustment +----------------------------- + +Ordinarily, cjpeg starts with a default set of tables (the same ones given +as examples in the JPEG standard) and scales them up or down according to +the -quality setting. The details of the scaling algorithm can be found in +jcparam.c. At very low quality settings, some quantization table entries +can get scaled up to values exceeding 255. Although 2-byte quantization +values are supported by the IJG software, this feature is not in baseline +JPEG and is not supported by all implementations. If you need to ensure +wide compatibility of low-quality files, you can constrain the scaled +quantization values to no more than 255 by giving the -baseline switch. +Note that use of -baseline will result in poorer quality for the same file +size, since more bits than necessary are expended on higher AC coefficients. + +You can substitute a different set of quantization values by using the +-qtables switch: + + -qtables file Use the quantization tables given in the named file. + +The specified file should be a text file containing decimal quantization +values. The file should contain one to four tables, each of 64 elements. +The tables are implicitly numbered 0,1,etc. in order of appearance. Table +entries appear in normal array order (NOT in the zigzag order in which they +will be stored in the JPEG file). + +Quantization table files are free format, in that arbitrary whitespace can +appear between numbers. Also, comments can be included: a comment starts +with '#' and extends to the end of the line. Here is an example file that +duplicates the default quantization tables: + + # Quantization tables given in JPEG spec, section K.1 + + # This is table 0 (the luminance table): + 16 11 10 16 24 40 51 61 + 12 12 14 19 26 58 60 55 + 14 13 16 24 40 57 69 56 + 14 17 22 29 51 87 80 62 + 18 22 37 56 68 109 103 77 + 24 35 55 64 81 104 113 92 + 49 64 78 87 103 121 120 101 + 72 92 95 98 112 100 103 99 + + # This is table 1 (the chrominance table): + 17 18 24 47 99 99 99 99 + 18 21 26 66 99 99 99 99 + 24 26 56 99 99 99 99 99 + 47 66 99 99 99 99 99 99 + 99 99 99 99 99 99 99 99 + 99 99 99 99 99 99 99 99 + 99 99 99 99 99 99 99 99 + 99 99 99 99 99 99 99 99 + +If the -qtables switch is used without -quality, then the specified tables +are used exactly as-is. If both -qtables and -quality are used, then the +tables taken from the file are scaled in the same fashion that the default +tables would be scaled for that quality setting. If -baseline appears, then +the quantization values are constrained to the range 1-255. + +By default, cjpeg will use quantization table 0 for luminance components and +table 1 for chrominance components. To override this choice, use the -qslots +switch: + + -qslots N[,...] Select which quantization table to use for + each color component. + +The -qslots switch specifies a quantization table number for each color +component, in the order in which the components appear in the JPEG SOF marker. +For example, to create a separate table for each of Y,Cb,Cr, you could +provide a -qtables file that defines three quantization tables and say +"-qslots 0,1,2". If -qslots gives fewer table numbers than there are color +components, then the last table number is repeated as necessary. + + +Sampling Factor Adjustment +-------------------------- + +By default, cjpeg uses 2:1 horizontal and vertical downsampling when +compressing YCbCr data, and no downsampling for all other color spaces. +You can override this default with the -sample switch: + + -sample HxV[,...] Set JPEG sampling factors for each color + component. + +The -sample switch specifies the JPEG sampling factors for each color +component, in the order in which they appear in the JPEG SOF marker. +If you specify fewer HxV pairs than there are components, the remaining +components are set to 1x1 sampling. For example, the default YCbCr setting +is equivalent to "-sample 2x2,1x1,1x1", which can be abbreviated to +"-sample 2x2". + +There are still some JPEG decoders in existence that support only 2x1 +sampling (also called 4:2:2 sampling). Compatibility with such decoders can +be achieved by specifying "-sample 2x1". This is not recommended unless +really necessary, since it increases file size and encoding/decoding time +with very little quality gain. + + +Multiple Scan / Progression Control +----------------------------------- + +By default, cjpeg emits a single-scan sequential JPEG file. The +-progressive switch generates a progressive JPEG file using a default series +of progression parameters. You can create multiple-scan sequential JPEG +files or progressive JPEG files with custom progression parameters by using +the -scans switch: + + -scans file Use the scan sequence given in the named file. + +The specified file should be a text file containing a "scan script". +The script specifies the contents and ordering of the scans to be emitted. +Each entry in the script defines one scan. A scan definition specifies +the components to be included in the scan, and for progressive JPEG it also +specifies the progression parameters Ss,Se,Ah,Al for the scan. Scan +definitions are separated by semicolons (';'). A semicolon after the last +scan definition is optional. + +Each scan definition contains one to four component indexes, optionally +followed by a colon (':') and the four progressive-JPEG parameters. The +component indexes denote which color component(s) are to be transmitted in +the scan. Components are numbered in the order in which they appear in the +JPEG SOF marker, with the first component being numbered 0. (Note that these +indexes are not the "component ID" codes assigned to the components, just +positional indexes.) + +The progression parameters for each scan are: + Ss Zigzag index of first coefficient included in scan + Se Zigzag index of last coefficient included in scan + Ah Zero for first scan of a coefficient, else Al of prior scan + Al Successive approximation low bit position for scan +If the progression parameters are omitted, the values 0,63,0,0 are used, +producing a sequential JPEG file. cjpeg automatically determines whether +the script represents a progressive or sequential file, by observing whether +Ss and Se values other than 0 and 63 appear. (The -progressive switch is +not needed to specify this; in fact, it is ignored when -scans appears.) +The scan script must meet the JPEG restrictions on progression sequences. +(cjpeg checks that the spec's requirements are obeyed.) + +Scan script files are free format, in that arbitrary whitespace can appear +between numbers and around punctuation. Also, comments can be included: a +comment starts with '#' and extends to the end of the line. For additional +legibility, commas or dashes can be placed between values. (Actually, any +single punctuation character other than ':' or ';' can be inserted.) For +example, the following two scan definitions are equivalent: + 0 1 2: 0 63 0 0; + 0,1,2 : 0-63, 0,0 ; + +Here is an example of a scan script that generates a partially interleaved +sequential JPEG file: + + 0; # Y only in first scan + 1 2; # Cb and Cr in second scan + +Here is an example of a progressive scan script using only spectral selection +(no successive approximation): + + # Interleaved DC scan for Y,Cb,Cr: + 0,1,2: 0-0, 0, 0 ; + # AC scans: + 0: 1-2, 0, 0 ; # First two Y AC coefficients + 0: 3-5, 0, 0 ; # Three more + 1: 1-63, 0, 0 ; # All AC coefficients for Cb + 2: 1-63, 0, 0 ; # All AC coefficients for Cr + 0: 6-9, 0, 0 ; # More Y coefficients + 0: 10-63, 0, 0 ; # Remaining Y coefficients + +Here is an example of a successive-approximation script. This is equivalent +to the default script used by "cjpeg -progressive" for YCbCr images: + + # Initial DC scan for Y,Cb,Cr (lowest bit not sent) + 0,1,2: 0-0, 0, 1 ; + # First AC scan: send first 5 Y AC coefficients, minus 2 lowest bits: + 0: 1-5, 0, 2 ; + # Send all Cr,Cb AC coefficients, minus lowest bit: + # (chroma data is usually too small to be worth subdividing further; + # but note we send Cr first since eye is least sensitive to Cb) + 2: 1-63, 0, 1 ; + 1: 1-63, 0, 1 ; + # Send remaining Y AC coefficients, minus 2 lowest bits: + 0: 6-63, 0, 2 ; + # Send next-to-lowest bit of all Y AC coefficients: + 0: 1-63, 2, 1 ; + # At this point we've sent all but the lowest bit of all coefficients. + # Send lowest bit of DC coefficients + 0,1,2: 0-0, 1, 0 ; + # Send lowest bit of AC coefficients + 2: 1-63, 1, 0 ; + 1: 1-63, 1, 0 ; + # Y AC lowest bit scan is last; it's usually the largest scan + 0: 1-63, 1, 0 ; + +It may be worth pointing out that this script is tuned for quality settings +of around 50 to 75. For lower quality settings, you'd probably want to use +a script with fewer stages of successive approximation (otherwise the +initial scans will be really bad). For higher quality settings, you might +want to use more stages of successive approximation (so that the initial +scans are not too large). diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrbmp.c glmark2-2021.02/android/jni/src/libjpeg-turbo/wrbmp.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrbmp.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wrbmp.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,493 @@ +/* + * wrbmp.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in Microsoft "BMP" + * format (MS Windows 3.x and OS/2 1.x flavors). + * Either 8-bit colormapped or 24-bit full-color format can be written. + * No compression is supported. + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + * + * This code contributed by James Arthur Boucher. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "jconfigint.h" + +#ifdef BMP_SUPPORTED + + +/* + * To support 12-bit JPEG data, we'd have to scale output down to 8 bits. + * This is not yet implemented. + */ + +#if BITS_IN_JSAMPLE != 8 + Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */ +#endif + +/* + * Since BMP stores scanlines bottom-to-top, we have to invert the image + * from JPEG's top-to-bottom order. To do this, we save the outgoing data + * in a virtual array during put_pixel_row calls, then actually emit the + * BMP file during finish_output. The virtual array contains one JSAMPLE per + * pixel if the output is grayscale or colormapped, three if it is full color. + */ + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + boolean is_os2; /* saves the OS2 format request flag */ + + jvirt_sarray_ptr whole_image; /* needed to reverse row order */ + JDIMENSION data_width; /* JSAMPLEs per row */ + JDIMENSION row_width; /* physical width of one row in the BMP file */ + int pad_bytes; /* number of padding bytes needed per row */ + JDIMENSION cur_output_row; /* next row# to write to virtual array */ +} bmp_dest_struct; + +typedef bmp_dest_struct *bmp_dest_ptr; + + +/* Forward declarations */ +LOCAL(void) write_colormap + (j_decompress_ptr cinfo, bmp_dest_ptr dest, int map_colors, + int map_entry_size); + + +static INLINE boolean is_big_endian(void) +{ + int test_value = 1; + if(*(char *)&test_value != 1) + return TRUE; + return FALSE; +} + + +/* + * Write some pixel data. + * In this module rows_supplied will always be 1. + */ + +METHODDEF(void) +put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +/* This version is for writing 24-bit pixels */ +{ + bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + JSAMPARRAY image_ptr; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + int pad; + + /* Access next row in virtual array */ + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->whole_image, + dest->cur_output_row, (JDIMENSION) 1, TRUE); + dest->cur_output_row++; + + /* Transfer data. Note destination values must be in BGR order + * (even though Microsoft's own documents say the opposite). + */ + inptr = dest->pub.buffer[0]; + outptr = image_ptr[0]; + + if(cinfo->out_color_space == JCS_RGB565) { + boolean big_endian = is_big_endian(); + unsigned short *inptr2 = (unsigned short *)inptr; + for (col = cinfo->output_width; col > 0; col--) { + if (big_endian) { + outptr[0] = (*inptr2 >> 5) & 0xF8; + outptr[1] = ((*inptr2 << 5) & 0xE0) | ((*inptr2 >> 11) & 0x1C); + outptr[2] = *inptr2 & 0xF8; + } else { + outptr[0] = (*inptr2 << 3) & 0xF8; + outptr[1] = (*inptr2 >> 3) & 0xFC; + outptr[2] = (*inptr2 >> 8) & 0xF8; + } + outptr += 3; + inptr2++; + } + } else { + for (col = cinfo->output_width; col > 0; col--) { + outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[1] = *inptr++; + outptr[0] = *inptr++; + outptr += 3; + } + } + + /* Zero out the pad bytes. */ + pad = dest->pad_bytes; + while (--pad >= 0) + *outptr++ = 0; +} + +METHODDEF(void) +put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +/* This version is for grayscale OR quantized color output */ +{ + bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + JSAMPARRAY image_ptr; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + int pad; + + /* Access next row in virtual array */ + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->whole_image, + dest->cur_output_row, (JDIMENSION) 1, TRUE); + dest->cur_output_row++; + + /* Transfer data. */ + inptr = dest->pub.buffer[0]; + outptr = image_ptr[0]; + for (col = cinfo->output_width; col > 0; col--) { + *outptr++ = *inptr++; /* can omit GETJSAMPLE() safely */ + } + + /* Zero out the pad bytes. */ + pad = dest->pad_bytes; + while (--pad >= 0) + *outptr++ = 0; +} + + +/* + * Startup: normally writes the file header. + * In this module we may as well postpone everything until finish_output. + */ + +METHODDEF(void) +start_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + /* no work here */ +} + + +/* + * Finish up at the end of the file. + * + * Here is where we really output the BMP file. + * + * First, routines to write the Windows and OS/2 variants of the file header. + */ + +LOCAL(void) +write_bmp_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) +/* Write a Windows-style BMP file header, including colormap if needed */ +{ + char bmpfileheader[14]; + char bmpinfoheader[40]; +#define PUT_2B(array,offset,value) \ + (array[offset] = (char) ((value) & 0xFF), \ + array[offset+1] = (char) (((value) >> 8) & 0xFF)) +#define PUT_4B(array,offset,value) \ + (array[offset] = (char) ((value) & 0xFF), \ + array[offset+1] = (char) (((value) >> 8) & 0xFF), \ + array[offset+2] = (char) (((value) >> 16) & 0xFF), \ + array[offset+3] = (char) (((value) >> 24) & 0xFF)) + long headersize, bfSize; + int bits_per_pixel, cmap_entries; + + /* Compute colormap size and total file size */ + if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->quantize_colors) { + /* Colormapped RGB */ + bits_per_pixel = 8; + cmap_entries = 256; + } else { + /* Unquantized, full color RGB */ + bits_per_pixel = 24; + cmap_entries = 0; + } + } else if (cinfo->out_color_space == JCS_RGB565) { + bits_per_pixel = 24; + cmap_entries = 0; + } else { + /* Grayscale output. We need to fake a 256-entry colormap. */ + bits_per_pixel = 8; + cmap_entries = 256; + } + /* File size */ + headersize = 14 + 40 + cmap_entries * 4; /* Header and colormap */ + bfSize = headersize + (long) dest->row_width * (long) cinfo->output_height; + + /* Set unused fields of header to 0 */ + MEMZERO(bmpfileheader, sizeof(bmpfileheader)); + MEMZERO(bmpinfoheader, sizeof(bmpinfoheader)); + + /* Fill the file header */ + bmpfileheader[0] = 0x42; /* first 2 bytes are ASCII 'B', 'M' */ + bmpfileheader[1] = 0x4D; + PUT_4B(bmpfileheader, 2, bfSize); /* bfSize */ + /* we leave bfReserved1 & bfReserved2 = 0 */ + PUT_4B(bmpfileheader, 10, headersize); /* bfOffBits */ + + /* Fill the info header (Microsoft calls this a BITMAPINFOHEADER) */ + PUT_2B(bmpinfoheader, 0, 40); /* biSize */ + PUT_4B(bmpinfoheader, 4, cinfo->output_width); /* biWidth */ + PUT_4B(bmpinfoheader, 8, cinfo->output_height); /* biHeight */ + PUT_2B(bmpinfoheader, 12, 1); /* biPlanes - must be 1 */ + PUT_2B(bmpinfoheader, 14, bits_per_pixel); /* biBitCount */ + /* we leave biCompression = 0, for none */ + /* we leave biSizeImage = 0; this is correct for uncompressed data */ + if (cinfo->density_unit == 2) { /* if have density in dots/cm, then */ + PUT_4B(bmpinfoheader, 24, (long) (cinfo->X_density*100)); /* XPels/M */ + PUT_4B(bmpinfoheader, 28, (long) (cinfo->Y_density*100)); /* XPels/M */ + } + PUT_2B(bmpinfoheader, 32, cmap_entries); /* biClrUsed */ + /* we leave biClrImportant = 0 */ + + if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t) 14) + ERREXIT(cinfo, JERR_FILE_WRITE); + if (JFWRITE(dest->pub.output_file, bmpinfoheader, 40) != (size_t) 40) + ERREXIT(cinfo, JERR_FILE_WRITE); + + if (cmap_entries > 0) + write_colormap(cinfo, dest, cmap_entries, 4); +} + + +LOCAL(void) +write_os2_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) +/* Write an OS2-style BMP file header, including colormap if needed */ +{ + char bmpfileheader[14]; + char bmpcoreheader[12]; + long headersize, bfSize; + int bits_per_pixel, cmap_entries; + + /* Compute colormap size and total file size */ + if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->quantize_colors) { + /* Colormapped RGB */ + bits_per_pixel = 8; + cmap_entries = 256; + } else { + /* Unquantized, full color RGB */ + bits_per_pixel = 24; + cmap_entries = 0; + } + } else if (cinfo->out_color_space == JCS_RGB565) { + bits_per_pixel = 24; + cmap_entries = 0; + } else { + /* Grayscale output. We need to fake a 256-entry colormap. */ + bits_per_pixel = 8; + cmap_entries = 256; + } + /* File size */ + headersize = 14 + 12 + cmap_entries * 3; /* Header and colormap */ + bfSize = headersize + (long) dest->row_width * (long) cinfo->output_height; + + /* Set unused fields of header to 0 */ + MEMZERO(bmpfileheader, sizeof(bmpfileheader)); + MEMZERO(bmpcoreheader, sizeof(bmpcoreheader)); + + /* Fill the file header */ + bmpfileheader[0] = 0x42; /* first 2 bytes are ASCII 'B', 'M' */ + bmpfileheader[1] = 0x4D; + PUT_4B(bmpfileheader, 2, bfSize); /* bfSize */ + /* we leave bfReserved1 & bfReserved2 = 0 */ + PUT_4B(bmpfileheader, 10, headersize); /* bfOffBits */ + + /* Fill the info header (Microsoft calls this a BITMAPCOREHEADER) */ + PUT_2B(bmpcoreheader, 0, 12); /* bcSize */ + PUT_2B(bmpcoreheader, 4, cinfo->output_width); /* bcWidth */ + PUT_2B(bmpcoreheader, 6, cinfo->output_height); /* bcHeight */ + PUT_2B(bmpcoreheader, 8, 1); /* bcPlanes - must be 1 */ + PUT_2B(bmpcoreheader, 10, bits_per_pixel); /* bcBitCount */ + + if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t) 14) + ERREXIT(cinfo, JERR_FILE_WRITE); + if (JFWRITE(dest->pub.output_file, bmpcoreheader, 12) != (size_t) 12) + ERREXIT(cinfo, JERR_FILE_WRITE); + + if (cmap_entries > 0) + write_colormap(cinfo, dest, cmap_entries, 3); +} + + +/* + * Write the colormap. + * Windows uses BGR0 map entries; OS/2 uses BGR entries. + */ + +LOCAL(void) +write_colormap (j_decompress_ptr cinfo, bmp_dest_ptr dest, + int map_colors, int map_entry_size) +{ + JSAMPARRAY colormap = cinfo->colormap; + int num_colors = cinfo->actual_number_of_colors; + FILE *outfile = dest->pub.output_file; + int i; + + if (colormap != NULL) { + if (cinfo->out_color_components == 3) { + /* Normal case with RGB colormap */ + for (i = 0; i < num_colors; i++) { + putc(GETJSAMPLE(colormap[2][i]), outfile); + putc(GETJSAMPLE(colormap[1][i]), outfile); + putc(GETJSAMPLE(colormap[0][i]), outfile); + if (map_entry_size == 4) + putc(0, outfile); + } + } else { + /* Grayscale colormap (only happens with grayscale quantization) */ + for (i = 0; i < num_colors; i++) { + putc(GETJSAMPLE(colormap[0][i]), outfile); + putc(GETJSAMPLE(colormap[0][i]), outfile); + putc(GETJSAMPLE(colormap[0][i]), outfile); + if (map_entry_size == 4) + putc(0, outfile); + } + } + } else { + /* If no colormap, must be grayscale data. Generate a linear "map". */ + for (i = 0; i < 256; i++) { + putc(i, outfile); + putc(i, outfile); + putc(i, outfile); + if (map_entry_size == 4) + putc(0, outfile); + } + } + /* Pad colormap with zeros to ensure specified number of colormap entries */ + if (i > map_colors) + ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, i); + for (; i < map_colors; i++) { + putc(0, outfile); + putc(0, outfile); + putc(0, outfile); + if (map_entry_size == 4) + putc(0, outfile); + } +} + + +METHODDEF(void) +finish_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + register FILE *outfile = dest->pub.output_file; + JSAMPARRAY image_ptr; + register JSAMPROW data_ptr; + JDIMENSION row; + register JDIMENSION col; + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + + /* Write the header and colormap */ + if (dest->is_os2) + write_os2_header(cinfo, dest); + else + write_bmp_header(cinfo, dest); + + /* Write the file body from our virtual array */ + for (row = cinfo->output_height; row > 0; row--) { + if (progress != NULL) { + progress->pub.pass_counter = (long) (cinfo->output_height - row); + progress->pub.pass_limit = (long) cinfo->output_height; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->whole_image, row-1, (JDIMENSION) 1, FALSE); + data_ptr = image_ptr[0]; + for (col = dest->row_width; col > 0; col--) { + putc(GETJSAMPLE(*data_ptr), outfile); + data_ptr++; + } + } + if (progress != NULL) + progress->completed_extra_passes++; + + /* Make sure we wrote the output file OK */ + fflush(outfile); + if (ferror(outfile)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for BMP format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_bmp (j_decompress_ptr cinfo, boolean is_os2) +{ + bmp_dest_ptr dest; + JDIMENSION row_width; + + /* Create module interface object, fill in method pointers */ + dest = (bmp_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(bmp_dest_struct)); + dest->pub.start_output = start_output_bmp; + dest->pub.finish_output = finish_output_bmp; + dest->is_os2 = is_os2; + + if (cinfo->out_color_space == JCS_GRAYSCALE) { + dest->pub.put_pixel_rows = put_gray_rows; + } else if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->quantize_colors) + dest->pub.put_pixel_rows = put_gray_rows; + else + dest->pub.put_pixel_rows = put_pixel_rows; + } else if(cinfo->out_color_space == JCS_RGB565 ) { + dest->pub.put_pixel_rows = put_pixel_rows; + } else { + ERREXIT(cinfo, JERR_BMP_COLORSPACE); + } + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + /* Determine width of rows in the BMP file (padded to 4-byte boundary). */ + if (cinfo->out_color_space == JCS_RGB565) { + row_width = cinfo->output_width * 2; + dest->row_width = dest->data_width = cinfo->output_width * 3; + } else { + row_width = cinfo->output_width * cinfo->output_components; + dest->row_width = dest->data_width = row_width; + } + while ((dest->row_width & 3) != 0) dest->row_width++; + dest->pad_bytes = (int) (dest->row_width - dest->data_width); + if (cinfo->out_color_space == JCS_RGB565) { + while ((row_width & 3) != 0) row_width++; + } else { + row_width = dest->row_width; + } + + + /* Allocate space for inversion array, prepare for write pass */ + dest->whole_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + dest->row_width, cinfo->output_height, (JDIMENSION) 1); + dest->cur_output_row = 0; + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } + + /* Create decompressor output buffer. */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, row_width, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + + return (djpeg_dest_ptr) dest; +} + +#endif /* BMP_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrgif.c glmark2-2021.02/android/jni/src/libjpeg-turbo/wrgif.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrgif.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wrgif.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,402 @@ +/* + * wrgif.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in GIF format. + * + ************************************************************************** + * NOTE: to avoid entanglements with Unisys' patent on LZW compression, * + * this code has been modified to output "uncompressed GIF" files. * + * There is no trace of the LZW algorithm in this file. * + ************************************************************************** + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + */ + +/* + * This code is loosely based on ppmtogif from the PBMPLUS distribution + * of Feb. 1991. That file contains the following copyright notice: + * Based on GIFENCODE by David Rowley . + * Lempel-Ziv compression based on "compress" by Spencer W. Thomas et al. + * Copyright (C) 1989 by Jef Poskanzer. + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose and without fee is hereby granted, provided + * that the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation. This software is provided "as is" without express or + * implied warranty. + * + * We are also required to state that + * "The Graphics Interchange Format(c) is the Copyright property of + * CompuServe Incorporated. GIF(sm) is a Service Mark property of + * CompuServe Incorporated." + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef GIF_SUPPORTED + + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + j_decompress_ptr cinfo; /* back link saves passing separate parm */ + + /* State for packing variable-width codes into a bitstream */ + int n_bits; /* current number of bits/code */ + int maxcode; /* maximum code, given n_bits */ + long cur_accum; /* holds bits not yet output */ + int cur_bits; /* # of bits in cur_accum */ + + /* State for GIF code assignment */ + int ClearCode; /* clear code (doesn't change) */ + int EOFCode; /* EOF code (ditto) */ + int code_counter; /* counts output symbols */ + + /* GIF data packet construction buffer */ + int bytesinpkt; /* # of bytes in current packet */ + char packetbuf[256]; /* workspace for accumulating packet */ + +} gif_dest_struct; + +typedef gif_dest_struct *gif_dest_ptr; + +/* Largest value that will fit in N bits */ +#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) + + +/* + * Routines to package finished data bytes into GIF data blocks. + * A data block consists of a count byte (1..255) and that many data bytes. + */ + +LOCAL(void) +flush_packet (gif_dest_ptr dinfo) +/* flush any accumulated data */ +{ + if (dinfo->bytesinpkt > 0) { /* never write zero-length packet */ + dinfo->packetbuf[0] = (char) dinfo->bytesinpkt++; + if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt) + != (size_t) dinfo->bytesinpkt) + ERREXIT(dinfo->cinfo, JERR_FILE_WRITE); + dinfo->bytesinpkt = 0; + } +} + + +/* Add a character to current packet; flush to disk if necessary */ +#define CHAR_OUT(dinfo,c) \ + { (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c); \ + if ((dinfo)->bytesinpkt >= 255) \ + flush_packet(dinfo); \ + } + + +/* Routine to convert variable-width codes into a byte stream */ + +LOCAL(void) +output (gif_dest_ptr dinfo, int code) +/* Emit a code of n_bits bits */ +/* Uses cur_accum and cur_bits to reblock into 8-bit bytes */ +{ + dinfo->cur_accum |= ((long) code) << dinfo->cur_bits; + dinfo->cur_bits += dinfo->n_bits; + + while (dinfo->cur_bits >= 8) { + CHAR_OUT(dinfo, dinfo->cur_accum & 0xFF); + dinfo->cur_accum >>= 8; + dinfo->cur_bits -= 8; + } +} + + +/* The pseudo-compression algorithm. + * + * In this module we simply output each pixel value as a separate symbol; + * thus, no compression occurs. In fact, there is expansion of one bit per + * pixel, because we use a symbol width one bit wider than the pixel width. + * + * GIF ordinarily uses variable-width symbols, and the decoder will expect + * to ratchet up the symbol width after a fixed number of symbols. + * To simplify the logic and keep the expansion penalty down, we emit a + * GIF Clear code to reset the decoder just before the width would ratchet up. + * Thus, all the symbols in the output file will have the same bit width. + * Note that emitting the Clear codes at the right times is a mere matter of + * counting output symbols and is in no way dependent on the LZW patent. + * + * With a small basic pixel width (low color count), Clear codes will be + * needed very frequently, causing the file to expand even more. So this + * simplistic approach wouldn't work too well on bilevel images, for example. + * But for output of JPEG conversions the pixel width will usually be 8 bits + * (129 to 256 colors), so the overhead added by Clear symbols is only about + * one symbol in every 256. + */ + +LOCAL(void) +compress_init (gif_dest_ptr dinfo, int i_bits) +/* Initialize pseudo-compressor */ +{ + /* init all the state variables */ + dinfo->n_bits = i_bits; + dinfo->maxcode = MAXCODE(dinfo->n_bits); + dinfo->ClearCode = (1 << (i_bits - 1)); + dinfo->EOFCode = dinfo->ClearCode + 1; + dinfo->code_counter = dinfo->ClearCode + 2; + /* init output buffering vars */ + dinfo->bytesinpkt = 0; + dinfo->cur_accum = 0; + dinfo->cur_bits = 0; + /* GIF specifies an initial Clear code */ + output(dinfo, dinfo->ClearCode); +} + + +LOCAL(void) +compress_pixel (gif_dest_ptr dinfo, int c) +/* Accept and "compress" one pixel value. + * The given value must be less than n_bits wide. + */ +{ + /* Output the given pixel value as a symbol. */ + output(dinfo, c); + /* Issue Clear codes often enough to keep the reader from ratcheting up + * its symbol size. + */ + if (dinfo->code_counter < dinfo->maxcode) { + dinfo->code_counter++; + } else { + output(dinfo, dinfo->ClearCode); + dinfo->code_counter = dinfo->ClearCode + 2; /* reset the counter */ + } +} + + +LOCAL(void) +compress_term (gif_dest_ptr dinfo) +/* Clean up at end */ +{ + /* Send an EOF code */ + output(dinfo, dinfo->EOFCode); + /* Flush the bit-packing buffer */ + if (dinfo->cur_bits > 0) { + CHAR_OUT(dinfo, dinfo->cur_accum & 0xFF); + } + /* Flush the packet buffer */ + flush_packet(dinfo); +} + + +/* GIF header construction */ + + +LOCAL(void) +put_word (gif_dest_ptr dinfo, unsigned int w) +/* Emit a 16-bit word, LSB first */ +{ + putc(w & 0xFF, dinfo->pub.output_file); + putc((w >> 8) & 0xFF, dinfo->pub.output_file); +} + + +LOCAL(void) +put_3bytes (gif_dest_ptr dinfo, int val) +/* Emit 3 copies of same byte value --- handy subr for colormap construction */ +{ + putc(val, dinfo->pub.output_file); + putc(val, dinfo->pub.output_file); + putc(val, dinfo->pub.output_file); +} + + +LOCAL(void) +emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) +/* Output the GIF file header, including color map */ +/* If colormap==NULL, synthesize a grayscale colormap */ +{ + int BitsPerPixel, ColorMapSize, InitCodeSize, FlagByte; + int cshift = dinfo->cinfo->data_precision - 8; + int i; + + if (num_colors > 256) + ERREXIT1(dinfo->cinfo, JERR_TOO_MANY_COLORS, num_colors); + /* Compute bits/pixel and related values */ + BitsPerPixel = 1; + while (num_colors > (1 << BitsPerPixel)) + BitsPerPixel++; + ColorMapSize = 1 << BitsPerPixel; + if (BitsPerPixel <= 1) + InitCodeSize = 2; + else + InitCodeSize = BitsPerPixel; + /* + * Write the GIF header. + * Note that we generate a plain GIF87 header for maximum compatibility. + */ + putc('G', dinfo->pub.output_file); + putc('I', dinfo->pub.output_file); + putc('F', dinfo->pub.output_file); + putc('8', dinfo->pub.output_file); + putc('7', dinfo->pub.output_file); + putc('a', dinfo->pub.output_file); + /* Write the Logical Screen Descriptor */ + put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); + put_word(dinfo, (unsigned int) dinfo->cinfo->output_height); + FlagByte = 0x80; /* Yes, there is a global color table */ + FlagByte |= (BitsPerPixel-1) << 4; /* color resolution */ + FlagByte |= (BitsPerPixel-1); /* size of global color table */ + putc(FlagByte, dinfo->pub.output_file); + putc(0, dinfo->pub.output_file); /* Background color index */ + putc(0, dinfo->pub.output_file); /* Reserved (aspect ratio in GIF89) */ + /* Write the Global Color Map */ + /* If the color map is more than 8 bits precision, */ + /* we reduce it to 8 bits by shifting */ + for (i=0; i < ColorMapSize; i++) { + if (i < num_colors) { + if (colormap != NULL) { + if (dinfo->cinfo->out_color_space == JCS_RGB) { + /* Normal case: RGB color map */ + putc(GETJSAMPLE(colormap[0][i]) >> cshift, dinfo->pub.output_file); + putc(GETJSAMPLE(colormap[1][i]) >> cshift, dinfo->pub.output_file); + putc(GETJSAMPLE(colormap[2][i]) >> cshift, dinfo->pub.output_file); + } else { + /* Grayscale "color map": possible if quantizing grayscale image */ + put_3bytes(dinfo, GETJSAMPLE(colormap[0][i]) >> cshift); + } + } else { + /* Create a grayscale map of num_colors values, range 0..255 */ + put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1)); + } + } else { + /* fill out the map to a power of 2 */ + put_3bytes(dinfo, 0); + } + } + /* Write image separator and Image Descriptor */ + putc(',', dinfo->pub.output_file); /* separator */ + put_word(dinfo, 0); /* left/top offset */ + put_word(dinfo, 0); + put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); /* image size */ + put_word(dinfo, (unsigned int) dinfo->cinfo->output_height); + /* flag byte: not interlaced, no local color map */ + putc(0x00, dinfo->pub.output_file); + /* Write Initial Code Size byte */ + putc(InitCodeSize, dinfo->pub.output_file); + + /* Initialize for "compression" of image data */ + compress_init(dinfo, InitCodeSize+1); +} + + +/* + * Startup: write the file header. + */ + +METHODDEF(void) +start_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + + if (cinfo->quantize_colors) + emit_header(dest, cinfo->actual_number_of_colors, cinfo->colormap); + else + emit_header(dest, 256, (JSAMPARRAY) NULL); +} + + +/* + * Write some pixel data. + * In this module rows_supplied will always be 1. + */ + +METHODDEF(void) +put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + for (col = cinfo->output_width; col > 0; col--) { + compress_pixel(dest, GETJSAMPLE(*ptr++)); + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + + /* Flush "compression" mechanism */ + compress_term(dest); + /* Write a zero-length data block to end the series */ + putc(0, dest->pub.output_file); + /* Write the GIF terminator mark */ + putc(';', dest->pub.output_file); + /* Make sure we wrote the output file OK */ + fflush(dest->pub.output_file); + if (ferror(dest->pub.output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for GIF format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_gif (j_decompress_ptr cinfo) +{ + gif_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (gif_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(gif_dest_struct)); + dest->cinfo = cinfo; /* make back link for subroutines */ + dest->pub.start_output = start_output_gif; + dest->pub.put_pixel_rows = put_pixel_rows; + dest->pub.finish_output = finish_output_gif; + + if (cinfo->out_color_space != JCS_GRAYSCALE && + cinfo->out_color_space != JCS_RGB) + ERREXIT(cinfo, JERR_GIF_COLORSPACE); + + /* Force quantization if color or if > 8 bits input */ + if (cinfo->out_color_space != JCS_GRAYSCALE || cinfo->data_precision > 8) { + /* Force quantization to at most 256 colors */ + cinfo->quantize_colors = TRUE; + if (cinfo->desired_number_of_colors > 256) + cinfo->desired_number_of_colors = 256; + } + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + if (cinfo->output_components != 1) /* safety check: just one component? */ + ERREXIT(cinfo, JERR_GIF_BUG); + + /* Create decompressor output buffer. */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, cinfo->output_width, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + + return (djpeg_dest_ptr) dest; +} + +#endif /* GIF_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrjpgcom.1 glmark2-2021.02/android/jni/src/libjpeg-turbo/wrjpgcom.1 --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrjpgcom.1 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wrjpgcom.1 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,103 @@ +.TH WRJPGCOM 1 "15 June 1995" +.SH NAME +wrjpgcom \- insert text comments into a JPEG file +.SH SYNOPSIS +.B wrjpgcom +[ +.B \-replace +] +[ +.BI \-comment " text" +] +[ +.BI \-cfile " name" +] +[ +.I filename +] +.LP +.SH DESCRIPTION +.LP +.B wrjpgcom +reads the named JPEG/JFIF file, or the standard input if no file is named, +and generates a new JPEG/JFIF file on standard output. A comment block is +added to the file. +.PP +The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file. +Although the standard doesn't actually define what COM blocks are for, they +are widely used to hold user-supplied text strings. This lets you add +annotations, titles, index terms, etc to your JPEG files, and later retrieve +them as text. COM blocks do not interfere with the image stored in the JPEG +file. The maximum size of a COM block is 64K, but you can have as many of +them as you like in one JPEG file. +.PP +.B wrjpgcom +adds a COM block, containing text you provide, to a JPEG file. +Ordinarily, the COM block is added after any existing COM blocks; but you +can delete the old COM blocks if you wish. +.SH OPTIONS +Switch names may be abbreviated, and are not case sensitive. +.TP +.B \-replace +Delete any existing COM blocks from the file. +.TP +.BI \-comment " text" +Supply text for new COM block on command line. +.TP +.BI \-cfile " name" +Read text for new COM block from named file. +.PP +If you have only one line of comment text to add, you can provide it on the +command line with +.BR \-comment . +The comment text must be surrounded with quotes so that it is treated as a +single argument. Longer comments can be read from a text file. +.PP +If you give neither +.B \-comment +nor +.BR \-cfile , +then +.B wrjpgcom +will read the comment text from standard input. (In this case an input image +file name MUST be supplied, so that the source JPEG file comes from somewhere +else.) You can enter multiple lines, up to 64KB worth. Type an end-of-file +indicator (usually control-D) to terminate the comment text entry. +.PP +.B wrjpgcom +will not add a COM block if the provided comment string is empty. Therefore +\fB\-replace \-comment ""\fR can be used to delete all COM blocks from a file. +.SH EXAMPLES +.LP +Add a short comment to in.jpg, producing out.jpg: +.IP +.B wrjpgcom \-c +\fI"View of my back yard" in.jpg +.B > +.I out.jpg +.PP +Attach a long comment previously stored in comment.txt: +.IP +.B wrjpgcom +.I in.jpg +.B < +.I comment.txt +.B > +.I out.jpg +.PP +or equivalently +.IP +.B wrjpgcom +.B -cfile +.I comment.txt +.B < +.I in.jpg +.B > +.I out.jpg +.SH SEE ALSO +.BR cjpeg (1), +.BR djpeg (1), +.BR jpegtran (1), +.BR rdjpgcom (1) +.SH AUTHOR +Independent JPEG Group diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrjpgcom.c glmark2-2021.02/android/jni/src/libjpeg-turbo/wrjpgcom.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrjpgcom.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wrjpgcom.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,592 @@ +/* + * wrjpgcom.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2014, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a very simple stand-alone application that inserts + * user-supplied text as a COM (comment) marker in a JFIF file. + * This may be useful as an example of the minimum logic needed to parse + * JPEG markers. + */ + +#define JPEG_CJPEG_DJPEG /* to get the command-line config symbols */ +#include "jinclude.h" /* get auto-config symbols, */ + +#ifndef HAVE_STDLIB_H /* should declare malloc() */ +extern void *malloc (); +#endif +#include /* to declare isupper(), tolower() */ +#ifdef USE_SETMODE +#include /* to declare setmode()'s parameter macros */ +/* If you have setmode() but not , just delete this line: */ +#include /* to declare setmode() */ +#endif + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + +#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */ +#define READ_BINARY "r" +#define WRITE_BINARY "w" +#else +#define READ_BINARY "rb" +#define WRITE_BINARY "wb" +#endif + +#ifndef EXIT_FAILURE /* define exit() codes if not provided */ +#define EXIT_FAILURE 1 +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + +/* Reduce this value if your malloc() can't allocate blocks up to 64K. + * On DOS, compiling in large model is usually a better solution. + */ + +#ifndef MAX_COM_LENGTH +#define MAX_COM_LENGTH 65000L /* must be <= 65533 in any case */ +#endif + + +/* + * These macros are used to read the input file and write the output file. + * To reuse this code in another application, you might need to change these. + */ + +static FILE *infile; /* input JPEG file */ + +/* Return next input byte, or EOF if no more */ +#define NEXTBYTE() getc(infile) + +static FILE *outfile; /* output JPEG file */ + +/* Emit an output byte */ +#define PUTBYTE(x) putc((x), outfile) + + +/* Error exit handler */ +#define ERREXIT(msg) (fprintf(stderr, "%s\n", msg), exit(EXIT_FAILURE)) + + +/* Read one byte, testing for EOF */ +static int +read_1_byte (void) +{ + int c; + + c = NEXTBYTE(); + if (c == EOF) + ERREXIT("Premature EOF in JPEG file"); + return c; +} + +/* Read 2 bytes, convert to unsigned int */ +/* All 2-byte quantities in JPEG markers are MSB first */ +static unsigned int +read_2_bytes (void) +{ + int c1, c2; + + c1 = NEXTBYTE(); + if (c1 == EOF) + ERREXIT("Premature EOF in JPEG file"); + c2 = NEXTBYTE(); + if (c2 == EOF) + ERREXIT("Premature EOF in JPEG file"); + return (((unsigned int) c1) << 8) + ((unsigned int) c2); +} + + +/* Routines to write data to output file */ + +static void +write_1_byte (int c) +{ + PUTBYTE(c); +} + +static void +write_2_bytes (unsigned int val) +{ + PUTBYTE((val >> 8) & 0xFF); + PUTBYTE(val & 0xFF); +} + +static void +write_marker (int marker) +{ + PUTBYTE(0xFF); + PUTBYTE(marker); +} + +static void +copy_rest_of_file (void) +{ + int c; + + while ((c = NEXTBYTE()) != EOF) + PUTBYTE(c); +} + + +/* + * JPEG markers consist of one or more 0xFF bytes, followed by a marker + * code byte (which is not an FF). Here are the marker codes of interest + * in this program. (See jdmarker.c for a more complete list.) + */ + +#define M_SOF0 0xC0 /* Start Of Frame N */ +#define M_SOF1 0xC1 /* N indicates which compression process */ +#define M_SOF2 0xC2 /* Only SOF0-SOF2 are now in common use */ +#define M_SOF3 0xC3 +#define M_SOF5 0xC5 /* NB: codes C4 and CC are NOT SOF markers */ +#define M_SOF6 0xC6 +#define M_SOF7 0xC7 +#define M_SOF9 0xC9 +#define M_SOF10 0xCA +#define M_SOF11 0xCB +#define M_SOF13 0xCD +#define M_SOF14 0xCE +#define M_SOF15 0xCF +#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ +#define M_EOI 0xD9 /* End Of Image (end of datastream) */ +#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ +#define M_COM 0xFE /* COMment */ + + +/* + * Find the next JPEG marker and return its marker code. + * We expect at least one FF byte, possibly more if the compressor used FFs + * to pad the file. (Padding FFs will NOT be replicated in the output file.) + * There could also be non-FF garbage between markers. The treatment of such + * garbage is unspecified; we choose to skip over it but emit a warning msg. + * NB: this routine must not be used after seeing SOS marker, since it will + * not deal correctly with FF/00 sequences in the compressed image data... + */ + +static int +next_marker (void) +{ + int c; + int discarded_bytes = 0; + + /* Find 0xFF byte; count and skip any non-FFs. */ + c = read_1_byte(); + while (c != 0xFF) { + discarded_bytes++; + c = read_1_byte(); + } + /* Get marker code byte, swallowing any duplicate FF bytes. Extra FFs + * are legal as pad bytes, so don't count them in discarded_bytes. + */ + do { + c = read_1_byte(); + } while (c == 0xFF); + + if (discarded_bytes != 0) { + fprintf(stderr, "Warning: garbage data found in JPEG file\n"); + } + + return c; +} + + +/* + * Read the initial marker, which should be SOI. + * For a JFIF file, the first two bytes of the file should be literally + * 0xFF M_SOI. To be more general, we could use next_marker, but if the + * input file weren't actually JPEG at all, next_marker might read the whole + * file and then return a misleading error message... + */ + +static int +first_marker (void) +{ + int c1, c2; + + c1 = NEXTBYTE(); + c2 = NEXTBYTE(); + if (c1 != 0xFF || c2 != M_SOI) + ERREXIT("Not a JPEG file"); + return c2; +} + + +/* + * Most types of marker are followed by a variable-length parameter segment. + * This routine skips over the parameters for any marker we don't otherwise + * want to process. + * Note that we MUST skip the parameter segment explicitly in order not to + * be fooled by 0xFF bytes that might appear within the parameter segment; + * such bytes do NOT introduce new markers. + */ + +static void +copy_variable (void) +/* Copy an unknown or uninteresting variable-length marker */ +{ + unsigned int length; + + /* Get the marker parameter length count */ + length = read_2_bytes(); + write_2_bytes(length); + /* Length includes itself, so must be at least 2 */ + if (length < 2) + ERREXIT("Erroneous JPEG marker length"); + length -= 2; + /* Skip over the remaining bytes */ + while (length > 0) { + write_1_byte(read_1_byte()); + length--; + } +} + +static void +skip_variable (void) +/* Skip over an unknown or uninteresting variable-length marker */ +{ + unsigned int length; + + /* Get the marker parameter length count */ + length = read_2_bytes(); + /* Length includes itself, so must be at least 2 */ + if (length < 2) + ERREXIT("Erroneous JPEG marker length"); + length -= 2; + /* Skip over the remaining bytes */ + while (length > 0) { + (void) read_1_byte(); + length--; + } +} + + +/* + * Parse the marker stream until SOFn or EOI is seen; + * copy data to output, but discard COM markers unless keep_COM is true. + */ + +static int +scan_JPEG_header (int keep_COM) +{ + int marker; + + /* Expect SOI at start of file */ + if (first_marker() != M_SOI) + ERREXIT("Expected SOI marker first"); + write_marker(M_SOI); + + /* Scan miscellaneous markers until we reach SOFn. */ + for (;;) { + marker = next_marker(); + switch (marker) { + /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be, + * treated as SOFn. C4 in particular is actually DHT. + */ + case M_SOF0: /* Baseline */ + case M_SOF1: /* Extended sequential, Huffman */ + case M_SOF2: /* Progressive, Huffman */ + case M_SOF3: /* Lossless, Huffman */ + case M_SOF5: /* Differential sequential, Huffman */ + case M_SOF6: /* Differential progressive, Huffman */ + case M_SOF7: /* Differential lossless, Huffman */ + case M_SOF9: /* Extended sequential, arithmetic */ + case M_SOF10: /* Progressive, arithmetic */ + case M_SOF11: /* Lossless, arithmetic */ + case M_SOF13: /* Differential sequential, arithmetic */ + case M_SOF14: /* Differential progressive, arithmetic */ + case M_SOF15: /* Differential lossless, arithmetic */ + return marker; + + case M_SOS: /* should not see compressed data before SOF */ + ERREXIT("SOS without prior SOFn"); + break; + + case M_EOI: /* in case it's a tables-only JPEG stream */ + return marker; + + case M_COM: /* Existing COM: conditionally discard */ + if (keep_COM) { + write_marker(marker); + copy_variable(); + } else { + skip_variable(); + } + break; + + default: /* Anything else just gets copied */ + write_marker(marker); + copy_variable(); /* we assume it has a parameter count... */ + break; + } + } /* end loop */ +} + + +/* Command line parsing code */ + +static const char *progname; /* program name for error messages */ + + +static void +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "wrjpgcom inserts a textual comment in a JPEG file.\n"); + fprintf(stderr, "You can add to or replace any existing comment(s).\n"); + + fprintf(stderr, "Usage: %s [switches] ", progname); +#ifdef TWO_FILE_COMMANDLINE + fprintf(stderr, "inputfile outputfile\n"); +#else + fprintf(stderr, "[inputfile]\n"); +#endif + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -replace Delete any existing comments\n"); + fprintf(stderr, " -comment \"text\" Insert comment with given text\n"); + fprintf(stderr, " -cfile name Read comment from named file\n"); + fprintf(stderr, "Notice that you must put quotes around the comment text\n"); + fprintf(stderr, "when you use -comment.\n"); + fprintf(stderr, "If you do not give either -comment or -cfile on the command line,\n"); + fprintf(stderr, "then the comment text is read from standard input.\n"); + fprintf(stderr, "It can be multiple lines, up to %u characters total.\n", + (unsigned int) MAX_COM_LENGTH); +#ifndef TWO_FILE_COMMANDLINE + fprintf(stderr, "You must specify an input JPEG file name when supplying\n"); + fprintf(stderr, "comment text from standard input.\n"); +#endif + + exit(EXIT_FAILURE); +} + + +static int +keymatch (char *arg, const char *keyword, int minchars) +/* Case-insensitive matching of (possibly abbreviated) keyword switches. */ +/* keyword is the constant keyword (must be lower case already), */ +/* minchars is length of minimum legal abbreviation. */ +{ + register int ca, ck; + register int nmatched = 0; + + while ((ca = *arg++) != '\0') { + if ((ck = *keyword++) == '\0') + return 0; /* arg longer than keyword, no good */ + if (isupper(ca)) /* force arg to lcase (assume ck is already) */ + ca = tolower(ca); + if (ca != ck) + return 0; /* no good */ + nmatched++; /* count matched characters */ + } + /* reached end of argument; fail if it's too short for unique abbrev */ + if (nmatched < minchars) + return 0; + return 1; /* A-OK */ +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + int argn; + char *arg; + int keep_COM = 1; + char *comment_arg = NULL; + FILE *comment_file = NULL; + unsigned int comment_length = 0; + int marker; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "wrjpgcom"; /* in case C library doesn't provide it */ + + /* Parse switches, if any */ + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (arg[0] != '-') + break; /* not switch, must be file name */ + arg++; /* advance over '-' */ + if (keymatch(arg, "replace", 1)) { + keep_COM = 0; + } else if (keymatch(arg, "cfile", 2)) { + if (++argn >= argc) usage(); + if ((comment_file = fopen(argv[argn], "r")) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + } else if (keymatch(arg, "comment", 1)) { + if (++argn >= argc) usage(); + comment_arg = argv[argn]; + /* If the comment text starts with '"', then we are probably running + * under MS-DOG and must parse out the quoted string ourselves. Sigh. + */ + if (comment_arg[0] == '"') { + comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH); + if (comment_arg == NULL) + ERREXIT("Insufficient memory"); + if (strlen(argv[argn]) + 2 >= (size_t) MAX_COM_LENGTH) { + fprintf(stderr, "Comment text may not exceed %u bytes\n", + (unsigned int) MAX_COM_LENGTH); + exit(EXIT_FAILURE); + } + strcpy(comment_arg, argv[argn]+1); + for (;;) { + comment_length = (unsigned int) strlen(comment_arg); + if (comment_length > 0 && comment_arg[comment_length-1] == '"') { + comment_arg[comment_length-1] = '\0'; /* zap terminating quote */ + break; + } + if (++argn >= argc) + ERREXIT("Missing ending quote mark"); + if (strlen(comment_arg) + strlen(argv[argn]) + 2 >= + (size_t) MAX_COM_LENGTH) { + fprintf(stderr, "Comment text may not exceed %u bytes\n", + (unsigned int) MAX_COM_LENGTH); + exit(EXIT_FAILURE); + } + strcat(comment_arg, " "); + strcat(comment_arg, argv[argn]); + } + } else if (strlen(argv[argn]) >= (size_t) MAX_COM_LENGTH) { + fprintf(stderr, "Comment text may not exceed %u bytes\n", + (unsigned int) MAX_COM_LENGTH); + exit(EXIT_FAILURE); + } + comment_length = (unsigned int) strlen(comment_arg); + } else + usage(); + } + + /* Cannot use both -comment and -cfile. */ + if (comment_arg != NULL && comment_file != NULL) + usage(); + /* If there is neither -comment nor -cfile, we will read the comment text + * from stdin; in this case there MUST be an input JPEG file name. + */ + if (comment_arg == NULL && comment_file == NULL && argn >= argc) + usage(); + + /* Open the input file. */ + if (argn < argc) { + if ((infile = fopen(argv[argn], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdin), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open stdin\n", progname); + exit(EXIT_FAILURE); + } +#else + infile = stdin; +#endif + } + + /* Open the output file. */ +#ifdef TWO_FILE_COMMANDLINE + /* Must have explicit output file name */ + if (argn != argc-2) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + if ((outfile = fopen(argv[argn+1], WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn+1]); + exit(EXIT_FAILURE); + } +#else + /* Unix style: expect zero or one file name */ + if (argn < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } + /* default output file is stdout */ +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdout), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((outfile = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open stdout\n", progname); + exit(EXIT_FAILURE); + } +#else + outfile = stdout; +#endif +#endif /* TWO_FILE_COMMANDLINE */ + + /* Collect comment text from comment_file or stdin, if necessary */ + if (comment_arg == NULL) { + FILE *src_file; + int c; + + comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH); + if (comment_arg == NULL) + ERREXIT("Insufficient memory"); + comment_length = 0; + src_file = (comment_file != NULL ? comment_file : stdin); + while ((c = getc(src_file)) != EOF) { + if (comment_length >= (unsigned int) MAX_COM_LENGTH) { + fprintf(stderr, "Comment text may not exceed %u bytes\n", + (unsigned int) MAX_COM_LENGTH); + exit(EXIT_FAILURE); + } + comment_arg[comment_length++] = (char) c; + } + if (comment_file != NULL) + fclose(comment_file); + } + + /* Copy JPEG headers until SOFn marker; + * we will insert the new comment marker just before SOFn. + * This (a) causes the new comment to appear after, rather than before, + * existing comments; and (b) ensures that comments come after any JFIF + * or JFXX markers, as required by the JFIF specification. + */ + marker = scan_JPEG_header(keep_COM); + /* Insert the new COM marker, but only if nonempty text has been supplied */ + if (comment_length > 0) { + write_marker(M_COM); + write_2_bytes(comment_length + 2); + while (comment_length > 0) { + write_1_byte(*comment_arg++); + comment_length--; + } + } + /* Duplicate the remainder of the source file. + * Note that any COM markers occuring after SOF will not be touched. + */ + write_marker(marker); + copy_rest_of_file(); + + /* All done. */ + exit(EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrppm.c glmark2-2021.02/android/jni/src/libjpeg-turbo/wrppm.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrppm.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wrppm.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,252 @@ +/* + * wrppm.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * Modified 2009 by Guido Vollbeding. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in PPM/PGM format. + * The extended 2-byte-per-sample raw PPM/PGM formats are supported. + * The PBMPLUS library is NOT required to compile this software + * (but it is highly useful as a set of PPM image manipulation programs). + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "wrppm.h" + +#ifdef PPM_SUPPORTED + + +/* + * For 12-bit JPEG data, we either downscale the values to 8 bits + * (to write standard byte-per-sample PPM/PGM files), or output + * nonstandard word-per-sample PPM/PGM files. Downscaling is done + * if PPM_NORAWWORD is defined (this can be done in the Makefile + * or in jconfig.h). + * (When the core library supports data precision reduction, a cleaner + * implementation will be to ask for that instead.) + */ + +#if BITS_IN_JSAMPLE == 8 +#define PUTPPMSAMPLE(ptr,v) *ptr++ = (char) (v) +#define BYTESPERSAMPLE 1 +#define PPM_MAXVAL 255 +#else +#ifdef PPM_NORAWWORD +#define PUTPPMSAMPLE(ptr,v) *ptr++ = (char) ((v) >> (BITS_IN_JSAMPLE-8)) +#define BYTESPERSAMPLE 1 +#define PPM_MAXVAL 255 +#else +/* The word-per-sample format always puts the MSB first. */ +#define PUTPPMSAMPLE(ptr,v) \ + { register int val_ = v; \ + *ptr++ = (char) ((val_ >> 8) & 0xFF); \ + *ptr++ = (char) (val_ & 0xFF); \ + } +#define BYTESPERSAMPLE 2 +#define PPM_MAXVAL ((1<pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * This code is used when we have to copy the data and apply a pixel + * format translation. Typically this only happens in 12-bit mode. + */ + +METHODDEF(void) +copy_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + register char *bufferptr; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + bufferptr = dest->iobuffer; + for (col = dest->samples_per_row; col > 0; col--) { + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(*ptr++)); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Write some pixel data when color quantization is in effect. + * We have to demap the color index values to straight data. + */ + +METHODDEF(void) +put_demapped_rgb (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + register char *bufferptr; + register int pixval; + register JSAMPROW ptr; + register JSAMPROW color_map0 = cinfo->colormap[0]; + register JSAMPROW color_map1 = cinfo->colormap[1]; + register JSAMPROW color_map2 = cinfo->colormap[2]; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + bufferptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + pixval = GETJSAMPLE(*ptr++); + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map0[pixval])); + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map1[pixval])); + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map2[pixval])); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +METHODDEF(void) +put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + register char *bufferptr; + register JSAMPROW ptr; + register JSAMPROW color_map = cinfo->colormap[0]; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + bufferptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map[GETJSAMPLE(*ptr++)])); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Startup: write the file header. + */ + +METHODDEF(void) +start_output_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + + /* Emit file header */ + switch (cinfo->out_color_space) { + case JCS_GRAYSCALE: + /* emit header for raw PGM format */ + fprintf(dest->pub.output_file, "P5\n%ld %ld\n%d\n", + (long) cinfo->output_width, (long) cinfo->output_height, + PPM_MAXVAL); + break; + case JCS_RGB: + /* emit header for raw PPM format */ + fprintf(dest->pub.output_file, "P6\n%ld %ld\n%d\n", + (long) cinfo->output_width, (long) cinfo->output_height, + PPM_MAXVAL); + break; + default: + ERREXIT(cinfo, JERR_PPM_COLORSPACE); + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_output_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + /* Make sure we wrote the output file OK */ + fflush(dinfo->output_file); + if (ferror(dinfo->output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for PPM format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_ppm (j_decompress_ptr cinfo) +{ + ppm_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (ppm_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(ppm_dest_struct)); + dest->pub.start_output = start_output_ppm; + dest->pub.finish_output = finish_output_ppm; + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + /* Create physical I/O buffer */ + dest->samples_per_row = cinfo->output_width * cinfo->out_color_components; + dest->buffer_width = dest->samples_per_row * (BYTESPERSAMPLE * sizeof(char)); + dest->iobuffer = (char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, dest->buffer_width); + + if (cinfo->quantize_colors || BITS_IN_JSAMPLE != 8 || + sizeof(JSAMPLE) != sizeof(char)) { + /* When quantizing, we need an output buffer for colormap indexes + * that's separate from the physical I/O buffer. We also need a + * separate buffer if pixel format translation must take place. + */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->output_width * cinfo->output_components, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + if (! cinfo->quantize_colors) + dest->pub.put_pixel_rows = copy_pixel_rows; + else if (cinfo->out_color_space == JCS_GRAYSCALE) + dest->pub.put_pixel_rows = put_demapped_gray; + else + dest->pub.put_pixel_rows = put_demapped_rgb; + } else { + /* We will fwrite() directly from decompressor output buffer. */ + /* Synthesize a JSAMPARRAY pointer structure */ + dest->pixrow = (JSAMPROW) dest->iobuffer; + dest->pub.buffer = & dest->pixrow; + dest->pub.buffer_height = 1; + dest->pub.put_pixel_rows = put_pixel_rows; + } + + return (djpeg_dest_ptr) dest; +} + +#endif /* PPM_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrppm.h glmark2-2021.02/android/jni/src/libjpeg-turbo/wrppm.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrppm.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wrppm.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,26 @@ +/* + * wrppm.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +#ifdef PPM_SUPPORTED + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + /* Usually these two pointers point to the same place: */ + char *iobuffer; /* fwrite's I/O buffer */ + JSAMPROW pixrow; /* decompressor output buffer */ + size_t buffer_width; /* width of I/O buffer */ + JDIMENSION samples_per_row; /* JSAMPLEs per output row */ +} ppm_dest_struct; + +typedef ppm_dest_struct *ppm_dest_ptr; + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrrle.c glmark2-2021.02/android/jni/src/libjpeg-turbo/wrrle.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrrle.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wrrle.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,307 @@ +/* + * wrrle.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in RLE format. + * The Utah Raster Toolkit library is required (version 3.1 or later). + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + * + * Based on code contributed by Mike Lijewski, + * with updates from Robert Hutchinson. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef RLE_SUPPORTED + +/* rle.h is provided by the Utah Raster Toolkit. */ + +#include + +/* + * We assume that JSAMPLE has the same representation as rle_pixel, + * to wit, "unsigned char". Hence we can't cope with 12- or 16-bit samples. + */ + +#if BITS_IN_JSAMPLE != 8 + Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */ +#endif + + +/* + * Since RLE stores scanlines bottom-to-top, we have to invert the image + * from JPEG's top-to-bottom order. To do this, we save the outgoing data + * in a virtual array during put_pixel_row calls, then actually emit the + * RLE file during finish_output. + */ + + +/* + * For now, if we emit an RLE color map then it is always 256 entries long, + * though not all of the entries need be used. + */ + +#define CMAPBITS 8 +#define CMAPLENGTH (1<<(CMAPBITS)) + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + jvirt_sarray_ptr image; /* virtual array to store the output image */ + rle_map *colormap; /* RLE-style color map, or NULL if none */ + rle_pixel **rle_row; /* To pass rows to rle_putrow() */ + +} rle_dest_struct; + +typedef rle_dest_struct *rle_dest_ptr; + +/* Forward declarations */ +METHODDEF(void) rle_put_pixel_rows + (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied); + + +/* + * Write the file header. + * + * In this module it's easier to wait till finish_output to write anything. + */ + +METHODDEF(void) +start_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + rle_dest_ptr dest = (rle_dest_ptr) dinfo; + size_t cmapsize; + int i, ci; +#ifdef PROGRESS_REPORT + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; +#endif + + /* + * Make sure the image can be stored in RLE format. + * + * - RLE stores image dimensions as *signed* 16 bit integers. JPEG + * uses unsigned, so we have to check the width. + * + * - Colorspace is expected to be grayscale or RGB. + * + * - The number of channels (components) is expected to be 1 (grayscale/ + * pseudocolor) or 3 (truecolor/directcolor). + * (could be 2 or 4 if using an alpha channel, but we aren't) + */ + + if (cinfo->output_width > 32767 || cinfo->output_height > 32767) + ERREXIT2(cinfo, JERR_RLE_DIMENSIONS, cinfo->output_width, + cinfo->output_height); + + if (cinfo->out_color_space != JCS_GRAYSCALE && + cinfo->out_color_space != JCS_RGB) + ERREXIT(cinfo, JERR_RLE_COLORSPACE); + + if (cinfo->output_components != 1 && cinfo->output_components != 3) + ERREXIT1(cinfo, JERR_RLE_TOOMANYCHANNELS, cinfo->num_components); + + /* Convert colormap, if any, to RLE format. */ + + dest->colormap = NULL; + + if (cinfo->quantize_colors) { + /* Allocate storage for RLE-style cmap, zero any extra entries */ + cmapsize = cinfo->out_color_components * CMAPLENGTH * sizeof(rle_map); + dest->colormap = (rle_map *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, cmapsize); + MEMZERO(dest->colormap, cmapsize); + + /* Save away data in RLE format --- note 8-bit left shift! */ + /* Shifting would need adjustment for JSAMPLEs wider than 8 bits. */ + for (ci = 0; ci < cinfo->out_color_components; ci++) { + for (i = 0; i < cinfo->actual_number_of_colors; i++) { + dest->colormap[ci * CMAPLENGTH + i] = + GETJSAMPLE(cinfo->colormap[ci][i]) << 8; + } + } + } + + /* Set the output buffer to the first row */ + dest->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->image, (JDIMENSION) 0, (JDIMENSION) 1, TRUE); + dest->pub.buffer_height = 1; + + dest->pub.put_pixel_rows = rle_put_pixel_rows; + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->total_extra_passes++; /* count file writing as separate pass */ + } +#endif +} + + +/* + * Write some pixel data. + * + * This routine just saves the data away in a virtual array. + */ + +METHODDEF(void) +rle_put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + rle_dest_ptr dest = (rle_dest_ptr) dinfo; + + if (cinfo->output_scanline < cinfo->output_height) { + dest->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->image, + cinfo->output_scanline, (JDIMENSION) 1, TRUE); + } +} + +/* + * Finish up at the end of the file. + * + * Here is where we really output the RLE file. + */ + +METHODDEF(void) +finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + rle_dest_ptr dest = (rle_dest_ptr) dinfo; + rle_hdr header; /* Output file information */ + rle_pixel **rle_row, *red, *green, *blue; + JSAMPROW output_row; + char cmapcomment[80]; + int row, col; + int ci; +#ifdef PROGRESS_REPORT + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; +#endif + + /* Initialize the header info */ + header = *rle_hdr_init(NULL); + header.rle_file = dest->pub.output_file; + header.xmin = 0; + header.xmax = cinfo->output_width - 1; + header.ymin = 0; + header.ymax = cinfo->output_height - 1; + header.alpha = 0; + header.ncolors = cinfo->output_components; + for (ci = 0; ci < cinfo->output_components; ci++) { + RLE_SET_BIT(header, ci); + } + if (cinfo->quantize_colors) { + header.ncmap = cinfo->out_color_components; + header.cmaplen = CMAPBITS; + header.cmap = dest->colormap; + /* Add a comment to the output image with the true colormap length. */ + sprintf(cmapcomment, "color_map_length=%d", cinfo->actual_number_of_colors); + rle_putcom(cmapcomment, &header); + } + + /* Emit the RLE header and color map (if any) */ + rle_put_setup(&header); + + /* Now output the RLE data from our virtual array. + * We assume here that rle_pixel is represented the same as JSAMPLE. + */ + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_limit = cinfo->output_height; + progress->pub.pass_counter = 0; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + + if (cinfo->output_components == 1) { + for (row = cinfo->output_height-1; row >= 0; row--) { + rle_row = (rle_pixel **) (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->image, + (JDIMENSION) row, (JDIMENSION) 1, FALSE); + rle_putrow(rle_row, (int) cinfo->output_width, &header); +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + } else { + for (row = cinfo->output_height-1; row >= 0; row--) { + rle_row = (rle_pixel **) dest->rle_row; + output_row = *(*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->image, + (JDIMENSION) row, (JDIMENSION) 1, FALSE); + red = rle_row[0]; + green = rle_row[1]; + blue = rle_row[2]; + for (col = cinfo->output_width; col > 0; col--) { + *red++ = GETJSAMPLE(*output_row++); + *green++ = GETJSAMPLE(*output_row++); + *blue++ = GETJSAMPLE(*output_row++); + } + rle_putrow(rle_row, (int) cinfo->output_width, &header); +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + } + +#ifdef PROGRESS_REPORT + if (progress != NULL) + progress->completed_extra_passes++; +#endif + + /* Emit file trailer */ + rle_puteof(&header); + fflush(dest->pub.output_file); + if (ferror(dest->pub.output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for RLE format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_rle (j_decompress_ptr cinfo) +{ + rle_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (rle_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(rle_dest_struct)); + dest->pub.start_output = start_output_rle; + dest->pub.finish_output = finish_output_rle; + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + /* Allocate a work array for output to the RLE library. */ + dest->rle_row = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->output_width, (JDIMENSION) cinfo->output_components); + + /* Allocate a virtual array to hold the image. */ + dest->image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) (cinfo->output_width * cinfo->output_components), + cinfo->output_height, (JDIMENSION) 1); + + return (djpeg_dest_ptr) dest; +} + +#endif /* RLE_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrtarga.c glmark2-2021.02/android/jni/src/libjpeg-turbo/wrtarga.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libjpeg-turbo/wrtarga.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libjpeg-turbo/wrtarga.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,247 @@ +/* + * wrtarga.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in Targa format. + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + * + * Based on code contributed by Lee Daniel Crocker. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef TARGA_SUPPORTED + + +/* + * To support 12-bit JPEG data, we'd have to scale output down to 8 bits. + * This is not yet implemented. + */ + +#if BITS_IN_JSAMPLE != 8 + Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */ +#endif + + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + char *iobuffer; /* physical I/O buffer */ + JDIMENSION buffer_width; /* width of one row */ +} tga_dest_struct; + +typedef tga_dest_struct *tga_dest_ptr; + + +LOCAL(void) +write_header (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, int num_colors) +/* Create and write a Targa header */ +{ + char targaheader[18]; + + /* Set unused fields of header to 0 */ + MEMZERO(targaheader, sizeof(targaheader)); + + if (num_colors > 0) { + targaheader[1] = 1; /* color map type 1 */ + targaheader[5] = (char) (num_colors & 0xFF); + targaheader[6] = (char) (num_colors >> 8); + targaheader[7] = 24; /* 24 bits per cmap entry */ + } + + targaheader[12] = (char) (cinfo->output_width & 0xFF); + targaheader[13] = (char) (cinfo->output_width >> 8); + targaheader[14] = (char) (cinfo->output_height & 0xFF); + targaheader[15] = (char) (cinfo->output_height >> 8); + targaheader[17] = 0x20; /* Top-down, non-interlaced */ + + if (cinfo->out_color_space == JCS_GRAYSCALE) { + targaheader[2] = 3; /* image type = uncompressed grayscale */ + targaheader[16] = 8; /* bits per pixel */ + } else { /* must be RGB */ + if (num_colors > 0) { + targaheader[2] = 1; /* image type = colormapped RGB */ + targaheader[16] = 8; + } else { + targaheader[2] = 2; /* image type = uncompressed RGB */ + targaheader[16] = 24; + } + } + + if (JFWRITE(dinfo->output_file, targaheader, 18) != (size_t) 18) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * Write some pixel data. + * In this module rows_supplied will always be 1. + */ + +METHODDEF(void) +put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +/* used for unquantized full-color output */ +{ + tga_dest_ptr dest = (tga_dest_ptr) dinfo; + register JSAMPROW inptr; + register char *outptr; + register JDIMENSION col; + + inptr = dest->pub.buffer[0]; + outptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + outptr[0] = (char) GETJSAMPLE(inptr[2]); /* RGB to BGR order */ + outptr[1] = (char) GETJSAMPLE(inptr[1]); + outptr[2] = (char) GETJSAMPLE(inptr[0]); + inptr += 3, outptr += 3; + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + +METHODDEF(void) +put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +/* used for grayscale OR quantized color output */ +{ + tga_dest_ptr dest = (tga_dest_ptr) dinfo; + register JSAMPROW inptr; + register char *outptr; + register JDIMENSION col; + + inptr = dest->pub.buffer[0]; + outptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + *outptr++ = (char) GETJSAMPLE(*inptr++); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Write some demapped pixel data when color quantization is in effect. + * For Targa, this is only applied to grayscale data. + */ + +METHODDEF(void) +put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + tga_dest_ptr dest = (tga_dest_ptr) dinfo; + register JSAMPROW inptr; + register char *outptr; + register JSAMPROW color_map0 = cinfo->colormap[0]; + register JDIMENSION col; + + inptr = dest->pub.buffer[0]; + outptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + *outptr++ = (char) GETJSAMPLE(color_map0[GETJSAMPLE(*inptr++)]); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Startup: write the file header. + */ + +METHODDEF(void) +start_output_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + tga_dest_ptr dest = (tga_dest_ptr) dinfo; + int num_colors, i; + FILE *outfile; + + if (cinfo->out_color_space == JCS_GRAYSCALE) { + /* Targa doesn't have a mapped grayscale format, so we will */ + /* demap quantized gray output. Never emit a colormap. */ + write_header(cinfo, dinfo, 0); + if (cinfo->quantize_colors) + dest->pub.put_pixel_rows = put_demapped_gray; + else + dest->pub.put_pixel_rows = put_gray_rows; + } else if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->quantize_colors) { + /* We only support 8-bit colormap indexes, so only 256 colors */ + num_colors = cinfo->actual_number_of_colors; + if (num_colors > 256) + ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, num_colors); + write_header(cinfo, dinfo, num_colors); + /* Write the colormap. Note Targa uses BGR byte order */ + outfile = dest->pub.output_file; + for (i = 0; i < num_colors; i++) { + putc(GETJSAMPLE(cinfo->colormap[2][i]), outfile); + putc(GETJSAMPLE(cinfo->colormap[1][i]), outfile); + putc(GETJSAMPLE(cinfo->colormap[0][i]), outfile); + } + dest->pub.put_pixel_rows = put_gray_rows; + } else { + write_header(cinfo, dinfo, 0); + dest->pub.put_pixel_rows = put_pixel_rows; + } + } else { + ERREXIT(cinfo, JERR_TGA_COLORSPACE); + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_output_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + /* Make sure we wrote the output file OK */ + fflush(dinfo->output_file); + if (ferror(dinfo->output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for Targa format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_targa (j_decompress_ptr cinfo) +{ + tga_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (tga_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(tga_dest_struct)); + dest->pub.start_output = start_output_tga; + dest->pub.finish_output = finish_output_tga; + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + /* Create I/O buffer. */ + dest->buffer_width = cinfo->output_width * cinfo->output_components; + dest->iobuffer = (char *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (size_t) (dest->buffer_width * sizeof(char))); + + /* Create decompressor output buffer. */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, dest->buffer_width, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + + return (djpeg_dest_ptr) dest; +} + +#endif /* TARGA_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/log.cc glmark2-2021.02/android/jni/src/libmatrix/log.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/log.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libmatrix/log.cc 2021-04-25 01:47:22.000000000 +0800 @@ -10,7 +10,6 @@ // Alexandros Frantzis // Jesse Barker // -#include #include #include #include @@ -22,6 +21,13 @@ #include #endif +#ifdef _WIN32 +// On windows 'isatty' is found in +#include +#else +#include +#endif + using std::string; const string Log::continuation_prefix("\x10"); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/mat.h glmark2-2021.02/android/jni/src/libmatrix/mat.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/mat.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libmatrix/mat.h 2021-04-25 01:47:22.000000000 +0800 @@ -109,9 +109,6 @@ // NOTE: If this is non-invertible, we will // throw to avoid undefined behavior. tmat2& inverse() -#ifdef USE_EXCEPTIONS - throw(std::runtime_error) -#endif // USE_EXCEPTIONS { T d(determinant()); if (d == static_cast(0)) @@ -411,9 +408,6 @@ // NOTE: If this is non-invertible, we will // throw to avoid undefined behavior. tmat3& inverse() -#ifdef USE_EXCEPTIONS - throw(std::runtime_error) -#endif // USE_EXCEPTIONS { T d(determinant()); if (d == static_cast(0)) @@ -793,9 +787,6 @@ // NOTE: If this is non-invertible, we will // throw to avoid undefined behavior. tmat4& inverse() -#ifdef USE_EXCEPTIONS - throw(std::runtime_error) -#endif // USE_EXCEPTIONS { T d(determinant()); if (d == static_cast(0)) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/program.cc glmark2-2021.02/android/jni/src/libmatrix/program.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/program.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libmatrix/program.cc 2021-04-25 01:47:22.000000000 +0800 @@ -190,7 +190,7 @@ } shader.attach(handle_); - shaders_.push_back(shader); + shaders_.push_back(std::move(shader)); return; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/program.h glmark2-2021.02/android/jni/src/libmatrix/program.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/program.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libmatrix/program.h 2021-04-25 01:47:22.000000000 +0800 @@ -35,6 +35,13 @@ message_(shader.message_), ready_(shader.ready_), valid_(shader.valid_) {} + Shader(Shader&& shader) noexcept: + handle_(std::exchange(shader.handle_, 0)), + type_(std::exchange(shader.type_, 0)), + source_(std::move(shader.source_)), + message_(std::move(shader.message_)), + ready_(std::exchange(shader.ready_, false)), + valid_(std::exchange(shader.valid_, false)) {} Shader(unsigned int type, const std::string& source); ~Shader(); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/shader-source.cc glmark2-2021.02/android/jni/src/libmatrix/shader-source.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/shader-source.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libmatrix/shader-source.cc 2021-04-25 01:47:22.000000000 +0800 @@ -18,6 +18,36 @@ #include "vec.h" #include "util.h" +namespace +{ + +bool is_valid_precision_value(ShaderSource::PrecisionValue precision_value) +{ + switch(precision_value) { + case ShaderSource::PrecisionValueLow: + case ShaderSource::PrecisionValueMedium: + case ShaderSource::PrecisionValueHigh: + case ShaderSource::PrecisionValueDefault: + return true; + default: + return false; + } +} + +bool is_valid_shader_type(ShaderSource::ShaderType shader_type) +{ + switch(shader_type) { + case ShaderSource::ShaderTypeVertex: + case ShaderSource::ShaderTypeFragment: + case ShaderSource::ShaderTypeUnknown: + return true; + default: + return false; + } +} + +} + /** * Holds default precision values for all shader types * (even the unknown type, which is hardwired to default precision values) @@ -34,7 +64,7 @@ bool ShaderSource::load_file(const std::string& filename, std::string& str) { - std::auto_ptr is_ptr(Util::get_resource(filename)); + std::unique_ptr is_ptr(Util::get_resource(filename)); std::istream& inputFile(*is_ptr); if (!inputFile) @@ -421,7 +451,9 @@ ss << "#endif" << std::endl; } } - else if (val >= 0 && val < ShaderSource::PrecisionValueDefault) { + else if (is_valid_precision_value(val) && + val != ShaderSource::PrecisionValueDefault) + { ss << "precision " << precision_map[val] << " "; ss << type_str << ";" << std::endl; } @@ -512,7 +544,7 @@ ShaderSource::default_precision(const ShaderSource::Precision& precision, ShaderSource::ShaderType type) { - if (type < 0 || type > ShaderSource::ShaderTypeUnknown) + if (!is_valid_shader_type(type)) type = ShaderSource::ShaderTypeUnknown; if (type == ShaderSource::ShaderTypeUnknown) { @@ -537,7 +569,7 @@ const ShaderSource::Precision& ShaderSource::default_precision(ShaderSource::ShaderType type) { - if (type < 0 || type > ShaderSource::ShaderTypeUnknown) + if (!is_valid_shader_type(type)) type = ShaderSource::ShaderTypeUnknown; return default_precision_[type]; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/util.cc glmark2-2021.02/android/jni/src/libmatrix/util.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/libmatrix/util.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/libmatrix/util.cc 2021-04-25 01:47:22.000000000 +0800 @@ -220,10 +220,10 @@ uint64_t Util::get_timestamp_us() { - struct timeval tv; - gettimeofday(&tv, NULL); - uint64_t now = static_cast(tv.tv_sec) * 1000000 + - static_cast(tv.tv_usec); + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + uint64_t now = static_cast(ts.tv_sec) * 1000000 + + static_cast(ts.tv_nsec)/1000.0; return now; } @@ -244,7 +244,7 @@ std::istream * Util::get_resource(const std::string &path) { - std::ifstream *ifs = new std::ifstream(path.c_str()); + std::ifstream *ifs = new std::ifstream(path.c_str(), std::ios::binary); return static_cast(ifs); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/main.cpp glmark2-2021.02/android/jni/src/main.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/main.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/main.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -42,12 +42,18 @@ #include "native-state-mir.h" #elif GLMARK2_USE_WAYLAND #include "native-state-wayland.h" +#elif GLMARK2_USE_DISPMANX +#include "native-state-dispmanx.h" +#elif GLMARK2_USE_WIN32 +#include "native-state-win32.h" #endif #if GLMARK2_USE_EGL #include "gl-state-egl.h" #elif GLMARK2_USE_GLX #include "gl-state-glx.h" +#elif GLMARK2_USE_WGL +#include "gl-state-wgl.h" #endif using std::vector; @@ -138,7 +144,6 @@ int main(int argc, char *argv[]) { - if (!Options::parse_args(argc, argv)) return 1; @@ -168,12 +173,18 @@ NativeStateMir native_state; #elif GLMARK2_USE_WAYLAND NativeStateWayland native_state; +#elif GLMARK2_USE_DISPMANX + NativeStateDispmanx native_state; +#elif GLMARK2_USE_WIN32 + NativeStateWin32 native_state; #endif #if GLMARK2_USE_EGL GLStateEGL gl_state; #elif GLMARK2_USE_GLX GLStateGLX gl_state; +#elif GLMARK2_USE_WGL + GLStateWGL gl_state; #endif CanvasGeneric canvas(native_state, gl_state, Options::size.first, Options::size.second); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/main-loop.cpp glmark2-2021.02/android/jni/src/main-loop.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/main-loop.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/main-loop.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -80,9 +80,9 @@ /* If we have found a valid scene, set it up */ if (bench_iter_ != benchmarks_.end()) { + before_scene_setup(); if (!Options::reuse_context) canvas_.reset(); - before_scene_setup(); scene_ = &(*bench_iter_)->setup_scene(); if (!scene_->running()) { if (!scene_->supported(false)) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/mesh.cpp glmark2-2021.02/android/jni/src/mesh.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/mesh.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/mesh.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -396,8 +396,9 @@ /* Update the current range from the vertex data */ float *dest(array + nfloats * ri->first); for (size_t n = ri->first; n <= ri->second; n++) { - for (size_t i = 0; i < nfloats; i++) - *dest++ = vertices_[n][offset + i]; + float *src(vertices_[n].data() + offset); + std::copy(src, src + nfloats, dest); + dest += nfloats; } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/meson.build glmark2-2021.02/android/jni/src/meson.build --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/meson.build 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/meson.build 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,263 @@ +common_sources = [ + 'benchmark-collection.cpp', + 'benchmark.cpp', + 'canvas-generic.cpp', + 'gl-headers.cpp', + 'gl-visual-config.cpp', + 'image-reader.cpp', + 'libmatrix/log.cc', + 'libmatrix/mat.cc', + 'libmatrix/program.cc', + 'libmatrix/shader-source.cc', + 'libmatrix/util.cc', + 'main-loop.cpp', + 'mesh.cpp', + 'model.cpp', + 'options.cpp', + 'scene-buffer.cpp', + 'scene-build.cpp', + 'scene-bump.cpp', + 'scene-clear.cpp', + 'scene-conditionals.cpp', + 'scene.cpp', + 'scene-default-options.cpp', + 'scene-desktop.cpp', + 'scene-effect-2d.cpp', + 'scene-function.cpp', + 'scene-grid.cpp', + 'scene-ideas/a.cc', + 'scene-ideas.cpp', + 'scene-ideas/d.cc', + 'scene-ideas/e.cc', + 'scene-ideas/i.cc', + 'scene-ideas/lamp.cc', + 'scene-ideas/logo.cc', + 'scene-ideas/m.cc', + 'scene-ideas/n.cc', + 'scene-ideas/o.cc', + 'scene-ideas/s.cc', + 'scene-ideas/splines.cc', + 'scene-ideas/table.cc', + 'scene-ideas/t.cc', + 'scene-jellyfish.cpp', + 'scene-loop.cpp', + 'scene-pulsar.cpp', + 'scene-refract.cpp', + 'scene-shading.cpp', + 'scene-shadow.cpp', + 'scene-terrain/base-renderer.cpp', + 'scene-terrain/blur-renderer.cpp', + 'scene-terrain/copy-renderer.cpp', + 'scene-terrain.cpp', + 'scene-terrain/luminance-renderer.cpp', + 'scene-terrain/normal-from-height-renderer.cpp', + 'scene-terrain/overlay-renderer.cpp', + 'scene-terrain/renderer-chain.cpp', + 'scene-terrain/simplex-noise-renderer.cpp', + 'scene-terrain/terrain-renderer.cpp', + 'scene-terrain/texture-renderer.cpp', + 'scene-texture.cpp', + 'shared-library.cpp', + 'text-renderer.cpp', + 'texture.cpp' +] + +libmatrix_headers_dep = declare_dependency( + include_directories: include_directories('libmatrix'), + compile_args: ['-DUSE_EXCEPTIONS'], +) + +common_deps = [ + m_dep, + dl_dep, + libjpeg_dep, + libpng_dep, + libmatrix_headers_dep, +] + +if need_drm + native_drm_lib = static_library( + 'native-drm', + 'native-state-drm.cpp', + dependencies: [libdrm_dep, libudev_dep, gbm_dep, libmatrix_headers_dep], + ) + native_drm_dep = declare_dependency( + link_with: native_drm_lib, + dependencies: libdrm_dep, + compile_args: ['-DGLMARK2_USE_DRM', '-D__GBM__'], + ) +else + native_drm_dep = declare_dependency() +endif + +if need_wayland + wayland_scanner = find_program(wayland_scanner_dep.get_pkgconfig_variable('wayland_scanner')) + wayland_protocols_dir = wayland_protocols_dep.get_pkgconfig_variable('pkgdatadir') + + xdg_shell_xml_path = wayland_protocols_dir + '/stable/xdg-shell/xdg-shell.xml' + xdg_shell_client_header = custom_target( + 'xdg-shell client-header', + command: [ wayland_scanner, 'client-header', '@INPUT@', '@OUTPUT@' ], + input: xdg_shell_xml_path, + output: 'xdg-shell-client-protocol.h', + ) + xdg_shell_private_code = custom_target( + 'xdg-shell private-code', + command: [ wayland_scanner, 'private-code', '@INPUT@', '@OUTPUT@' ], + input: xdg_shell_xml_path, + output: 'xdg-shell-protocol.c', + ) + + native_wayland_lib = static_library( + 'native-wayland', + 'native-state-wayland.cpp', + xdg_shell_client_header, + xdg_shell_private_code, + dependencies: [libmatrix_headers_dep, wayland_client_dep, wayland_cursor_dep, wayland_egl_dep], + ) + + native_wayland_dep = declare_dependency( + link_with: native_wayland_lib, + dependencies: [wayland_client_dep, wayland_cursor_dep, wayland_egl_dep], + compile_args: ['-DGLMARK2_USE_WAYLAND', '-DWL_EGL_PLATFORM'], + ) +else + native_wayland_dep = declare_dependency() +endif + +if need_x11 + native_x11_lib = static_library( + 'native-x11', + 'native-state-x11.cpp', + dependencies: [x11_dep, libmatrix_headers_dep], + ) + + native_x11_dep = declare_dependency( + link_with: native_x11_lib, + compile_args: ['-DGLMARK2_USE_X11'], + ) +else + native_x11_dep = declare_dependency() +endif + +if need_gl + gl_gl_lib = static_library( + 'gl-gl', + 'glad/src/gl.c', + include_directories: include_directories('glad/include'), + ) + gl_gl_dep = declare_dependency( + link_with: gl_gl_lib, + include_directories: include_directories('glad/include'), + compile_args: ['-DGLMARK2_USE_GL'], + ) + + lib_common_gl = static_library( + 'common-gl', + common_sources, + dependencies: [gl_gl_dep, common_deps], + include_directories: [ + include_directories('scene-ideas'), + include_directories('scene-terrain'), + ] + ) + common_gl_dep = declare_dependency( + link_with: lib_common_gl, + include_directories: include_directories('libmatrix'), + ) +else + gl_gl_dep = declare_dependency() + common_gl_dep = declare_dependency() +endif + +if need_glesv2 + gl_glesv2_lib = static_library( + 'gl-glesv2', + 'glad/src/gles2.c', + include_directories: include_directories('glad/include'), + ) + gl_glesv2_dep = declare_dependency( + link_with: gl_glesv2_lib, + include_directories: include_directories('glad/include'), + compile_args: ['-DGLMARK2_USE_GLESv2'], + ) + + lib_common_glesv2 = static_library( + 'common-glesv2', + common_sources, + dependencies: [gl_glesv2_dep, common_deps], + include_directories: [ + include_directories('scene-ideas'), + include_directories('scene-terrain'), + ] + ) + common_glesv2_dep = declare_dependency( + link_with: lib_common_glesv2, + include_directories: include_directories('libmatrix'), + ) +else + gl_glesv2_dep = declare_dependency() + common_glesv2_dep = declare_dependency() +endif + +if need_egl + # gl-state-egl builds depend on both the GL type and the native type. To + # avoid creating all the required combinations explicitly, make this a + # source dependency which will be built properly as part of the final + # glmark2 executable. + wsi_egl_dep = declare_dependency( + sources: ['gl-state-egl.cpp', 'glad/src/egl.c'], + include_directories: [ + include_directories('glad/include'), + include_directories('libmatrix'), + ], + compile_args: ['-DGLMARK2_USE_EGL'], + ) +else + wsi_egl_dep = declare_dependency() +endif + +if need_glx + # Although there is a single valid flavor for glx (x11-gl) and we could + # build gl-state-glx here, make this a source dependency for convenience + # and parity with gl-state-egl. + wsi_glx_dep = declare_dependency( + sources: ['gl-state-glx.cpp', 'glad/src/glx.c'], + include_directories: [ + include_directories('glad/include'), + include_directories('libmatrix'), + ], + compile_args: ['-DGLMARK2_USE_GLX'], + ) +else + wsi_glx_dep = declare_dependency() +endif + +# Flavor info : [ binary, native, gl, wsi ] +flavor_info = { + 'drm-gl' : ['glmark2-drm', native_drm_dep, gl_gl_dep, wsi_egl_dep], + 'drm-glesv2' : ['glmark2-es2-drm', native_drm_dep, gl_glesv2_dep, wsi_egl_dep], + 'wayland-gl' : ['glmark2-wayland', native_wayland_dep, gl_gl_dep, wsi_egl_dep], + 'wayland-glesv2' : ['glmark2-es2-wayland', native_wayland_dep, gl_glesv2_dep, wsi_egl_dep], + 'x11-gl' : ['glmark2', native_x11_dep, gl_gl_dep, wsi_glx_dep], + 'x11-glesv2' : ['glmark2-es2', native_x11_dep, gl_glesv2_dep, wsi_egl_dep], +} + +foreach flavor : flavors + info = flavor_info[flavor] + binary = info[0] + native_dep = info[1] + gl_dep = info[2] + wsi_dep = info[3] + if gl_dep == gl_gl_dep + common_dep = common_gl_dep + elif gl_dep == gl_glesv2_dep + common_dep = common_glesv2_dep + endif + executable( + binary, + ['main.cpp', 'canvas-generic.cpp'], + dependencies: [common_dep, native_dep, gl_dep, wsi_dep], + install: true, + ) +endforeach diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/model.cpp glmark2-2021.02/android/jni/src/model.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/model.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/model.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -361,7 +361,7 @@ Log::debug("Loading model from 3ds file '%s'\n", filename.c_str()); - const std::auto_ptr input_file_ptr(Util::get_resource(filename)); + const std::unique_ptr input_file_ptr(Util::get_resource(filename)); std::istream& input_file(*input_file_ptr); if (!input_file) { @@ -385,7 +385,7 @@ return false; } - //Read the lenght of the chunk + //Read the length of the chunk read_or_fail(input_file, &chunk_length, 4); switch (chunk_id) @@ -393,7 +393,7 @@ //----------------- MAIN3DS ----------------- // Description: Main chunk, contains all the other chunks // Chunk ID: 4d4d - // Chunk Lenght: 0 + sub chunks + // Chunk Length: 0 + sub chunks //------------------------------------------- case 0x4d4d: break; @@ -401,7 +401,7 @@ //----------------- EDIT3DS ----------------- // Description: 3D Editor chunk, objects layout info // Chunk ID: 3d3d (hex) - // Chunk Lenght: 0 + sub chunks + // Chunk Length: 0 + sub chunks //------------------------------------------- case 0x3d3d: break; @@ -409,7 +409,7 @@ //--------------- EDIT_OBJECT --------------- // Description: Object block, info for each object // Chunk ID: 4000 (hex) - // Chunk Lenght: len(object name) + sub chunks + // Chunk Length: len(object name) + sub chunks //------------------------------------------- case 0x4000: { @@ -429,7 +429,7 @@ //--------------- OBJ_TRIMESH --------------- // Description: Triangular mesh, contains chunks for 3d mesh info // Chunk ID: 4100 (hex) - // Chunk Lenght: 0 + sub chunks + // Chunk Length: 0 + sub chunks //------------------------------------------- case 0x4100: break; @@ -437,7 +437,7 @@ //--------------- TRI_VERTEXL --------------- // Description: Vertices list // Chunk ID: 4110 (hex) - // Chunk Lenght: 1 x unsigned short (number of vertices) + // Chunk Length: 1 x unsigned short (number of vertices) // + 3 x float (vertex coordinates) x (number of vertices) // + sub chunks //------------------------------------------- @@ -461,7 +461,7 @@ //--------------- TRI_FACEL1 ---------------- // Description: Polygons (faces) list // Chunk ID: 4120 (hex) - // Chunk Lenght: 1 x unsigned short (number of polygons) + // Chunk Length: 1 x unsigned short (number of polygons) // + 3 x unsigned short (polygon points) x (number of polygons) // + sub chunks //------------------------------------------- @@ -484,7 +484,7 @@ //------------- TRI_MAPPINGCOORS ------------ // Description: Vertices list // Chunk ID: 4140 (hex) - // Chunk Lenght: 1 x unsigned short (number of mapping points) + // Chunk Length: 1 x unsigned short (number of mapping points) // + 2 x float (mapping coordinates) x (number of mapping points) // + sub chunks //------------------------------------------- @@ -505,7 +505,7 @@ //----------- Skip unknow chunks ------------ //We need to skip all the chunks that currently we don't use - //We use the chunk lenght information to set the file pointer + //We use the chunk length information to set the file pointer //to the same level next chunk //------------------------------------------- default: @@ -681,7 +681,7 @@ { Log::debug("Loading model from obj file '%s'\n", filename.c_str()); - const std::auto_ptr input_file_ptr(Util::get_resource(filename)); + const std::unique_ptr input_file_ptr(Util::get_resource(filename)); std::istream& inputFile(*input_file_ptr); if (!inputFile) { @@ -814,7 +814,7 @@ return ModelPrivate::modelMap; } vector pathVec; - string dataDir(GLMARK_DATA_PATH"/models"); + string dataDir(Options::data_path + "/models"); Util::list_files(dataDir, pathVec); #ifdef GLMARK_EXTRAS_PATH string extrasDir(GLMARK_EXTRAS_PATH"/models"); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-dispmanx.cpp glmark2-2021.02/android/jni/src/native-state-dispmanx.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-dispmanx.cpp 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-dispmanx.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,132 @@ +/* + * Copyright © 2013 Rafal Mielniczuk + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Rafal Mielniczuk + */ + +#include "native-state-dispmanx.h" +#include "log.h" + +#include +#include + +NativeStateDispmanx::NativeStateDispmanx() +{ + memset(&properties_, 0, sizeof(properties_)); + memset(&egl_dispmanx_window, 0, sizeof(egl_dispmanx_window)); +} + +NativeStateDispmanx::~NativeStateDispmanx() +{ +} + +bool +NativeStateDispmanx::init_display() +{ + bcm_host_init(); + + return true; +} + +void* +NativeStateDispmanx::display() +{ + return EGL_DEFAULT_DISPLAY; +} + +bool +NativeStateDispmanx::create_window(WindowProperties const& properties) +{ + int dispmanx_output = 0; /* LCD */ + + if (!properties.fullscreen) { + Log::error("Error: Dispmanx only supports full screen rendering.\n"); + return false; + } + + unsigned screen_width, screen_height; + if (graphics_get_display_size(dispmanx_output, + &screen_width, &screen_height) < 0) { + Log::error("Error: Couldn't get screen width/height.\n"); + return false; + } + + properties_.fullscreen = properties.fullscreen; + properties_.visual_id = properties.visual_id; + properties_.width = screen_width; + properties_.height = screen_height; + + dispmanx_display = vc_dispmanx_display_open(dispmanx_output); + + VC_RECT_T dst_rect; + VC_RECT_T src_rect; + dst_rect.x = 0; + dst_rect.y = 0; + dst_rect.width = screen_width; + dst_rect.height = screen_height; + + src_rect.x = 0; + src_rect.y = 0; + src_rect.width = screen_width << 16; + src_rect.height = screen_height << 16; + + dispmanx_update = vc_dispmanx_update_start(0); + + dispmanx_element = vc_dispmanx_element_add(dispmanx_update, + dispmanx_display, + 0 /*layer*/, + &dst_rect, + 0 /*src*/, + &src_rect, + DISPMANX_PROTECTION_NONE, + 0 /*alpha*/, + 0 /*clamp*/, + DISPMANX_NO_ROTATE); + + egl_dispmanx_window.element = dispmanx_element; + egl_dispmanx_window.width = dst_rect.width; + egl_dispmanx_window.height = dst_rect.height; + vc_dispmanx_update_submit_sync(dispmanx_update); + + return true; +} + +void* +NativeStateDispmanx::window(WindowProperties &properties) +{ + properties = properties_; + return &egl_dispmanx_window; +} + +void +NativeStateDispmanx::visible(bool /*v*/) +{ +} + +bool +NativeStateDispmanx::should_quit() +{ + return false; +} + +void +NativeStateDispmanx::flip() +{ +} + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-dispmanx.h glmark2-2021.02/android/jni/src/native-state-dispmanx.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-dispmanx.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-dispmanx.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,56 @@ +/* + * Copyright © 2013 Rafal Mielniczuk + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Rafal Mielniczuk + */ + +#ifndef GLMARK2_NATIVE_STATE_DISPMANX_H_ +#define GLMARK2_NATIVE_STATE_DISPMANX_H_ + +#include +#include "bcm_host.h" +#include "GLES/gl.h" +#include "EGL/egl.h" +#include "EGL/eglext.h" + +#include "native-state.h" + +class NativeStateDispmanx : public NativeState +{ +public: + NativeStateDispmanx(); + ~NativeStateDispmanx(); + + bool init_display(); + void* display(); + bool create_window(WindowProperties const& properties); + void* window(WindowProperties& properties); + void visible(bool v); + bool should_quit(); + void flip(); + +private: + DISPMANX_DISPLAY_HANDLE_T dispmanx_display; + DISPMANX_UPDATE_HANDLE_T dispmanx_update; + DISPMANX_ELEMENT_HANDLE_T dispmanx_element; + EGL_DISPMANX_WINDOW_T egl_dispmanx_window; + WindowProperties properties_; +}; + +#endif /* GLMARK2_NATIVE_STATE_DISPMANX_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-drm.cpp glmark2-2021.02/android/jni/src/native-state-drm.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-drm.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-drm.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -25,6 +25,11 @@ #include "native-state-drm.h" #include "log.h" +#include +#include +#include +#include + /****************** * Public methods * ******************/ @@ -106,7 +111,8 @@ FD_ZERO(&fds); FD_SET(fd_, &fds); drmEventContext evCtx; - evCtx.version = DRM_EVENT_CONTEXT_VERSION; + memset(&evCtx, 0, sizeof(evCtx)); + evCtx.version = 2; evCtx.page_flip_handler = page_flip_handler; while (waiting) { @@ -134,6 +140,269 @@ * Private methods * *******************/ +/* Simple helpers */ + +inline static bool valid_fd(int fd) +{ + return fd >= 0; +} + +inline static bool valid_drm_node_path(std::string const& provided_node_path) +{ + return !provided_node_path.empty(); +} + +inline static bool invalid_drm_node_path(std::string const& provided_node_path) +{ + return !(valid_drm_node_path(provided_node_path)); +} + +/* Udev methods */ +// Udev detection functions +#define UDEV_TEST_FUNC_SIGNATURE(udev_identifier, device_identifier, syspath_identifier) \ + struct udev * __restrict const udev_identifier, \ + struct udev_device * __restrict const device_identifier, \ + char const * __restrict syspath_identifier + +/* Omitting the parameter names is kind of ugly but is the only way + * to force G++ to forget about the unused parameters. + * Having big warnings during the compilation isn't very nice. + * + * These functions will be used as function pointers and should have + * the same signature to avoid weird stack related issues. + * + * Another way to deal with that issue will be to mark unused parameters + * with __attribute__((unused)) + */ +static bool udev_drm_test_virtual( + UDEV_TEST_FUNC_SIGNATURE(,,tested_node_syspath)) +{ + return strstr(tested_node_syspath, "virtual") != NULL; +} + +static bool udev_drm_test_not_virtual( + UDEV_TEST_FUNC_SIGNATURE(udev, current_device, tested_node_syspath)) +{ + return !udev_drm_test_virtual(udev, + current_device, + tested_node_syspath); +} + +static bool +udev_drm_test_primary_gpu(UDEV_TEST_FUNC_SIGNATURE(, current_device,)) +{ + bool is_main_gpu = false; + + auto const drm_node_parent = udev_device_get_parent(current_device); + + /* While tempting, using udev_device_unref will generate issues + * when unreferencing the child in udev_get_node_that_pass_in_enum + * + * udev_device_unref WILL unreference the parent, so avoid doing + * that here. + * + * ( See udev sources : src/libudev/libudev-device.c ) + */ + if (drm_node_parent != NULL) { + is_main_gpu = + (udev_device_get_sysattr_value(drm_node_parent, "boot_vga") + != NULL); + } + + return is_main_gpu; +} + +/* Test if the drm-device is actually modeset capable. + * Render-only devices cannot drive an actual display, + * so the GETRESOURCES ioctl will fail in that case. + */ +static bool udev_drm_test_modeset(std::string const& dev_path) +{ + struct drm_mode_card_res res {}; + int fd, ret; + + fd = open(dev_path.c_str(), O_RDWR); + if (!valid_fd(fd)) + return false; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_GETRESOURCES, &res); + drmClose(fd); + + return !ret; +} + +static std::string +udev_get_node_that_pass_in_enum( + struct udev * __restrict const udev, + struct udev_enumerate * __restrict const dev_enum, + bool (* check_function)(UDEV_TEST_FUNC_SIGNATURE(,,))) +{ + std::string result; + + auto current_element = udev_enumerate_get_list_entry(dev_enum); + + while (current_element && result.empty()) { + char const * __restrict current_element_sys_path = + udev_list_entry_get_name(current_element); + + if (current_element_sys_path) { + struct udev_device * current_device = + udev_device_new_from_syspath(udev, + current_element_sys_path); + auto check_passed = check_function( + udev, current_device, current_element_sys_path); + + if (check_passed) { + const char * device_node_path = + udev_device_get_devnode(current_device); + + if (device_node_path && + udev_drm_test_modeset(device_node_path)) { + result = device_node_path; + } + + } + + udev_device_unref(current_device); + } + + current_element = udev_list_entry_get_next(current_element); + } + + return result; +} + +/* Inspired by KWin detection mechanism */ +/* And yet KWin got it wrong too, it seems. + * 1 - Looking for the primary GPU by checking the flag 'boot_vga' + * won't get you far with some embedded chipsets, like Rockchip. + * 2 - Looking for a GPU plugged in PCI will fail on various embedded + * devices ! + * 3 - Looking for a render node is not guaranteed to work on some + * poorly maintained DRM drivers, which plague some embedded + * devices too... + * + * So, we won't play too smart here. + * - We first check for a primary GPU plugged in PCI with the 'boot_vga' + * attribute, to take care of Desktop users using multiple GPU. + * - Then, we just check for a DRM node that is not virtual + * - At least, we use the first virtual node we get, if we didn't find + * anything yet. + * This should take care of almost every potential use case. + * + * The remaining ones will be dealt with an additional option to + * specify the DRM dev node manually. + */ +static std::string udev_main_gpu_drm_node_path() +{ + Log::debug("Using Udev to detect the right DRM node to use\n"); + auto udev = udev_new(); + auto dev_enumeration = udev_enumerate_new(udev); + + udev_enumerate_add_match_subsystem(dev_enumeration, "drm"); + udev_enumerate_add_match_sysname(dev_enumeration, "card[0-9]*"); + udev_enumerate_scan_devices(dev_enumeration); + + Log::debug("Looking for the main GPU DRM node...\n"); + std::string node_path = udev_get_node_that_pass_in_enum( + udev, dev_enumeration, udev_drm_test_primary_gpu); + + if (invalid_drm_node_path(node_path)) { + Log::debug("Not found!\n"); + Log::debug("Looking for a concrete GPU DRM node...\n"); + node_path = udev_get_node_that_pass_in_enum( + udev, dev_enumeration, udev_drm_test_not_virtual); + } + if (invalid_drm_node_path(node_path)) { + Log::debug("Not found!?\n"); + Log::debug("Looking for a virtual GPU DRM node...\n"); + node_path = udev_get_node_that_pass_in_enum( + udev, dev_enumeration, udev_drm_test_virtual); + } + if (invalid_drm_node_path(node_path)) { + Log::debug("Not found.\n"); + Log::debug("Cannot find a single DRM node using UDEV...\n"); + } + + if (valid_drm_node_path(node_path)) { + Log::debug("Success!\n"); + } + + udev_enumerate_unref(dev_enumeration); + udev_unref(udev); + + return node_path; +} + +static int open_using_udev_scan() +{ + auto dev_path = udev_main_gpu_drm_node_path(); + + int fd = -1; + if (valid_drm_node_path(dev_path)) { + Log::debug("Trying to use the DRM node %s\n", dev_path.c_str()); + fd = open(dev_path.c_str(), O_RDWR); + } + else { + Log::error("Can't determine the main graphic card " + "DRM device node\n"); + } + + if (!valid_fd(fd)) { + // %m is GLIBC specific... Maybe use strerror here... + Log::error("Tried to use '%s' but failed.\nReason : %m\n", + dev_path.c_str()); + } + else + Log::debug("Success!\n"); + + return fd; +} + +/* -- End of Udev helpers -- */ + +/* + * This method is there to take care of cases that would not be handled + * by open_using_udev_scan. This should not happen. + * + * If your driver defines a /dev/dri/cardX node and open_using_udev_scan + * were not able to detect it, you should probably file an issue. + * + */ +static int open_using_module_checking() +{ + static const char* drm_modules[] = { + "i915", + "imx-drm", + "nouveau", + "radeon", + "vmgfx", + "omapdrm", + "exynos", + "pl111", + "vc4", + "msm", + "meson", + "rockchip", + "sun4i-drm", + "stm", + }; + + int fd = -1; + unsigned int num_modules(sizeof(drm_modules)/sizeof(drm_modules[0])); + for (unsigned int m = 0; m < num_modules; m++) { + fd = drmOpen(drm_modules[m], 0); + if (fd < 0) { + Log::debug("Failed to open DRM module '%s'\n", drm_modules[m]); + continue; + } + Log::debug("Opened DRM module '%s'\n", drm_modules[m]); + break; + } + + return fd; +} + void NativeStateDRM::fb_destroy_callback(gbm_bo* bo, void* data) { @@ -197,34 +466,23 @@ bool NativeStateDRM::init() { - // TODO: Replace this with something that explicitly probes for the loaded - // driver (udev?). - static const char* drm_modules[] = { - "i915", - "nouveau", - "radeon", - "vmgfx", - "omapdrm", - "exynos" - }; + // TODO: The user should be able to define *exactly* which device + // node to open and the program should try to open only + // this node, in order to take care of unknown use cases. + int fd = open_using_udev_scan(); - unsigned int num_modules(sizeof(drm_modules)/sizeof(drm_modules[0])); - for (unsigned int m = 0; m < num_modules; m++) { - fd_ = drmOpen(drm_modules[m], 0); - if (fd_ < 0) { - Log::debug("Failed to open DRM module '%s'\n", drm_modules[m]); - continue; - } - Log::debug("Opened DRM module '%s'\n", drm_modules[m]); - break; + if (!valid_fd(fd)) { + fd = open_using_module_checking(); } - if (fd_ < 0) { + if (!valid_fd(fd)) { Log::error("Failed to find a suitable DRM device\n"); return false; } - resources_ = drmModeGetResources(fd_); + fd_ = fd; + + resources_ = drmModeGetResources(fd); if (!resources_) { Log::error("drmModeGetResources failed\n"); return false; @@ -232,7 +490,7 @@ // Find a connected connector for (int c = 0; c < resources_->count_connectors; c++) { - connector_ = drmModeGetConnector(fd_, resources_->connectors[c]); + connector_ = drmModeGetConnector(fd, resources_->connectors[c]); if (DRM_MODE_CONNECTED == connector_->connection) { break; } @@ -263,14 +521,40 @@ // Find a suitable encoder for (int e = 0; e < resources_->count_encoders; e++) { + bool found = false; encoder_ = drmModeGetEncoder(fd_, resources_->encoders[e]); - if (encoder_ && encoder_->encoder_id == connector_->encoder_id) { - break; + for (int ce = 0; ce < connector_->count_encoders; ce++) { + if (encoder_ && encoder_->encoder_id == connector_->encoders[ce]) { + found = true; + break; + } } + if (found) + break; drmModeFreeEncoder(encoder_); encoder_ = 0; } + // If encoder is not connected to the connector try to find + // a suitable one + if (!encoder_) { + for (int e = 0; e < connector_->count_encoders; e++) { + encoder_ = drmModeGetEncoder(fd, connector_->encoders[e]); + for (int c = 0; c < resources_->count_crtcs; c++) { + if (encoder_->possible_crtcs & (1 << c)) { + encoder_->crtc_id = resources_->crtcs[c]; + break; + } + } + if (encoder_->crtc_id) { + break; + } + + drmModeFreeEncoder(encoder_); + encoder_ = 0; + } + } + if (!encoder_) { Log::error("Failed to find a suitable encoder\n"); return false; @@ -282,8 +566,13 @@ crtc_ = drmModeGetCrtc(fd_, encoder_->crtc_id); if (!crtc_) { - Log::error("Failed to get current CRTC\n"); - return false; + // if there is no current CRTC, make sure to attach a suitable one + for (int c = 0; c < resources_->count_crtcs; c++) { + if (encoder_->possible_crtcs & (1 << c)) { + encoder_->crtc_id = resources_->crtcs[c]; + break; + } + } } signal(SIGINT, &NativeStateDRM::quit_handler); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-drm.h glmark2-2021.02/android/jni/src/native-state-drm.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-drm.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-drm.h 2021-04-25 01:47:22.000000000 +0800 @@ -41,6 +41,7 @@ resources_(0), connector_(0), encoder_(0), + crtc_(0), mode_(0), dev_(0), surface_(0), diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state.h glmark2-2021.02/android/jni/src/native-state.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state.h 2021-04-25 01:47:22.000000000 +0800 @@ -22,12 +22,14 @@ #ifndef GLMARK2_NATIVE_STATE_H_ #define GLMARK2_NATIVE_STATE_H_ +#include + class NativeState { public: struct WindowProperties { - WindowProperties(int w, int h, bool f, int v) + WindowProperties(int w, int h, bool f, intptr_t v) : width(w), height(h), fullscreen(f), visual_id(v) {} WindowProperties() : width(0), height(0), fullscreen(false), visual_id(0) {} @@ -35,7 +37,7 @@ int width; int height; bool fullscreen; - int visual_id; + intptr_t visual_id; }; virtual ~NativeState() {} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-mir.cpp glmark2-2021.02/android/jni/src/native-state-mir.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-mir.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-mir.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -157,7 +157,7 @@ bool NativeStateMir::create_window(WindowProperties const& properties) { - static const char *win_name("glmark2 "GLMARK_VERSION); + static const char *win_name("glmark2 " GLMARK_VERSION); if (!mir_connection_is_valid(mir_connection_)) { Log::error("Cannot create a Mir surface without a valid connection " diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-wayland.cpp glmark2-2021.02/android/jni/src/native-state-wayland.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-wayland.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-wayland.cpp 2021-04-25 01:47:47.000000000 +0800 @@ -21,19 +21,29 @@ */ #include "native-state-wayland.h" +#include "log.h" +#include #include #include +#include const struct wl_registry_listener NativeStateWayland::registry_listener_ = { NativeStateWayland::registry_handle_global, NativeStateWayland::registry_handle_global_remove }; -const struct wl_shell_surface_listener NativeStateWayland::shell_surface_listener_ = { - NativeStateWayland::shell_surface_handle_ping, - NativeStateWayland::shell_surface_handle_configure, - NativeStateWayland::shell_surface_handle_popup_done +const struct xdg_toplevel_listener NativeStateWayland::xdg_toplevel_listener_ = { + NativeStateWayland::xdg_toplevel_handle_configure, + NativeStateWayland::xdg_toplevel_handle_close +}; + +const struct xdg_surface_listener NativeStateWayland::xdg_surface_listener_ = { + NativeStateWayland::xdg_surface_handle_configure +}; + +const struct xdg_wm_base_listener NativeStateWayland::xdg_wm_base_listener_ = { + NativeStateWayland::xdg_wm_base_handle_ping }; const struct wl_output_listener NativeStateWayland::output_listener_ = { @@ -43,29 +53,58 @@ NativeStateWayland::output_handle_scale }; +const struct wl_seat_listener NativeStateWayland::seat_listener_ = { + NativeStateWayland::seat_handle_capabilities, +}; + +const struct wl_pointer_listener NativeStateWayland::pointer_listener_ = { + NativeStateWayland::pointer_handle_enter, + NativeStateWayland::pointer_handle_leave, + NativeStateWayland::pointer_handle_motion, + NativeStateWayland::pointer_handle_button, + NativeStateWayland::pointer_handle_axis, +}; + +const struct wl_keyboard_listener NativeStateWayland::keyboard_listener_ = { + NativeStateWayland::keyboard_handle_keymap, + NativeStateWayland::keyboard_handle_enter, + NativeStateWayland::keyboard_handle_leave, + NativeStateWayland::keyboard_handle_key, + NativeStateWayland::keyboard_handle_modifiers, +}; + volatile bool NativeStateWayland::should_quit_ = false; -NativeStateWayland::NativeStateWayland() : display_(0), window_(0) +NativeStateWayland::NativeStateWayland() : cursor_(0), display_(0), window_(0) { } NativeStateWayland::~NativeStateWayland() { if (window_) { - if (window_->shell_surface) - wl_shell_surface_destroy(window_->shell_surface); - if (window_->opaque_reqion) - wl_region_destroy(window_->opaque_reqion); - if (window_->surface) - wl_surface_destroy(window_->surface); + if (window_->xdg_toplevel) + xdg_toplevel_destroy(window_->xdg_toplevel); + if (window_->xdg_surface) + xdg_surface_destroy(window_->xdg_surface); if (window_->native) wl_egl_window_destroy(window_->native); + if (window_->surface) + wl_surface_destroy(window_->surface); delete window_; } + if (cursor_) { + if (cursor_->cursor_surface) + wl_surface_destroy(cursor_->cursor_surface); + if (cursor_->cursor_theme) + wl_cursor_theme_destroy(cursor_->cursor_theme); + + delete cursor_; + } + if (display_) { - if (display_->shell) - wl_shell_destroy(display_->shell); + if (display_->xdg_wm_base) + xdg_wm_base_destroy(display_->xdg_wm_base); for (OutputsVector::iterator it = display_->outputs.begin(); it != display_->outputs.end(); ++it) { @@ -88,30 +127,66 @@ void NativeStateWayland::registry_handle_global(void *data, struct wl_registry *registry, uint32_t id, const char *interface, - uint32_t /*version*/) + uint32_t version) { NativeStateWayland *that = static_cast(data); if (strcmp(interface, "wl_compositor") == 0) { that->display_->compositor = static_cast( wl_registry_bind(registry, - id, &wl_compositor_interface, 1)); - } else if (strcmp(interface, "wl_shell") == 0) { - that->display_->shell = - static_cast( + id, &wl_compositor_interface, std::min(version, 4U))); + } else if (strcmp(interface, "xdg_wm_base") == 0) { + that->display_->xdg_wm_base = + static_cast( wl_registry_bind(registry, - id, &wl_shell_interface, 1)); + id, &xdg_wm_base_interface, std::min(version, 2U))); + xdg_wm_base_add_listener(that->display_->xdg_wm_base, &xdg_wm_base_listener_, that); } else if (strcmp(interface, "wl_output") == 0) { - struct my_output *my_output = new struct my_output; + struct my_output *my_output = new struct my_output(); memset(my_output, 0, sizeof(*my_output)); + my_output->scale = 1; my_output->output = static_cast( wl_registry_bind(registry, - id, &wl_output_interface, 2)); + id, &wl_output_interface, std::min(version, 2U))); that->display_->outputs.push_back(my_output); wl_output_add_listener(my_output->output, &output_listener_, my_output); wl_display_roundtrip(that->display_->display); + } else if (strcmp(interface, "wl_seat") == 0) { + that->display_->seat = + static_cast( + wl_registry_bind(registry, id, &wl_seat_interface, 1)); + wl_seat_add_listener(that->display_->seat, &seat_listener_, that); + } else if (strcmp(interface, "wl_shm") == 0) { + that->display_->shm = + static_cast( + wl_registry_bind(registry, id, &wl_shm_interface, 1)); + + struct my_cursor *my_cursor = new struct my_cursor(); + my_cursor->cursor_surface = + wl_compositor_create_surface(that->display_->compositor); + my_cursor->cursor_theme = wl_cursor_theme_load(NULL, 32, that->display_->shm); + + if (!my_cursor->cursor_theme) { + Log::error("unable to load default theme\n"); + wl_surface_destroy(my_cursor->cursor_surface); + delete my_cursor; + return; + } + + my_cursor->default_cursor = + wl_cursor_theme_get_cursor(my_cursor->cursor_theme, "left_ptr"); + + if (!my_cursor->default_cursor) { + wl_surface_destroy(my_cursor->cursor_surface); + // assume above cursor_theme was set + wl_cursor_theme_destroy(my_cursor->cursor_theme); + delete my_cursor; + return; + } + + that->cursor_ = my_cursor; } } @@ -152,32 +227,84 @@ } void -NativeStateWayland::output_handle_scale(void * /*data*/, struct wl_output * /*wl_output*/, - int32_t /*factor*/) +NativeStateWayland::output_handle_scale(void *data, struct wl_output * /*wl_output*/, + int32_t factor) { + struct my_output *my_output = static_cast(data); + my_output->scale = factor; } void -NativeStateWayland::shell_surface_handle_ping(void * /*data*/, struct wl_shell_surface *shell_surface, - uint32_t serial) +NativeStateWayland::xdg_wm_base_handle_ping(void * /*data*/, struct xdg_wm_base *xdg_wm_base, + uint32_t serial) { - wl_shell_surface_pong(shell_surface, serial); + xdg_wm_base_pong(xdg_wm_base, serial); } void -NativeStateWayland::shell_surface_handle_popup_done(void * /*data*/, - struct wl_shell_surface * /*shell_surface*/) +NativeStateWayland::xdg_toplevel_handle_configure(void *data, struct xdg_toplevel * /*xdg_toplevel*/, + int32_t width, int32_t height, + struct wl_array *states) { + NativeStateWayland *that = static_cast(data); + uint32_t *state; + bool want_fullscreen = false; + uint32_t scale = 1; + + that->window_->waiting_for_configure = false; + + if (!that->display_->outputs.empty()) scale = that->display_->outputs.at(0)->scale; + + for (state = (uint32_t *) states->data; + state < (uint32_t *) ((char *) states->data + states->size); + state++) { + if (*state == XDG_TOPLEVEL_STATE_FULLSCREEN) + want_fullscreen = true; + } + + if (want_fullscreen) { + width *= scale; + height *= scale; + } + + if (want_fullscreen == that->window_->properties.fullscreen && + (width == 0 || width == that->window_->properties.width) && + (height == 0 || height == that->window_->properties.fullscreen)) { + return; + } + + that->window_->properties.fullscreen = want_fullscreen; + that->window_->properties.width = width; + that->window_->properties.height = height; + + if (!that->window_->native) + return; + + wl_egl_window_resize(that->window_->native, width, height, 0, 0); + + struct wl_region *opaque_reqion = wl_compositor_create_region(that->display_->compositor); + wl_region_add(opaque_reqion, 0, 0, + that->window_->properties.width, + that->window_->properties.height); + wl_surface_set_opaque_region(that->window_->surface, opaque_reqion); + wl_region_destroy(opaque_reqion); } void -NativeStateWayland::shell_surface_handle_configure(void *data, struct wl_shell_surface * /*shell_surface*/, - uint32_t /*edges*/, int32_t width, int32_t height) +NativeStateWayland::xdg_toplevel_handle_close(void *data, + struct xdg_toplevel * /*xdg_toplevel */) { NativeStateWayland *that = static_cast(data); - that->window_->properties.width = width; - that->window_->properties.height = height; - wl_egl_window_resize(that->window_->native, width, height, 0, 0); + + that->should_quit_ = true; +} + +void +NativeStateWayland::xdg_surface_handle_configure(void * /*data*/, + struct xdg_surface *xdg_surface, + uint32_t serial) +{ + xdg_surface_ack_configure(xdg_surface, serial); } bool @@ -191,7 +318,7 @@ sigaction(SIGINT, &sa, NULL); sigaction(SIGTERM, &sa, NULL); - display_ = new struct my_display; + display_ = new struct my_display(); if (!display_) { return false; @@ -223,42 +350,51 @@ { struct my_output *output = 0; if (!display_->outputs.empty()) output = display_->outputs.at(0); - window_ = new struct my_window; + window_ = new struct my_window(); window_->properties = properties; - window_->surface = wl_compositor_create_surface(display_->compositor); - if (window_->properties.fullscreen && output) { - window_->native = wl_egl_window_create(window_->surface, - output->width, output->height); - window_->properties.width = output->width; - window_->properties.height = output->height; - } else { - window_->native = wl_egl_window_create(window_->surface, - properties.width, properties.height); - } - window_->opaque_reqion = wl_compositor_create_region(display_->compositor); - wl_region_add(window_->opaque_reqion, 0, 0, - window_->properties.width, - window_->properties.height); - wl_surface_set_opaque_region(window_->surface, window_->opaque_reqion); - - window_->shell_surface = wl_shell_get_shell_surface(display_->shell, - window_->surface); + window_->surface = wl_compositor_create_surface(display_->compositor); + window_->xdg_surface = xdg_wm_base_get_xdg_surface(display_->xdg_wm_base, + window_->surface); + xdg_surface_add_listener(window_->xdg_surface, &xdg_surface_listener_, this); + window_->xdg_toplevel = xdg_surface_get_toplevel(window_->xdg_surface); + xdg_toplevel_add_listener(window_->xdg_toplevel, &xdg_toplevel_listener_, this); + + xdg_toplevel_set_app_id(window_->xdg_toplevel, "com.github.glmark2.glmark2"); + xdg_toplevel_set_title(window_->xdg_toplevel, "glmark2"); + if (window_->properties.fullscreen && output) + xdg_toplevel_set_fullscreen(window_->xdg_toplevel, output->output); + wl_surface_commit(window_->surface); + window_->waiting_for_configure = true; + + /* xdg-shell requires us to wait for the compositor to tell us what its + * desired size for us is */ + while (window_->waiting_for_configure) + wl_display_roundtrip(display_->display); + + if (output && window_->properties.fullscreen) { + if (window_->properties.width <= 0 || + window_->properties.height <= 0) { + window_->properties.width = output->width * output->scale; + window_->properties.height = output->height * output->scale; + } - if (window_->shell_surface) { - wl_shell_surface_add_listener(window_->shell_surface, - &shell_surface_listener_, this); + if (wl_proxy_get_version((struct wl_proxy *) output) >= + WL_OUTPUT_SCALE_SINCE_VERSION) { + wl_surface_set_buffer_scale(window_->surface, output->scale); + } } - wl_shell_surface_set_title(window_->shell_surface, "glmark2"); + window_->native = wl_egl_window_create(window_->surface, + window_->properties.width, + window_->properties.height); - if (window_->properties.fullscreen) { - wl_shell_surface_set_fullscreen(window_->shell_surface, - WL_SHELL_SURFACE_FULLSCREEN_METHOD_DRIVER, - output->refresh, output->output); - } else { - wl_shell_surface_set_toplevel(window_->shell_surface); - } + struct wl_region *opaque_reqion = wl_compositor_create_region(display_->compositor); + wl_region_add(opaque_reqion, 0, 0, + window_->properties.width, + window_->properties.height); + wl_surface_set_opaque_region(window_->surface, opaque_reqion); + wl_region_destroy(opaque_reqion); return true; } @@ -298,3 +434,135 @@ should_quit_ = true; } +void +NativeStateWayland::seat_handle_capabilities(void *data, struct wl_seat *seat, uint32_t caps) +{ + NativeStateWayland *that = static_cast(data); + + if ((caps & WL_SEAT_CAPABILITY_POINTER) && !that->display_->pointer) { + that->display_->pointer = wl_seat_get_pointer(seat); + wl_pointer_add_listener(that->display_->pointer, &pointer_listener_, that); + } else if (!(caps & WL_SEAT_CAPABILITY_POINTER) && that->display_->pointer) { + wl_pointer_destroy(that->display_->pointer); + that->display_->pointer = NULL; + } + + if ((caps & WL_SEAT_CAPABILITY_KEYBOARD) && !that->display_->keyboard) { + that->display_->keyboard = wl_seat_get_keyboard(seat); + wl_keyboard_add_listener(that->display_->keyboard, &keyboard_listener_, that); + } else if (!(caps & WL_SEAT_CAPABILITY_KEYBOARD) && that->display_->keyboard) { + wl_keyboard_destroy(that->display_->keyboard); + that->display_->keyboard = NULL; + } +} + + +void +NativeStateWayland::pointer_handle_enter(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface, + wl_fixed_t sx, wl_fixed_t sy) +{ + NativeStateWayland *that = static_cast(data); + + struct wl_buffer *buffer; + struct my_cursor *my_cursor = that->cursor_; + struct wl_cursor *cursor; + struct wl_cursor_image *image; + + if (!my_cursor) + return; + + cursor = my_cursor->default_cursor; + + if (that->window_->properties.fullscreen) { + wl_pointer_set_cursor(pointer, serial, NULL, 0, 0); + } else { + if (cursor) { + image = my_cursor->default_cursor->images[0]; + buffer = wl_cursor_image_get_buffer(image); + if (!buffer) + return; + + wl_pointer_set_cursor(pointer, serial, + my_cursor->cursor_surface, + image->hotspot_x, + image->hotspot_y); + wl_surface_attach(my_cursor->cursor_surface, buffer, 0, 0); + wl_surface_damage(my_cursor->cursor_surface, 0, 0, + image->width, image->height); + wl_surface_commit(my_cursor->cursor_surface); + } + } +} + +void +NativeStateWayland::pointer_handle_leave(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface) +{ +} + +void +NativeStateWayland::pointer_handle_motion(void *data, struct wl_pointer *pointer, + uint32_t time, wl_fixed_t sx, wl_fixed_t sy) +{ +} + +void +NativeStateWayland::pointer_handle_button(void *data, struct wl_pointer *wl_pointer, + uint32_t serial, uint32_t time, uint32_t button, + uint32_t state) +{ + /* just show that window_ can be used */ + NativeStateWayland *that = static_cast(data); + + if (!that->window_->xdg_toplevel) + return; + + if (state == WL_POINTER_BUTTON_STATE_PRESSED && button == BTN_LEFT) + xdg_toplevel_move(that->window_->xdg_toplevel, + that->display_->seat, serial); +} + +void +NativeStateWayland::pointer_handle_axis(void *data, struct wl_pointer *wl_pointer, + uint32_t time, uint32_t axis, wl_fixed_t value) +{ +} + +void +NativeStateWayland::keyboard_handle_keymap(void *data, struct wl_keyboard *keyboard, + uint32_t format, int fd, uint32_t size) +{ + close(fd); +} + +void +NativeStateWayland::keyboard_handle_enter(void *data, struct wl_keyboard *keyboard, + uint32_t serial, struct wl_surface *surface, + struct wl_array *keys) +{ +} + +void +NativeStateWayland::keyboard_handle_leave(void *data, struct wl_keyboard *keyboard, + uint32_t serial, struct wl_surface *surface) +{ +} + +void +NativeStateWayland::keyboard_handle_key(void *data, struct wl_keyboard *keyboard, + uint32_t serial, uint32_t time, uint32_t key, + uint32_t state) +{ + if (state == WL_KEYBOARD_KEY_STATE_PRESSED && + (key == KEY_ESC || key == KEY_Q)) + should_quit_ = true; +} + +void +NativeStateWayland::keyboard_handle_modifiers(void *data, struct wl_keyboard *keyboard, + uint32_t serial, uint32_t mods_depressed, + uint32_t mods_latched, uint32_t mods_locked, + uint32_t group) +{ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-wayland.h glmark2-2021.02/android/jni/src/native-state-wayland.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-wayland.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-wayland.h 2021-04-25 01:47:47.000000000 +0800 @@ -26,6 +26,8 @@ #include #include #include +#include +#include "xdg-shell-client-protocol.h" #include "native-state.h" @@ -47,8 +49,13 @@ static void quit_handler(int signum); static const struct wl_registry_listener registry_listener_; - static const struct wl_shell_surface_listener shell_surface_listener_; + static const struct xdg_wm_base_listener xdg_wm_base_listener_; + static const struct xdg_surface_listener xdg_surface_listener_; + static const struct xdg_toplevel_listener xdg_toplevel_listener_; static const struct wl_output_listener output_listener_; + static const struct wl_seat_listener seat_listener_; + static const struct wl_pointer_listener pointer_listener_; + static const struct wl_keyboard_listener keyboard_listener_; static void registry_handle_global(void *data, struct wl_registry *registry, @@ -76,39 +83,87 @@ output_handle_scale(void *data, struct wl_output *wl_output, int32_t factor); static void - shell_surface_handle_ping(void *data, struct wl_shell_surface *shell_surface, - uint32_t serial); + xdg_wm_base_handle_ping(void *data, struct xdg_wm_base *xdg_wm_base, + uint32_t serial); static void - shell_surface_handle_configure(void *data, - struct wl_shell_surface *shell_surface, - uint32_t edges, - int32_t width, int32_t height); - static void - shell_surface_handle_popup_done(void *data, - struct wl_shell_surface *shell_surface); - + xdg_toplevel_handle_configure(void *data, + struct xdg_toplevel *xdg_toplevel, + int32_t width, int32_t height, + struct wl_array *states); + static void + xdg_toplevel_handle_close(void *data, struct xdg_toplevel *xdg_toplevel); + static void + xdg_surface_handle_configure(void *data, struct xdg_surface *xdg_surface, + uint32_t serial); + + static void seat_handle_capabilities(void *data, + struct wl_seat *seat, uint32_t caps); + + static void pointer_handle_enter(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface, + wl_fixed_t sx, wl_fixed_t sy); + + static void pointer_handle_leave(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface); + + static void pointer_handle_motion(void *data, struct wl_pointer *pointer, + uint32_t time, wl_fixed_t sx, wl_fixed_t sy); + + static void pointer_handle_button(void *data, struct wl_pointer *wl_pointer, + uint32_t serial, uint32_t time, + uint32_t button, uint32_t state); + + static void pointer_handle_axis(void *data, struct wl_pointer *wl_pointer, + uint32_t time, uint32_t axis, wl_fixed_t value); + + static void keyboard_handle_keymap(void *data, struct wl_keyboard *keyboard, + uint32_t format, int fd, uint32_t size); + static void keyboard_handle_enter(void *data, struct wl_keyboard *keyboard, + uint32_t serial, struct wl_surface *surface, + struct wl_array *keys); + static void keyboard_handle_leave(void *data, struct wl_keyboard *keyboard, + uint32_t serial, struct wl_surface *surface); + static void keyboard_handle_key(void *data, struct wl_keyboard *keyboard, + uint32_t serial, uint32_t time, uint32_t key, + uint32_t state); + static void keyboard_handle_modifiers(void *data, struct wl_keyboard *keyboard, + uint32_t serial, uint32_t mods_depressed, + uint32_t mods_latched, uint32_t mods_locked, + uint32_t group); struct my_output { wl_output *output; int32_t width, height; int32_t refresh; + int32_t scale; }; typedef std::vector OutputsVector; + struct my_cursor { + wl_cursor_theme *cursor_theme; + wl_cursor *default_cursor; + wl_surface *cursor_surface; + } *cursor_; + struct my_display { wl_display *display; wl_registry *registry; wl_compositor *compositor; - wl_shell *shell; + wl_shm *shm; + wl_seat *seat; + wl_pointer *pointer; + wl_keyboard *keyboard; + struct xdg_wm_base *xdg_wm_base; OutputsVector outputs; } *display_; struct my_window { WindowProperties properties; + bool waiting_for_configure; struct wl_surface *surface; - struct wl_region *opaque_reqion; struct wl_egl_window *native; - struct wl_shell_surface *shell_surface; + struct xdg_surface *xdg_surface; + struct xdg_toplevel *xdg_toplevel; } *window_; static volatile bool should_quit_; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-win32.cpp glmark2-2021.02/android/jni/src/native-state-win32.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-win32.cpp 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-win32.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,248 @@ +/* + * Copyright © 2019 Jamie Madill + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Jamie Madill + */ +#include "native-state-win32.h" + +static const char *win_name("glmark2 " GLMARK_VERSION); +static const char *child_win_name("glmark2 " GLMARK_VERSION " child"); + +/****************** + * Public methods * + ******************/ + +NativeStateWin32::NativeStateWin32() : parent_window_(0), child_window_(0), native_display_(0), should_quit_(false) {} + +NativeStateWin32::~NativeStateWin32() { cleanup(); } + +bool +NativeStateWin32::init_display() +{ + cleanup(); + + static const LONG initial_width = 128; + static const LONG initial_height = 128; + + // Work around compile error from not defining "UNICODE" while Chromium does + const LPSTR idcArrow = MAKEINTRESOURCEA(32512); + + WNDCLASSEXA parentWindowClass = {}; + parentWindowClass.cbSize = sizeof(WNDCLASSEXA); + parentWindowClass.style = 0; + parentWindowClass.lpfnWndProc = wnd_proc; + parentWindowClass.cbClsExtra = 0; + parentWindowClass.cbWndExtra = 0; + parentWindowClass.hInstance = GetModuleHandle(nullptr); + parentWindowClass.hIcon = nullptr; + parentWindowClass.hCursor = LoadCursorA(nullptr, idcArrow); + parentWindowClass.hbrBackground = 0; + parentWindowClass.lpszMenuName = nullptr; + parentWindowClass.lpszClassName = win_name; + if (!RegisterClassExA(&parentWindowClass)) + { + return false; + } + + WNDCLASSEXA childWindowClass = {}; + childWindowClass.cbSize = sizeof(WNDCLASSEXA); + childWindowClass.style = CS_OWNDC; + childWindowClass.lpfnWndProc = wnd_proc; + childWindowClass.cbClsExtra = 0; + childWindowClass.cbWndExtra = 0; + childWindowClass.hInstance = GetModuleHandle(nullptr); + childWindowClass.hIcon = nullptr; + childWindowClass.hCursor = LoadCursorA(nullptr, idcArrow); + childWindowClass.hbrBackground = 0; + childWindowClass.lpszMenuName = nullptr; + childWindowClass.lpszClassName = child_win_name; + if (!RegisterClassExA(&childWindowClass)) + { + return false; + } + + DWORD parentStyle = WS_CAPTION | WS_THICKFRAME | WS_MINIMIZEBOX | WS_MAXIMIZEBOX | WS_SYSMENU; + DWORD parentExtendedStyle = WS_EX_APPWINDOW; + + RECT sizeRect = { 0, 0, initial_width, initial_height }; + AdjustWindowRectEx(&sizeRect, parentStyle, FALSE, parentExtendedStyle); + + parent_window_ = CreateWindowExA(parentExtendedStyle, win_name, win_name, + parentStyle, CW_USEDEFAULT, CW_USEDEFAULT, + sizeRect.right - sizeRect.left, sizeRect.bottom - sizeRect.top, + nullptr, nullptr, GetModuleHandle(nullptr), this); + + child_window_ = CreateWindowExA(0, child_win_name, win_name, WS_CHILD, 0, 0, + static_cast(initial_width), static_cast(initial_height), + parent_window_, nullptr, GetModuleHandle(nullptr), this); + + native_display_ = GetDC(child_window_); + if (!native_display_) + { + cleanup(); + } + + return native_display_; +} + +void* +NativeStateWin32::display() +{ + return native_display_; +} + +bool +NativeStateWin32::create_window(WindowProperties const& properties) +{ + properties_ = properties; + + RECT windowRect; + if (!GetWindowRect(parent_window_, &windowRect)) + { + return false; + } + + RECT clientRect; + if (!GetClientRect(parent_window_, &clientRect)) + { + return false; + } + + LONG diffX = (windowRect.right - windowRect.left) - clientRect.right; + LONG diffY = (windowRect.bottom - windowRect.top) - clientRect.bottom; + if (!MoveWindow(parent_window_, windowRect.left, windowRect.top, properties.width + diffX, properties.height + diffY, + TRUE)) + { + return false; + } + + if (!MoveWindow(child_window_, 0, 0, properties.width, properties.height, FALSE)) + { + return false; + } + + return true; +} + +void* +NativeStateWin32::window(WindowProperties& properties) +{ + properties = properties_; + return child_window_; +} + +void +NativeStateWin32::visible(bool visible) +{ + int flag = (visible ? SW_SHOW : SW_HIDE); + + if (parent_window_) + { + ShowWindow(parent_window_, flag); + } + + if (child_window_) + { + ShowWindow(child_window_, flag); + } +} + +bool +NativeStateWin32::should_quit() +{ + pump_messages(); + return should_quit_; +} + +void +NativeStateWin32::flip() +{ + pump_messages(); +} + +/******************* + * Private methods * + *******************/ + +void +NativeStateWin32::cleanup() +{ + if (native_display_) + { + ReleaseDC(child_window_, native_display_); + native_display_ = 0; + } + + if (child_window_) + { + DestroyWindow(child_window_); + child_window_ = 0; + } + + if (parent_window_) + { + DestroyWindow(parent_window_); + parent_window_ = 0; + } + + UnregisterClassA(win_name, nullptr); + UnregisterClassA(child_win_name, nullptr); +} + +void +NativeStateWin32::pump_messages() +{ + MSG msg; + while (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) + { + TranslateMessage(&msg); + DispatchMessage(&msg); + } +} + +LRESULT CALLBACK +NativeStateWin32::wnd_proc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + switch (message) + { + case WM_NCCREATE: + { + LPCREATESTRUCT pCreateStruct = reinterpret_cast(lParam); + SetWindowLongPtr(hWnd, GWLP_USERDATA, + reinterpret_cast(pCreateStruct->lpCreateParams)); + return DefWindowProcA(hWnd, message, wParam, lParam); + } + } + + NativeStateWin32 *window = reinterpret_cast(GetWindowLongPtr(hWnd, GWLP_USERDATA)); + if (window) + { + switch (message) + { + case WM_CLOSE: + case WM_DESTROY: + window->should_quit_ = true; + break; + default: + break; + } + } + + return DefWindowProcA(hWnd, message, wParam, lParam); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-win32.h glmark2-2021.02/android/jni/src/native-state-win32.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-win32.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-win32.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,56 @@ +/* + * Copyright © 2019 Jamie Madill + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Jamie Madill + */ +#ifndef GLMARK2_NATIVE_STATE_WIN32_H_ +#define GLMARK2_NATIVE_STATE_WIN32_H_ + +#include "native-state.h" + +#include + +class NativeStateWin32 : public NativeState +{ +public: + NativeStateWin32(); + ~NativeStateWin32(); + + bool init_display(); + void* display(); + bool create_window(WindowProperties const& properties); + void* window(WindowProperties& properties); + void visible(bool v); + bool should_quit(); + void flip(); + +private: + void cleanup(); + void pump_messages(); + + static LRESULT CALLBACK wnd_proc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); + + HWND parent_window_; + HWND child_window_; + HDC native_display_; + WindowProperties properties_; + bool should_quit_; +}; + +#endif /* GLMARK2_NATIVE_STATE_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-x11.cpp glmark2-2021.02/android/jni/src/native-state-x11.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/native-state-x11.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/native-state-x11.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -60,7 +60,7 @@ bool NativeStateX11::create_window(WindowProperties const& properties) { - static const char *win_name("glmark2 "GLMARK_VERSION); + static const char *win_name("glmark2 " GLMARK_VERSION); if (!xdpy_) { Log::error("Error: X11 Display has not been initialized!\n"); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/options.cpp glmark2-2021.02/android/jni/src/options.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/options.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/options.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -31,6 +31,7 @@ std::vector Options::benchmarks; std::vector Options::benchmark_files; bool Options::validate = false; +std::string Options::data_path = std::string(GLMARK_DATA_PATH); Options::FrameEnd Options::frame_end = Options::FrameEndDefault; std::pair Options::size(800, 600); bool Options::list_scenes = false; @@ -48,6 +49,7 @@ {"benchmark", 1, 0, 0}, {"benchmark-file", 1, 0, 0}, {"validate", 0, 0, 0}, + {"data-path", 1, 0, 0}, {"frame-end", 1, 0, 0}, {"off-screen", 0, 0, 0}, {"visual-config", 1, 0, 0}, @@ -116,13 +118,15 @@ printf("A benchmark for Open GL (ES) 2.0\n" "\n" "Options:\n" - " -b, --benchmark BENCH A benchmark to run: 'scene(:opt1=val1)*'\n" + " -b, --benchmark BENCH A benchmark or options to run: '(scene)?(:opt1=val1)*'\n" " (the option can be used multiple times)\n" " -f, --benchmark-file F Load benchmarks to run from a file containing a\n" " list of benchmark descriptions (one per line)\n" " (the option can be used multiple times)\n" " --validate Run a quick output validation test instead of \n" " running the benchmarks\n" + " --data-path PATH Path to glmark2 models, shaders and textures\n" + " Default: " GLMARK_DATA_PATH "\n" " --frame-end METHOD How to end a frame [default,none,swap,finish,readpixels]\n" " --off-screen Render to an off-screen surface\n" " --visual-config C The visual configuration to use for the rendering\n" @@ -171,6 +175,8 @@ Options::benchmark_files.push_back(optarg); else if (!strcmp(optname, "validate")) Options::validate = true; + else if (!strcmp(optname, "data-path")) + Options::data_path = std::string(optarg); else if (!strcmp(optname, "frame-end")) Options::frame_end = frame_end_from_str(optarg); else if (!strcmp(optname, "off-screen")) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/options.h glmark2-2021.02/android/jni/src/options.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/options.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/options.h 2021-04-25 01:47:22.000000000 +0800 @@ -43,6 +43,7 @@ static std::vector benchmarks; static std::vector benchmark_files; static bool validate; + static std::string data_path; static FrameEnd frame_end; static std::pair size; static bool list_scenes; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-buffer.cpp glmark2-2021.02/android/jni/src/scene-buffer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-buffer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-buffer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,6 +23,7 @@ #include "scene.h" #include "log.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "shader-source.h" #include "util.h" @@ -244,9 +245,9 @@ { /* Set up shaders */ static const std::string vtx_shader_filename( - GLMARK_DATA_PATH"/shaders/buffer-wireframe.vert"); + Options::data_path + "/shaders/buffer-wireframe.vert"); static const std::string frg_shader_filename( - GLMARK_DATA_PATH"/shaders/buffer-wireframe.frag"); + Options::data_path + "/shaders/buffer-wireframe.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-build.cpp glmark2-2021.02/android/jni/src/scene-build.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-build.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-build.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -25,6 +25,7 @@ #include "scene.h" #include "log.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "shader-source.h" #include "model.h" @@ -89,8 +90,8 @@ return false; /* Set up shaders */ - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/light-basic.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/light-basic.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); static const LibMatrix::vec4 materialDiffuse(1.0f, 1.0f, 1.0f, 1.0f); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-bump.cpp glmark2-2021.02/android/jni/src/scene-bump.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-bump.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-bump.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,6 +23,7 @@ #include "scene.h" #include "log.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "shader-source.h" #include "model.h" @@ -61,8 +62,8 @@ bool SceneBump::setup_model_plain(const std::string &type) { - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/bump-poly.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/bump-poly.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/bump-poly.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/bump-poly.frag"); static const std::string low_poly_filename("asteroid-low"); static const std::string high_poly_filename("asteroid-high"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); @@ -115,8 +116,8 @@ bool SceneBump::setup_model_normals() { - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/bump-normals.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/bump-normals.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/bump-normals.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/bump-normals.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); Model model; @@ -170,8 +171,8 @@ bool SceneBump::setup_model_normals_tangent() { - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/bump-normals-tangent.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/bump-normals-tangent.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/bump-normals-tangent.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/bump-normals-tangent.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); Model model; @@ -228,8 +229,8 @@ bool SceneBump::setup_model_height() { - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/bump-height.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/bump-height.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/bump-height.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/bump-height.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); Model model; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-clear.cpp glmark2-2021.02/android/jni/src/scene-clear.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-clear.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-clear.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2013 Linaro Limited +// Copyright © 2013 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-collection.h glmark2-2021.02/android/jni/src/scene-collection.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-collection.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-collection.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2013 Linaro Limited +// Copyright © 2013 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-conditionals.cpp glmark2-2021.02/android/jni/src/scene-conditionals.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-conditionals.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-conditionals.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,6 +21,7 @@ */ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -29,7 +30,7 @@ #include -static const std::string shader_file_base(GLMARK_DATA_PATH"/shaders/conditionals"); +static const std::string shader_file_base("/shaders/conditionals"); static const std::string vtx_file(shader_file_base + ".vert"); static const std::string frg_file(shader_file_base + ".frag"); @@ -56,14 +57,14 @@ static std::string get_vertex_shader_source(int steps, bool conditionals) { - ShaderSource source(vtx_file); + ShaderSource source(Options::data_path + vtx_file); ShaderSource source_main; for (int i = 0; i < steps; i++) { if (conditionals) - source_main.append_file(step_conditional_file); + source_main.append_file(Options::data_path + step_conditional_file); else - source_main.append_file(step_simple_file); + source_main.append_file(Options::data_path + step_simple_file); } source.replace("$MAIN$", source_main.str()); @@ -74,14 +75,14 @@ static std::string get_fragment_shader_source(int steps, bool conditionals) { - ShaderSource source(frg_file); + ShaderSource source(Options::data_path + frg_file); ShaderSource source_main; for (int i = 0; i < steps; i++) { if (conditionals) - source_main.append_file(step_conditional_file); + source_main.append_file(Options::data_path + step_conditional_file); else - source_main.append_file(step_simple_file); + source_main.append_file(Options::data_path + step_simple_file); } source.replace("$MAIN$", source_main.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene.cpp glmark2-2021.02/android/jni/src/scene.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -43,10 +43,12 @@ Scene::Scene(Canvas &pCanvas, const string &name) : canvas_(pCanvas), name_(name), startTime_(0), lastUpdateTime_(0), currentFrame_(0), - running_(0), duration_(0) + running_(0), duration_(0), nframes_(0) { options_["duration"] = Scene::Option("duration", "10.0", "The duration of each benchmark in seconds"); + options_["nframes"] = Scene::Option("nframes", "", + "The number of frames to render"); options_["vertex-precision"] = Scene::Option("vertex-precision", "default,default,default,default", "The precision values for the vertex shader (\"int,float,sampler2d,samplercube\")"); @@ -96,6 +98,7 @@ Scene::setup() { duration_ = Util::fromString(options_["duration"].value); + nframes_ = Util::fromString(options_["nframes"].value); ShaderSource::default_precision( ShaderSource::Precision(options_["vertex-precision"].value), @@ -132,6 +135,9 @@ if (elapsed_time >= duration_) running_ = false; + + if (nframes_ > 0 && currentFrame_ >= nframes_) + running_ = false; } void diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-desktop.cpp glmark2-2021.02/android/jni/src/scene-desktop.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-desktop.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-desktop.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -20,11 +20,13 @@ * Alexandros Frantzis (glmark2) * Jesse Barker (glmark2) */ +#include #include #include #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -43,8 +45,8 @@ create_blur_shaders(ShaderSource& vtx_source, ShaderSource& frg_source, unsigned int radius, float sigma, BlurDirection direction) { - vtx_source.append_file(GLMARK_DATA_PATH"/shaders/desktop.vert"); - frg_source.append_file(GLMARK_DATA_PATH"/shaders/desktop-blur.frag"); + vtx_source.append_file(Options::data_path + "/shaders/desktop.vert"); + frg_source.append_file(Options::data_path + "/shaders/desktop-blur.frag"); /* Don't let the gaussian curve become too narrow */ if (sigma < 1.0) @@ -122,6 +124,9 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + GLint prev_fbo; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &prev_fbo); + /* Create a FBO */ glGenFramebuffers(1, &fbo_); glBindFramebuffer(GL_FRAMEBUFFER, fbo_); @@ -144,12 +149,12 @@ } } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, prev_fbo); /* Load the shader program when this class if first used */ if (RenderObject::use_count == 0) { - ShaderSource vtx_source(GLMARK_DATA_PATH"/shaders/desktop.vert"); - ShaderSource frg_source(GLMARK_DATA_PATH"/shaders/desktop.frag"); + ShaderSource vtx_source(Options::data_path + "/shaders/desktop.vert"); + ShaderSource frg_source(Options::data_path + "/shaders/desktop.frag"); Scene::load_shaders_from_strings(main_program, vtx_source.str(), frg_source.str()); } @@ -196,6 +201,8 @@ /* Recreate the backing texture with correct size */ if (size_.x() != size.x() || size_.y() != size.y()) { size_ = size; + if (fbo_ == 0) + return; /* If we're resizing the texture, we need to tell the framebuffer*/ glBindTexture(GL_TEXTURE_2D, texture_); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, size_.x(), size_.y(), 0, @@ -377,9 +384,11 @@ */ class RenderScreen : public RenderObject { + Canvas &canvas_; public: - RenderScreen(Canvas &canvas) { fbo_ = canvas.fbo(); } - virtual void init() {} + RenderScreen(Canvas &canvas) : canvas_(canvas) {} + virtual void init() { fbo_ = canvas_.fbo(); } + virtual void size(const LibMatrix::vec2& size) { size_ = size; clear(); } virtual void release() {} }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-effect-2d.cpp glmark2-2021.02/android/jni/src/scene-effect-2d.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-effect-2d.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-effect-2d.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -26,6 +26,7 @@ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -96,7 +97,7 @@ create_convolution_fragment_shader(Canvas &canvas, std::vector &array, unsigned int width, unsigned int height) { - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/effect-2d-convolution.frag"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/effect-2d-convolution.frag"); ShaderSource source(frg_shader_filename); if (width * height != array.size()) { @@ -303,7 +304,7 @@ Texture::find_textures(); - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/effect-2d.vert"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/effect-2d.vert"); std::vector kernel; unsigned int kernel_width = 0; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-function.cpp glmark2-2021.02/android/jni/src/scene-function.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-function.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-function.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,13 +23,14 @@ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" #include "shader-source.h" #include "util.h" -static const std::string shader_file_base(GLMARK_DATA_PATH"/shaders/function"); +static const std::string shader_file_base("/shaders/function"); static const std::string vtx_file(shader_file_base + ".vert"); static const std::string frg_file(shader_file_base + ".frag"); @@ -61,7 +62,7 @@ static std::string get_vertex_shader_source(int steps, bool function, std::string &complexity) { - ShaderSource source(vtx_file); + ShaderSource source(Options::data_path + vtx_file); ShaderSource source_main; std::string step_file; @@ -72,13 +73,13 @@ for (int i = 0; i < steps; i++) { if (function) - source_main.append_file(call_file); + source_main.append_file(Options::data_path + call_file); else - source_main.append_file(step_file); + source_main.append_file(Options::data_path + step_file); } if (function) - source.replace_with_file("$PROCESS$", step_file); + source.replace_with_file("$PROCESS$", Options::data_path + step_file); else source.replace("$PROCESS$", ""); @@ -90,7 +91,7 @@ static std::string get_fragment_shader_source(int steps, bool function, std::string &complexity) { - ShaderSource source(frg_file); + ShaderSource source(Options::data_path + frg_file); ShaderSource source_main; std::string step_file; @@ -101,13 +102,13 @@ for (int i = 0; i < steps; i++) { if (function) - source_main.append_file(call_file); + source_main.append_file(Options::data_path + call_file); else - source_main.append_file(step_file); + source_main.append_file(Options::data_path + step_file); } if (function) - source.replace_with_file("$PROCESS$", step_file); + source.replace_with_file("$PROCESS$", Options::data_path + step_file); else source.replace("$PROCESS$", ""); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene.h glmark2-2021.02/android/jni/src/scene.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene.h 2021-04-25 01:47:22.000000000 +0800 @@ -234,6 +234,7 @@ unsigned currentFrame_; bool running_; double duration_; // Duration of run in seconds + unsigned nframes_; }; /* diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/a.cc glmark2-2021.02/android/jni/src/scene-ideas/a.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/a.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/a.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'a' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/characters.h glmark2-2021.02/android/jni/src/scene-ideas/characters.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/characters.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/characters.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/d.cc glmark2-2021.02/android/jni/src/scene-ideas/d.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/d.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/d.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'd' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/e.cc glmark2-2021.02/android/jni/src/scene-ideas/e.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/e.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/e.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'e' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/i.cc glmark2-2021.02/android/jni/src/scene-ideas/i.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/i.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/i.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'i' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/lamp.cc glmark2-2021.02/android/jni/src/scene-ideas/lamp.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/lamp.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/lamp.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the lamp * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * @@ -23,6 +23,7 @@ * Jesse Barker */ #include "lamp.h" +#include "options.h" #include "shader-source.h" #include "log.h" #include "scene.h" @@ -170,8 +171,8 @@ // Initialize shader sources from input files and create programs from them // The program for handling lighting... - string lit_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-lamp-lit.vert"); - string lit_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-lamp-lit.frag"); + string lit_vtx_filename(Options::data_path + "/shaders/ideas-lamp-lit.vert"); + string lit_frg_filename(Options::data_path + "/shaders/ideas-lamp-lit.frag"); ShaderSource lit_vtx_source(lit_vtx_filename); ShaderSource lit_frg_source(lit_frg_filename); if (!Scene::load_shaders_from_strings(litProgram_, lit_vtx_source.str(), @@ -182,8 +183,8 @@ } // The simple program with no lighting... - string unlit_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-lamp-unlit.vert"); - string unlit_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-lamp-unlit.frag"); + string unlit_vtx_filename(Options::data_path + "/shaders/ideas-lamp-unlit.vert"); + string unlit_frg_filename(Options::data_path + "/shaders/ideas-lamp-unlit.frag"); ShaderSource unlit_vtx_source(unlit_vtx_filename); ShaderSource unlit_frg_source(unlit_frg_filename); if (!Scene::load_shaders_from_strings(unlitProgram_, unlit_vtx_source.str(), diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/lamp.h glmark2-2021.02/android/jni/src/scene-ideas/lamp.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/lamp.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/lamp.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/logo.cc glmark2-2021.02/android/jni/src/scene-ideas/logo.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/logo.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/logo.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the old Silicon Graphics logo * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * @@ -23,6 +23,7 @@ * Jesse Barker */ #include "logo.h" +#include "options.h" #include "scene.h" #include "shader-source.h" #include "log.h" @@ -423,8 +424,8 @@ // Initialize shader sources from input files and create programs from them // The program for handling the main object with lighting... - string logo_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-logo.vert"); - string logo_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-logo.frag"); + string logo_vtx_filename(Options::data_path + "/shaders/ideas-logo.vert"); + string logo_frg_filename(Options::data_path + "/shaders/ideas-logo.frag"); ShaderSource logo_vtx_source(logo_vtx_filename); ShaderSource logo_frg_source(logo_frg_filename); if (!Scene::load_shaders_from_strings(normalProgram_, logo_vtx_source.str(), @@ -437,8 +438,8 @@ normalNormalIndex_ = normalProgram_[normalAttribName_].location(); // The program for handling the flat object... - string logo_flat_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-logo-flat.vert"); - string logo_flat_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-logo-flat.frag"); + string logo_flat_vtx_filename(Options::data_path + "/shaders/ideas-logo-flat.vert"); + string logo_flat_frg_filename(Options::data_path + "/shaders/ideas-logo-flat.frag"); ShaderSource logo_flat_vtx_source(logo_flat_vtx_filename); ShaderSource logo_flat_frg_source(logo_flat_frg_filename); if (!Scene::load_shaders_from_strings(flatProgram_, logo_flat_vtx_source.str(), @@ -450,8 +451,8 @@ flatVertexIndex_ = flatProgram_[vertexAttribName_].location(); // The program for handling the shadow object with texturing... - string logo_shadow_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-logo-shadow.vert"); - string logo_shadow_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-logo-shadow.frag"); + string logo_shadow_vtx_filename(Options::data_path + "/shaders/ideas-logo-shadow.vert"); + string logo_shadow_frg_filename(Options::data_path + "/shaders/ideas-logo-shadow.frag"); ShaderSource logo_shadow_vtx_source(logo_shadow_vtx_filename); ShaderSource logo_shadow_frg_source(logo_shadow_frg_filename); if (!Scene::load_shaders_from_strings(shadowProgram_, logo_shadow_vtx_source.str(), diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/logo.h glmark2-2021.02/android/jni/src/scene-ideas/logo.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/logo.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/logo.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/m.cc glmark2-2021.02/android/jni/src/scene-ideas/m.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/m.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/m.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'm' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/n.cc glmark2-2021.02/android/jni/src/scene-ideas/n.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/n.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/n.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'o' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/o.cc glmark2-2021.02/android/jni/src/scene-ideas/o.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/o.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/o.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'o' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/s.cc glmark2-2021.02/android/jni/src/scene-ideas/s.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/s.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/s.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 's' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/splines.cc glmark2-2021.02/android/jni/src/scene-ideas/splines.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/splines.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/splines.cc 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/splines.h glmark2-2021.02/android/jni/src/scene-ideas/splines.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/splines.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/splines.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/table.cc glmark2-2021.02/android/jni/src/scene-ideas/table.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/table.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/table.cc 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * @@ -20,6 +20,7 @@ * Authors: * Jesse Barker */ +#include "options.h" #include "table.h" #include "scene.h" #include "shader-source.h" @@ -101,8 +102,8 @@ // Initialize shader sources from input files and create programs from them // Program to render the table with lighting and a time-based fade... - string table_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-table.vert"); - string table_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-table.frag"); + string table_vtx_filename(Options::data_path + "/shaders/ideas-table.vert"); + string table_frg_filename(Options::data_path + "/shaders/ideas-table.frag"); ShaderSource table_vtx_source(table_vtx_filename); ShaderSource table_frg_source(table_frg_filename); if (!Scene::load_shaders_from_strings(tableProgram_, table_vtx_source.str(), @@ -111,11 +112,11 @@ Log::error("No valid program for table rendering.\n"); return; } - textVertexIndex_ = tableProgram_[vertexAttribName_].location(); + tableVertexIndex_ = tableProgram_[vertexAttribName_].location(); // Program to render the paper with lighting and a time-based fade... - string paper_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-paper.vert"); - string paper_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-paper.frag"); + string paper_vtx_filename(Options::data_path + "/shaders/ideas-paper.vert"); + string paper_frg_filename(Options::data_path + "/shaders/ideas-paper.frag"); ShaderSource paper_vtx_source(paper_vtx_filename); ShaderSource paper_frg_source(paper_frg_filename); if (!Scene::load_shaders_from_strings(paperProgram_, paper_vtx_source.str(), @@ -127,8 +128,8 @@ paperVertexIndex_ = paperProgram_[vertexAttribName_].location(); // Program to handle the text (time-based color fade)... - string text_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-text.vert"); - string text_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-text.frag"); + string text_vtx_filename(Options::data_path + "/shaders/ideas-text.vert"); + string text_frg_filename(Options::data_path + "/shaders/ideas-text.frag"); ShaderSource text_vtx_source(text_vtx_filename); ShaderSource text_frg_source(text_frg_filename); if (!Scene::load_shaders_from_strings(textProgram_, text_vtx_source.str(), @@ -140,8 +141,8 @@ textVertexIndex_ = textProgram_[vertexAttribName_].location(); // Program for the drawUnder functionality (just paint it black)... - string under_table_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-under-table.vert"); - string under_table_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-under-table.frag"); + string under_table_vtx_filename(Options::data_path + "/shaders/ideas-under-table.vert"); + string under_table_frg_filename(Options::data_path + "/shaders/ideas-under-table.frag"); ShaderSource under_table_vtx_source(under_table_vtx_filename); ShaderSource under_table_frg_source(under_table_frg_filename); if (!Scene::load_shaders_from_strings(underProgram_, under_table_vtx_source.str(), diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/table.h glmark2-2021.02/android/jni/src/scene-ideas/table.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/table.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/table.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/t.cc glmark2-2021.02/android/jni/src/scene-ideas/t.cc --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas/t.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas/t.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 't' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas.cpp glmark2-2021.02/android/jni/src/scene-ideas.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-ideas.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-ideas.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * @@ -48,7 +48,7 @@ timeOffset_(START_TIME_) { startTime_.tv_sec = 0; - startTime_.tv_usec = 0; + startTime_.tv_nsec = 0; } ~SceneIdeasPrivate() { @@ -69,7 +69,7 @@ float currentSpeed_; float currentTime_; float timeOffset_; - struct timeval startTime_; + struct timespec startTime_; static const float CYCLE_TIME_; static const float TIME_; static const float START_TIME_; @@ -163,17 +163,17 @@ SceneIdeasPrivate::reset_time() { timeOffset_ = START_TIME_; - gettimeofday(&startTime_, NULL); + clock_gettime(CLOCK_MONOTONIC, &startTime_); } void SceneIdeasPrivate::update_time() { // Compute new time - struct timeval current = {0, 0}; - gettimeofday(¤t, NULL); + struct timespec current = {0, 0}; + clock_gettime(CLOCK_MONOTONIC, ¤t); float timediff = (current.tv_sec - startTime_.tv_sec) + - static_cast(current.tv_usec - startTime_.tv_usec) / 1000000.0; + static_cast(current.tv_nsec - startTime_.tv_nsec) / 1000000000.0; float sceneTime = timediff * currentSpeed_ + timeOffset_; // Keep the current time in [START_TIME_..CYCLE_TIME_) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-jellyfish.cpp glmark2-2021.02/android/jni/src/scene-jellyfish.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-jellyfish.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-jellyfish.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -24,6 +24,7 @@ #include #include #include +#include "options.h" #include "scene.h" #include "scene-jellyfish.h" #include "log.h" @@ -115,8 +116,8 @@ GradientRenderer::init() { // Program set up - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/gradient.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/gradient.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/gradient.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/gradient.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); if (!Scene::load_shaders_from_strings(program_, vtx_source.str(), @@ -272,7 +273,7 @@ { Log::debug("Loading model from file '%s'\n", filename.c_str()); - const std::auto_ptr input_file_ptr(Util::get_resource(filename)); + const std::unique_ptr input_file_ptr(Util::get_resource(filename)); std::istream& inputFile(*input_file_ptr); if (!inputFile) { @@ -375,7 +376,7 @@ bool JellyfishPrivate::initialize() { - static const string modelFilename(GLMARK_DATA_PATH"/models/jellyfish.jobj"); + static const string modelFilename(Options::data_path + "/models/jellyfish.jobj"); if (!load_obj(modelFilename)) { return false; @@ -410,8 +411,8 @@ // Set up program first so we can store attribute and uniform locations // away for the using std::string; - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/jellyfish.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/jellyfish.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/jellyfish.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/jellyfish.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-jellyfish.h glmark2-2021.02/android/jni/src/scene-jellyfish.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-jellyfish.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-jellyfish.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-loop.cpp glmark2-2021.02/android/jni/src/scene-loop.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-loop.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-loop.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,13 +23,14 @@ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" #include "shader-source.h" #include "util.h" -static const std::string shader_file_base(GLMARK_DATA_PATH"/shaders/loop"); +static const std::string shader_file_base("/shaders/loop"); static const std::string vtx_file(shader_file_base + ".vert"); static const std::string frg_file(shader_file_base + ".frag"); @@ -62,11 +63,11 @@ static std::string get_fragment_shader_source(int steps, bool loop, bool uniform) { - ShaderSource source(frg_file); + ShaderSource source(Options::data_path + frg_file); ShaderSource source_main; if (loop) { - source_main.append_file(step_loop_file); + source_main.append_file(Options::data_path + step_loop_file); if (uniform) { source_main.replace("$NLOOPS$", "FragmentLoops"); } @@ -76,7 +77,7 @@ } else { for (int i = 0; i < steps; i++) - source_main.append_file(step_simple_file); + source_main.append_file(Options::data_path + step_simple_file); } source.replace("$MAIN$", source_main.str()); @@ -87,11 +88,11 @@ static std::string get_vertex_shader_source(int steps, bool loop, bool uniform) { - ShaderSource source(vtx_file); + ShaderSource source(Options::data_path + vtx_file); ShaderSource source_main; if (loop) { - source_main.append_file(step_loop_file); + source_main.append_file(Options::data_path + step_loop_file); if (uniform) { source_main.replace("$NLOOPS$", "VertexLoops"); } @@ -101,7 +102,7 @@ } else { for (int i = 0; i < steps; i++) - source_main.append_file(step_simple_file); + source_main.append_file(Options::data_path + step_simple_file); } source.replace("$MAIN$", source_main.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-pulsar.cpp glmark2-2021.02/android/jni/src/scene-pulsar.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-pulsar.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-pulsar.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -26,6 +26,7 @@ #include #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -112,19 +113,19 @@ std::string frg_shader_filename; static const vec4 lightPosition(-20.0f, 20.0f,-20.0f, 1.0f); if (options_["light"].value == "true") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/pulsar-light.vert"; + vtx_shader_filename = Options::data_path + "/shaders/pulsar-light.vert"; } else { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/pulsar.vert"; + vtx_shader_filename = Options::data_path + "/shaders/pulsar.vert"; } if (options_["texture"].value == "true") { - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-basic-tex.frag"; + frg_shader_filename = Options::data_path + "/shaders/light-basic-tex.frag"; Texture::find_textures(); if (!Texture::load("crate-base", &texture_, GL_NEAREST, GL_NEAREST, 0)) return false; } else { - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-basic.frag"; + frg_shader_filename = Options::data_path + "/shaders/light-basic.frag"; } ShaderSource vtx_source(vtx_shader_filename); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-refract.cpp glmark2-2021.02/android/jni/src/scene-refract.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-refract.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-refract.cpp 2021-04-25 01:47:47.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -21,6 +21,7 @@ // #include "scene-refract.h" #include "model.h" +#include "options.h" #include "texture.h" #include "util.h" #include "log.h" @@ -176,10 +177,10 @@ // bool -DistanceRenderTarget::setup(unsigned int width, unsigned int height) +DistanceRenderTarget::setup(unsigned int canvas_fbo, unsigned int width, unsigned int height) { - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/depth.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/depth.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/depth.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/depth.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); @@ -192,6 +193,7 @@ canvas_height_ = height; width_ = canvas_width_ * 2; height_ = canvas_height_ * 2; + canvas_fbo_ = canvas_fbo; // If the texture will be too large for the implemnetation, we need to // clamp the dimensions but maintain the aspect ratio. @@ -235,7 +237,7 @@ Log::error("DistanceRenderTarget::setup: glCheckFramebufferStatus failed (0x%x)\n", status); return false; } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_fbo_); return true; } @@ -272,7 +274,7 @@ void DistanceRenderTarget::disable() { - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_fbo_); glViewport(0, 0, canvas_width_, canvas_height_); glCullFace(GL_BACK); } @@ -281,8 +283,8 @@ RefractPrivate::setup(map& options) { // Program object setup - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/light-refract.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/light-refract.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/light-refract.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/light-refract.frag"); static const vec4 lightColor(0.4, 0.4, 0.4, 1.0); ShaderSource vtx_source(vtx_shader_filename); @@ -382,7 +384,7 @@ 0.0, 0.0, 0.0, 0.0, 1.0, 0.0); - if (!depthTarget_.setup(canvas_.width(), canvas_.height())) { + if (!depthTarget_.setup(canvas_.fbo(), canvas_.width(), canvas_.height())) { Log::error("Failed to set up the render target for the depth pass\n"); return false; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-refract.h glmark2-2021.02/android/jni/src/scene-refract.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-refract.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-refract.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -46,6 +46,7 @@ unsigned int height_; unsigned int tex_[2]; unsigned int fbo_; + unsigned int canvas_fbo_; public: DistanceRenderTarget() : canvas_width_(0), @@ -57,7 +58,7 @@ tex_[DEPTH] = tex_[COLOR] = 0; } ~DistanceRenderTarget() {} - bool setup(unsigned int width, unsigned int height); + bool setup(unsigned int canvas_fbo, unsigned int width, unsigned int height); void teardown(); void enable(const LibMatrix::mat4& mvp); void disable(); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-shading.cpp glmark2-2021.02/android/jni/src/scene-shading.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-shading.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-shading.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -24,6 +24,7 @@ */ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -154,24 +155,24 @@ ShaderSource vtx_source; ShaderSource frg_source; if (shading == "gouraud") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/light-basic.vert"; - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-basic.frag"; + vtx_shader_filename = Options::data_path + "/shaders/light-basic.vert"; + frg_shader_filename = Options::data_path + "/shaders/light-basic.frag"; frg_source.append_file(frg_shader_filename); vtx_source.append_file(vtx_shader_filename); vtx_source.add_const("LightSourcePosition", lightPosition); vtx_source.add_const("MaterialDiffuse", materialDiffuse); } else if (shading == "blinn-phong-inf") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/light-advanced.vert"; - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-advanced.frag"; + vtx_shader_filename = Options::data_path + "/shaders/light-advanced.vert"; + frg_shader_filename = Options::data_path + "/shaders/light-advanced.frag"; frg_source.append_file(frg_shader_filename); frg_source.add_const("LightSourcePosition", lightPosition); frg_source.add_const("LightSourceHalfVector", halfVector); vtx_source.append_file(vtx_shader_filename); } else if (shading == "phong") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/light-phong.vert"; - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-phong.frag"; + vtx_shader_filename = Options::data_path + "/shaders/light-phong.vert"; + frg_shader_filename = Options::data_path + "/shaders/light-phong.frag"; unsigned int num_lights = Util::fromString(options_["num-lights"].value); string fragsource = get_fragment_shader_source(frg_shader_filename, num_lights); frg_source.append(fragsource); @@ -179,8 +180,8 @@ vtx_source.append_file(vtx_shader_filename); } else if (shading == "cel") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/light-phong.vert"; - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-cel.frag"; + vtx_shader_filename = Options::data_path + "/shaders/light-phong.vert"; + frg_shader_filename = Options::data_path + "/shaders/light-cel.frag"; vtx_source.append_file(vtx_shader_filename); frg_source.append_file(frg_shader_filename); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-shadow.cpp glmark2-2021.02/android/jni/src/scene-shadow.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-shadow.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-shadow.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -21,6 +21,7 @@ // #include "scene.h" #include "model.h" +#include "options.h" #include "util.h" #include "log.h" #include "shader-source.h" @@ -53,6 +54,7 @@ unsigned int height_; unsigned int tex_; unsigned int fbo_; + unsigned int canvas_fbo_; public: DepthRenderTarget() : canvas_width_(0), @@ -62,7 +64,7 @@ tex_(0), fbo_(0) {} ~DepthRenderTarget() {} - bool setup(unsigned int width, unsigned int height); + bool setup(unsigned int canvas_fbo, unsigned int width, unsigned int height); void teardown(); void enable(const mat4& mvp); void disable(); @@ -71,10 +73,10 @@ }; bool -DepthRenderTarget::setup(unsigned int width, unsigned int height) +DepthRenderTarget::setup(unsigned int canvas_fbo, unsigned int width, unsigned int height) { - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/depth.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/depth.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/depth.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/depth.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); @@ -85,6 +87,7 @@ canvas_width_ = width; canvas_height_ = height; + canvas_fbo_ = canvas_fbo; width_ = canvas_width_ * 2; height_ = canvas_height_ * 2; @@ -120,7 +123,7 @@ Log::error("DepthRenderTarget::setup: glCheckFramebufferStatus failed (0x%x)\n", status); return false; } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_fbo_); return true; } @@ -155,7 +158,7 @@ void DepthRenderTarget::disable() { - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_fbo_); glViewport(0, 0, canvas_width_, canvas_height_); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } @@ -201,8 +204,8 @@ // Program set up static const vec4 materialDiffuse(0.3f, 0.3f, 0.3f, 1.0f); - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/shadow.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/shadow.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/shadow.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/shadow.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); @@ -314,8 +317,8 @@ ShadowPrivate::setup(map& options) { // Program object setup - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/light-basic.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/light-basic.frag"); static const vec4 materialDiffuse(1.0f, 1.0f, 1.0f, 1.0f); ShaderSource vtx_source(vtx_shader_filename); @@ -372,7 +375,7 @@ float aspect(static_cast(canvas_.width())/static_cast(canvas_.height())); projection_.perspective(fovy, aspect, 2.0, 50.0); - if (!depthTarget_.setup(canvas_.width(), canvas_.height())) { + if (!depthTarget_.setup(canvas_.fbo(), canvas_.width(), canvas_.height())) { Log::error("Failed to set up the render target for the depth pass\n"); return false; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/base-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/base-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/base-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/base-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,26 +21,33 @@ */ #include "renderer.h" -BaseRenderer::BaseRenderer(const LibMatrix::vec2 &size) : +BaseRenderer::BaseRenderer() : texture_(0), input_texture_(0), fbo_(0), depth_renderbuffer_(0), - min_filter_(GL_LINEAR), mag_filter_(GL_LINEAR), + owns_fbo_(false), min_filter_(GL_LINEAR), mag_filter_(GL_LINEAR), wrap_s_(GL_CLAMP_TO_EDGE), wrap_t_(GL_CLAMP_TO_EDGE) { - setup(size, true, true); } BaseRenderer::~BaseRenderer() { glDeleteTextures(1, &texture_); glDeleteRenderbuffers(1, &depth_renderbuffer_); - glDeleteFramebuffers(1, &fbo_); + if (owns_fbo_) + glDeleteFramebuffers(1, &fbo_); +} + +void +BaseRenderer::setup_onscreen(Canvas& canvas) +{ + size_ = LibMatrix::vec2(canvas.width(), canvas.height()); + recreate(&canvas, false); } void -BaseRenderer::setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth) +BaseRenderer::setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) { size_ = size; - recreate(onscreen, has_depth); + recreate(nullptr, has_depth); } void @@ -59,7 +66,7 @@ { glBindFramebuffer(GL_FRAMEBUFFER, fbo_); glViewport(0, 0, size_.x(), size_.y()); - if (!fbo_ || depth_renderbuffer_) { + if (!owns_fbo_ || depth_renderbuffer_) { glEnable(GL_DEPTH_TEST); glDepthMask(GL_TRUE); } @@ -79,21 +86,26 @@ } void -BaseRenderer::recreate(bool onscreen, bool has_depth) +BaseRenderer::recreate(Canvas* canvas, bool has_depth) { if (texture_) { glDeleteTextures(1, &texture_); texture_ = 0; } - if (fbo_) { + if (owns_fbo_ && fbo_) { glDeleteRenderbuffers(1, &depth_renderbuffer_); depth_renderbuffer_ = 0; glDeleteFramebuffers(1, &fbo_); fbo_ = 0; } - if (!onscreen) { + + if (canvas) { + fbo_ = canvas->fbo(); + owns_fbo_ = false; + } else { create_texture(); create_fbo(has_depth); + owns_fbo_ = true; } } @@ -131,6 +143,9 @@ size_.x(), size_.y()); } + GLint prev_fbo; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &prev_fbo); + /* Create the FBO and attach the texture and the renderebuffer */ glGenFramebuffers(1, &fbo_); glBindFramebuffer(GL_FRAMEBUFFER, fbo_); @@ -140,5 +155,6 @@ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth_renderbuffer_); } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + + glBindFramebuffer(GL_FRAMEBUFFER, prev_fbo); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/blur-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/blur-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/blur-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/blur-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,18 +19,21 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" +#include + void create_blur_shaders(ShaderSource& vtx_source, ShaderSource& frg_source, unsigned int radius, float sigma, BlurRenderer::BlurDirection direction, float tilt_shift); -BlurRenderer::BlurRenderer(const LibMatrix::vec2 &size, int radius, float sigma, +BlurRenderer::BlurRenderer(int radius, float sigma, BlurDirection dir, const LibMatrix::vec2 &step, float tilt_shift) : - TextureRenderer(size, *blur_program(true, radius, sigma, dir, step, tilt_shift)) + TextureRenderer(*blur_program(true, radius, sigma, dir, step, tilt_shift)) { blur_program_ = blur_program(false, radius, sigma, dir, step, tilt_shift); } @@ -74,15 +77,15 @@ unsigned int radius, float sigma, BlurRenderer::BlurDirection direction, float tilt_shift) { - vtx_source.append_file(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - frg_source.append_file(GLMARK_DATA_PATH"/shaders/terrain-blur.frag"); + vtx_source.append_file(Options::data_path + "/shaders/terrain-texture.vert"); + frg_source.append_file(Options::data_path + "/shaders/terrain-blur.frag"); /* Don't let the gaussian curve become too narrow */ if (sigma < 1.0) sigma = 1.0; unsigned int side = 2 * radius + 1; - float values[radius]; + std::vector values(radius + 1); float sum = 0.0; for (unsigned int i = 0; i < radius + 1; i++) { diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/copy-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/copy-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/copy-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/copy-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,12 +19,13 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" -CopyRenderer::CopyRenderer(const LibMatrix::vec2 &size) : - TextureRenderer(size, *copy_program(true)) +CopyRenderer::CopyRenderer() : + TextureRenderer(*copy_program(true)) { copy_program_ = copy_program(false); } @@ -38,8 +39,8 @@ if (!copy_program) { copy_program = new Program(); - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-overlay.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-overlay.frag"); Scene::load_shaders_from_strings(*copy_program, vtx_shader.str(), frg_shader.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/luminance-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/luminance-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/luminance-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/luminance-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,12 +19,13 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" -LuminanceRenderer::LuminanceRenderer(const LibMatrix::vec2 &size) : - TextureRenderer(size, *luminance_program(true)) +LuminanceRenderer::LuminanceRenderer() : + TextureRenderer(*luminance_program(true)) { luminance_program_ = luminance_program(false); } @@ -38,8 +39,8 @@ if (!luminance_program) { luminance_program = new Program(); - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-luminance.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-luminance.frag"); Scene::load_shaders_from_strings(*luminance_program, vtx_shader.str(), frg_shader.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/normal-from-height-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/normal-from-height-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/normal-from-height-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/normal-from-height-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,19 +19,40 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" -NormalFromHeightRenderer::NormalFromHeightRenderer(const LibMatrix::vec2 &size) : - TextureRenderer(size, *normal_from_height_program(size, true)) +NormalFromHeightRenderer::NormalFromHeightRenderer() : + TextureRenderer(*normal_from_height_program(true)) { - normal_from_height_program_ = normal_from_height_program(size, false); + normal_from_height_program_ = normal_from_height_program(false); +} + +void +NormalFromHeightRenderer::setup_onscreen(Canvas& canvas) +{ + TextureRenderer::setup_onscreen(canvas); + + normal_from_height_program_->start(); + (*normal_from_height_program_)["resolution"] = + LibMatrix::vec2(canvas.width(), canvas.height()); + normal_from_height_program_->stop(); +} + +void +NormalFromHeightRenderer::setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) +{ + TextureRenderer::setup_offscreen(size, has_depth); + + normal_from_height_program_->start(); + (*normal_from_height_program_)["resolution"] = size; + normal_from_height_program_->stop(); } Program * -NormalFromHeightRenderer::normal_from_height_program(const LibMatrix::vec2 &size, - bool create_new) +NormalFromHeightRenderer::normal_from_height_program(bool create_new) { static Program *normal_from_height_program(0); if (create_new) @@ -39,15 +60,14 @@ if (!normal_from_height_program) { normal_from_height_program = new Program(); - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-normalmap.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-normalmap.frag"); Scene::load_shaders_from_strings(*normal_from_height_program, vtx_shader.str(), frg_shader.str()); normal_from_height_program->start(); (*normal_from_height_program)["heightMap"] = 0; - (*normal_from_height_program)["resolution"] = size; (*normal_from_height_program)["height"] = 0.05f; (*normal_from_height_program)["uvOffset"] = LibMatrix::vec2(0.0f, 0.0f); (*normal_from_height_program)["uvScale"] = LibMatrix::vec2(1.0f, 1.0f); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/overlay-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/overlay-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/overlay-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/overlay-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,6 +19,7 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" @@ -33,10 +34,14 @@ void -OverlayRenderer::setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth) +OverlayRenderer::setup_onscreen(Canvas &canvas) +{ +} + +void +OverlayRenderer::setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) { static_cast(size); - static_cast(onscreen); static_cast(has_depth); } @@ -107,8 +112,8 @@ void OverlayRenderer::create_program() { - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-overlay.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-overlay.frag"); if (!Scene::load_shaders_from_strings(program_, vtx_shader.str(), frg_shader.str())) return; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/renderer-chain.cpp glmark2-2021.02/android/jni/src/scene-terrain/renderer-chain.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/renderer-chain.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/renderer-chain.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -22,10 +22,15 @@ #include "renderer.h" void -RendererChain::setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth) +RendererChain::setup_onscreen(Canvas &canvas) +{ + static_cast(canvas); +} + +void +RendererChain::setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) { static_cast(size); - static_cast(onscreen); static_cast(has_depth); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/renderer.h glmark2-2021.02/android/jni/src/scene-terrain/renderer.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/renderer.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/renderer.h 2021-04-25 01:47:22.000000000 +0800 @@ -22,6 +22,7 @@ #include #include +#include "canvas.h" #include "mesh.h" #include "vec.h" #include "program.h" @@ -36,7 +37,8 @@ /** * Sets up the renderer's target. */ - virtual void setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth) = 0; + virtual void setup_onscreen(Canvas& canvas) = 0; + virtual void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) = 0; /** * Sets up the renderer's target texture (if any). */ @@ -81,7 +83,8 @@ virtual ~RendererChain() {} /* IRenderer methods */ - void setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth); + void setup_onscreen(Canvas& canvas); + void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth); void setup_texture(GLint min_filter, GLint mag_filter, GLint wrap_s, GLint wrap_t); void input_texture(GLuint t); @@ -108,11 +111,12 @@ class BaseRenderer : public IRenderer { public: - BaseRenderer(const LibMatrix::vec2 &size); + BaseRenderer(); virtual ~BaseRenderer(); /* IRenderer methods */ - virtual void setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth); + virtual void setup_onscreen(Canvas& canvas); + virtual void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth); virtual void setup_texture(GLint min_filter, GLint mag_filter, GLint wrap_s, GLint wrap_t); virtual void input_texture(GLuint t) { input_texture_ = t; } @@ -123,7 +127,7 @@ virtual void render() = 0; protected: - void recreate(bool onscreen, bool has_depth); + void recreate(Canvas* canvas, bool has_depth); void create_texture(); void update_texture_parameters(); void create_fbo(bool has_depth); @@ -133,6 +137,7 @@ GLuint input_texture_; GLuint fbo_; GLuint depth_renderbuffer_; + bool owns_fbo_; GLint min_filter_; GLint mag_filter_; GLint wrap_s_; @@ -146,7 +151,7 @@ class TextureRenderer : public BaseRenderer { public: - TextureRenderer(const LibMatrix::vec2 &size, Program &program); + TextureRenderer(Program &program); virtual ~TextureRenderer() { } /* IRenderer/BaseRenderer methods */ @@ -170,7 +175,7 @@ class CopyRenderer : public TextureRenderer { public: - CopyRenderer(const LibMatrix::vec2 &size); + CopyRenderer(); virtual ~CopyRenderer() { delete copy_program_; } @@ -186,7 +191,7 @@ class SimplexNoiseRenderer : public TextureRenderer { public: - SimplexNoiseRenderer(const LibMatrix::vec2 &size); + SimplexNoiseRenderer(); virtual ~SimplexNoiseRenderer() { delete noise_program_; } LibMatrix::vec2 uv_scale() { return uv_scale_; } @@ -204,12 +209,14 @@ class NormalFromHeightRenderer : public TextureRenderer { public: - NormalFromHeightRenderer(const LibMatrix::vec2 &size); + NormalFromHeightRenderer(); virtual ~NormalFromHeightRenderer() { delete normal_from_height_program_; } + virtual void setup_onscreen(Canvas& canvas); + virtual void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth); + private: - static Program *normal_from_height_program(const LibMatrix::vec2 &size, - bool create_new); + static Program *normal_from_height_program(bool create_new); Program *normal_from_height_program_; }; @@ -220,7 +227,7 @@ class LuminanceRenderer : public TextureRenderer { public: - LuminanceRenderer(const LibMatrix::vec2 &size); + LuminanceRenderer(); virtual ~LuminanceRenderer() { delete luminance_program_; } private: @@ -242,8 +249,8 @@ BlurDirectionBoth }; - BlurRenderer(const LibMatrix::vec2 &size, int radius, float sigma, - BlurDirection dir, const LibMatrix::vec2 &step, float tilt_shift); + BlurRenderer(int radius, float sigma, BlurDirection dir, + const LibMatrix::vec2 &step, float tilt_shift); virtual ~BlurRenderer() { delete blur_program_; } private: @@ -265,7 +272,8 @@ virtual ~OverlayRenderer() { } /* IRenderable Methods */ - void setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth); + virtual void setup_onscreen(Canvas& canvas); + virtual void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth); void setup_texture(GLint min_filter, GLint mag_filter, GLint wrap_s, GLint wrap_t); void input_texture(GLuint t) { input_texture_ = t; } @@ -293,7 +301,7 @@ class TerrainRenderer : public BaseRenderer { public: - TerrainRenderer(const LibMatrix::vec2 &size, const LibMatrix::vec2 &repeat_overlay); + TerrainRenderer(const LibMatrix::vec2 &repeat_overlay); virtual ~TerrainRenderer(); /* IRenderable Methods */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/simplex-noise-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/simplex-noise-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/simplex-noise-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/simplex-noise-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,14 +19,15 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" LibMatrix::vec2 SimplexNoiseRenderer::uv_scale_(1.5f, 1.5); -SimplexNoiseRenderer::SimplexNoiseRenderer(const LibMatrix::vec2 &size) : - TextureRenderer(size, *noise_program(true)) +SimplexNoiseRenderer::SimplexNoiseRenderer() : + TextureRenderer(*noise_program(true)) { noise_program_ = noise_program(false); } @@ -40,8 +41,8 @@ if (!noise_program) { noise_program = new Program(); - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-noise.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-noise.frag"); Scene::load_shaders_from_strings(*noise_program, vtx_shader.str(), frg_shader.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/terrain-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/terrain-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/terrain-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/terrain-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,14 +19,14 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "texture.h" #include "shader-source.h" -TerrainRenderer::TerrainRenderer(const LibMatrix::vec2 &size, - const LibMatrix::vec2 &repeat_overlay) : - BaseRenderer(size), height_map_tex_(0), normal_map_tex_(0), +TerrainRenderer::TerrainRenderer(const LibMatrix::vec2 &repeat_overlay) : + BaseRenderer(), height_map_tex_(0), normal_map_tex_(0), specular_map_tex_(0), repeat_overlay_(repeat_overlay) { create_mesh(); @@ -85,8 +85,8 @@ void TerrainRenderer::init_program() { - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain.frag"); if (!Scene::load_shaders_from_strings(program_, vtx_shader.str(), frg_shader.str())) return; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/texture-renderer.cpp glmark2-2021.02/android/jni/src/scene-terrain/texture-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain/texture-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain/texture-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,8 +21,8 @@ */ #include "renderer.h" -TextureRenderer::TextureRenderer(const LibMatrix::vec2 &size, Program &program) : - BaseRenderer(size), program_(program) +TextureRenderer::TextureRenderer(Program &program) : + BaseRenderer(), program_(program) { /* Create the mesh (quad) used for rendering */ create_mesh(); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain.cpp glmark2-2021.02/android/jni/src/scene-terrain.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-terrain.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-terrain.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -64,63 +64,67 @@ const vec2 bloom_res(256.0f, 256.0f); const vec2 grass_res(512.0f, 512.0f); - height_map_renderer = new SimplexNoiseRenderer(map_res); - height_map_renderer->setup(height_map_renderer->size(), false, false); + height_map_renderer = new SimplexNoiseRenderer(); + height_map_renderer->setup_offscreen(map_res, false); - normal_map_renderer = new NormalFromHeightRenderer(map_res); - normal_map_renderer->setup(normal_map_renderer->size(), false, false); + normal_map_renderer = new NormalFromHeightRenderer(); + normal_map_renderer->setup_offscreen(map_res, false); - specular_map_renderer = new LuminanceRenderer(grass_res); + specular_map_renderer = new LuminanceRenderer(); + specular_map_renderer->setup_offscreen(grass_res, false); specular_map_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_REPEAT, GL_REPEAT); - specular_map_renderer->setup(specular_map_renderer->size(), false, false); - terrain_renderer = new TerrainRenderer(screen_res, repeat_overlay); - terrain_renderer->setup(terrain_renderer->size(), - !use_bloom && !use_tilt_shift, - true); + terrain_renderer = new TerrainRenderer(repeat_overlay); + if (!use_bloom && !use_tilt_shift) + terrain_renderer->setup_onscreen(canvas); + else + terrain_renderer->setup_offscreen(screen_res, true); terrain_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE); /* Bloom */ if (use_bloom) { - bloom_h_renderer = new BlurRenderer(bloom_res, 2, 4.0, + bloom_h_renderer = new BlurRenderer(2, 4.0, BlurRenderer::BlurDirectionHorizontal, vec2(1.0, 1.0) / screen_res, 0.0); + bloom_h_renderer->setup_offscreen(bloom_res, false); bloom_h_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE); - bloom_h_renderer->setup(bloom_h_renderer->size(), false, false); - bloom_v_renderer = new BlurRenderer(bloom_res, 2, 4.0, + bloom_v_renderer = new BlurRenderer(2, 4.0, BlurRenderer::BlurDirectionVertical, vec2(1.0, 1.0) / bloom_res, 0.0); + bloom_v_renderer->setup_offscreen(bloom_res, false); bloom_v_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE); - bloom_v_renderer->setup(bloom_v_renderer->size(), false, false); overlay_renderer = new OverlayRenderer(*terrain_renderer, 0.6); } /* Tilt-shift */ if (use_tilt_shift) { - tilt_h_renderer = new BlurRenderer(screen_res, 4, 2.7, + tilt_h_renderer = new BlurRenderer(4, 2.7, BlurRenderer::BlurDirectionHorizontal, vec2(1.0, 1.0) / screen_res, 0.5); + tilt_h_renderer->setup_offscreen(screen_res, false); tilt_h_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE); - tilt_h_renderer->setup(tilt_h_renderer->size(), false, false); - tilt_v_renderer = new BlurRenderer(screen_res, 4, 2.7, + tilt_v_renderer = new BlurRenderer(4, 2.7, BlurRenderer::BlurDirectionVertical, vec2(1.0, 1.0) / screen_res, 0.5); + tilt_v_renderer->setup_onscreen(canvas); } /* Copy renderer */ - if (use_bloom && !use_tilt_shift) - copy_renderer = new CopyRenderer(screen_res); + if (use_bloom && !use_tilt_shift) { + copy_renderer = new CopyRenderer(); + copy_renderer->setup_onscreen(canvas); + } /* Height normal chain */ height_normal_chain = new RendererChain(); @@ -310,7 +314,7 @@ /* Create the specular map */ priv_->specular_map_renderer->render(); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_.fbo()); glViewport(0, 0, canvas_.width(), canvas_.height()); currentFrame_ = 0; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-texture.cpp glmark2-2021.02/android/jni/src/scene-texture.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/scene-texture.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/scene-texture.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -24,6 +24,7 @@ */ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -109,10 +110,10 @@ if (!Scene::setup()) return false; - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.vert"); - static const std::string vtx_shader_texgen_filename(GLMARK_DATA_PATH"/shaders/light-basic-texgen.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic-tex.frag"); - static const std::string frg_shader_bilinear_filename(GLMARK_DATA_PATH"/shaders/light-basic-tex-bilinear.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/light-basic.vert"); + static const std::string vtx_shader_texgen_filename(Options::data_path + "/shaders/light-basic-texgen.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/light-basic-tex.frag"); + static const std::string frg_shader_bilinear_filename(Options::data_path + "/shaders/light-basic-tex-bilinear.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); static const LibMatrix::vec4 materialDiffuse(1.0f, 1.0f, 1.0f, 1.0f); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/shared-library.cpp glmark2-2021.02/android/jni/src/shared-library.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/shared-library.cpp 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/shared-library.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,90 @@ +/* + * Copyright © 2019 Jamie Madill + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Jamie Madill + */ +#include "shared-library.h" + +#include + +#if defined(WIN32) +#include +#else +#include +#endif + +/****************** + * Public methods * + ******************/ +SharedLibrary::SharedLibrary() : handle_(nullptr) {} + +SharedLibrary::~SharedLibrary() +{ + close(); +} + +bool SharedLibrary::open(const char *libName) +{ +#if defined(WIN32) + handle_ = LoadLibraryA(libName); +#else + handle_ = dlopen(libName, RTLD_NOW | RTLD_NODELETE); +#endif + return (handle_ != nullptr); +} + +bool SharedLibrary::open_from_alternatives(std::initializer_list alt_names) +{ + for (const char *name : alt_names) { + if (open(name)) + return true; + } + + return false; +} + +void SharedLibrary::close() +{ + if (handle_) + { +#if defined(WIN32) + FreeLibrary(reinterpret_cast(handle_)); +#else + dlclose(handle_); +#endif + } +} + +void *SharedLibrary::handle() const +{ + return handle_; +} + +void *SharedLibrary::load(const char *symbol) const +{ +#if defined(WIN32) + return reinterpret_cast(GetProcAddress(reinterpret_cast(handle_), symbol)); +#else + return dlsym(handle_, symbol); +#endif +} + +/******************* + * Private methods * + *******************/ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/shared-library.h glmark2-2021.02/android/jni/src/shared-library.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/shared-library.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/shared-library.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,44 @@ +/* + * Copyright © 2019 Jamie Madill + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Jamie Madill + */ +#ifndef GLMARK2_SHARED_LIBRARY_H_ +#define GLMARK2_SHARED_LIBRARY_H_ + +#include + +class SharedLibrary +{ +public: + SharedLibrary(); + ~SharedLibrary(); + + bool open(const char *name); + bool open_from_alternatives(std::initializer_list alt_names); + void close(); + + void *handle() const; + void *load(const char *symbol) const; + +private: + void *handle_; +}; + +#endif /* GLMARK2_SHARED_LIBRARY_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/text-renderer.cpp glmark2-2021.02/android/jni/src/text-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/text-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/text-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,6 +21,7 @@ */ #include "text-renderer.h" #include "gl-headers.h" +#include "options.h" #include "scene.h" #include "shader-source.h" #include "vec.h" @@ -49,8 +50,8 @@ size(0.03); glGenBuffers(2, vbo_); - ShaderSource vtx_source(GLMARK_DATA_PATH"/shaders/text-renderer.vert"); - ShaderSource frg_source(GLMARK_DATA_PATH"/shaders/text-renderer.frag"); + ShaderSource vtx_source(Options::data_path + "/shaders/text-renderer.vert"); + ShaderSource frg_source(Options::data_path + "/shaders/text-renderer.frag"); if (!Scene::load_shaders_from_strings(program_, vtx_source.str(), frg_source.str())) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/texture.cpp glmark2-2021.02/android/jni/src/texture.cpp --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/texture.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/jni/src/texture.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,9 +23,11 @@ */ #include "texture.h" #include "log.h" +#include "options.h" #include "util.h" #include "image-reader.h" +#include #include #include @@ -149,7 +151,7 @@ return TexturePrivate::textureMap; } vector pathVec; - string dataDir(GLMARK_DATA_PATH"/textures"); + string dataDir(Options::data_path + "/textures"); Util::list_files(dataDir, pathVec); // Now that we have a list of all of the image files available to us, // let's go through and pull out the names and what format they're in diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/wscript_build glmark2-2021.02/android/jni/src/wscript_build --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/wscript_build 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/android/jni/src/wscript_build 2021-04-25 01:47:22.000000000 +0800 @@ -1,70 +1,251 @@ +# Detect a Windows build. +is_win = 'WIN32' in ' '.join(bld.env.keys()) + +# Detect MSVC +is_msvc = bld.env.CXX_NAME == 'msvc' + all_sources = bld.path.ant_glob('*.cpp scene-ideas/*.cc scene-terrain/*.cpp') common_sources = [f for f in all_sources if f.name.find('canvas-') == -1 and f.name.find('android') == -1 and f.name.find('native-state-') == -1 and - f.name.find('gl-state-') == -1] -common_uselibs = ['libpng'] + f.name.find('gl-state-') == -1 and + f.name.find('main.cpp') == -1] + common_defines = ['USE_EXCEPTIONS'] libmatrix_sources = [f for f in bld.path.ant_glob('libmatrix/*.cc') if not f.name.endswith('test.cc')] + +common_flavor_sources = ['main.cpp', 'canvas-generic.cpp'] + +libpng_local_sources = [f for f in bld.path.ant_glob('libpng/*.c')] +zlib_local_sources = [f for f in bld.path.ant_glob('zlib/*.c')] + +# Some c sources in libjpeg-turbo are included like headers. So we cannot use globs. +libjpeg_local_sources = [ + 'jcapimin', 'jcapistd', 'jccoefct', 'jccolor', 'jcdctmgr', 'jchuff', 'jcinit', + 'jcmainct', 'jcmarker', 'jcmaster', 'jcomapi', 'jcparam', 'jcphuff', 'jcprepct', + 'jcsample', 'jdapimin', 'jdapistd', 'jdatadst', 'jdatasrc', 'jdcoefct', 'jdcolor', + 'jddctmgr', 'jdhuff', 'jdinput', 'jdmainct', 'jdmarker', 'jdmaster', 'jdmerge', + 'jdphuff', 'jdpostct', 'jdsample', 'jerror', 'jfdctflt', 'jfdctfst', 'jfdctint', + 'jidctflt', 'jidctfst', 'jidctint', 'jidctred', 'jmemmgr', 'jmemnobs', 'jquant1', + 'jquant2', 'jsimd_none', 'jutils', +] + +libjpeg_turbo_local_sources = ['libjpeg-turbo/%s.c' % f for f in libjpeg_local_sources] + +platform_includes = [] +platform_libs = [] +platform_uselibs = [] + +if is_win: + platform_uselibs += ['libpng-local', 'zlib-local', 'libjpeg-turbo-local'] + platform_libs = ['user32'] + + # On windows include some overrides for posix headers. + if is_msvc: + platform_includes += ['include'] +else: + platform_uselibs += ['libpng'] + platform_libs = ['m', 'jpeg', 'dl'] + +if 'WAYLAND_SCANNER_wayland_scanner' in bld.env.keys(): + def wayland_scanner_cmd(arg, src): + return '%s %s < %s > ${TGT}' % (bld.env['WAYLAND_SCANNER_wayland_scanner'], arg, src) + + def wayland_proto_src_path(proto, ver): + wp_dir = bld.env['WAYLAND_PROTOCOLS_pkgdatadir'] + if ver == 'stable': + return '%s/stable/%s/%s.xml' % (wp_dir, proto, proto) + else: + return '%s/unstable/%s/%s-unstable-%s.xml' % (wp_dir, proto, proto, ver) + + def wayland_client_protocol(proto, ver, dst_base): + src_path = wayland_proto_src_path(proto, ver) + out = '%s-client-protocol.h' % dst_base + return bld(rule = wayland_scanner_cmd('client-header', src_path), + target = out, + name = out, + ext_out = ['.h']) + + def wayland_protocol_code(proto, ver, dst_base): + src_path = wayland_proto_src_path(proto, ver) + out = '%s-protocol.c' % dst_base + return bld(rule = wayland_scanner_cmd('private-code', src_path), + target = out, + name = out) + + wayland_client_protocol('xdg-shell', 'stable', 'xdg-shell') + wayland_protocol_code('xdg-shell', 'stable', 'xdg-shell') + flavor_sources = { - 'x11-gl' : ['canvas-generic.cpp', 'native-state-x11.cpp', 'gl-state-glx.cpp'], - 'x11-glesv2' : ['canvas-generic.cpp', 'native-state-x11.cpp', 'gl-state-egl.cpp'], - 'drm-gl' : ['canvas-generic.cpp', 'native-state-drm.cpp', 'gl-state-egl.cpp'], - 'drm-glesv2' : ['canvas-generic.cpp', 'native-state-drm.cpp', 'gl-state-egl.cpp'], - 'mir-gl' : ['canvas-generic.cpp', 'native-state-mir.cpp', 'gl-state-egl.cpp'], - 'mir-glesv2' : ['canvas-generic.cpp', 'native-state-mir.cpp', 'gl-state-egl.cpp'], - 'wayland-gl' : ['canvas-generic.cpp', 'native-state-wayland.cpp', 'gl-state-egl.cpp'], - 'wayland-glesv2' : ['canvas-generic.cpp', 'native-state-wayland.cpp', 'gl-state-egl.cpp'] + 'dispmanx-glesv2' : common_flavor_sources + ['native-state-dispmanx.cpp', 'gl-state-egl.cpp'], + 'drm-gl' : common_flavor_sources + ['native-state-drm.cpp', 'gl-state-egl.cpp'], + 'drm-glesv2' : common_flavor_sources + ['native-state-drm.cpp', 'gl-state-egl.cpp'], + 'mir-gl' : common_flavor_sources + ['native-state-mir.cpp', 'gl-state-egl.cpp'], + 'mir-glesv2' : common_flavor_sources + ['native-state-mir.cpp', 'gl-state-egl.cpp'], + 'wayland-gl' : common_flavor_sources + ['native-state-wayland.cpp', 'gl-state-egl.cpp'], + 'wayland-glesv2' : common_flavor_sources + ['native-state-wayland.cpp', 'gl-state-egl.cpp'], + 'win32-gl': common_flavor_sources + ['native-state-win32.cpp', 'gl-state-wgl.cpp'], + 'win32-glesv2': common_flavor_sources + ['native-state-win32.cpp', 'gl-state-egl.cpp'], + 'x11-gl' : common_flavor_sources + ['native-state-x11.cpp', 'gl-state-glx.cpp'], + 'x11-glesv2' : common_flavor_sources + ['native-state-x11.cpp', 'gl-state-egl.cpp'], } flavor_uselibs = { - 'x11-gl' : ['x11', 'gl', 'matrix-gl'], - 'x11-glesv2' : ['x11', 'egl', 'glesv2', 'matrix-glesv2'], - 'drm-gl' : ['drm', 'gbm', 'egl', 'gl', 'matrix-gl'], - 'drm-glesv2' : ['drm', 'gbm', 'egl', 'glesv2', 'matrix-glesv2'], - 'mir-gl' : ['mirclient', 'egl', 'gl', 'matrix-gl'], - 'mir-glesv2' : ['mirclient', 'egl', 'glesv2', 'matrix-glesv2'], - 'wayland-gl' : ['wayland-client', 'wayland-egl', 'egl', 'gl', 'matrix-gl'], - 'wayland-glesv2' : ['wayland-client', 'wayland-egl', 'egl', 'glesv2', 'matrix-glesv2'] + 'dispmanx-glesv2' : ['glad-egl-dispmanx', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2', 'dispmanx'], + 'drm-gl' : ['drm', 'gbm', 'udev', 'glad-egl-drm', 'glad-gl', 'matrix-gl', 'common-gl'], + 'drm-glesv2' : ['drm', 'gbm', 'udev', 'glad-egl-drm', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], + 'mir-gl' : ['mirclient', 'glad-egl-mir', 'glad-gl', 'matrix-gl', 'common-gl'], + 'mir-glesv2' : ['mirclient', 'glad-egl-mir', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], + 'wayland-gl' : ['wayland-client', 'wayland-egl', 'wayland-cursor', 'glad-egl-wayland', 'glad-gl', 'matrix-gl', 'common-gl'], + 'wayland-glesv2' : ['wayland-client', 'wayland-egl', 'wayland-cursor', 'glad-egl-wayland', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], + 'win32-gl': ['glad-gl', 'glad-wgl', 'matrix-gl', 'common-gl'], + 'win32-glesv2': ['glad-egl-win32', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], + 'x11-gl' : ['x11', 'glad-gl', 'glad-glx', 'matrix-gl', 'common-gl'], + 'x11-glesv2' : ['x11', 'glad-egl-x11', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], } + flavor_defines = { - 'x11-gl' : ['GLMARK2_USE_X11', 'GLMARK2_USE_GL', 'GLMARK2_USE_GLX'], - 'x11-glesv2' : ['GLMARK2_USE_X11', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], - 'drm-gl' : ['GLMARK2_USE_DRM', 'GLMARK2_USE_GL', 'GLMARK2_USE_EGL', '__GBM__'], - 'drm-glesv2' : ['GLMARK2_USE_DRM', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL', '__GBM__'], + 'dispmanx-glesv2' : ['GLMARK2_USE_DISPMANX', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], + 'drm-gl' : ['GLMARK2_USE_DRM', 'GLMARK2_USE_GL', 'GLMARK2_USE_EGL'], + 'drm-glesv2' : ['GLMARK2_USE_DRM', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], 'mir-gl' : ['GLMARK2_USE_MIR', 'GLMARK2_USE_GL', 'GLMARK2_USE_EGL'], 'mir-glesv2' : ['GLMARK2_USE_MIR', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], 'wayland-gl' : ['GLMARK2_USE_WAYLAND', 'GLMARK2_USE_GL', 'GLMARK2_USE_EGL'], - 'wayland-glesv2' : ['GLMARK2_USE_WAYLAND', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'] + 'wayland-glesv2' : ['GLMARK2_USE_WAYLAND', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], + 'win32-gl': ['GLMARK2_USE_WIN32', 'GLMARK2_USE_WGL', 'GLMARK2_USE_GL'], + 'win32-glesv2': ['GLMARK2_USE_WIN32', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], + 'x11-gl' : ['GLMARK2_USE_X11', 'GLMARK2_USE_GL', 'GLMARK2_USE_GLX'], + 'x11-glesv2' : ['GLMARK2_USE_X11', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], +} +flavor_libs = { + 'dispmanx-glesv2' : [], + 'drm-gl' : [], + 'drm-glesv2' : [], + 'mir-gl' : [], + 'mir-glesv2' : [], + 'wayland-gl' : [], + 'wayland-glesv2' : [], + 'win32-gl': ['opengl32', 'gdi32'], + 'win32-glesv2' : [], + 'x11-gl' : [], + 'x11-glesv2' : [], +} +flavor_depends_on = { + 'dispmanx-glesv2' : [], + 'drm-gl' : [], + 'drm-glesv2' : [], + 'mir-gl' : [], + 'mir-glesv2' : [], + 'wayland-gl' : ['xdg-shell-client-protocol.h', 'xdg-shell-protocol.c'], + 'wayland-glesv2' : ['xdg-shell-client-protocol.h', 'xdg-shell-protocol.c'], + 'win32-gl': [], + 'win32-glesv2' : [], + 'x11-gl' : [], + 'x11-glesv2' : [], +} +flavor_sources_gen = { + 'dispmanx-glesv2' : [], + 'drm-gl' : [], + 'drm-glesv2' : [], + 'mir-gl' : [], + 'mir-glesv2' : [], + 'wayland-gl' : [bld.path.find_or_declare('xdg-shell-protocol.c')], + 'wayland-glesv2' : [bld.path.find_or_declare('xdg-shell-protocol.c')], + 'win32-gl': [], + 'win32-glesv2' : [], + 'x11-gl' : [], + 'x11-glesv2' : [], +} +egl_platform_defines = { + 'dispmanx' : ['MESA_EGL_NO_X11_HEADERS'], + 'drm' : ['__GBM__'], + 'mir' : ['MESA_EGL_NO_X11_HEADERS'], + 'wayland' : ['WL_EGL_PLATFORM'], + 'win32' : [], + 'x11' : [], } -includes = ['.', 'scene-ideas', 'scene-terrain'] +includes = ['.', 'scene-ideas', 'scene-terrain'] + platform_includes all_uselibs = set() for name in bld.env.keys(): if name.startswith('FLAVOR_') and bld.env[name]: flavor = name.replace('FLAVOR_', '').lower().replace('_', '-') + egl_platform = flavor.split('-')[0] target = bld.env[name] - bld( + node = bld( features = ['cxx', 'cprogram'], - source = common_sources + flavor_sources[flavor], + source = flavor_sources[flavor], target = target, - use = common_uselibs + flavor_uselibs[flavor], - lib = ['m', 'jpeg', 'dl'], - includes = includes, - defines = common_defines + flavor_defines[flavor] + use = platform_uselibs + flavor_uselibs[flavor], + lib = platform_libs + flavor_libs[flavor], + includes = ['.'] + platform_includes, + defines = common_defines + flavor_defines[flavor] + + egl_platform_defines[egl_platform], + depends_on = flavor_depends_on[flavor] ) - all_uselibs |= set(flavor_uselibs[flavor]) + if flavor_sources_gen[flavor]: + node.source.extend(flavor_sources_gen[flavor]) + all_uselibs |= set(flavor_uselibs[flavor] + platform_uselibs) + +# Build glad-egl for all used EGL platforms +for egl_target in (v for v in all_uselibs if v.startswith('glad-egl')): + egl_platform = egl_target.split('-')[2] + bld( + features = ['c'], + source = ['glad/src/egl.c'], + target = egl_target, + includes = ['glad/include'], + export_includes = 'glad/include', + defines = egl_platform_defines[egl_platform] + ) + +if 'glad-glx' in all_uselibs: + bld( + features = ['c'], + source = ['glad/src/glx.c'], + target = 'glad-glx', + includes = ['glad/include'], + export_includes = 'glad/include' + ) + +if 'glad-gl' in all_uselibs: + bld( + features = ['c'], + source = ['glad/src/gl.c'], + target = 'glad-gl', + includes = ['glad/include'], + export_includes = 'glad/include' + ) + +if 'glad-wgl' in all_uselibs: + bld( + features = ['c'], + source = ['glad/src/wgl.c'], + target = 'glad-wgl', + includes = ['glad/include'], + export_includes = 'glad/include' + ) + +if 'glad-glesv2' in all_uselibs: + bld( + features = ['c'], + source = ['glad/src/gles2.c'], + target = 'glad-glesv2', + includes = ['glad/include'], + export_includes = 'glad/include' + ) if 'matrix-gl' in all_uselibs: bld( features = ['cxx', 'cxxstlib'], source = libmatrix_sources, target = 'matrix-gl', + use = ['glad-gl'], lib = ['m'], - includes = ['.'], + includes = ['.'] + platform_includes, export_includes = 'libmatrix', defines = ['GLMARK2_USE_GL', 'USE_EXCEPTIONS'] ) @@ -74,8 +255,56 @@ features = ['cxx', 'cxxstlib'], source = libmatrix_sources, target = 'matrix-glesv2', + use = ['glad-glesv2'], lib = ['m'], - includes = ['.'], + includes = ['.'] + platform_includes, export_includes = 'libmatrix', defines = ['GLMARK2_USE_GLESv2', 'USE_EXCEPTIONS'] ) + +if 'libpng-local' in all_uselibs: + bld( + features = ['c'], + source = libpng_local_sources, + target = 'libpng-local', + use = ['zlib-local'], + export_includes = 'libpng' + ) + +if 'zlib-local' in all_uselibs: + bld( + features = ['c'], + source = zlib_local_sources, + target = 'zlib-local', + export_includes = 'zlib' + ) + +if 'libjpeg-turbo-local' in all_uselibs: + bld( + features = ['c'], + source = libjpeg_turbo_local_sources, + target = 'libjpeg-turbo-local', + export_includes = 'libjpeg-turbo' + ) + +if 'common-gl' in all_uselibs: + bld( + features = ['cxx', 'cxxstlib'], + source = common_sources, + target = 'common-gl', + use = platform_uselibs + ['glad-gl', 'matrix-gl'], + lib = ['m', 'jpeg', 'dl'], + includes = includes, + defines = ['GLMARK2_USE_GL', 'USE_EXCEPTIONS'] + ) + +if 'common-glesv2' in all_uselibs: + bld( + features = ['cxx', 'cxxstlib'], + source = common_sources, + target = 'common-glesv2', + use = platform_uselibs + ['glad-glesv2', 'matrix-glesv2'], + lib = ['m', 'jpeg', 'dl'], + includes = includes, + defines = ['GLMARK2_USE_GLESv2', 'USE_EXCEPTIONS'] + ) diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/adler32.c glmark2-2021.02/android/jni/src/zlib/adler32.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/adler32.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/adler32.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,186 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); + +#define BASE 65521U /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware -- + try it both ways to see which is faster */ +#ifdef NO_DIVIDE +/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 + (thank you to John Reiser for pointing this out) */ +# define CHOP(a) \ + do { \ + unsigned long tmp = a >> 16; \ + a &= 0xffffUL; \ + a += (tmp << 4) - tmp; \ + } while (0) +# define MOD28(a) \ + do { \ + CHOP(a); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD(a) \ + do { \ + CHOP(a); \ + MOD28(a); \ + } while (0) +# define MOD63(a) \ + do { /* this assumes a is not negative */ \ + z_off64_t tmp = a >> 32; \ + a &= 0xffffffffL; \ + a += (tmp << 8) - (tmp << 5) + tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD28(a) a %= BASE +# define MOD63(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32_z(adler, buf, len) + uLong adler; + const Bytef *buf; + z_size_t len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD28(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + return adler32_z(adler, buf, len); +} + +/* ========================================================================= */ +local uLong adler32_combine_(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* for negative len, return invalid adler32 as a clue for debugging */ + if (len2 < 0) + return 0xffffffffUL; + + /* the derivation of this formula is left as an exercise for the reader */ + MOD63(len2); /* assumes len2 >= 0 */ + rem = (unsigned)len2; + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} + +uLong ZEXPORT adler32_combine64(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/compress.c glmark2-2021.02/android/jni/src/zlib/compress.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/compress.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/compress.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,86 @@ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; + int level; +{ + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong left; + + left = *destLen; + *destLen = 0; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen; + sourceLen -= stream.avail_in; + } + err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); + } while (err == Z_OK); + + *destLen = stream.total_out; + deflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} + +/* =========================================================================== + If the default memLevel or windowBits for deflateInit() is changed, then + this function needs to be updated. + */ +uLong ZEXPORT compressBound (sourceLen) + uLong sourceLen; +{ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/crc32.c glmark2-2021.02/android/jni/src/zlib/crc32.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/crc32.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/crc32.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,442 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +/* @(#) $Id$ */ + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + + DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. + */ + +#ifdef MAKECRCH +# include +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +/* Definitions for doing the crc four data bytes at a time. */ +#if !defined(NOBYFOUR) && defined(Z_U4) +# define BYFOUR +#endif +#ifdef BYFOUR + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, z_size_t)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, z_size_t)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); +local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); + + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local z_crc_t FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const z_crc_t FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + z_crc_t c; + int n, k; + z_crc_t poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0; + for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) + poly |= (z_crc_t)1 << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (z_crc_t)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = ZSWAP32(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = ZSWAP32(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const z_crc_t FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const z_crc_t FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", + (unsigned long)(table[n]), + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const z_crc_t FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const z_crc_t FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + z_crc_t endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + uInt len; +{ + return crc32_z(crc, buf, len); +} + +#ifdef BYFOUR + +/* + This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit + integer pointer type. This violates the strict aliasing rule, where a + compiler can assume, for optimization purposes, that two pointers to + fundamentally different types won't ever point to the same memory. This can + manifest as a problem only if one of the pointers is written to. This code + only reads from those pointers. So long as this code remains isolated in + this compilation unit, there won't be a problem. For this reason, this code + should not be copied and pasted into a compilation unit in which other code + writes to the buffer that is passed to these routines. + */ + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = (z_crc_t)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *buf4++; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = ZSWAP32((z_crc_t)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(ZSWAP32(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +local uLong crc32_combine_(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case (also disallow negative lengths) */ + if (len2 <= 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} + +uLong ZEXPORT crc32_combine64(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/crc32.h glmark2-2021.02/android/jni/src/zlib/crc32.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/crc32.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/crc32.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,441 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const z_crc_t FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/deflate.c glmark2-2021.02/android/jni/src/zlib/deflate.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/deflate.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/deflate.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,2163 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://tools.ietf.org/html/rfc1951 + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id$ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local int deflateStateCheck OF((z_streamp strm)); +local void slide_hash OF((deflate_state *s)); +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +#ifndef FASTEST +local block_state deflate_slow OF((deflate_state *s, int flush)); +#endif +local block_state deflate_rle OF((deflate_state *s, int flush)); +local block_state deflate_huff OF((deflate_state *s, int flush)); +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifdef ASMV +# pragma message("Assembler code may have bugs -- use at your own risk") + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif + +#ifdef ZLIB_DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ +#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to UPDATE_HASH are made with consecutive input + * characters, so that a running hash key can be computed from the previous + * key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to INSERT_STRING are made with consecutive input + * characters and the first MIN_MATCH bytes of str are valid (except for + * the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +local void slide_hash(s) + deflate_state *s; +{ + unsigned n, m; + Posf *p; + uInt wsize = s->w_size; + + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + } while (--n); + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif +} + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + s->status = INIT_STATE; /* to pass state test in deflateReset() */ + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = (uInt)windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = (uInt)memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->high_water = 0; /* nothing written to s->window yet */ + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= + * Check for a valid deflate stream state. Return 0 if ok, 1 if not. + */ +local int deflateStateCheck (strm) + z_streamp strm; +{ + deflate_state *s; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + s = strm->state; + if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE && +#ifdef GZIP + s->status != GZIP_STATE && +#endif + s->status != EXTRA_STATE && + s->status != NAME_STATE && + s->status != COMMENT_STATE && + s->status != HCRC_STATE && + s->status != BUSY_STATE && + s->status != FINISH_STATE)) + return 1; + return 0; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt str, n; + int wrap; + unsigned avail; + z_const unsigned char *next; + + if (deflateStateCheck(strm) || dictionary == Z_NULL) + return Z_STREAM_ERROR; + s = strm->state; + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; + + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; + } + + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); + } + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength) + z_streamp strm; + Bytef *dictionary; + uInt *dictLength; +{ + deflate_state *s; + uInt len; + + if (deflateStateCheck(strm)) + return Z_STREAM_ERROR; + s = strm->state; + len = s->strstart + s->lookahead; + if (len > s->w_size) + len = s->w_size; + if (dictionary != Z_NULL && len) + zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len); + if (dictLength != Z_NULL) + *dictLength = len; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateResetKeep (strm) + z_streamp strm; +{ + deflate_state *s; + + if (deflateStateCheck(strm)) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = +#ifdef GZIP + s->wrap == 2 ? GZIP_STATE : +#endif + s->wrap ? INIT_STATE : BUSY_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader (strm, head) + z_streamp strm; + gz_headerp head; +{ + if (deflateStateCheck(strm) || strm->state->wrap != 2) + return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePending (strm, pending, bits) + unsigned *pending; + int *bits; + z_streamp strm; +{ + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + if (pending != Z_NULL) + *pending = strm->state->pending; + if (bits != Z_NULL) + *bits = strm->state->bi_valid; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime (strm, bits, value) + z_streamp strm; + int bits; + int value; +{ + deflate_state *s; + int put; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; + if (put > bits) + put = bits; + s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); + s->bi_valid += put; + _tr_flush_bits(s); + value >>= put; + bits -= put; + } while (bits); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if ((strategy != s->strategy || func != configuration_table[level].func) && + s->high_water) { + /* Flush the last buffer: */ + int err = deflate(strm, Z_BLOCK); + if (err == Z_STREAM_ERROR) + return err; + if (strm->avail_out == 0) + return Z_BUF_ERROR; + } + if (s->level != level) { + if (s->level == 0 && s->matches != 0) { + if (s->matches == 1) + slide_hash(s); + else + CLEAR_HASH(s); + s->matches = 0; + } + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) + z_streamp strm; + int good_length; + int max_lazy; + int nice_length; + int max_chain; +{ + deflate_state *s; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = (uInt)good_length; + s->max_lazy_match = (uInt)max_lazy; + s->nice_match = nice_length; + s->max_chain_length = (uInt)max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns + * a close to exact, as well as small, upper bound on the compressed size. + * They are coded as constants here for a reason--if the #define's are + * changed, then this function needs to be changed as well. The return + * value for 15 and 8 only works for those exact settings. + * + * For any setting other than those defaults for windowBits and memLevel, + * the value returned is a conservative worst case for the maximum expansion + * resulting from using fixed blocks instead of stored blocks, which deflate + * can emit on compressed data for some combinations of the parameters. + * + * This function could be more sophisticated to provide closer upper bounds for + * every combination of windowBits and memLevel. But even the conservative + * upper bound of about 14% expansion does not seem onerous for output buffer + * allocation. + */ +uLong ZEXPORT deflateBound(strm, sourceLen) + z_streamp strm; + uLong sourceLen; +{ + deflate_state *s; + uLong complen, wraplen; + + /* conservative upper bound for compressed data */ + complen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; + + /* if can't get parameters, return conservative bound plus zlib wrapper */ + if (deflateStateCheck(strm)) + return complen + 6; + + /* compute wrapper length */ + s = strm->state; + switch (s->wrap) { + case 0: /* raw deflate */ + wraplen = 0; + break; + case 1: /* zlib wrapper */ + wraplen = 6 + (s->strstart ? 4 : 0); + break; +#ifdef GZIP + case 2: /* gzip wrapper */ + wraplen = 18; + if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ + Bytef *str; + if (s->gzhead->extra != Z_NULL) + wraplen += 2 + s->gzhead->extra_len; + str = s->gzhead->name; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + str = s->gzhead->comment; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + if (s->gzhead->hcrc) + wraplen += 2; + } + break; +#endif + default: /* for compiler happiness */ + wraplen = 6; + } + + /* if not default parameters, return conservative bound */ + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return complen + wraplen; + + /* default settings: return tight bound for that case */ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13 - 6 + wraplen; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output, except for + * some deflate_stored() output, goes through this function so some + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len; + deflate_state *s = strm->state; + + _tr_flush_bits(s); + len = s->pending; + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, s->pending_out, len); + strm->next_out += len; + s->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; + } +} + +/* =========================================================================== + * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1]. + */ +#define HCRC_UPDATE(beg) \ + do { \ + if (s->gzhead->hcrc && s->pending > (beg)) \ + strm->adler = crc32(strm->adler, s->pending_buf + (beg), \ + s->pending - (beg)); \ + } while (0) + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->avail_in != 0 && strm->next_in == Z_NULL) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + old_flush = s->last_flush; + s->last_flush = flush; + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Write the header */ + if (s->status == INIT_STATE) { + /* zlib header */ + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#ifdef GZIP + if (s->status == GZIP_STATE) { + /* gzip header */ + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == Z_NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != Z_NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex; + while (s->pending + left > s->pending_buf_size) { + uInt copy = s->pending_buf_size - s->pending; + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, copy); + s->pending = s->pending_buf_size; + HCRC_UPDATE(beg); + s->gzindex += copy; + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + left -= copy; + } + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, left); + s->pending += left; + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + } + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) { + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); + } + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#endif + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = s->level == 0 ? deflate_stored(s, flush) : + s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + if (s->lookahead == 0) { + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + + status = strm->state->status; + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (deflateStateCheck(source) || dest == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local unsigned read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, buf, len); + } +#endif + strm->next_in += len; + strm->total_in += len; + + return len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->insert = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifndef FASTEST +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +#endif +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = (int)s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} +#endif /* ASMV */ + +#else /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for FASTEST only + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#endif /* FASTEST */ + +#ifdef ZLIB_DEBUG + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* ZLIB_DEBUG */ + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + unsigned n; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + slide_hash(s); + more += wsize; + } + if (s->strm->avail_in == 0) break; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead + s->insert >= MIN_MATCH) { + uInt str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + while (s->insert) { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + s->insert--; + if (s->lookahead + s->insert < MIN_MATCH) + break; + } + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); + + /* If the WIN_INIT bytes after the end of the current data have never been + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next + * time through here. WIN_INIT is set to MAX_MATCH since the longest match + * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + ulg curr = s->strstart + (ulg)(s->lookahead); + ulg init; + + if (s->high_water < curr) { + /* Previous high water mark below current data -- zero WIN_INIT + * bytes or up to end of window, whichever is less. + */ + init = s->window_size - curr; + if (init > WIN_INIT) + init = WIN_INIT; + zmemzero(s->window + curr, (unsigned)init); + s->high_water = curr + init; + } + else if (s->high_water < (ulg)curr + WIN_INIT) { + /* High water mark at or above current data, but below current data + * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up + * to end of window, whichever is less. + */ + init = (ulg)curr + WIN_INIT - s->high_water; + if (init > s->window_size - s->high_water) + init = s->window_size - s->high_water; + zmemzero(s->window + s->high_water, (unsigned)init); + s->high_water += init; + } + } + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "not enough room for search"); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, last) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (last)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, last) { \ + FLUSH_BLOCK_ONLY(s, last); \ + if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ +} + +/* Maximum stored block length in deflate format (not including header). */ +#define MAX_STORED 65535 + +/* Minimum of a and b. */ +#define MIN(a, b) ((a) > (b) ? (b) : (a)) + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * + * In case deflateParams() is used to later switch to a non-zero compression + * level, s->matches (otherwise unused when storing) keeps track of the number + * of hash table slides to perform. If s->matches is 1, then one hash table + * slide will be done when switching. If s->matches is 2, the maximum value + * allowed here, then the hash table will be cleared, since two or more slides + * is the same as a clear. + * + * deflate_stored() is written to minimize the number of times an input byte is + * copied. It is most efficient with large input and output buffers, which + * maximizes the opportunites to have a single copy from next_in to next_out. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Smallest worthy block size when not flushing or finishing. By default + * this is 32K. This can be as small as 507 bytes for memLevel == 1. For + * large input and output buffers, the stored block size will be larger. + */ + unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); + + /* Copy as many min_block or larger stored blocks directly to next_out as + * possible. If flushing, copy the remaining available input to next_out as + * stored blocks, if there is enough space. + */ + unsigned len, left, have, last = 0; + unsigned used = s->strm->avail_in; + do { + /* Set len to the maximum size block that we can copy directly with the + * available input data and output space. Set left to how much of that + * would be copied from what's left in the window. + */ + len = MAX_STORED; /* maximum deflate stored block length */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + if (s->strm->avail_out < have) /* need room for header */ + break; + /* maximum stored block length that will fit in avail_out: */ + have = s->strm->avail_out - have; + left = s->strstart - s->block_start; /* bytes left in window */ + if (len > (ulg)left + s->strm->avail_in) + len = left + s->strm->avail_in; /* limit len to the input */ + if (len > have) + len = have; /* limit len to the output */ + + /* If the stored block would be less than min_block in length, or if + * unable to copy all of the available input when flushing, then try + * copying to the window and the pending buffer instead. Also don't + * write an empty block when flushing -- deflate() does that. + */ + if (len < min_block && ((len == 0 && flush != Z_FINISH) || + flush == Z_NO_FLUSH || + len != left + s->strm->avail_in)) + break; + + /* Make a dummy stored block in pending to get the header bytes, + * including any pending bits. This also updates the debugging counts. + */ + last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; + _tr_stored_block(s, (char *)0, 0L, last); + + /* Replace the lengths in the dummy stored block with len. */ + s->pending_buf[s->pending - 4] = len; + s->pending_buf[s->pending - 3] = len >> 8; + s->pending_buf[s->pending - 2] = ~len; + s->pending_buf[s->pending - 1] = ~len >> 8; + + /* Write the stored block header bytes. */ + flush_pending(s->strm); + +#ifdef ZLIB_DEBUG + /* Update debugging counts for the data about to be copied. */ + s->compressed_len += len << 3; + s->bits_sent += len << 3; +#endif + + /* Copy uncompressed bytes from the window to next_out. */ + if (left) { + if (left > len) + left = len; + zmemcpy(s->strm->next_out, s->window + s->block_start, left); + s->strm->next_out += left; + s->strm->avail_out -= left; + s->strm->total_out += left; + s->block_start += left; + len -= left; + } + + /* Copy uncompressed bytes directly from next_in to next_out, updating + * the check value. + */ + if (len) { + read_buf(s->strm, s->strm->next_out, len); + s->strm->next_out += len; + s->strm->avail_out -= len; + s->strm->total_out += len; + } + } while (last == 0); + + /* Update the sliding window with the last s->w_size bytes of the copied + * data, or append all of the copied data to the existing window if less + * than s->w_size bytes were copied. Also update the number of bytes to + * insert in the hash tables, in the event that deflateParams() switches to + * a non-zero compression level. + */ + used -= s->strm->avail_in; /* number of input bytes directly copied */ + if (used) { + /* If any input was used, then no unused input remains in the window, + * therefore s->block_start == s->strstart. + */ + if (used >= s->w_size) { /* supplant the previous history */ + s->matches = 2; /* clear hash */ + zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); + s->strstart = s->w_size; + } + else { + if (s->window_size - s->strstart <= used) { + /* Slide the window down. */ + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + } + zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); + s->strstart += used; + } + s->block_start = s->strstart; + s->insert += MIN(used, s->w_size - s->insert); + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* If the last block was written to next_out, then done. */ + if (last) + return finish_done; + + /* If flushing and all input has been consumed, then done. */ + if (flush != Z_NO_FLUSH && flush != Z_FINISH && + s->strm->avail_in == 0 && (long)s->strstart == s->block_start) + return block_done; + + /* Fill the window with any remaining input. */ + have = s->window_size - s->strstart - 1; + if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { + /* Slide the window down. */ + s->block_start -= s->w_size; + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + have += s->w_size; /* more space now */ + } + if (have > s->strm->avail_in) + have = s->strm->avail_in; + if (have) { + read_buf(s->strm, s->window + s->strstart, have); + s->strstart += have; + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* There was not enough avail_out to write a complete worthy or flushed + * stored block to next_out. Write a stored block to pending instead, if we + * have enough input for a worthy block, or if flushing and there is enough + * room for the remaining input as a stored block in the pending buffer. + */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + /* maximum stored block length that will fit in pending: */ + have = MIN(s->pending_buf_size - have, MAX_STORED); + min_block = MIN(have, s->w_size); + left = s->strstart - s->block_start; + if (left >= min_block || + ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && + s->strm->avail_in == 0 && left <= have)) { + len = MIN(left, have); + last = flush == Z_FINISH && s->strm->avail_in == 0 && + len == left ? 1 : 0; + _tr_stored_block(s, (charf *)s->window + s->block_start, len, last); + s->block_start += len; + flush_pending(s->strm); + } + + /* We've done all we can with the available input and output. */ + return last ? finish_started : need_more; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} +#endif /* FASTEST */ + +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + uInt prev; /* byte at distance one to match */ + Bytef *scan, *strend; /* scan goes up to strend for length of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest run, plus one for the unrolled loop. + */ + if (s->lookahead <= MAX_MATCH) { + fill_window(s); + if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + s->match_length = 0; + if (s->lookahead >= MIN_MATCH && s->strstart > 0) { + scan = s->window + s->strstart - 1; + prev = *scan; + if (prev == *++scan && prev == *++scan && prev == *++scan) { + strend = s->window + s->strstart + MAX_MATCH; + do { + } while (prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + scan < strend); + s->match_length = MAX_MATCH - (uInt)(strend - scan); + if (s->match_length > s->lookahead) + s->match_length = s->lookahead; + } + Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan"); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, s->match_length); + + _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + s->strstart += s->match_length; + s->match_length = 0; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. + * (It will be regenerated if this run of deflate switches away from Huffman.) + */ +local block_state deflate_huff(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we have a literal to write. */ + if (s->lookahead == 0) { + fill_window(s); + if (s->lookahead == 0) { + if (flush == Z_NO_FLUSH) + return need_more; + break; /* flush the current block */ + } + } + + /* Output a literal byte */ + s->match_length = 0; + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/deflate.h glmark2-2021.02/android/jni/src/zlib/deflate.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/deflate.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/deflate.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,349 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef DEFLATE_H +#define DEFLATE_H + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define Buf_size 16 +/* size of bit buffer in bi_buf */ + +#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ +#ifdef GZIP +# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ +#endif +#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ +#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ +#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ +#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ +#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ +#define FINISH_STATE 666 /* stream complete */ +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + const static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + ulg pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + ulg gzindex; /* where in extra, name, or comment */ + Byte method; /* can only be DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + uInt insert; /* bytes at end of window left to insert */ + +#ifdef ZLIB_DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + + ulg high_water; + /* High water mark offset in window for initialized bytes -- bytes above + * this are set to zero in order to avoid memory check warnings when + * longest match routines access bytes past the input. This is then + * updated to the new high water mark. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +#define WIN_INIT MAX_MATCH +/* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + + /* in trees.c */ +void ZLIB_INTERNAL _tr_init OF((deflate_state *s)); +int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); +void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef ZLIB_DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch ZLIB_INTERNAL _length_code[]; + extern uch ZLIB_INTERNAL _dist_code[]; +#else + extern const uch ZLIB_INTERNAL _length_code[]; + extern const uch ZLIB_INTERNAL _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* DEFLATE_H */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzclose.c glmark2-2021.02/android/jni/src/zlib/gzclose.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzclose.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/gzclose.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,25 @@ +/* gzclose.c -- zlib gzclose() function + * Copyright (C) 2004, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* gzclose() is in a separate file so that it is linked in only if it is used. + That way the other gzclose functions can be used instead to avoid linking in + unneeded compression or decompression routines. */ +int ZEXPORT gzclose(file) + gzFile file; +{ +#ifndef NO_GZCOMPRESS + gz_statep state; + + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); +#else + return gzclose_r(file); +#endif +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzguts.h glmark2-2021.02/android/jni/src/zlib/gzguts.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzguts.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/gzguts.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,218 @@ +/* gzguts.h -- zlib internal header definitions for gz* operations + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef _LARGEFILE64_SOURCE +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# endif +# ifdef _FILE_OFFSET_BITS +# undef _FILE_OFFSET_BITS +# endif +#endif + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include +#include "zlib.h" +#ifdef STDC +# include +# include +# include +#endif + +#ifndef _POSIX_SOURCE +# define _POSIX_SOURCE +#endif +#include + +#ifdef _WIN32 +# include +#endif + +#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) +# include +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +# define WIDECHAR +#endif + +#ifdef WINAPI_FAMILY +# define open _open +# define read _read +# define write _write +# define close _close +#endif + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS +/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 +/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) +# define vsnprintf _vsnprintf +# endif +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +# ifdef VMS +# define NO_vsnprintf +# endif +# ifdef __OS400__ +# define NO_vsnprintf +# endif +# ifdef __MVS__ +# define NO_vsnprintf +# endif +#endif + +/* unlike snprintf (which is required in C99), _snprintf does not guarantee + null termination of the result -- however this is only used in gzlib.c where + the result is assured to fit in the space provided */ +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +/* gz* functions always use library allocation functions */ +#ifndef STDC + extern voidp malloc OF((uInt size)); + extern void free OF((voidpf ptr)); +#endif + +/* get errno and strerror definition */ +#if defined UNDER_CE +# include +# define zstrerror() gz_strwinerror((DWORD)GetLastError()) +#else +# ifndef NO_STRERROR +# include +# define zstrerror() strerror(errno) +# else +# define zstrerror() "stdio error (consult errno)" +# endif +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + +/* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ +#define GZBUFSIZE 8192 + +/* gzip modes, also provide a little integrity check on the passed structure */ +#define GZ_NONE 0 +#define GZ_READ 7247 +#define GZ_WRITE 31153 +#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ + +/* values for gz_state how */ +#define LOOK 0 /* look for a gzip header */ +#define COPY 1 /* copy input directly */ +#define GZIP 2 /* decompress a gzip stream */ + +/* internal gzip file state data structure */ +typedef struct { + /* exposed contents for gzgetc() macro */ + struct gzFile_s x; /* "x" for exposed */ + /* x.have: number of bytes available at x.next */ + /* x.next: next output data to deliver or write */ + /* x.pos: current position in uncompressed data */ + /* used for both reading and writing */ + int mode; /* see gzip modes above */ + int fd; /* file descriptor */ + char *path; /* path or fd for error messages */ + unsigned size; /* buffer size, zero if not allocated yet */ + unsigned want; /* requested buffer size, default is GZBUFSIZE */ + unsigned char *in; /* input buffer (double-sized when writing) */ + unsigned char *out; /* output buffer (double-sized when reading) */ + int direct; /* 0 if processing gzip, 1 if transparent */ + /* just for reading */ + int how; /* 0: get header, 1: copy, 2: decompress */ + z_off64_t start; /* where the gzip data started, for rewinding */ + int eof; /* true if end of input file reached */ + int past; /* true if read requested past end */ + /* just for writing */ + int level; /* compression level */ + int strategy; /* compression strategy */ + /* seek request */ + z_off64_t skip; /* amount to skip (already rewound if backwards) */ + int seek; /* true if seek request pending */ + /* error information */ + int err; /* error code */ + char *msg; /* error message */ + /* zlib inflate or deflate stream */ + z_stream strm; /* stream structure in-place (not a pointer) */ +} gz_state; +typedef gz_state FAR *gz_statep; + +/* shared functions */ +void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); +#if defined UNDER_CE +char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error)); +#endif + +/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +#ifdef INT_MAX +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) +#else +unsigned ZLIB_INTERNAL gz_intmax OF((void)); +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzlib.c glmark2-2021.02/android/jni/src/zlib/gzlib.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzlib.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/gzlib.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,637 @@ +/* gzlib.c -- zlib functions common to reading and writing gzip files + * Copyright (C) 2004-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__) +# define LSEEK _lseeki64 +#else +#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 +# define LSEEK lseek64 +#else +# define LSEEK lseek +#endif +#endif + +/* Local functions */ +local void gz_reset OF((gz_statep)); +local gzFile gz_open OF((const void *, int, const char *)); + +#if defined UNDER_CE + +/* Map the Windows error number in ERROR to a locale-dependent error message + string and return a pointer to it. Typically, the values for ERROR come + from GetLastError. + + The string pointed to shall not be modified by the application, but may be + overwritten by a subsequent call to gz_strwinerror + + The gz_strwinerror function does not change the current setting of + GetLastError. */ +char ZLIB_INTERNAL *gz_strwinerror (error) + DWORD error; +{ + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +#endif /* UNDER_CE */ + +/* Reset gzip file state */ +local void gz_reset(state) + gz_statep state; +{ + state->x.have = 0; /* no output data available */ + if (state->mode == GZ_READ) { /* for reading ... */ + state->eof = 0; /* not at end of file */ + state->past = 0; /* have not read past end yet */ + state->how = LOOK; /* look for gzip header */ + } + state->seek = 0; /* no seek request pending */ + gz_error(state, Z_OK, NULL); /* clear error */ + state->x.pos = 0; /* no uncompressed data yet */ + state->strm.avail_in = 0; /* no input data yet */ +} + +/* Open a gzip file either by name or file descriptor. */ +local gzFile gz_open(path, fd, mode) + const void *path; + int fd; + const char *mode; +{ + gz_statep state; + z_size_t len; + int oflag; +#ifdef O_CLOEXEC + int cloexec = 0; +#endif +#ifdef O_EXCL + int exclusive = 0; +#endif + + /* check input */ + if (path == NULL) + return NULL; + + /* allocate gzFile structure to return */ + state = (gz_statep)malloc(sizeof(gz_state)); + if (state == NULL) + return NULL; + state->size = 0; /* no buffers allocated yet */ + state->want = GZBUFSIZE; /* requested buffer size */ + state->msg = NULL; /* no error message yet */ + + /* interpret mode */ + state->mode = GZ_NONE; + state->level = Z_DEFAULT_COMPRESSION; + state->strategy = Z_DEFAULT_STRATEGY; + state->direct = 0; + while (*mode) { + if (*mode >= '0' && *mode <= '9') + state->level = *mode - '0'; + else + switch (*mode) { + case 'r': + state->mode = GZ_READ; + break; +#ifndef NO_GZCOMPRESS + case 'w': + state->mode = GZ_WRITE; + break; + case 'a': + state->mode = GZ_APPEND; + break; +#endif + case '+': /* can't read and write at the same time */ + free(state); + return NULL; + case 'b': /* ignore -- will request binary anyway */ + break; +#ifdef O_CLOEXEC + case 'e': + cloexec = 1; + break; +#endif +#ifdef O_EXCL + case 'x': + exclusive = 1; + break; +#endif + case 'f': + state->strategy = Z_FILTERED; + break; + case 'h': + state->strategy = Z_HUFFMAN_ONLY; + break; + case 'R': + state->strategy = Z_RLE; + break; + case 'F': + state->strategy = Z_FIXED; + break; + case 'T': + state->direct = 1; + break; + default: /* could consider as an error, but just ignore */ + ; + } + mode++; + } + + /* must provide an "r", "w", or "a" */ + if (state->mode == GZ_NONE) { + free(state); + return NULL; + } + + /* can't force transparent read */ + if (state->mode == GZ_READ) { + if (state->direct) { + free(state); + return NULL; + } + state->direct = 1; /* for empty file */ + } + + /* save the path name for error messages */ +#ifdef WIDECHAR + if (fd == -2) { + len = wcstombs(NULL, path, 0); + if (len == (z_size_t)-1) + len = 0; + } + else +#endif + len = strlen((const char *)path); + state->path = (char *)malloc(len + 1); + if (state->path == NULL) { + free(state); + return NULL; + } +#ifdef WIDECHAR + if (fd == -2) + if (len) + wcstombs(state->path, path, len + 1); + else + *(state->path) = 0; + else +#endif +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->path, len + 1, "%s", (const char *)path); +#else + strcpy(state->path, path); +#endif + + /* compute the flags for open() */ + oflag = +#ifdef O_LARGEFILE + O_LARGEFILE | +#endif +#ifdef O_BINARY + O_BINARY | +#endif +#ifdef O_CLOEXEC + (cloexec ? O_CLOEXEC : 0) | +#endif + (state->mode == GZ_READ ? + O_RDONLY : + (O_WRONLY | O_CREAT | +#ifdef O_EXCL + (exclusive ? O_EXCL : 0) | +#endif + (state->mode == GZ_WRITE ? + O_TRUNC : + O_APPEND))); + + /* open the file with the appropriate flags (or just use fd) */ + state->fd = fd > -1 ? fd : ( +#ifdef WIDECHAR + fd == -2 ? _wopen(path, oflag, 0666) : +#endif + open((const char *)path, oflag, 0666)); + if (state->fd == -1) { + free(state->path); + free(state); + return NULL; + } + if (state->mode == GZ_APPEND) { + LSEEK(state->fd, 0, SEEK_END); /* so gzoffset() is correct */ + state->mode = GZ_WRITE; /* simplify later checks */ + } + + /* save the current position for rewinding (only if reading) */ + if (state->mode == GZ_READ) { + state->start = LSEEK(state->fd, 0, SEEK_CUR); + if (state->start == -1) state->start = 0; + } + + /* initialize stream */ + gz_reset(state); + + /* return stream */ + return (gzFile)state; +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen(path, mode) + const char *path; + const char *mode; +{ + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen64(path, mode) + const char *path; + const char *mode; +{ + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzdopen(fd, mode) + int fd; + const char *mode; +{ + char *path; /* identifier for error messages */ + gzFile gz; + + if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL) + return NULL; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(path, 7 + 3 * sizeof(int), "", fd); +#else + sprintf(path, "", fd); /* for debugging */ +#endif + gz = gz_open(path, fd, mode); + free(path); + return gz; +} + +/* -- see zlib.h -- */ +#ifdef WIDECHAR +gzFile ZEXPORT gzopen_w(path, mode) + const wchar_t *path; + const char *mode; +{ + return gz_open(path, -2, mode); +} +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzbuffer(file, size) + gzFile file; + unsigned size; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* make sure we haven't already allocated memory */ + if (state->size != 0) + return -1; + + /* check and set requested size */ + if ((size << 1) < size) + return -1; /* need to be able to double it */ + if (size < 2) + size = 2; /* need two bytes to check magic header */ + state->want = size; + return 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzrewind(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* back up and start over */ + if (LSEEK(state->fd, state->start, SEEK_SET) == -1) + return -1; + gz_reset(state); + return 0; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzseek64(file, offset, whence) + gzFile file; + z_off64_t offset; + int whence; +{ + unsigned n; + z_off64_t ret; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* check that there's no error */ + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* can only seek from start or relative to current position */ + if (whence != SEEK_SET && whence != SEEK_CUR) + return -1; + + /* normalize offset to a SEEK_CUR specification */ + if (whence == SEEK_SET) + offset -= state->x.pos; + else if (state->seek) + offset += state->skip; + state->seek = 0; + + /* if within raw area while reading, just go there */ + if (state->mode == GZ_READ && state->how == COPY && + state->x.pos + offset >= 0) { + ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR); + if (ret == -1) + return -1; + state->x.have = 0; + state->eof = 0; + state->past = 0; + state->seek = 0; + gz_error(state, Z_OK, NULL); + state->strm.avail_in = 0; + state->x.pos += offset; + return state->x.pos; + } + + /* calculate skip amount, rewinding if needed for back seek when reading */ + if (offset < 0) { + if (state->mode != GZ_READ) /* writing -- can't go backwards */ + return -1; + offset += state->x.pos; + if (offset < 0) /* before start of file! */ + return -1; + if (gzrewind(file) == -1) /* rewind, then skip to offset */ + return -1; + } + + /* if reading, skip what's in output buffer (one less gzgetc() check) */ + if (state->mode == GZ_READ) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ? + (unsigned)offset : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + offset -= n; + } + + /* request skip (if not zero) */ + if (offset) { + state->seek = 1; + state->skip = offset; + } + return state->x.pos + offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzseek(file, offset, whence) + gzFile file; + z_off_t offset; + int whence; +{ + z_off64_t ret; + + ret = gzseek64(file, (z_off64_t)offset, whence); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gztell64(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* return position */ + return state->x.pos + (state->seek ? state->skip : 0); +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gztell(file) + gzFile file; +{ + z_off64_t ret; + + ret = gztell64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzoffset64(file) + gzFile file; +{ + z_off64_t offset; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* compute and return effective offset in file */ + offset = LSEEK(state->fd, 0, SEEK_CUR); + if (offset == -1) + return -1; + if (state->mode == GZ_READ) /* reading */ + offset -= state->strm.avail_in; /* don't count buffered input */ + return offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzoffset(file) + gzFile file; +{ + z_off64_t ret; + + ret = gzoffset64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzeof(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return 0; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return 0; + + /* return end-of-file state */ + return state->mode == GZ_READ ? state->past : 0; +} + +/* -- see zlib.h -- */ +const char * ZEXPORT gzerror(file, errnum) + gzFile file; + int *errnum; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return NULL; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return NULL; + + /* return error information */ + if (errnum != NULL) + *errnum = state->err; + return state->err == Z_MEM_ERROR ? "out of memory" : + (state->msg == NULL ? "" : state->msg); +} + +/* -- see zlib.h -- */ +void ZEXPORT gzclearerr(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return; + + /* clear error and end-of-file */ + if (state->mode == GZ_READ) { + state->eof = 0; + state->past = 0; + } + gz_error(state, Z_OK, NULL); +} + +/* Create an error message in allocated memory and set state->err and + state->msg accordingly. Free any previous error message already there. Do + not try to free or allocate space if the error is Z_MEM_ERROR (out of + memory). Simply save the error message as a static string. If there is an + allocation failure constructing the error message, then convert the error to + out of memory. */ +void ZLIB_INTERNAL gz_error(state, err, msg) + gz_statep state; + int err; + const char *msg; +{ + /* free previously allocated message and clear */ + if (state->msg != NULL) { + if (state->err != Z_MEM_ERROR) + free(state->msg); + state->msg = NULL; + } + + /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */ + if (err != Z_OK && err != Z_BUF_ERROR) + state->x.have = 0; + + /* set error code, and if no message, then done */ + state->err = err; + if (msg == NULL) + return; + + /* for an out of memory error, return literal string when requested */ + if (err == Z_MEM_ERROR) + return; + + /* construct error message with path */ + if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) == + NULL) { + state->err = Z_MEM_ERROR; + return; + } +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, + "%s%s%s", state->path, ": ", msg); +#else + strcpy(state->msg, state->path); + strcat(state->msg, ": "); + strcat(state->msg, msg); +#endif +} + +#ifndef INT_MAX +/* portably return maximum value for an int (when limits.h presumed not + available) -- we need to do this to cover cases where 2's complement not + used, since C standard permits 1's complement and sign-bit representations, + otherwise we could just use ((unsigned)-1) >> 1 */ +unsigned ZLIB_INTERNAL gz_intmax() +{ + unsigned p, q; + + p = 1; + do { + q = p; + p <<= 1; + p++; + } while (p > q); + return q >> 1; +} +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzread.c glmark2-2021.02/android/jni/src/zlib/gzread.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzread.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/gzread.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,654 @@ +/* gzread.c -- zlib functions for reading gzip files + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Local functions */ +local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); +local int gz_avail OF((gz_statep)); +local int gz_look OF((gz_statep)); +local int gz_decomp OF((gz_statep)); +local int gz_fetch OF((gz_statep)); +local int gz_skip OF((gz_statep, z_off64_t)); +local z_size_t gz_read OF((gz_statep, voidp, z_size_t)); + +/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from + state->fd, and update state->eof, state->err, and state->msg as appropriate. + This function needs to loop on read(), since read() is not guaranteed to + read the number of bytes requested, depending on the type of descriptor. */ +local int gz_load(state, buf, len, have) + gz_statep state; + unsigned char *buf; + unsigned len; + unsigned *have; +{ + int ret; + unsigned get, max = ((unsigned)-1 >> 2) + 1; + + *have = 0; + do { + get = len - *have; + if (get > max) + get = max; + ret = read(state->fd, buf + *have, get); + if (ret <= 0) + break; + *have += (unsigned)ret; + } while (*have < len); + if (ret < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + if (ret == 0) + state->eof = 1; + return 0; +} + +/* Load up input buffer and set eof flag if last data loaded -- return -1 on + error, 0 otherwise. Note that the eof flag is set when the end of the input + file is reached, even though there may be unused data in the buffer. Once + that data has been used, no more attempts will be made to read the file. + If strm->avail_in != 0, then the current data is moved to the beginning of + the input buffer, and then the remainder of the buffer is loaded with the + available data from the input file. */ +local int gz_avail(state) + gz_statep state; +{ + unsigned got; + z_streamp strm = &(state->strm); + + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + if (state->eof == 0) { + if (strm->avail_in) { /* copy what's there to the start */ + unsigned char *p = state->in; + unsigned const char *q = strm->next_in; + unsigned n = strm->avail_in; + do { + *p++ = *q++; + } while (--n); + } + if (gz_load(state, state->in + strm->avail_in, + state->size - strm->avail_in, &got) == -1) + return -1; + strm->avail_in += got; + strm->next_in = state->in; + } + return 0; +} + +/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. + If this is the first time in, allocate required memory. state->how will be + left unchanged if there is no more input data available, will be set to COPY + if there is no gzip header and direct copying will be performed, or it will + be set to GZIP for decompression. If direct copying, then leftover input + data from the input buffer will be copied to the output buffer. In that + case, all further file reads will be directly to either the output buffer or + a user buffer. If decompressing, the inflate state will be initialized. + gz_look() will return 0 on success or -1 on failure. */ +local int gz_look(state) + gz_statep state; +{ + z_streamp strm = &(state->strm); + + /* allocate read buffers and inflate memory */ + if (state->size == 0) { + /* allocate buffers */ + state->in = (unsigned char *)malloc(state->want); + state->out = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL || state->out == NULL) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + state->size = state->want; + + /* allocate inflate memory */ + state->strm.zalloc = Z_NULL; + state->strm.zfree = Z_NULL; + state->strm.opaque = Z_NULL; + state->strm.avail_in = 0; + state->strm.next_in = Z_NULL; + if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ + free(state->out); + free(state->in); + state->size = 0; + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + } + + /* get at least the magic bytes in the input buffer */ + if (strm->avail_in < 2) { + if (gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) + return 0; + } + + /* look for gzip magic bytes -- if there, do gzip decoding (note: there is + a logical dilemma here when considering the case of a partially written + gzip file, to wit, if a single 31 byte is written, then we cannot tell + whether this is a single-byte file, or just a partially written gzip + file -- for here we assume that if a gzip file is being written, then + the header will be written in a single operation, so that reading a + single byte is sufficient indication that it is not a gzip file) */ + if (strm->avail_in > 1 && + strm->next_in[0] == 31 && strm->next_in[1] == 139) { + inflateReset(strm); + state->how = GZIP; + state->direct = 0; + return 0; + } + + /* no gzip header -- if we were decoding gzip before, then this is trailing + garbage. Ignore the trailing garbage and finish. */ + if (state->direct == 0) { + strm->avail_in = 0; + state->eof = 1; + state->x.have = 0; + return 0; + } + + /* doing raw i/o, copy any leftover input to output -- this assumes that + the output buffer is larger than the input buffer, which also assures + space for gzungetc() */ + state->x.next = state->out; + if (strm->avail_in) { + memcpy(state->x.next, strm->next_in, strm->avail_in); + state->x.have = strm->avail_in; + strm->avail_in = 0; + } + state->how = COPY; + state->direct = 1; + return 0; +} + +/* Decompress from input to the provided next_out and avail_out in the state. + On return, state->x.have and state->x.next point to the just decompressed + data. If the gzip stream completes, state->how is reset to LOOK to look for + the next gzip stream or raw data, once state->x.have is depleted. Returns 0 + on success, -1 on failure. */ +local int gz_decomp(state) + gz_statep state; +{ + int ret = Z_OK; + unsigned had; + z_streamp strm = &(state->strm); + + /* fill output buffer up to end of deflate stream */ + had = strm->avail_out; + do { + /* get more input for inflate() */ + if (strm->avail_in == 0 && gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) { + gz_error(state, Z_BUF_ERROR, "unexpected end of file"); + break; + } + + /* decompress and handle errors */ + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { + gz_error(state, Z_STREAM_ERROR, + "internal error: inflate stream corrupt"); + return -1; + } + if (ret == Z_MEM_ERROR) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ + gz_error(state, Z_DATA_ERROR, + strm->msg == NULL ? "compressed data error" : strm->msg); + return -1; + } + } while (strm->avail_out && ret != Z_STREAM_END); + + /* update available output */ + state->x.have = had - strm->avail_out; + state->x.next = strm->next_out - state->x.have; + + /* if the gzip stream completed successfully, look for another */ + if (ret == Z_STREAM_END) + state->how = LOOK; + + /* good decompression */ + return 0; +} + +/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. + Data is either copied from the input file or decompressed from the input + file depending on state->how. If state->how is LOOK, then a gzip header is + looked for to determine whether to copy or decompress. Returns -1 on error, + otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the + end of the input file has been reached and all data has been processed. */ +local int gz_fetch(state) + gz_statep state; +{ + z_streamp strm = &(state->strm); + + do { + switch(state->how) { + case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ + if (gz_look(state) == -1) + return -1; + if (state->how == LOOK) + return 0; + break; + case COPY: /* -> COPY */ + if (gz_load(state, state->out, state->size << 1, &(state->x.have)) + == -1) + return -1; + state->x.next = state->out; + return 0; + case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ + strm->avail_out = state->size << 1; + strm->next_out = state->out; + if (gz_decomp(state) == -1) + return -1; + } + } while (state->x.have == 0 && (!state->eof || strm->avail_in)); + return 0; +} + +/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ +local int gz_skip(state, len) + gz_statep state; + z_off64_t len; +{ + unsigned n; + + /* skip over len bytes or reach end-of-file, whichever comes first */ + while (len) + /* skip over whatever is in output buffer */ + if (state->x.have) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? + (unsigned)len : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + len -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) + break; + + /* need more data to skip -- load up output buffer */ + else { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return -1; + } + return 0; +} + +/* Read len bytes into buf from file, or less than len up to the end of the + input. Return the number of bytes read. If zero is returned, either the + end of file was reached, or there was an error. state->err must be + consulted in that case to determine which. */ +local z_size_t gz_read(state, buf, len) + gz_statep state; + voidp buf; + z_size_t len; +{ + z_size_t got; + unsigned n; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return 0; + } + + /* get len bytes to buf, or less than len if at the end */ + got = 0; + do { + /* set n to the maximum amount of len that fits in an unsigned int */ + n = -1; + if (n > len) + n = len; + + /* first just try copying data from the output buffer */ + if (state->x.have) { + if (state->x.have < n) + n = state->x.have; + memcpy(buf, state->x.next, n); + state->x.next += n; + state->x.have -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) { + state->past = 1; /* tried to read past end */ + break; + } + + /* need output data -- for small len or new stream load up our output + buffer */ + else if (state->how == LOOK || n < (state->size << 1)) { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return 0; + continue; /* no progress yet -- go back to copy above */ + /* the copy above assures that we will leave with space in the + output buffer, allowing at least one gzungetc() to succeed */ + } + + /* large len -- read directly into user buffer */ + else if (state->how == COPY) { /* read directly */ + if (gz_load(state, (unsigned char *)buf, n, &n) == -1) + return 0; + } + + /* large len -- decompress directly into user buffer */ + else { /* state->how == GZIP */ + state->strm.avail_out = n; + state->strm.next_out = (unsigned char *)buf; + if (gz_decomp(state) == -1) + return 0; + n = state->x.have; + state->x.have = 0; + } + + /* update progress */ + len -= n; + buf = (char *)buf + n; + got += n; + state->x.pos += n; + } while (len); + + /* return number of bytes read into user buffer */ + return got; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzread(file, buf, len) + gzFile file; + voidp buf; + unsigned len; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); + return -1; + } + + /* read len or fewer bytes to buf */ + len = gz_read(state, buf, len); + + /* check for an error */ + if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* return the number of bytes read (this is assured to fit in an int) */ + return (int)len; +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfread(buf, size, nitems, file) + voidp buf; + z_size_t size; + z_size_t nitems; + gzFile file; +{ + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* read len or fewer bytes to buf, return the number of full items read */ + return len ? gz_read(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +#else +# undef gzgetc +#endif +int ZEXPORT gzgetc(file) + gzFile file; +{ + int ret; + unsigned char buf[1]; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* try output buffer (no need to check for skip request) */ + if (state->x.have) { + state->x.have--; + state->x.pos++; + return *(state->x.next)++; + } + + /* nothing there -- try gz_read() */ + ret = gz_read(state, buf, 1); + return ret < 1 ? -1 : buf[0]; +} + +int ZEXPORT gzgetc_(file) +gzFile file; +{ + return gzgetc(file); +} + +/* -- see zlib.h -- */ +int ZEXPORT gzungetc(c, file) + int c; + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return -1; + } + + /* can't push EOF */ + if (c < 0) + return -1; + + /* if output buffer empty, put byte at end (allows more pushing) */ + if (state->x.have == 0) { + state->x.have = 1; + state->x.next = state->out + (state->size << 1) - 1; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; + } + + /* if no room, give up (must have already done a gzungetc()) */ + if (state->x.have == (state->size << 1)) { + gz_error(state, Z_DATA_ERROR, "out of room to push characters"); + return -1; + } + + /* slide output data if needed and insert byte before existing data */ + if (state->x.next == state->out) { + unsigned char *src = state->out + state->x.have; + unsigned char *dest = state->out + (state->size << 1); + while (src > state->out) + *--dest = *--src; + state->x.next = dest; + } + state->x.have++; + state->x.next--; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; +} + +/* -- see zlib.h -- */ +char * ZEXPORT gzgets(file, buf, len) + gzFile file; + char *buf; + int len; +{ + unsigned left, n; + char *str; + unsigned char *eol; + gz_statep state; + + /* check parameters and get internal structure */ + if (file == NULL || buf == NULL || len < 1) + return NULL; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return NULL; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return NULL; + } + + /* copy output bytes up to new line or len - 1, whichever comes first -- + append a terminating zero to the string (we don't check for a zero in + the contents, let the user worry about that) */ + str = buf; + left = (unsigned)len - 1; + if (left) do { + /* assure that something is in the output buffer */ + if (state->x.have == 0 && gz_fetch(state) == -1) + return NULL; /* error */ + if (state->x.have == 0) { /* end of file */ + state->past = 1; /* read past end */ + break; /* return what we have */ + } + + /* look for end-of-line in current output buffer */ + n = state->x.have > left ? left : state->x.have; + eol = (unsigned char *)memchr(state->x.next, '\n', n); + if (eol != NULL) + n = (unsigned)(eol - state->x.next) + 1; + + /* copy through end-of-line, or remainder if not found */ + memcpy(buf, state->x.next, n); + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + left -= n; + buf += n; + } while (left && eol == NULL); + + /* return terminated string, or if nothing, end of file */ + if (buf == str) + return NULL; + buf[0] = 0; + return str; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzdirect(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* if the state is not known, but we can find out, then do so (this is + mainly for right after a gzopen() or gzdopen()) */ + if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) + (void)gz_look(state); + + /* return 1 if transparent, 0 if processing a gzip stream */ + return state->direct; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_r(file) + gzFile file; +{ + int ret, err; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're reading */ + if (state->mode != GZ_READ) + return Z_STREAM_ERROR; + + /* free memory and close file */ + if (state->size) { + inflateEnd(&(state->strm)); + free(state->out); + free(state->in); + } + err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; + gz_error(state, Z_OK, NULL); + free(state->path); + ret = close(state->fd); + free(state); + return ret ? Z_ERRNO : err; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzwrite.c glmark2-2021.02/android/jni/src/zlib/gzwrite.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/gzwrite.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/gzwrite.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,665 @@ +/* gzwrite.c -- zlib functions for writing gzip files + * Copyright (C) 2004-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Local functions */ +local int gz_init OF((gz_statep)); +local int gz_comp OF((gz_statep, int)); +local int gz_zero OF((gz_statep, z_off64_t)); +local z_size_t gz_write OF((gz_statep, voidpc, z_size_t)); + +/* Initialize state for writing a gzip file. Mark initialization by setting + state->size to non-zero. Return -1 on a memory allocation failure, or 0 on + success. */ +local int gz_init(state) + gz_statep state; +{ + int ret; + z_streamp strm = &(state->strm); + + /* allocate input buffer (double size for gzprintf) */ + state->in = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* only need output buffer and deflate state if compressing */ + if (!state->direct) { + /* allocate output buffer */ + state->out = (unsigned char *)malloc(state->want); + if (state->out == NULL) { + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* allocate deflate memory, set up for gzip compression */ + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = deflateInit2(strm, state->level, Z_DEFLATED, + MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy); + if (ret != Z_OK) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + strm->next_in = NULL; + } + + /* mark state as initialized */ + state->size = state->want; + + /* initialize write buffer if compressing */ + if (!state->direct) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = strm->next_out; + } + return 0; +} + +/* Compress whatever is at avail_in and next_in and write to the output file. + Return -1 if there is an error writing to the output file or if gz_init() + fails to allocate memory, otherwise 0. flush is assumed to be a valid + deflate() flush value. If flush is Z_FINISH, then the deflate() state is + reset to start a new gzip stream. If gz->direct is true, then simply write + to the output file without compressing, and ignore flush. */ +local int gz_comp(state, flush) + gz_statep state; + int flush; +{ + int ret, writ; + unsigned have, put, max = ((unsigned)-1 >> 2) + 1; + z_streamp strm = &(state->strm); + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return -1; + + /* write directly if requested */ + if (state->direct) { + while (strm->avail_in) { + put = strm->avail_in > max ? max : strm->avail_in; + writ = write(state->fd, strm->next_in, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + strm->avail_in -= (unsigned)writ; + strm->next_in += writ; + } + return 0; + } + + /* run deflate() on provided input until it produces no more output */ + ret = Z_OK; + do { + /* write out current buffer contents if full, or if flushing, but if + doing Z_FINISH then don't write until we get to Z_STREAM_END */ + if (strm->avail_out == 0 || (flush != Z_NO_FLUSH && + (flush != Z_FINISH || ret == Z_STREAM_END))) { + while (strm->next_out > state->x.next) { + put = strm->next_out - state->x.next > (int)max ? max : + (unsigned)(strm->next_out - state->x.next); + writ = write(state->fd, state->x.next, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + state->x.next += writ; + } + if (strm->avail_out == 0) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = state->out; + } + } + + /* compress */ + have = strm->avail_out; + ret = deflate(strm, flush); + if (ret == Z_STREAM_ERROR) { + gz_error(state, Z_STREAM_ERROR, + "internal error: deflate stream corrupt"); + return -1; + } + have -= strm->avail_out; + } while (have); + + /* if that completed a deflate stream, allow another to start */ + if (flush == Z_FINISH) + deflateReset(strm); + + /* all done, no errors */ + return 0; +} + +/* Compress len zeros to output. Return -1 on a write error or memory + allocation failure by gz_comp(), or 0 on success. */ +local int gz_zero(state, len) + gz_statep state; + z_off64_t len; +{ + int first; + unsigned n; + z_streamp strm = &(state->strm); + + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + + /* compress len zeros (len guaranteed > 0) */ + first = 1; + while (len) { + n = GT_OFF(state->size) || (z_off64_t)state->size > len ? + (unsigned)len : state->size; + if (first) { + memset(state->in, 0, n); + first = 0; + } + strm->avail_in = n; + strm->next_in = state->in; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + len -= n; + } + return 0; +} + +/* Write len bytes from buf to file. Return the number of bytes written. If + the returned value is less than len, then there was an error. */ +local z_size_t gz_write(state, buf, len) + gz_statep state; + voidpc buf; + z_size_t len; +{ + z_size_t put = len; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return 0; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return 0; + } + + /* for small len, copy to input buffer, otherwise compress directly */ + if (len < state->size) { + /* copy to input buffer, compress when full */ + do { + unsigned have, copy; + + if (state->strm.avail_in == 0) + state->strm.next_in = state->in; + have = (unsigned)((state->strm.next_in + state->strm.avail_in) - + state->in); + copy = state->size - have; + if (copy > len) + copy = len; + memcpy(state->in + have, buf, copy); + state->strm.avail_in += copy; + state->x.pos += copy; + buf = (const char *)buf + copy; + len -= copy; + if (len && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + } while (len); + } + else { + /* consume whatever's left in the input buffer */ + if (state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + + /* directly compress user buffer to file */ + state->strm.next_in = (z_const Bytef *)buf; + do { + unsigned n = (unsigned)-1; + if (n > len) + n = len; + state->strm.avail_in = n; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + len -= n; + } while (len); + } + + /* input was all buffered or compressed */ + return put; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzwrite(file, buf, len) + gzFile file; + voidpc buf; + unsigned len; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); + return 0; + } + + /* write len bytes from buf (the return value will fit in an int) */ + return (int)gz_write(state, buf, len); +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfwrite(buf, size, nitems, file) + voidpc buf; + z_size_t size; + z_size_t nitems; + gzFile file; +{ + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* write len bytes to buf, return the number of full items written */ + return len ? gz_write(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputc(file, c) + gzFile file; + int c; +{ + unsigned have; + unsigned char buf[1]; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return -1; + } + + /* try writing to input buffer for speed (state->size == 0 if buffer not + initialized) */ + if (state->size) { + if (strm->avail_in == 0) + strm->next_in = state->in; + have = (unsigned)((strm->next_in + strm->avail_in) - state->in); + if (have < state->size) { + state->in[have] = (unsigned char)c; + strm->avail_in++; + state->x.pos++; + return c & 0xff; + } + } + + /* no room in buffer or not initialized, use gz_write() */ + buf[0] = (unsigned char)c; + if (gz_write(state, buf, 1) != 1) + return -1; + return c & 0xff; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputs(file, str) + gzFile file; + const char *str; +{ + int ret; + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* write string */ + len = strlen(str); + ret = gz_write(state, str, len); + return ret == 0 && len != 0 ? -1 : ret; +} + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +#include + +/* -- see zlib.h -- */ +int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) +{ + int len; + unsigned left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->err; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(state->in + (strm->next_in - state->in) + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_vsnprintf +# ifdef HAS_vsprintf_void + (void)vsprintf(next, format, va); + for (len = 0; len < state->size; len++) + if (next[len] == 0) break; +# else + len = vsprintf(next, format, va); +# endif +#else +# ifdef HAS_vsnprintf_void + (void)vsnprintf(next, state->size, format, va); + len = strlen(next); +# else + len = vsnprintf(next, state->size, format, va); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || (unsigned)len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += (unsigned)len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memcpy(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return len; +} + +int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) +{ + va_list va; + int ret; + + va_start(va, format); + ret = gzvprintf(file, format, va); + va_end(va); + return ret; +} + +#else /* !STDC && !Z_HAVE_STDARG_H */ + +/* -- see zlib.h -- */ +int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) + gzFile file; + const char *format; + int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; +{ + unsigned len, left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that can really pass pointer in ints */ + if (sizeof(int) != sizeof(void *)) + return Z_STREAM_ERROR; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->error; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->error; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(strm->next_in + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_snprintf +# ifdef HAS_sprintf_void + sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, + a13, a14, a15, a16, a17, a18, a19, a20); + for (len = 0; len < size; len++) + if (next[len] == 0) + break; +# else + len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, + a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#else +# ifdef HAS_snprintf_void + snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, + a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + len = strlen(next); +# else + len = snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memcpy(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return (int)len; +} + +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzflush(file, flush) + gzFile file; + int flush; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* check flush parameter */ + if (flush < 0 || flush > Z_FINISH) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* compress remaining data with requested flush */ + (void)gz_comp(state, flush); + return state->err; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzsetparams(file, level, strategy) + gzFile file; + int level; + int strategy; +{ + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* if no change is requested, then do nothing */ + if (level == state->level && strategy == state->strategy) + return Z_OK; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* change compression parameters for subsequent input */ + if (state->size) { + /* flush previous input with previous parameters before changing */ + if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1) + return state->err; + deflateParams(strm, level, strategy); + } + state->level = level; + state->strategy = strategy; + return Z_OK; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_w(file) + gzFile file; +{ + int ret = Z_OK; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing */ + if (state->mode != GZ_WRITE) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + ret = state->err; + } + + /* flush, free memory, and close file */ + if (gz_comp(state, Z_FINISH) == -1) + ret = state->err; + if (state->size) { + if (!state->direct) { + (void)deflateEnd(&(state->strm)); + free(state->out); + } + free(state->in); + } + gz_error(state, Z_OK, NULL); + free(state->path); + if (close(state->fd) == -1) + ret = Z_ERRNO; + free(state); + return ret; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/infback.c glmark2-2021.02/android/jni/src/zlib/infback.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/infback.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/infback.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,640 @@ +/* infback.c -- inflate using a call-back interface + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + This code is largely copied from inflate.c. Normally either infback.o or + inflate.o would be linked into an application--not both. The interface + with inffast.c is retained so that optimized assembler-coded versions of + inflate_fast() can be used with either inflate.c or infback.c. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. + */ +int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) +z_streamp strm; +int windowBits; +unsigned char FAR *window; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->dmax = 32768U; + state->wbits = (uInt)windowBits; + state->wsize = 1U << windowBits; + state->window = window; + state->wnext = 0; + state->whave = 0; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* Macros for inflateBack(): */ + +/* Load returned state from inflate_fast() */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Set state from registers for inflate_fast() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = state->window; \ + left = state->wsize; \ + state->whave = left; \ + if (out(out_desc, put, left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) +z_streamp strm; +in_func in; +void FAR *in_desc; +out_func out; +void FAR *out_desc; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + state->mode = TYPE; + state->last = 0; + state->whave = 0; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = state->window; + left = state->wsize; + + /* Inflate until end of block marked as last */ + for (;;) + switch (state->mode) { + case TYPE: + /* determine and dispatch block type */ + if (state->last) { + BYTEBITS(); + state->mode = DONE; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + + /* copy stored block from input to output */ + while (state->length != 0) { + copy = state->length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= 6 && left >= 258) { + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; + inflate_fast(strm, state->wsize); + LOAD(); + break; + } + + /* get a literal, length, or end-of-block code */ + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + state->length = (unsigned)here.val; + + /* process literal */ + if (here.op == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + ROOM(); + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + } + + /* process end of block */ + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + + /* invalid code */ + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + + /* get distance code */ + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + + /* get distance extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } + if (state->offset > state->wsize - (state->whave < state->wsize ? + left : 0)) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = state->wsize - state->offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - state->offset; + copy = left; + } + if (copy > state->length) copy = state->length; + state->length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (state->length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left)) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBackEnd(strm) +z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inffast.c glmark2-2021.02/android/jni/src/zlib/inffast.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inffast.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/inffast.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,323 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef ASMINF +# pragma message("Assembler code may have bugs -- use at your own risk") +#else + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void ZLIB_INTERNAL inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code here; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in; + last = in + (strm->avail_in - 5); + out = strm->next_out; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + wnext = state->wnext; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = lcode[hold & lmask]; + dolen: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op == 0) { /* literal */ + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + *out++ = (unsigned char)(here.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = dcode[hold & dmask]; + dodist: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + *out++ = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + *out++ = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + *out++ = *from++; + } while (--len); + continue; + } +#endif + } + from = window; + if (wnext == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; + if (op < len) { /* some from end of window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = window; + if (wnext < len) { /* some from start of window */ + op = wnext; + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += wnext - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } while (len > 2); + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + here = dcode[here.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + here = lcode[here.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in; + strm->next_out = out; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and wnext == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inffast.h glmark2-2021.02/android/jni/src/zlib/inffast.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inffast.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/inffast.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inffixed.h glmark2-2021.02/android/jni/src/zlib/inffixed.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inffixed.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/inffixed.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,94 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inflate.c glmark2-2021.02/android/jni/src/zlib/inflate.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inflate.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/inflate.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1561 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common wnext == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +/* function prototypes */ +local int inflateStateCheck OF((z_streamp strm)); +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, + unsigned copy)); +#ifdef BUILDFIXED + void makefixed OF((void)); +#endif +local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf, + unsigned len)); + +local int inflateStateCheck(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + state = (struct inflate_state FAR *)strm->state; + if (state == Z_NULL || state->strm != strm || + state->mode < HEAD || state->mode > SYNC) + return 1; + return 0; +} + +int ZEXPORT inflateResetKeep(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->head = Z_NULL; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); +} + +int ZEXPORT inflateReset2(strm, windowBits) +z_streamp strm; +int windowBits; +{ + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 5; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + int ret; + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->strm = strm; + state->window = Z_NULL; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { + ZFREE(strm, state); + strm->state = Z_NULL; + } + return ret; +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; + state->bits = 0; + return Z_OK; + } + if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += (unsigned)value << state->bits; + state->bits += (uInt)bits; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed() +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, + state.lencode[low].bits, state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, end, copy) +z_streamp strm; +const Bytef *end; +unsigned copy; +{ + struct inflate_state FAR *state; + unsigned dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->wnext = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->wnext; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->wnext, end - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; + state->whave = state->wsize; + } + else { + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (inflateStateCheck(strm) || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + if (state->wbits == 0) + state->wbits = 15; + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (state->wbits == 0) + state->wbits = len; + if (len > 15 || len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if ((state->wrap & 4) && hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = ZSWAP32(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + case COPY_: + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (const code FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + case LEN_: + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + if (state->mode == TYPE) + state->back = -1; + break; + } + state->back = 0; + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + state->mode = LIT; + break; + } + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->back = -1; + state->mode = TYPE; + break; + } + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(here.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; + state->mode = DIST; + case DIST: + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; + if (copy > state->length) copy = state->length; + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = 0; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; +#endif + } + if (copy > state->wnext) { + copy -= state->wnext; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->wnext - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if ((state->wrap & 4) && ( +#ifdef GUNZIP + state->flags ? hold : +#endif + ZSWAP32(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength) +z_streamp strm; +Bytef *dictionary; +uInt *dictLength; +{ + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, + state->whave - state->wnext); + zmemcpy(dictionary + state->whave - state->wnext, + state->window, state->wnext); + } + if (dictLength != Z_NULL) + *dictLength = state->whave; + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) +z_streamp strm; +const Bytef *dictionary; +uInt dictLength; +{ + struct inflate_state FAR *state; + unsigned long dictid; + int ret; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary identifier */ + if (state->mode == DICT) { + dictid = adler32(0L, Z_NULL, 0); + dictid = adler32(dictid, dictionary, dictLength); + if (dictid != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); + if (ret) { + state->mode = MEM; + return Z_MEM_ERROR; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(strm, head) +z_streamp strm; +gz_headerp head; +{ + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(have, buf, len) +unsigned FAR *have; +const unsigned char FAR *buf; +unsigned len; +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(strm) +z_streamp strm; +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(dest, source) +z_streamp dest; +z_streamp source; +{ + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (inflateStateCheck(source) || dest == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + copy->strm = dest; + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} + +int ZEXPORT inflateUndermine(strm, subvert) +z_streamp strm; +int subvert; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + state->sane = !subvert; + return Z_OK; +#else + (void)subvert; + state->sane = 1; + return Z_DATA_ERROR; +#endif +} + +int ZEXPORT inflateValidate(strm, check) +z_streamp strm; +int check; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (check) + state->wrap |= 4; + else + state->wrap &= ~4; + return Z_OK; +} + +long ZEXPORT inflateMark(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) + return -(1L << 16); + state = (struct inflate_state FAR *)strm->state; + return (long)(((unsigned long)((long)state->back)) << 16) + + (state->mode == COPY ? state->length : + (state->mode == MATCH ? state->was - state->length : 0)); +} + +unsigned long ZEXPORT inflateCodesUsed(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) return (unsigned long)-1; + state = (struct inflate_state FAR *)strm->state; + return (unsigned long)(state->next - state->codes); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inflate.h glmark2-2021.02/android/jni/src/zlib/inflate.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inflate.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/inflate.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,125 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD = 16180, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to BAD or MEM on error -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + (raw) -> TYPEDO + Read deflate blocks: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* State maintained between inflate() calls -- approximately 7K bytes, not + including the allocated sliding window, which is up to 32K bytes. */ +struct inflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip, + bit 2 true to validate check value */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ +}; diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inftrees.c glmark2-2021.02/android/jni/src/zlib/inftrees.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inftrees.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/inftrees.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,304 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code here; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + unsigned match; /* use base and extra for symbol >= match */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min < max; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + match = 20; + break; + case LENS: + base = lbase; + extra = lext; + match = 257; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + match = 0; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + here.bits = (unsigned char)(len - drop); + if (work[sym] + 1U < match) { + here.op = (unsigned char)0; + here.val = work[sym]; + } + else if (work[sym] >= match) { + here.op = (unsigned char)(extra[work[sym] - match]); + here.val = base[work[sym] - match]; + } + else { + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = here; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inftrees.h glmark2-2021.02/android/jni/src/zlib/inftrees.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/inftrees.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/inftrees.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,62 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribtution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns returns 852, and "enough 30 6 15" for distance codes returns 592. + The initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +/* Type of code to build for inflate_table() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/README glmark2-2021.02/android/jni/src/zlib/README --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/README 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/README 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,115 @@ +ZLIB DATA COMPRESSION LIBRARY + +zlib 1.2.11 is a general purpose data compression library. All the code is +thread safe. The data format used by the zlib library is described by RFCs +(Request for Comments) 1950 to 1952 in the files +http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and +rfc1952 (gzip format). + +All functions of the compression library are documented in the file zlib.h +(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example +of the library is given in the file test/example.c which also tests that +the library is working correctly. Another example is given in the file +test/minigzip.c. The compression library itself is composed of all source +files in the root directory. + +To compile all files and run the test program, follow the instructions given at +the top of Makefile.in. In short "./configure; make test", and if that goes +well, "make install" should work for most flavors of Unix. For Windows, use +one of the special makefiles in win32/ or contrib/vstudio/ . For VMS, use +make_vms.com. + +Questions about zlib should be sent to , or to Gilles Vollant + for the Windows DLL version. The zlib home page is +http://zlib.net/ . Before reporting a problem, please check this site to +verify that you have the latest version of zlib; otherwise get the latest +version and check whether the problem still exists or not. + +PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help. + +Mark Nelson wrote an article about zlib for the Jan. 1997 +issue of Dr. Dobb's Journal; a copy of the article is available at +http://marknelson.us/1997/01/01/zlib-engine/ . + +The changes made in version 1.2.11 are documented in the file ChangeLog. + +Unsupported third party contributions are provided in directory contrib/ . + +zlib is available in Java using the java.util.zip package, documented at +http://java.sun.com/developer/technicalArticles/Programming/compression/ . + +A Perl interface to zlib written by Paul Marquess is available +at CPAN (Comprehensive Perl Archive Network) sites, including +http://search.cpan.org/~pmqs/IO-Compress-Zlib/ . + +A Python interface to zlib written by A.M. Kuchling is +available in Python 1.5 and later versions, see +http://docs.python.org/library/zlib.html . + +zlib is built into tcl: http://wiki.tcl.tk/4610 . + +An experimental package to read and write files in .zip format, written on top +of zlib by Gilles Vollant , is available in the +contrib/minizip directory of zlib. + + +Notes for some targets: + +- For Windows DLL versions, please see win32/DLL_FAQ.txt + +- For 64-bit Irix, deflate.c must be compiled without any optimization. With + -O, one libpng test fails. The test works in 32 bit mode (with the -n32 + compiler flag). The compiler bug has been reported to SGI. + +- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works + when compiled with cc. + +- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is + necessary to get gzprintf working correctly. This is done by configure. + +- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with + other compilers. Use "make test" to check your compiler. + +- gzdopen is not supported on RISCOS or BEOS. + +- For PalmOs, see http://palmzlib.sourceforge.net/ + + +Acknowledgments: + + The deflate format used by zlib was defined by Phil Katz. The deflate and + zlib specifications were written by L. Peter Deutsch. Thanks to all the + people who reported problems and suggested various improvements in zlib; they + are too numerous to cite here. + +Copyright notice: + + (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +If you use the zlib library in a product, we would appreciate *not* receiving +lengthy legal documents to sign. The sources are provided for free but without +warranty of any kind. The library has been entirely written by Jean-loup +Gailly and Mark Adler; it does not include third-party code. + +If you redistribute modified sources, we would appreciate that you include in +the file ChangeLog history information documenting your changes. Please read +the FAQ for more information on the distribution of modified source versions. diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/trees.c glmark2-2021.02/android/jni/src/zlib/trees.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/trees.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/trees.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1203 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2017 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef ZLIB_DEBUG +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local const static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local const static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local const static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); +local int detect_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef ZLIB_DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* !ZLIB_DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef ZLIB_DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (ush)value << s->bi_valid; + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= (ush)value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !ZLIB_DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = (int)value;\ + s->bi_buf |= (ush)val << s->bi_valid;\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* ZLIB_DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; +#endif + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef ZLIB_DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void ZLIB_INTERNAL _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (unsigned)(bits + xbits); + if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); + } + if (overflow == 0) return; + + Tracev((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + unsigned code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = (ush)code; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ + bi_windup(s); /* align on byte boundary */ + put_short(s, (ush)stored_len); + put_short(s, (ush)~stored_len); + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + s->pending += stored_len; +#ifdef ZLIB_DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; + s->bits_sent += 2*16; + s->bits_sent += stored_len<<3; +#endif +} + +/* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(s) + deflate_state *s; +{ + bi_flush(s); +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + */ +void ZLIB_INTERNAL _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef ZLIB_DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and write out the encoded block. + */ +void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, last); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+last, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (last) { + bi_windup(s); +#ifdef ZLIB_DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*last)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int ZLIB_INTERNAL _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + const ct_data *ltree; /* literal tree */ + const ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= (unsigned)base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, + "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); +} + +/* =========================================================================== + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "black list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local int detect_data_type(s) + deflate_state *s; +{ + /* black_mask is the bit mask of black-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 + */ + unsigned long black_mask = 0xf3ffc07fUL; + int n; + + /* Check for non-textual ("black-listed") bytes. */ + for (n = 0; n <= 31; n++, black_mask >>= 1) + if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("white-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) + if (s->dyn_ltree[n].Freq != 0) + return Z_TEXT; + + /* There are no "black-listed" or "white-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/trees.h glmark2-2021.02/android/jni/src/zlib/trees.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/trees.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/trees.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,128 @@ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/uncompr.c glmark2-2021.02/android/jni/src/zlib/uncompr.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/uncompr.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/uncompr.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,93 @@ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. *sourceLen is + the byte length of the source buffer. Upon entry, *destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, + *destLen is the size of the decompressed data and *sourceLen is the number + of source bytes consumed. Upon return, source + *sourceLen points to the + first unused input byte. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, or + Z_DATA_ERROR if the input data was corrupted, including if the input data is + an incomplete zlib stream. +*/ +int ZEXPORT uncompress2 (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong *sourceLen; +{ + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong len, left; + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ + + len = *sourceLen; + if (*destLen) { + left = *destLen; + *destLen = 0; + } + else { + left = 1; + dest = buf; + } + + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = len > (uLong)max ? max : (uInt)len; + len -= stream.avail_in; + } + err = inflate(&stream, Z_NO_FLUSH); + } while (err == Z_OK); + + *sourceLen -= len + stream.avail_in; + if (dest != buf) + *destLen = stream.total_out; + else if (stream.total_out && err == Z_BUF_ERROR) + left = 1; + + inflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : + err == Z_NEED_DICT ? Z_DATA_ERROR : + err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR : + err; +} + +int ZEXPORT uncompress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return uncompress2(dest, destLen, source, &sourceLen); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/zconf.h glmark2-2021.02/android/jni/src/zlib/zconf.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/zconf.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/zconf.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,534 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_z z_crc32_z +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO + typedef unsigned long z_size_t; +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#ifdef HAVE_STDARG_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H) +# define Z_HAVE_UNISTD_H +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/zlib.h glmark2-2021.02/android/jni/src/zlib/zlib.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/zlib.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/zlib.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1912 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.11" +#define ZLIB_VERNUM 0x12b0 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 11 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more ouput + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if any input has been consumed in a previous + deflate() call, then the input available so far is compressed with the old + level and strategy using deflate(strm, Z_BLOCK). There are three approaches + for the compression levels 0, 1..3, and 4..9 respectively. The new level + and strategy will take effect at the next call of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will not automatically decode concatenated gzip streams. + inflate() will return Z_STREAM_END at the end of the gzip stream. The state + would need to be reset to continue decoding a subsequent gzip stream. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm)); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen)); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); + + Opens a gzip (.gz) file for reading or writing. The mode parameter is as + in fopen ("rb" or "wb") but can also include a compression level ("wb9") or + a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only + compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' + for fixed code compression as in "wb9F". (See the description of + deflateInit2 for more information about the strategy parameter.) 'T' will + request transparent writing or appending with no compression and not using + the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen associates a gzFile with the file descriptor fd. File descriptors + are obtained from calls like open, dup, creat, pipe or fileno (if the file + has been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); +/* + Set the internal buffer size used by this library's functions. The + default buffer size is 8192 bytes. This function must be called after + gzopen() or gzdopen(), and before any other calls that read or write the + file. The buffer memory allocation is always deferred to the first read or + write. Three times that size in buffer space is allocated. A larger buffer + size of, for example, 64K or 128K bytes will noticeably increase the speed + of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. Previously provided + data is flushed before the parameter change. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems, + gzFile file)); +/* + Read up to nitems items of size size from file to buf, otherwise operating + as gzread() does. This duplicates the interface of stdio's fread(), with + size_t request and return types. If the library defines size_t, then + z_size_t is identical to size_t. If not, then z_size_t is an unsigned + integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevetheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, reseting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes written or 0 in case of + error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size, + z_size_t nitems, gzFile file)); +/* + gzfwrite() writes nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the arguments to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf() + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or a + newline character is read and transferred to buf, or an end-of-file + condition is encountered. If any characters are read or if len == 1, the + string is terminated with a null character. If no characters are read due + to an end-of-file or len < 1, then the buffer is left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read as the first character + on the next read. At least one character of push-back is allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter flush + is as in the deflate() function. The return value is the zlib error number + (see function gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); + + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); + + Returns the starting position for the next gzread or gzwrite on the given + compressed file. This position represents a number of bytes in the + uncompressed data stream, and is zero when starting, even if appending or + reading a gzip stream from the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); + + Returns the current offset in the file being read or written. This offset + includes the count of bytes that precede the gzip stream, for example when + appending or when using gzdopen() for reading. When reading, the offset + does not include as yet unused buffered input. This information can be used + for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns true (1) if the end-of-file indicator has been set while reading, + false (0) otherwise. Note that the end-of-file indicator is set only if the + read tried to go past the end of the input, but came up short. Therefore, + just like feof(), gzeof() may return false even if there is no more data to + read, in the event that the last read request was for the exact number of + bytes remaining in the input file. This will happen if the input file size + is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file and + deallocates the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r OF((gzFile file)); +ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the given + compressed file. errnum is set to zlib error number. If an error occurred + in the file system and not in the compression library, errnum is set to + Z_ERRNO and the application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); +ZEXTERN int ZEXPORT inflateValidate OF((z_streamp, int)); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed OF ((z_streamp)); +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); +#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, + const char *mode)); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file, + const char *format, + va_list va)); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/zutil.c glmark2-2021.02/android/jni/src/zlib/zutil.c --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/zutil.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/zutil.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,325 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2017 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" +#ifndef Z_SOLO +# include "gzguts.h" +#endif + +z_const char * const z_errmsg[10] = { + (z_const char *)"need dictionary", /* Z_NEED_DICT 2 */ + (z_const char *)"stream end", /* Z_STREAM_END 1 */ + (z_const char *)"", /* Z_OK 0 */ + (z_const char *)"file error", /* Z_ERRNO (-1) */ + (z_const char *)"stream error", /* Z_STREAM_ERROR (-2) */ + (z_const char *)"data error", /* Z_DATA_ERROR (-3) */ + (z_const char *)"insufficient memory", /* Z_MEM_ERROR (-4) */ + (z_const char *)"buffer error", /* Z_BUF_ERROR (-5) */ + (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */ + (z_const char *)"" +}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags() +{ + uLong flags; + + flags = 0; + switch ((int)(sizeof(uInt))) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch ((int)(sizeof(uLong))) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch ((int)(sizeof(voidpf))) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch ((int)(sizeof(z_off_t))) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef ZLIB_DEBUG + flags += 1 << 8; +#endif +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef ZLIB_DEBUG +#include +# ifndef verbose +# define verbose 0 +# endif +int ZLIB_INTERNAL z_verbose = verbose; + +void ZLIB_INTERNAL z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#ifndef HAVE_MEMCPY + +void ZLIB_INTERNAL zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int ZLIB_INTERNAL zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void ZLIB_INTERNAL zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifndef Z_SOLO + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf; + ulg bsize = (ulg)items*size; + + (void)opaque; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + int n; + + (void)opaque; + + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size) +{ + (void)opaque; + return _halloc((long)items, size); +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + (void)opaque; + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf ZLIB_INTERNAL zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + (void)opaque; + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void ZLIB_INTERNAL zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + (void)opaque; + free(ptr); +} + +#endif /* MY_ZCALLOC */ + +#endif /* !Z_SOLO */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/zutil.h glmark2-2021.02/android/jni/src/zlib/zutil.h --- glmark2-2014.03+git20150611.fa71af2d/android/jni/src/zlib/zutil.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/jni/src/zlib/zutil.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,271 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include "zlib.h" + +#if defined(STDC) && !defined(Z_SOLO) +# if !(defined(_WIN32_WCE) && defined(_MSC_VER)) +# include +# endif +# include +# include +#endif + +#ifdef Z_SOLO + typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# ifndef Z_SOLO +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 1 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 2 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#ifdef __370__ +# if __TARGET_LIB__ < 0x20000000 +# define OS_CODE 4 +# elif __TARGET_LIB__ < 0x40000000 +# define OS_CODE 11 +# else +# define OS_CODE 8 +# endif +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 5 +#endif + +#ifdef OS2 +# define OS_CODE 6 +# if defined(M_I86) && !defined(Z_SOLO) +# include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 7 +# ifndef Z_SOLO +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +# endif +#endif + +#ifdef __acorn +# define OS_CODE 13 +#endif + +#if defined(WIN32) && !defined(__CYGWIN__) +# define OS_CODE 10 +#endif + +#ifdef _BEOS_ +# define OS_CODE 16 +#endif + +#ifdef __TOS_OS400__ +# define OS_CODE 18 +#endif + +#ifdef __APPLE__ +# define OS_CODE 19 +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + +#if defined(__BORLANDC__) && !defined(MSDOS) + #pragma warn -8004 + #pragma warn -8008 + #pragma warn -8066 +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_WIN32) && \ + (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 3 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(pyr) || defined(Z_SOLO) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef ZLIB_DEBUG +# include + extern int ZLIB_INTERNAL z_verbose; + extern void ZLIB_INTERNAL z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +#ifndef Z_SOLO + voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, + unsigned size)); + void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); +#endif + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +#endif /* ZUTIL_H */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/ic_launcher_background.xml glmark2-2021.02/android/res/drawable/ic_launcher_background.xml --- glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/ic_launcher_background.xml 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/res/drawable/ic_launcher_background.xml 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,12 @@ + + + + + \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/ic_launcher_foreground.xml glmark2-2021.02/android/res/drawable/ic_launcher_foreground.xml --- glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/ic_launcher_foreground.xml 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/res/drawable/ic_launcher_foreground.xml 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,28 @@ + + + + + + + + + \ No newline at end of file Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/menu_about.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable/menu_about.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/menu_delete.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable/menu_delete.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/menu_load.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable/menu_load.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/menu_results.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable/menu_results.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/menu_save.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable/menu_save.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable/menu_settings.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable/menu_settings.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-hdpi/ic_menu_archive.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-hdpi/ic_menu_archive.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-hdpi/ic_menu_delete.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-hdpi/ic_menu_delete.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-hdpi/ic_menu_info_details.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-hdpi/ic_menu_info_details.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-hdpi/ic_menu_mark.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-hdpi/ic_menu_mark.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-hdpi/ic_menu_preferences.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-hdpi/ic_menu_preferences.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-hdpi/ic_menu_save.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-hdpi/ic_menu_save.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-mdpi/ic_menu_archive.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-mdpi/ic_menu_archive.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-mdpi/ic_menu_delete.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-mdpi/ic_menu_delete.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-mdpi/ic_menu_info_details.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-mdpi/ic_menu_info_details.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-mdpi/ic_menu_mark.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-mdpi/ic_menu_mark.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-mdpi/ic_menu_preferences.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-mdpi/ic_menu_preferences.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-mdpi/ic_menu_save.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-mdpi/ic_menu_save.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-xhdpi/ic_menu_archive.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-xhdpi/ic_menu_archive.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-xhdpi/ic_menu_delete.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-xhdpi/ic_menu_delete.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-xhdpi/ic_menu_info_details.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-xhdpi/ic_menu_info_details.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-xhdpi/ic_menu_matk.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-xhdpi/ic_menu_matk.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-xhdpi/ic_menu_preferences.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-xhdpi/ic_menu_preferences.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/drawable-xhdpi/ic_menu_save.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/drawable-xhdpi/ic_menu_save.png differ diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/res/menu/main_options_menu.xml glmark2-2021.02/android/res/menu/main_options_menu.xml --- glmark2-2014.03+git20150611.fa71af2d/android/res/menu/main_options_menu.xml 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/android/res/menu/main_options_menu.xml 2021-04-25 01:47:22.000000000 +0800 @@ -2,25 +2,25 @@

+ android:icon="@drawable/ic_menu_save" /> + android:icon="@drawable/ic_menu_archive" /> + android:icon="@drawable/ic_menu_delete" /> + android:icon="@drawable/ic_menu_preferences" /> + android:icon="@drawable/ic_menu_mark" /> + android:icon="@drawable/ic_menu_info_details" /> diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap/ic_launcher_round.xml glmark2-2021.02/android/res/mipmap/ic_launcher_round.xml --- glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap/ic_launcher_round.xml 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/res/mipmap/ic_launcher_round.xml 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,36 @@ + + + + + + + + + + + \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap/ic_launcher.xml glmark2-2021.02/android/res/mipmap/ic_launcher.xml --- glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap/ic_launcher.xml 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/res/mipmap/ic_launcher.xml 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,16 @@ + + + + + + \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-anydpi-v26/ic_launcher_round.xml glmark2-2021.02/android/res/mipmap-anydpi-v26/ic_launcher_round.xml --- glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-anydpi-v26/ic_launcher_round.xml 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/res/mipmap-anydpi-v26/ic_launcher_round.xml 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-anydpi-v26/ic_launcher.xml glmark2-2021.02/android/res/mipmap-anydpi-v26/ic_launcher.xml --- glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-anydpi-v26/ic_launcher.xml 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/android/res/mipmap-anydpi-v26/ic_launcher.xml 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-hdpi/ic_launcher.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-hdpi/ic_launcher.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-hdpi/ic_launcher_round.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-hdpi/ic_launcher_round.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-ldpi/ic_launcher.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-ldpi/ic_launcher.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-ldpi/ic_launcher_round.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-ldpi/ic_launcher_round.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-mdpi/ic_launcher.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-mdpi/ic_launcher.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-mdpi/ic_launcher_round.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-mdpi/ic_launcher_round.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-xhdpi/ic_launcher.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-xhdpi/ic_launcher.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-xhdpi/ic_launcher_round.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-xhdpi/ic_launcher_round.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-xxhdpi/ic_launcher.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-xxhdpi/ic_launcher.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-xxhdpi/ic_launcher_round.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-xxhdpi/ic_launcher_round.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-xxxhdpi/ic_launcher.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-xxxhdpi/ic_launcher.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/android/res/mipmap-xxxhdpi/ic_launcher_round.png and /tmp/VEvcU31eDG/glmark2-2021.02/android/res/mipmap-xxxhdpi/ic_launcher_round.png differ diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/meson.build glmark2-2021.02/data/meson.build --- glmark2-2014.03+git20150611.fa71af2d/data/meson.build 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/data/meson.build 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,14 @@ +install_subdir( + 'shaders', + install_dir : join_paths([get_option('datadir'), 'glmark2']) + ) + +install_subdir( + 'models', + install_dir : join_paths([get_option('datadir'), 'glmark2']) + ) + +install_subdir( + 'textures', + install_dir : join_paths([get_option('datadir'), 'glmark2']) + ) diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/bump-height.frag glmark2-2021.02/data/shaders/bump-height.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/bump-height.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/bump-height.frag 2021-04-25 01:47:22.000000000 +0800 @@ -4,7 +4,12 @@ uniform sampler2D HeightMap; +#ifdef GL_FRAGMENT_PRECISION_HIGH +varying highp vec2 TextureCoord; +#else varying vec2 TextureCoord; +#endif + varying vec3 NormalEye; varying vec3 TangentEye; varying vec3 BitangentEye; diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/conditionals.frag glmark2-2021.02/data/shaders/conditionals.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/conditionals.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/conditionals.frag 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,14 @@ void main(void) { - float d = fract(gl_FragCoord.x * gl_FragCoord.y * 0.0001); +#ifdef GL_FRAGMENT_PRECISION_HIGH + // should be declared highp since the multiplication can overflow in + // mediump, particularly if mediump is implemented as fp16 + highp vec2 FragCoord = gl_FragCoord.xy; +#else + vec2 FragCoord = gl_FragCoord.xy; +#endif + float d = fract(FragCoord.x * FragCoord.y * 0.0001); $MAIN$ diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/function.frag glmark2-2021.02/data/shaders/function.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/function.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/function.frag 2021-04-25 01:47:22.000000000 +0800 @@ -8,7 +8,14 @@ void main(void) { - float d = fract(gl_FragCoord.x * gl_FragCoord.y * 0.0001); +#ifdef GL_FRAGMENT_PRECISION_HIGH + // should be declared highp since the multiplication can overflow in + // mediump, particularly if mediump is implemented as fp16 + highp vec2 FragCoord = gl_FragCoord.xy; +#else + vec2 FragCoord = gl_FragCoord.xy; +#endif + float d = fract(FragCoord.x * FragCoord.y * 0.0001); $MAIN$ diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/ideas-lamp-lit.frag glmark2-2021.02/data/shaders/ideas-lamp-lit.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/ideas-lamp-lit.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/ideas-lamp-lit.frag 2021-04-25 01:47:22.000000000 +0800 @@ -6,7 +6,6 @@ vec4 position; }; LightSourceParameters lightSource[3]; -uniform mat4 modelview; uniform vec4 light0Position; uniform vec4 light1Position; uniform vec4 light2Position; diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/light-refract.frag glmark2-2021.02/data/shaders/light-refract.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/light-refract.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/light-refract.frag 2021-04-25 01:47:22.000000000 +0800 @@ -23,7 +23,7 @@ vec3 mc_perspective = (MapCoord.xyz / MapCoord.w) + front_refraction; vec2 dcoord = mc_perspective.st * point_five + point_five; vec4 distance_value = texture2D(DistanceMap, dcoord); - vec3 back_position = vertex_position.xyz + front_refraction * distance_value.z; + vec3 back_position = vertex_position.xyz + front_refraction * distance_value.x; // Use the exit point to index the map of back-side normals, and use the // back-side position and normal to find the transmitted vector out of the // object. diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/loop.frag glmark2-2021.02/data/shaders/loop.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/loop.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/loop.frag 2021-04-25 01:47:22.000000000 +0800 @@ -3,7 +3,14 @@ void main(void) { - float d = fract(gl_FragCoord.x * gl_FragCoord.y * 0.0001); +#ifdef GL_FRAGMENT_PRECISION_HIGH + // should be declared highp since the multiplication can overflow in + // mediump, particularly if mediump is implemented as fp16 + highp vec2 FragCoord = gl_FragCoord.xy; +#else + vec2 FragCoord = gl_FragCoord.xy; +#endif + float d = fract(FragCoord.x * FragCoord.y * 0.0001); $MAIN$ diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/shadow.frag glmark2-2021.02/data/shaders/shadow.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/shadow.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/shadow.frag 2021-04-25 01:47:22.000000000 +0800 @@ -8,7 +8,7 @@ vec4 sc_perspective = ShadowCoord / ShadowCoord.w; sc_perspective.z += 0.1505; vec4 shadow_value = texture2D(ShadowMap, sc_perspective.st); - float light_distance = shadow_value.z; + float light_distance = shadow_value.x; float shadow = 1.0; if (ShadowCoord.w > 0.0 && light_distance < sc_perspective.z) { shadow = 0.5; diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/terrain.frag glmark2-2021.02/data/shaders/terrain.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/terrain.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/terrain.frag 2021-04-25 01:47:22.000000000 +0800 @@ -3,9 +3,6 @@ uniform vec3 uSpecularColor; uniform float uShininess; uniform float uOpacity; -uniform bool enableDiffuse1; -uniform bool enableDiffuse2; -uniform bool enableSpecular; uniform sampler2D tDiffuse1; uniform sampler2D tDiffuse2; uniform sampler2D tDetail; @@ -70,7 +67,12 @@ vec3 pointSpecular = vec3( 0.0 ); for ( int i = 0; i < MAX_POINT_LIGHTS; i ++ ) { vec4 lPosition = viewMatrix * vec4( pointLightPosition[ i ], 1.0 ); +#ifdef GL_FRAGMENT_PRECISION_HIGH + // should be highp for correct behaviour if mediump is implemented as fp16 + highp vec3 lVector = lPosition.xyz + vViewPosition.xyz; +#else vec3 lVector = lPosition.xyz + vViewPosition.xyz; +#endif float lDistance = 1.0; if ( pointLightDistance[ i ] > 0.0 ) lDistance = 1.0 - min( ( length( lVector ) / pointLightDistance[ i ] ), 1.0 ); diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/terrain-noise.frag glmark2-2021.02/data/shaders/terrain-noise.frag --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/terrain-noise.frag 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/terrain-noise.frag 2021-04-25 01:47:22.000000000 +0800 @@ -17,7 +17,13 @@ uniform MEDIUMP vec2 uvScale; varying vec2 vUv; +#ifdef GL_FRAGMENT_PRECISION_HIGH +// x should be passed as highp since the intermediate multiplications can +// overflow with mediump +vec4 permute(highp vec4 x) +#else vec4 permute(vec4 x) +#endif { return mod(((x * 34.0) + 1.0) * x, 289.0); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/data/shaders/terrain.vert glmark2-2021.02/data/shaders/terrain.vert --- glmark2-2014.03+git20150611.fa71af2d/data/shaders/terrain.vert 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/data/shaders/terrain.vert 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,6 @@ uniform mat4 modelViewMatrix; uniform mat4 normalMatrix; uniform mat4 projectionMatrix; -uniform mat4 viewMatrix; // Vertex attributes attribute vec3 position; Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/data/textures/effect-2d.png and /tmp/VEvcU31eDG/glmark2-2021.02/data/textures/effect-2d.png differ diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/changelog glmark2-2021.02/debian/changelog --- glmark2-2014.03+git20150611.fa71af2d/debian/changelog 2021-01-04 05:21:54.000000000 +0800 +++ glmark2-2021.02/debian/changelog 2021-05-22 23:30:19.000000000 +0800 @@ -1,3 +1,11 @@ +glmark2 (2021.02-0ubuntu1) hirsute; urgency=medium + + * New upstream release 2021.02 + https://github.com/glmark2/glmark2/tree/2021.02 + * Build using python3 + + -- Soar Huang Sat, 22 May 2021 23:30:19 +0800 + glmark2 (2014.03+git20150611.fa71af2d-0ubuntu8) hirsute; urgency=medium * debian/rules: diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/control glmark2-2021.02/debian/control --- glmark2-2014.03+git20150611.fa71af2d/debian/control 2021-01-04 04:43:15.000000000 +0800 +++ glmark2-2021.02/debian/control 2021-05-22 23:30:19.000000000 +0800 @@ -4,7 +4,7 @@ Maintainer: Ubuntu Developers XSBC-Original-Maintainer: Alexandros Frantzis Build-Depends: debhelper (>= 9), - python2, + python3, pkg-config, libx11-dev, libjpeg-dev, @@ -15,6 +15,8 @@ libwayland-dev, libdrm-dev, libgbm-dev, + libudev-dev, + wayland-protocols, Standards-Version: 3.9.6 Homepage: https://launchpad.net/glmark2 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-drm.install glmark2-2021.02/debian/glmark2-drm.install --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-drm.install 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-drm.install 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +usr/bin/glmark2-drm diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-drm.manpages glmark2-2021.02/debian/glmark2-drm.manpages --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-drm.manpages 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-drm.manpages 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +debian/tmp/usr/share/man/man1/glmark2-drm.1 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2-drm.install glmark2-2021.02/debian/glmark2-es2-drm.install --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2-drm.install 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-es2-drm.install 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +usr/bin/glmark2-es2-drm diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2-drm.manpages glmark2-2021.02/debian/glmark2-es2-drm.manpages --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2-drm.manpages 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-es2-drm.manpages 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +debian/tmp/usr/share/man/man1/glmark2-es2-drm.1 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2.install glmark2-2021.02/debian/glmark2-es2.install --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2.install 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-es2.install 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +usr/bin/glmark2-es2 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2.manpages glmark2-2021.02/debian/glmark2-es2.manpages --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2.manpages 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-es2.manpages 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +debian/tmp/usr/share/man/man1/glmark2-es2.1 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2-wayland.install glmark2-2021.02/debian/glmark2-es2-wayland.install --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2-wayland.install 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-es2-wayland.install 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +usr/bin/glmark2-es2-wayland diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2-wayland.manpages glmark2-2021.02/debian/glmark2-es2-wayland.manpages --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-es2-wayland.manpages 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-es2-wayland.manpages 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +debian/tmp/usr/share/man/man1/glmark2-es2-wayland.1 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2.install glmark2-2021.02/debian/glmark2.install --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2.install 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2.install 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +usr/bin/glmark2 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2.manpages glmark2-2021.02/debian/glmark2.manpages --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2.manpages 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2.manpages 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +debian/tmp/usr/share/man/man1/glmark2.1 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-wayland.install glmark2-2021.02/debian/glmark2-wayland.install --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-wayland.install 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-wayland.install 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +usr/bin/glmark2-wayland diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-wayland.manpages glmark2-2021.02/debian/glmark2-wayland.manpages --- glmark2-2014.03+git20150611.fa71af2d/debian/glmark2-wayland.manpages 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/debian/glmark2-wayland.manpages 2021-05-22 23:30:19.000000000 +0800 @@ -0,0 +1 @@ +debian/tmp/usr/share/man/man1/glmark2-wayland.1 diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/patches/c-library-check.patch glmark2-2021.02/debian/patches/c-library-check.patch --- glmark2-2014.03+git20150611.fa71af2d/debian/patches/c-library-check.patch 2019-01-11 10:14:51.000000000 +0800 +++ glmark2-2021.02/debian/patches/c-library-check.patch 1970-01-01 08:00:00.000000000 +0800 @@ -1,29 +0,0 @@ -Description: wscript: check C libraries using the C compiler -Author: Alexandre Courbot -Applied-Upstream: https://github.com/glmark2/glmark2/commit/88bbc8383fbf8c0ddf5cd133c086df5173975d82 -Last-Update: 2019-01-10 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/wscript -+++ b/wscript -@@ -82,17 +82,17 @@ - # Check required headers - req_headers = ['stdlib.h', 'string.h', 'unistd.h', 'stdint.h', 'stdio.h', 'jpeglib.h'] - for header in req_headers: -- ctx.check_cxx(header_name = header, auto_add_header_name = True, mandatory = True) -+ ctx.check_cc(header_name = header, auto_add_header_name = True, mandatory = True) - - # Check for required libs - req_libs = [('m', 'm'), ('jpeg', 'jpeg')] - for (lib, uselib) in req_libs: -- ctx.check_cxx(lib = lib, uselib_store = uselib) -+ ctx.check_cc(lib = lib, uselib_store = uselib) - - # Check required functions - req_funcs = [('memset', 'string.h', []) ,('sqrt', 'math.h', ['m'])] - for func, header, uselib in req_funcs: -- ctx.check_cxx(function_name = func, header_name = header, -+ ctx.check_cc(function_name = func, header_name = header, - uselib = uselib, mandatory = True) - - # Check for a supported version of libpng diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/patches/series glmark2-2021.02/debian/patches/series --- glmark2-2014.03+git20150611.fa71af2d/debian/patches/series 2019-01-11 10:14:42.000000000 +0800 +++ glmark2-2021.02/debian/patches/series 1970-01-01 08:00:00.000000000 +0800 @@ -1 +0,0 @@ -c-library-check.patch diff -Nru glmark2-2014.03+git20150611.fa71af2d/debian/rules glmark2-2021.02/debian/rules --- glmark2-2014.03+git20150611.fa71af2d/debian/rules 2021-01-04 04:42:54.000000000 +0800 +++ glmark2-2021.02/debian/rules 2021-05-22 23:30:19.000000000 +0800 @@ -19,7 +19,7 @@ dh $@ override_dh_auto_configure: - ./waf configure --with-flavors=$(FLAVORS) --prefix=/usr --no-werror + ./waf configure --with-flavors=$(FLAVORS) --prefix=/usr override_dh_auto_build: ./waf diff -Nru glmark2-2014.03+git20150611.fa71af2d/doc/glmark2.1.in glmark2-2021.02/doc/glmark2.1.in --- glmark2-2014.03+git20150611.fa71af2d/doc/glmark2.1.in 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/doc/glmark2.1.in 2021-04-25 01:47:22.000000000 +0800 @@ -1,4 +1,4 @@ -.TH @APPNAME@ "1" "March 2014" "@appname@ @appversion@" +.TH @APPNAME@ "1" "February 2021" "@appname@ @appversion@" .SH NAME @appname@ \- OpenGL (ES) 2.0 benchmark suite .SH SYNOPSIS @@ -21,6 +21,9 @@ Run a quick output validation test instead of running the benchmarks .TP +\fB\-\-data-path\fR PATH +Path to glmark2 models, shaders and textures +.TP \fB\-\-frame-end\fR METHOD How to end a frame [default,none,swap,finish,readpixels] .TP diff -Nru glmark2-2014.03+git20150611.fa71af2d/doc/meson.build glmark2-2021.02/doc/meson.build --- glmark2-2014.03+git20150611.fa71af2d/doc/meson.build 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/doc/meson.build 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,13 @@ +foreach flavor : flavors + binary = flavor_info[flavor][0] + config = configuration_data() + config.set('APPNAME', binary.to_upper()) + config.set('appname', binary) + config.set('appversion', full_version) + f = configure_file( + input: 'glmark2.1.in', + output: binary + '.1', + configuration: config + ) + install_man(f) +endforeach diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20100715 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20100715 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20100715 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20100715 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,12 @@ +glmark2 10.07 (20100715) +======================== + +* First release. +* Included benchmarks: + - Rendering using vertex arrays + - Rendering using VBOs + - Texturing using nearest filtering + - Texturing using linear filtering + - Texturing using trilinear mipmapped filtering + - Lighting per vertex using simple GLSL shaders + - Lighting per pixel using elaborate GLSL shaders \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20100728 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20100728 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20100728 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20100728 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,4 @@ +glmark2 10.07.1 (20100728) +========================== + +* Define the precision of fragment shader variables for OpenGL ES 2.0. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110125 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110125 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110125 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110125 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,8 @@ +glmark2 11.01 (20110125) +========================== + +* Fix visual corruption in glmark2-es2 due to missing depth buffer. +* Fix linking issues with gcc 4.5. +* Use correct GL functions to manipulate shaders vs programs. +* Make result reporting more parser-friendly. +* Upgrade build system to waf 1.6.2. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110530 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110530 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110530 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110530 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,8 @@ +glmark2 11.05 (20110530) +========================== + +* Don't use the SDL_OPENGL flag for GLESv2 (LP: #761848). +* Ensure our screen updates are not synchronized with the vertical + retrace (LP: 761855). +* Use the correct GL headers depending on the flavor (desktop vs ES2). +* Query the correct GL object for shader linking status. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110624 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110624 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110624 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110624 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,16 @@ +glmark2 2011.06 (20110624) +========================== + +* Improve benchmark versatility by allowing runtime-configurable, + per-scene options. +* Add command line option to list avalaible scenes and their + supported options (-l,--list-scenes). +* Allow specifying the scenes to run and their options from the + command line (-b,--benchmark). +* Add basic output validation functionality (--validate). +* Add command line option to call glFinish() instead of swapping + the front and back buffers (--no-swap-buffers). +* Manually disable VSync for GL/GLX (work around an SDL bug). +* Replace custom math and shader infrastructure with functionality + provided by LibMatrix (lp:libmatrix). +* Improve user documentation (--help and man page). \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110719 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110719 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110719 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110719 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,11 @@ +glmark2 2011.07 (20110719) +========================== + +* Replace SDL with custom window handling code. +* Add benchmark for shader conditionals. +* Add benchmark for shader function calls. +* Add benchmark for shader loops. +* Add benchmark for real phong (vs blinn-phong) lighting model. +* Add benchmark for normal mapping. +* Refactor Mesh class to increase flexibility in vertex attribute + handling. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110818 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110818 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110818 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110818 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,9 @@ +glmark2 2011.08 (20110818) +========================== + +* Port to Android (see INSTALL.android). +* Add benchmark based on pulsar X11 GL screensaver. +* Add benchmark for 2D image processing using the GPU. +* Add command line option to set the size of the output window (-s, --size). +* Implement ShaderSource object to simplify complex shader creation. +* Implement utility functions to access resources in an abstract way. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110921 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110921 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20110921 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20110921 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,9 @@ +glmark2 2011.09 (20110921) +========================== + +* Add benchmark for blur desktop effect. +* Add support for multiple lights in the phong shading benchmark. +* Add support for loading models from OBJ geometry files. +* Add Stanford Bunny model and make it available in the build benchmark. +* Add per-scene options to set shader precision at runtime. +* Add command-line option to specify the benchmarks to run using a text file. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20111018 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20111018 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20111018 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20111018 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,9 @@ +glmark2 2011.10 (20111018) +========================== + +* Add benchmark for buffer (VBO) updates. +* Add benchmark for drop-shadow desktop effect. +* Add support for glmark2 extra large models. +* Enable the selection of additional models in the shading benchmark. +* Gracefully handle unsupported OpenGL versions, on both X11 and Android. +* Disable screen dimming and screen rotation on Android. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20111116 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20111116 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20111116 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20111116 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,9 @@ +glmark2 2011.11 (20111116) +========================== + +* Add benchmark for bump mapping using a height map. +* Add benchmark for bump mapping using a tangent space normal map + (not included in the default benchmarks). +* Implement validation support for all default benchmarks. +* Add a colored prefix to log messages in debug mode. +* Clean up and refactor code. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20111215 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20111215 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20111215 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20111215 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,9 @@ +glmark2 2011.12 (20111215) +========================== + +* Add benchmark for bilinear filtering implemented in the fragment + shader (not included in the default benchmarks). +* Ensure we don't call any GL functions before binding a GL context. +* Fix bug in the ShaderSource object that could lead to shader + compilation errors in strict OpenGL ES 2.0 implementations. +* Refactor scene update code to reduce duplication. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120119 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120119 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120119 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120119 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,15 @@ +glmark2 2012.01 (20120119) +========================== + +* X11: + - Run each benchmark in a fresh GL context. Use --reuse-context + to revert to the old default behavior of using the same context + for all benchmarks. + - Add option for showing a live FPS counter on screen (--show-fps). + - If the list of benchmarks to run contains only option-setting + descriptions, run the default benchmarks. +* Android: + - Fix crash on platforms not supporting glMapBufferOES. + - Log the glmark2 score when finishing. + - Log an error message if we fail to find a suitable EGLContext. + - Implement various stability improvements. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120216 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120216 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120216 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120216 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,15 @@ +glmark2 2012.02 (20120216) +========================== + +* Consolidate X11 and Android main loops. +* Accept command-line options on Android through either a file + ('/data/glmark2/args') or an extra intent key ('args'). +* Support per-scene options for displaying an FPS count on screen + (show-fps, fps-pos, fps-size), and remove --show-fps command-line option. +* Support per-scene options for displaying a benchmark title on screen + (title, title-pos, title-size). +* Add command-line option to run benchmarks indefinitely (--run-forever). +* Add command-line option to annotate the benchmarks with on-screen + information (--annotate == -b :show-fps=true:title=#info#). +* Move various utility classes to libmatrix. +* Fix build issues on Android ICS. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120322 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120322 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120322 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120322 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,7 @@ +glmark2 2012.03 (20120322) +========================== + +* X11: + - Support rendering to an off-screen surface (--off-screen). + - Add command-line option to select which method to use to "end" + a frame (--frame-end). \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120524 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120524 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120524 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120524 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,8 @@ +glmark2 2012.05 (20120524) +========================== + +* Add benchmark based on the SGI "Ideas in Motion" demo. +* Ensure that the framebuffer is drawn opaquely. +* Add command-line option to configure the visual used for rendering + (--visual-config). +* Add support for additional models and textures in the 'texture' scene. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120621 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120621 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120621 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120621 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,5 @@ +glmark2 2012.06 (20120621) +========================== + +* Add command-line option to render in fullscreen mode (--fullscreen). +* Display the frame time in addition to the FPS for each benchmark. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120719 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120719 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120719 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120719 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,10 @@ +glmark2 2012.07 (20120719) +========================== + +* Add Android GUI for defining and running benchmarks. +* Add benchmark based on the WebGL jellyfish demo (scene 'jellyfish'). +* Add benchmark based on the WebGL dynamic terrain demo (scene 'terrain'). +* Extend texture scene with the option to compute texture coordinates + in the shader (option 'texgen'). +* Add support for reading texture data from JPEG files. +* Properly support options that have a finite set of acceptable values. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120817 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120817 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20120817 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20120817 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,12 @@ +glmark2 2012.08 (20120817) +========================== + +* Display benchmark results in the Android GUI. +* Add an Android option menu offering load/save/delete benchmark list + functionality, access to last benchmark results, settings and + information about glmark2. +* Properly handle spaces in scene options on Android. +* Exclude from score calculation benchmarks whose set up wasn't successful. +* Display an appropriate message (instead of a 0 FPS value) if benchmark + set up isn't successful. +* Add support for standard GNU installation directories. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20121126 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20121126 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20121126 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20121126 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,5 @@ +glmark2 2012.11 (20121126) +========================== + +* Add a shadow mapping benchmark (scene 'shadow'). +* Consolidate EGL interactions into a separate state object. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20121218 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20121218 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20121218 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20121218 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,8 @@ +glmark2 2012.12 (20121218) +========================== + +* Add a refraction (2 interface) and reflection benchmark (scene 'reflect'). +* Add a new canvas object which allows rendering to GBM managed surfaces + and page flipping to DRM managed framebuffer from a console. +* Enhancements to visual config mechanism to consider stencil size in the + selection criteria and to query all config attribs. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20140310 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20140310 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20140310 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20140310 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,9 @@ +glmark2 2014.03 (20140310) +========================== + +* Refactor canvas code to make it easier to support new platforms. +* Add Wayland backend. +* Add Mir backend. +* Add a new 'cel' (or 'toon') shading model (scene 'shading'). +* Add a new 'clear' scene, which just clears the canvas. +* Improve support for the Wavefront .obj file format. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20170728 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20170728 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20170728 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20170728 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,8 @@ +glmark2 2017.07 (20170728) +========================== + +* Use monotonic clock for benchmark timings. +* Use udev to find the best node path to use in the DRM backend. +* Don't rebuild common source needlessly. +* Build in C++14 mode. + diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20200428 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20200428 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20200428 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20200428 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,11 @@ +glmark2 2020.04 (20200428) +========================== + +* Port Wayland flavor to xdg-shell window management. +* Support recent Android SDK/NDK versions. +* Add support for Windows via WGL and ANGLE-EGL. +* Support Raspberry Pi's dispmanx. +* Use glad for GL headers and dynamic GL library loading. +* Add --data-path command-line option to set data path at runtime. +* Add 'nframes' scene option to limit the number of rendered frames. +* Add F-Droid/fastlane metadata. diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20210215 glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20210215 --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/changelogs/20210215 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/changelogs/20210215 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,9 @@ +glmark2 2021.02 (20210215) +========================== + +* Meson build system support for DRM/X11/Wayland builds. +* Fix precision handling in various fragment shaders. +* Fix spurious failures when using --validate. +* Always draw to the correct, offscreen buffer when using --off-screen. +* Don't prefer framebuffer formats with more than 8 bytes per component. +* Add basic mouse and keyboard support on Wayland. diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/full_description.txt glmark2-2021.02/fastlane/metadata/android/en-US/full_description.txt --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/full_description.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/full_description.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,5 @@ +This benchmark contains tests for standard OpenGL (ES) 2.0 features, such as vertex arrays, VBOs, texturing and shaders. + +glmark2 is developed by Alexandros Frantzis and Jesse Barker based on the original glmark benchmark by Ben Smith. + +It is licensed under the GPLv3. \ No newline at end of file Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/images/icon.png and /tmp/VEvcU31eDG/glmark2-2021.02/fastlane/metadata/android/en-US/images/icon.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/images/phoneScreenshots/1.png and /tmp/VEvcU31eDG/glmark2-2021.02/fastlane/metadata/android/en-US/images/phoneScreenshots/1.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/images/phoneScreenshots/2.png and /tmp/VEvcU31eDG/glmark2-2021.02/fastlane/metadata/android/en-US/images/phoneScreenshots/2.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/images/phoneScreenshots/3.png and /tmp/VEvcU31eDG/glmark2-2021.02/fastlane/metadata/android/en-US/images/phoneScreenshots/3.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/images/phoneScreenshots/4.png and /tmp/VEvcU31eDG/glmark2-2021.02/fastlane/metadata/android/en-US/images/phoneScreenshots/4.png differ Binary files /tmp/XNsyvrPWB7/glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/images/phoneScreenshots/5.png and /tmp/VEvcU31eDG/glmark2-2021.02/fastlane/metadata/android/en-US/images/phoneScreenshots/5.png differ diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/short_description.txt glmark2-2021.02/fastlane/metadata/android/en-US/short_description.txt --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/short_description.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/short_description.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1 @@ +glmark2 is an OpenGL 2.0 and ES 2.0 benchmark. \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/title.txt glmark2-2021.02/fastlane/metadata/android/en-US/title.txt --- glmark2-2014.03+git20150611.fa71af2d/fastlane/metadata/android/en-US/title.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/fastlane/metadata/android/en-US/title.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1 @@ +glmark2 \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/.github/workflows/build.yml glmark2-2021.02/.github/workflows/build.yml --- glmark2-2014.03+git20150611.fa71af2d/.github/workflows/build.yml 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/.github/workflows/build.yml 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,79 @@ +name: build + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build-meson: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v1 + - name: Install dependencies + run: > + sudo apt install meson libdrm-dev libgbm-dev libudev-dev + libwayland-dev wayland-protocols libx11-dev + - name: Setup + run: meson setup build -Dflavors=x11-gl,x11-glesv2,wayland-gl,wayland-glesv2,drm-gl,drm-glesv2 + - name: Build + run: ninja -C build + - name: Install + run: DESTDIR=/tmp/glmark2-install ninja -C build install + + build-meson-only-drm: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v1 + - name: Install dependencies + run: sudo apt install meson libdrm-dev libgbm-dev libudev-dev + - name: Setup + run: meson setup build -Dflavors=drm-gl,drm-glesv2 + - name: Build + run: ninja -C build + - name: Install + run: DESTDIR=/tmp/glmark2-install ninja -C build install + + build-meson-only-wayland: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v1 + - name: Install dependencies + run: sudo apt install meson libwayland-dev wayland-protocols + - name: Setup + run: meson setup build -Dflavors=wayland-gl,wayland-glesv2 + - name: Build + run: ninja -C build + - name: Install + run: DESTDIR=/tmp/glmark2-install ninja -C build install + + build-meson-only-x11: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v1 + - name: Install dependencies + run: sudo apt install meson libx11-dev + - name: Setup + run: meson setup build -Dflavors=x11-gl,x11-glesv2 + - name: Build + run: ninja -C build + - name: Install + run: DESTDIR=/tmp/glmark2-install ninja -C build install + + build-waf: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v1 + - name: Install dependencies + run: > + sudo apt install libdrm-dev libgbm-dev libudev-dev + libwayland-dev wayland-protocols libx11-dev + - name: Setup + run: ./waf configure --with-flavors=x11-gl,x11-glesv2,wayland-gl,wayland-glesv2,drm-gl,drm-glesv2 + - name: Build + run: ./waf build + - name: Install + run: DESTDIR=/tmp/glmark2-install ./waf install diff -Nru glmark2-2014.03+git20150611.fa71af2d/INSTALL glmark2-2021.02/INSTALL --- glmark2-2014.03+git20150611.fa71af2d/INSTALL 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/INSTALL 2021-04-25 01:47:22.000000000 +0800 @@ -1,8 +1,31 @@ -glmark2 uses the WAF build system. +Meson build system +------------------ + +glmark2 uses the meson build system for the most common build flavors (X11, +Wayland, DRM). + +To configure glmark2 use: + +$ meson setup build -Dflavors=drm-gl,drm-glesv2,wayland-gl,wayland-glesv2,x11-gl,x11-glesv2 [-Ddata-path=DATA_PATH --prefix=PREFIX] + +To build use: + +$ ninja -C build + +To install use: + +$ [DESTDIR=] ninja -C build install + +WAF build system +---------------- + +glmark2 previously used the WAF build system, and the builds for some flavors +(dispmanx, win32, mir) have not been ported to the meson build system. For such +cases follow the instructions below. To configure glmark2 use: -$ ./waf configure --with-flavors= [--data-path=DATA_PATH --prefix=PREFIX] +$ ./waf configure --with-flavors=dispmanx-glesv2,win32-gl,win32-glesv2,mir-gl,mir-glesv2... [--data-path=DATA_PATH --prefix=PREFIX] To build use: diff -Nru glmark2-2014.03+git20150611.fa71af2d/INSTALL.android glmark2-2021.02/INSTALL.android --- glmark2-2014.03+git20150611.fa71af2d/INSTALL.android 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/INSTALL.android 2021-04-25 01:47:22.000000000 +0800 @@ -1,8 +1,26 @@ -The minimum Android API level for glmark2 is 9 (>= Android 2.3). +A build script for Linux has been added which simplifies the process of +creating an APK. + +Any recent toolchain (released in 2018 or later) should work. The steps +using build-tools have been hard coded to 26.0.1, but could be made +configurable, or update the script to point to installed version. + +Tested with NDK r18b. + +$ sudo apt-get install openjdk-8-jdk +$ export ANDROID_SDK=/path/to/Android/Sdk +$ export ANDROID_NDK=/path/to/Android/Sdk/ndk-bundle +$ export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 +$ cd android +$ ./build.sh +$ adb install glmark2.apk Building using the SDK and NDK ------------------------------ +Note: These build steps require older SDK and NDK. Later revisions have +removed the android and ant scripts. + To build and install glmark2 you need the Android SDK and NDK. The 'android', 'adb' and 'ndk-build' tools used below are included there. diff -Nru glmark2-2014.03+git20150611.fa71af2d/meson.build glmark2-2021.02/meson.build --- glmark2-2014.03+git20150611.fa71af2d/meson.build 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/meson.build 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,67 @@ +project( + 'glmark2', + ['cpp', 'c'], + default_options : ['cpp_std=c++14'], + version : '2021.02', + meson_version: '>=0.47' + ) + +cpp = meson.get_compiler('cpp') + +full_version = meson.project_version() + get_option('version-suffix') +add_global_arguments('-DGLMARK_VERSION="@0@"'.format(full_version), language : 'cpp') + +data_path = get_option('data-path') +if data_path == '' + data_path = join_paths([get_option('prefix'), get_option('datadir'), 'glmark2']) +endif +add_global_arguments('-DGLMARK_DATA_PATH="@0@"'.format(data_path), language : 'cpp') + +extras_path = get_option('extras-path') +if extras_path != '' + add_global_arguments('-DGLMARK_EXTRAS_PATH="@0@"'.format(extras_path), language : 'cpp') +endif + +m_dep = cpp.find_library('m', required : false) +dl_dep = cpp.find_library('dl') +libjpeg_dep = dependency('libjpeg') +libpng_dep = dependency('libpng') + +flavors = get_option('flavors') +if flavors.length() == 0 + error('Need at least one flavor with -Dflavors=flavor1[,flavors2]...\n'+ + 'See meson_options.txt contents for available flavors') +endif + +flavors_str = ', '.join(flavors) +need_x11 = flavors_str.contains('x11-') +need_drm = flavors_str.contains('drm-') +need_wayland = flavors_str.contains('wayland-') +need_gl = flavors_str.contains('-gl') +need_glesv2 = flavors_str.contains('-glesv2') +need_egl = need_drm or need_wayland or need_glesv2 +need_glx = flavors.contains('x11-gl') + +if need_x11 + x11_dep = dependency('x11') +endif + +if need_drm + libdrm_dep = dependency('libdrm') + gbm_dep = dependency('gbm') + libudev_dep = dependency('libudev') +endif + +if need_wayland + wayland_client_dep = dependency('wayland-client') + wayland_cursor_dep = dependency('wayland-cursor') + wayland_egl_dep = dependency('wayland-egl') + wayland_protocols_dep = dependency('wayland-protocols', version : '>= 1.12') + wayland_scanner_dep = dependency('wayland-scanner', native: true) +endif + +subdir('src') +subdir('data') +subdir('doc') + +message('Building with flavors: ' + flavors_str) diff -Nru glmark2-2014.03+git20150611.fa71af2d/meson_options.txt glmark2-2021.02/meson_options.txt --- glmark2-2014.03+git20150611.fa71af2d/meson_options.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/meson_options.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,15 @@ +option('flavors', type : 'array', + choices : [ + 'drm-gl', + 'drm-glesv2', + 'wayland-gl', + 'wayland-glesv2', + 'x11-gl', + 'x11-glesv2' + ], + value : [] +) + +option('data-path', type : 'string', value : '') +option('extras-path', type : 'string', value : '') +option('version-suffix', type : 'string', value : '') diff -Nru glmark2-2014.03+git20150611.fa71af2d/NEWS glmark2-2021.02/NEWS --- glmark2-2014.03+git20150611.fa71af2d/NEWS 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/NEWS 2021-04-25 01:47:22.000000000 +0800 @@ -1,3 +1,33 @@ +glmark2 2021.02 (20210215) +========================== + +* Meson build system support for DRM/X11/Wayland builds. +* Fix precision handling in various fragment shaders. +* Fix spurious failures when using --validate. +* Always draw to the correct, offscreen buffer when using --off-screen. +* Don't prefer framebuffer formats with more than 8 bytes per component. +* Add basic mouse and keyboard support on Wayland. + +glmark2 2020.04 (20200428) +========================== + +* Port Wayland flavor to xdg-shell window management. +* Support recent Android SDK/NDK versions. +* Add support for Windows via WGL and ANGLE-EGL. +* Support Raspberry Pi's dispmanx. +* Use glad for GL headers and dynamic GL library loading. +* Add --data-path command-line option to set data path at runtime. +* Add 'nframes' scene option to limit the number of rendered frames. +* Add F-Droid/fastlane metadata. + +glmark2 2017.07 (20170728) +========================== + +* Use monotonic clock for benchmark timings. +* Use udev to find the best node path to use in the DRM backend. +* Don't rebuild common source needlessly. +* Build in C++14 mode. + glmark2 2014.03 (20140310) ========================== diff -Nru glmark2-2014.03+git20150611.fa71af2d/README glmark2-2021.02/README --- glmark2-2014.03+git20150611.fa71af2d/README 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/README 2021-04-25 01:47:22.000000000 +0800 @@ -5,19 +5,14 @@ It is licensed under the GPLv3 (see COPYING). -To build glmark2 for X11/GL(ES2) you need: +To build glmark2 you need: - * python 2.x (>= 2.4) for the build system (waf) - * libpng 1.2 - -and for OpenGL 2.0: - - * libGL - -or for OpenGL ES 2.0: - - * libEGL - * libGLESv2 + * meson (>= 0.47) or python3 (if you are using WAF for building) + * libpng 1.6 + * Window system development files for the flavors you want to build + (e.g, X11, Wayland, drm) + * libGL (for desktop GL) + * libEGL and libGLESv2 (for GLESv2) Read the INSTALL file for building/installation instructions, diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/canvas-android.cpp glmark2-2021.02/src/canvas-android.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/canvas-android.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/canvas-android.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -24,7 +24,6 @@ #include "log.h" #include "options.h" #include "gl-headers.h" -#include #include #include @@ -41,8 +40,26 @@ EGL_NONE }; + /* Init the GLAD entrypoints */ + if (!egl_lib_.open_from_alternatives({"libEGL.so", "libEGL.so.1" })) { + Log::error("Error loading EGL library\n"); + return false; + } + + if (gladLoadEGLUserPtr(EGL_NO_DISPLAY, load_proc, &egl_lib_) == 0) { + Log::error("Loading EGL entry points failed\n"); + return false; + } + /* Get the current EGL config */ EGLDisplay egl_display(eglGetCurrentDisplay()); + + /* Reinitialize GLAD with a known display */ + if (gladLoadEGLUserPtr(egl_display, load_proc, &egl_lib_) == 0) { + Log::error("Loading EGL entry points with display failed\n"); + return false; + } + EGLContext egl_context(eglGetCurrentContext()); EGLConfig egl_config(0); EGLint num_configs; @@ -51,6 +68,17 @@ eglChooseConfig(egl_display, attribs, &egl_config, 1, &num_configs); + /* Before calling GLES functions, init GLAD GLES */ + if (!gles_lib_.open("libGLESv2.so")) { + Log::error("Error loading GLES library\n"); + return false; + } + + if (!gladLoadGLES2UserPtr(load_proc, &gles_lib_)) { + Log::error("Loading GLESv2 entry points failed."); + return false; + } + resize(width_, height_); if (!eglSwapInterval(egl_display, 0)) @@ -168,6 +196,20 @@ * Private methods * *******************/ +GLADapiproc +CanvasAndroid::load_proc(void *userdata, const char *name) +{ + if (eglGetProcAddress) { + GLADapiproc sym = reinterpret_cast(eglGetProcAddress(name)); + if (sym) { + return sym; + } + } + + SharedLibrary* lib = reinterpret_cast(userdata); + return reinterpret_cast(lib->load(name)); +} + void CanvasAndroid::init_gl_extensions() { diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/canvas-android.h glmark2-2021.02/src/canvas-android.h --- glmark2-2014.03+git20150611.fa71af2d/src/canvas-android.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/canvas-android.h 2021-04-25 01:47:22.000000000 +0800 @@ -23,6 +23,8 @@ #define GLMARK2_CANVAS_ANDROID_H_ #include "canvas.h" +#include "shared-library.h" +#include "glad/egl.h" /** * Canvas for rendering to Android surfaces. @@ -49,6 +51,10 @@ void resize(int width, int height); private: + SharedLibrary egl_lib_; + SharedLibrary gles_lib_; + + static GLADapiproc load_proc(void *userdata, const char *name); void init_gl_extensions(); }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/canvas-generic.cpp glmark2-2021.02/src/canvas-generic.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/canvas-generic.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/canvas-generic.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -103,7 +103,7 @@ Options::FrameEnd m = Options::frame_end; if (m == Options::FrameEndDefault) { - if (offscreen_) + if (offscreen_ || Options::validate) m = Options::FrameEndFinish; else m = Options::FrameEndSwap; @@ -194,7 +194,8 @@ bool CanvasGeneric::supports_gl2() { - std::string gl_version_str(reinterpret_cast(glGetString(GL_VERSION))); + const char *version = reinterpret_cast(glGetString(GL_VERSION)); + std::string gl_version_str(version ? version : ""); int gl_major(0); size_t point_pos(gl_version_str.find('.')); @@ -221,7 +222,7 @@ { bool request_fullscreen = (width == -1 && height == -1); - int vid; + intptr_t vid; if (!gl_state_.gotNativeConfig(vid)) { Log::error("Error: Couldn't get GL visual config!\n"); @@ -249,6 +250,7 @@ } native_window_ = native_state_.window(cur_properties); + window_initialized_ = true; width_ = cur_properties.width; height_ = cur_properties.height; @@ -274,6 +276,12 @@ bool CanvasGeneric::do_make_current() { + if (!window_initialized_) { + Log::error("glwindow has never been initialized, check native-state code\n" + "it should not return a valid window until create_window() is called\n"); + return false; + } + gl_state_.init_surface(native_window_); if (!gl_state_.valid()) { @@ -281,7 +289,8 @@ return false; } - gl_state_.init_gl_extensions(); + if (!gl_state_.init_gl_extensions()) + return false; if (offscreen_) { if (!ensure_fbo()) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/canvas-generic.h glmark2-2021.02/src/canvas-generic.h --- glmark2-2014.03+git20150611.fa71af2d/src/canvas-generic.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/canvas-generic.h 2021-04-25 01:47:22.000000000 +0800 @@ -37,9 +37,10 @@ CanvasGeneric(NativeState& native_state, GLState& gl_state, int width, int height) : Canvas(width, height), - native_state_(native_state), gl_state_(gl_state), + native_state_(native_state), gl_state_(gl_state), native_window_(0), gl_color_format_(0), gl_depth_format_(0), - color_renderbuffer_(0), depth_renderbuffer_(0), fbo_(0) {} + color_renderbuffer_(0), depth_renderbuffer_(0), fbo_(0), + window_initialized_(false) {} bool init(); bool reset(); @@ -70,6 +71,7 @@ GLuint color_renderbuffer_; GLuint depth_renderbuffer_; GLuint fbo_; + bool window_initialized_; }; #endif /* GLMARK2_CANVAS_GENERIC_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/include/EGL/eglplatform.h glmark2-2021.02/src/glad/include/EGL/eglplatform.h --- glmark2-2014.03+git20150611.fa71af2d/src/glad/include/EGL/eglplatform.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/include/EGL/eglplatform.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,176 @@ +#ifndef __eglplatform_h_ +#define __eglplatform_h_ + +/* +** Copyright (c) 2007-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Platform-specific types and definitions for egl.h + * $Revision: 30994 $ on $Date: 2015-04-30 13:36:48 -0700 (Thu, 30 Apr 2015) $ + * + * Adopters may modify khrplatform.h and this file to suit their platform. + * You are encouraged to submit all modifications to the Khronos group so that + * they can be included in future versions of this file. Please submit changes + * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) + * by filing a bug against product "EGL" component "Registry". + */ + +#include + +/* Macros used in EGL function prototype declarations. + * + * EGL functions should be prototyped as: + * + * EGLAPI return-type EGLAPIENTRY eglFunction(arguments); + * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments); + * + * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h + */ + +#ifndef EGLAPI +#define EGLAPI KHRONOS_APICALL +#endif + +#ifndef EGLAPIENTRY +#define EGLAPIENTRY KHRONOS_APIENTRY +#endif +#define EGLAPIENTRYP EGLAPIENTRY* + +/* The types NativeDisplayType, NativeWindowType, and NativePixmapType + * are aliases of window-system-dependent types, such as X Display * or + * Windows Device Context. They must be defined in platform-specific + * code below. The EGL-prefixed versions of Native*Type are the same + * types, renamed in EGL 1.3 so all types in the API start with "EGL". + * + * Khronos STRONGLY RECOMMENDS that you use the default definitions + * provided below, since these changes affect both binary and source + * portability of applications using EGL running on different EGL + * implementations. + */ + +#if defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include + +typedef HDC EGLNativeDisplayType; +typedef HBITMAP EGLNativePixmapType; +typedef HWND EGLNativeWindowType; + +#elif defined(__EMSCRIPTEN__) + +typedef int EGLNativeDisplayType; +typedef int EGLNativePixmapType; +typedef int EGLNativeWindowType; + +#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */ + +typedef int EGLNativeDisplayType; +typedef void *EGLNativePixmapType; +typedef void *EGLNativeWindowType; + +#elif defined(WL_EGL_PLATFORM) + +typedef struct wl_display *EGLNativeDisplayType; +typedef struct wl_egl_pixmap *EGLNativePixmapType; +typedef struct wl_egl_window *EGLNativeWindowType; + +#elif defined(__GBM__) + +typedef struct gbm_device *EGLNativeDisplayType; +typedef struct gbm_bo *EGLNativePixmapType; +typedef void *EGLNativeWindowType; + +#elif defined(__ANDROID__) || defined(ANDROID) + +struct ANativeWindow; +struct egl_native_pixmap_t; + +typedef void* EGLNativeDisplayType; +typedef struct egl_native_pixmap_t* EGLNativePixmapType; +typedef struct ANativeWindow* EGLNativeWindowType; + +#elif defined(USE_OZONE) + +typedef intptr_t EGLNativeDisplayType; +typedef intptr_t EGLNativePixmapType; +typedef intptr_t EGLNativeWindowType; + +#elif defined(__unix__) && defined(EGL_NO_X11) + +typedef void *EGLNativeDisplayType; +typedef khronos_uintptr_t EGLNativePixmapType; +typedef khronos_uintptr_t EGLNativeWindowType; + +#elif defined(__unix__) || defined(USE_X11) + +/* X11 (tentative) */ +#include +#include + +typedef Display *EGLNativeDisplayType; +typedef Pixmap EGLNativePixmapType; +typedef Window EGLNativeWindowType; + +#elif defined(__APPLE__) + +typedef int EGLNativeDisplayType; +typedef void *EGLNativePixmapType; +typedef void *EGLNativeWindowType; + +#elif defined(__HAIKU__) + +#include + +typedef void *EGLNativeDisplayType; +typedef khronos_uintptr_t EGLNativePixmapType; +typedef khronos_uintptr_t EGLNativeWindowType; + +#else +#error "Platform not recognized" +#endif + +/* EGL 1.2 types, renamed for consistency in EGL 1.3 */ +typedef EGLNativeDisplayType NativeDisplayType; +typedef EGLNativePixmapType NativePixmapType; +typedef EGLNativeWindowType NativeWindowType; + + +/* Define EGLint. This must be a signed integral type large enough to contain + * all legal attribute names and values passed into and out of EGL, whether + * their type is boolean, bitmask, enumerant (symbolic constant), integer, + * handle, or other. While in general a 32-bit integer will suffice, if + * handles are 64 bit types, then EGLint should be defined as a signed 64-bit + * integer type. + */ +typedef khronos_int32_t EGLint; + + +/* C++ / C typecast macros for special EGL handle values */ +#if defined(__cplusplus) +#define EGL_CAST(type, value) (static_cast(value)) +#else +#define EGL_CAST(type, value) ((type) (value)) +#endif + +#endif /* __eglplatform_h */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/egl.h glmark2-2021.02/src/glad/include/glad/egl.h --- glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/egl.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/include/glad/egl.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,554 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:15 2019 + * + * Generator: C/C++ + * Specification: egl + * Extensions: 4 + * + * APIs: + * - egl=1.5 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='egl=1.5' --extensions='EGL_EXT_platform_base,EGL_KHR_platform_gbm,EGL_KHR_platform_wayland,EGL_KHR_platform_x11' c + * + * Online: + * http://glad.sh/#api=egl%3D1.5&extensions=EGL_EXT_platform_base%2CEGL_KHR_platform_gbm%2CEGL_KHR_platform_wayland%2CEGL_KHR_platform_x11&generator=c&options= + * + */ + +#ifndef GLAD_EGL_H_ +#define GLAD_EGL_H_ + + +#define GLAD_EGL + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define EGL_ALPHA_FORMAT 0x3088 +#define EGL_ALPHA_FORMAT_NONPRE 0x308B +#define EGL_ALPHA_FORMAT_PRE 0x308C +#define EGL_ALPHA_MASK_SIZE 0x303E +#define EGL_ALPHA_SIZE 0x3021 +#define EGL_BACK_BUFFER 0x3084 +#define EGL_BAD_ACCESS 0x3002 +#define EGL_BAD_ALLOC 0x3003 +#define EGL_BAD_ATTRIBUTE 0x3004 +#define EGL_BAD_CONFIG 0x3005 +#define EGL_BAD_CONTEXT 0x3006 +#define EGL_BAD_CURRENT_SURFACE 0x3007 +#define EGL_BAD_DISPLAY 0x3008 +#define EGL_BAD_MATCH 0x3009 +#define EGL_BAD_NATIVE_PIXMAP 0x300A +#define EGL_BAD_NATIVE_WINDOW 0x300B +#define EGL_BAD_PARAMETER 0x300C +#define EGL_BAD_SURFACE 0x300D +#define EGL_BIND_TO_TEXTURE_RGB 0x3039 +#define EGL_BIND_TO_TEXTURE_RGBA 0x303A +#define EGL_BLUE_SIZE 0x3022 +#define EGL_BUFFER_DESTROYED 0x3095 +#define EGL_BUFFER_PRESERVED 0x3094 +#define EGL_BUFFER_SIZE 0x3020 +#define EGL_CLIENT_APIS 0x308D +#define EGL_CL_EVENT_HANDLE 0x309C +#define EGL_COLORSPACE 0x3087 +#define EGL_COLORSPACE_LINEAR 0x308A +#define EGL_COLORSPACE_sRGB 0x3089 +#define EGL_COLOR_BUFFER_TYPE 0x303F +#define EGL_CONDITION_SATISFIED 0x30F6 +#define EGL_CONFIG_CAVEAT 0x3027 +#define EGL_CONFIG_ID 0x3028 +#define EGL_CONFORMANT 0x3042 +#define EGL_CONTEXT_CLIENT_TYPE 0x3097 +#define EGL_CONTEXT_CLIENT_VERSION 0x3098 +#define EGL_CONTEXT_LOST 0x300E +#define EGL_CONTEXT_MAJOR_VERSION 0x3098 +#define EGL_CONTEXT_MINOR_VERSION 0x30FB +#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002 +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001 +#define EGL_CONTEXT_OPENGL_DEBUG 0x31B0 +#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1 +#define EGL_CONTEXT_OPENGL_PROFILE_MASK 0x30FD +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS 0x31B2 +#define EGL_CORE_NATIVE_ENGINE 0x305B +#define EGL_DEFAULT_DISPLAY EGL_CAST(EGLNativeDisplayType,0) +#define EGL_DEPTH_SIZE 0x3025 +#define EGL_DISPLAY_SCALING 10000 +#define EGL_DONT_CARE EGL_CAST(EGLint,-1) +#define EGL_DRAW 0x3059 +#define EGL_EXTENSIONS 0x3055 +#define EGL_FALSE 0 +#define EGL_FOREVER 0xFFFFFFFFFFFFFFFF +#define EGL_GL_COLORSPACE 0x309D +#define EGL_GL_COLORSPACE_LINEAR 0x308A +#define EGL_GL_COLORSPACE_SRGB 0x3089 +#define EGL_GL_RENDERBUFFER 0x30B9 +#define EGL_GL_TEXTURE_2D 0x30B1 +#define EGL_GL_TEXTURE_3D 0x30B2 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7 +#define EGL_GL_TEXTURE_LEVEL 0x30BC +#define EGL_GL_TEXTURE_ZOFFSET 0x30BD +#define EGL_GREEN_SIZE 0x3023 +#define EGL_HEIGHT 0x3056 +#define EGL_HORIZONTAL_RESOLUTION 0x3090 +#define EGL_IMAGE_PRESERVED 0x30D2 +#define EGL_LARGEST_PBUFFER 0x3058 +#define EGL_LEVEL 0x3029 +#define EGL_LOSE_CONTEXT_ON_RESET 0x31BF +#define EGL_LUMINANCE_BUFFER 0x308F +#define EGL_LUMINANCE_SIZE 0x303D +#define EGL_MATCH_NATIVE_PIXMAP 0x3041 +#define EGL_MAX_PBUFFER_HEIGHT 0x302A +#define EGL_MAX_PBUFFER_PIXELS 0x302B +#define EGL_MAX_PBUFFER_WIDTH 0x302C +#define EGL_MAX_SWAP_INTERVAL 0x303C +#define EGL_MIN_SWAP_INTERVAL 0x303B +#define EGL_MIPMAP_LEVEL 0x3083 +#define EGL_MIPMAP_TEXTURE 0x3082 +#define EGL_MULTISAMPLE_RESOLVE 0x3099 +#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B +#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200 +#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A +#define EGL_NATIVE_RENDERABLE 0x302D +#define EGL_NATIVE_VISUAL_ID 0x302E +#define EGL_NATIVE_VISUAL_TYPE 0x302F +#define EGL_NONE 0x3038 +#define EGL_NON_CONFORMANT_CONFIG 0x3051 +#define EGL_NOT_INITIALIZED 0x3001 +#define EGL_NO_CONTEXT EGL_CAST(EGLContext,0) +#define EGL_NO_DISPLAY EGL_CAST(EGLDisplay,0) +#define EGL_NO_IMAGE EGL_CAST(EGLImage,0) +#define EGL_NO_RESET_NOTIFICATION 0x31BE +#define EGL_NO_SURFACE EGL_CAST(EGLSurface,0) +#define EGL_NO_SYNC EGL_CAST(EGLSync,0) +#define EGL_NO_TEXTURE 0x305C +#define EGL_OPENGL_API 0x30A2 +#define EGL_OPENGL_BIT 0x0008 +#define EGL_OPENGL_ES2_BIT 0x0004 +#define EGL_OPENGL_ES3_BIT 0x00000040 +#define EGL_OPENGL_ES_API 0x30A0 +#define EGL_OPENGL_ES_BIT 0x0001 +#define EGL_OPENVG_API 0x30A1 +#define EGL_OPENVG_BIT 0x0002 +#define EGL_OPENVG_IMAGE 0x3096 +#define EGL_PBUFFER_BIT 0x0001 +#define EGL_PIXEL_ASPECT_RATIO 0x3092 +#define EGL_PIXMAP_BIT 0x0002 +#define EGL_PLATFORM_GBM_KHR 0x31D7 +#define EGL_PLATFORM_WAYLAND_KHR 0x31D8 +#define EGL_PLATFORM_X11_KHR 0x31D5 +#define EGL_PLATFORM_X11_SCREEN_KHR 0x31D6 +#define EGL_READ 0x305A +#define EGL_RED_SIZE 0x3024 +#define EGL_RENDERABLE_TYPE 0x3040 +#define EGL_RENDER_BUFFER 0x3086 +#define EGL_RGB_BUFFER 0x308E +#define EGL_SAMPLES 0x3031 +#define EGL_SAMPLE_BUFFERS 0x3032 +#define EGL_SIGNALED 0x30F2 +#define EGL_SINGLE_BUFFER 0x3085 +#define EGL_SLOW_CONFIG 0x3050 +#define EGL_STENCIL_SIZE 0x3026 +#define EGL_SUCCESS 0x3000 +#define EGL_SURFACE_TYPE 0x3033 +#define EGL_SWAP_BEHAVIOR 0x3093 +#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400 +#define EGL_SYNC_CL_EVENT 0x30FE +#define EGL_SYNC_CL_EVENT_COMPLETE 0x30FF +#define EGL_SYNC_CONDITION 0x30F8 +#define EGL_SYNC_FENCE 0x30F9 +#define EGL_SYNC_FLUSH_COMMANDS_BIT 0x0001 +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE 0x30F0 +#define EGL_SYNC_STATUS 0x30F1 +#define EGL_SYNC_TYPE 0x30F7 +#define EGL_TEXTURE_2D 0x305F +#define EGL_TEXTURE_FORMAT 0x3080 +#define EGL_TEXTURE_RGB 0x305D +#define EGL_TEXTURE_RGBA 0x305E +#define EGL_TEXTURE_TARGET 0x3081 +#define EGL_TIMEOUT_EXPIRED 0x30F5 +#define EGL_TRANSPARENT_BLUE_VALUE 0x3035 +#define EGL_TRANSPARENT_GREEN_VALUE 0x3036 +#define EGL_TRANSPARENT_RED_VALUE 0x3037 +#define EGL_TRANSPARENT_RGB 0x3052 +#define EGL_TRANSPARENT_TYPE 0x3034 +#define EGL_TRUE 1 +#define EGL_UNKNOWN EGL_CAST(EGLint,-1) +#define EGL_UNSIGNALED 0x30F3 +#define EGL_VENDOR 0x3053 +#define EGL_VERSION 0x3054 +#define EGL_VERTICAL_RESOLUTION 0x3091 +#define EGL_VG_ALPHA_FORMAT 0x3088 +#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B +#define EGL_VG_ALPHA_FORMAT_PRE 0x308C +#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040 +#define EGL_VG_COLORSPACE 0x3087 +#define EGL_VG_COLORSPACE_LINEAR 0x308A +#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020 +#define EGL_VG_COLORSPACE_sRGB 0x3089 +#define EGL_WIDTH 0x3057 +#define EGL_WINDOW_BIT 0x0004 + + +#include +#include + + + + + + + + + + + +struct AHardwareBuffer; + +typedef unsigned int EGLBoolean; +typedef unsigned int EGLenum; +typedef intptr_t EGLAttribKHR; +typedef intptr_t EGLAttrib; +typedef void *EGLClientBuffer; +typedef void *EGLConfig; +typedef void *EGLContext; +typedef void *EGLDeviceEXT; +typedef void *EGLDisplay; +typedef void *EGLImage; +typedef void *EGLImageKHR; +typedef void *EGLLabelKHR; +typedef void *EGLObjectKHR; +typedef void *EGLOutputLayerEXT; +typedef void *EGLOutputPortEXT; +typedef void *EGLStreamKHR; +typedef void *EGLSurface; +typedef void *EGLSync; +typedef void *EGLSyncKHR; +typedef void *EGLSyncNV; +typedef void (*__eglMustCastToProperFunctionPointerType)(void); +typedef khronos_utime_nanoseconds_t EGLTimeKHR; +typedef khronos_utime_nanoseconds_t EGLTime; +typedef khronos_utime_nanoseconds_t EGLTimeNV; +typedef khronos_utime_nanoseconds_t EGLuint64NV; +typedef khronos_uint64_t EGLuint64KHR; +typedef khronos_stime_nanoseconds_t EGLnsecsANDROID; +typedef int EGLNativeFileDescriptorKHR; +typedef khronos_ssize_t EGLsizeiANDROID; +typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize); +typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize); +struct EGLClientPixmapHI { + void *pData; + EGLint iWidth; + EGLint iHeight; + EGLint iStride; +}; +typedef void (GLAD_API_PTR *EGLDEBUGPROCKHR)(EGLenum error,const char *command,EGLint messageType,EGLLabelKHR threadLabel,EGLLabelKHR objectLabel,const char* message); + + +#define EGL_VERSION_1_0 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_0; +#define EGL_VERSION_1_1 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_1; +#define EGL_VERSION_1_2 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_2; +#define EGL_VERSION_1_3 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_3; +#define EGL_VERSION_1_4 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_4; +#define EGL_VERSION_1_5 1 +GLAD_API_CALL int GLAD_EGL_VERSION_1_5; +#define EGL_EXT_platform_base 1 +GLAD_API_CALL int GLAD_EGL_EXT_platform_base; +#define EGL_KHR_platform_gbm 1 +GLAD_API_CALL int GLAD_EGL_KHR_platform_gbm; +#define EGL_KHR_platform_wayland 1 +GLAD_API_CALL int GLAD_EGL_KHR_platform_wayland; +#define EGL_KHR_platform_x11 1 +GLAD_API_CALL int GLAD_EGL_KHR_platform_x11; + + +typedef EGLBoolean (GLAD_API_PTR *PFNEGLBINDAPIPROC)(EGLenum api); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLBINDTEXIMAGEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint buffer); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLCHOOSECONFIGPROC)(EGLDisplay dpy, const EGLint * attrib_list, EGLConfig * configs, EGLint config_size, EGLint * num_config); +typedef EGLint (GLAD_API_PTR *PFNEGLCLIENTWAITSYNCPROC)(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLCOPYBUFFERSPROC)(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target); +typedef EGLContext (GLAD_API_PTR *PFNEGLCREATECONTEXTPROC)(EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint * attrib_list); +typedef EGLImage (GLAD_API_PTR *PFNEGLCREATEIMAGEPROC)(EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC)(EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPBUFFERSURFACEPROC)(EGLDisplay dpy, EGLConfig config, const EGLint * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPIXMAPSURFACEPROC)(EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC)(EGLDisplay dpy, EGLConfig config, void * native_pixmap, const EGLAttrib * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC)(EGLDisplay dpy, EGLConfig config, void * native_pixmap, const EGLint * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMWINDOWSURFACEPROC)(EGLDisplay dpy, EGLConfig config, void * native_window, const EGLAttrib * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC)(EGLDisplay dpy, EGLConfig config, void * native_window, const EGLint * attrib_list); +typedef EGLSync (GLAD_API_PTR *PFNEGLCREATESYNCPROC)(EGLDisplay dpy, EGLenum type, const EGLAttrib * attrib_list); +typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEWINDOWSURFACEPROC)(EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint * attrib_list); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYCONTEXTPROC)(EGLDisplay dpy, EGLContext ctx); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYIMAGEPROC)(EGLDisplay dpy, EGLImage image); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYSURFACEPROC)(EGLDisplay dpy, EGLSurface surface); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYSYNCPROC)(EGLDisplay dpy, EGLSync sync); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETCONFIGATTRIBPROC)(EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint * value); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETCONFIGSPROC)(EGLDisplay dpy, EGLConfig * configs, EGLint config_size, EGLint * num_config); +typedef EGLContext (GLAD_API_PTR *PFNEGLGETCURRENTCONTEXTPROC)(void); +typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETCURRENTDISPLAYPROC)(void); +typedef EGLSurface (GLAD_API_PTR *PFNEGLGETCURRENTSURFACEPROC)(EGLint readdraw); +typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETDISPLAYPROC)(EGLNativeDisplayType display_id); +typedef EGLint (GLAD_API_PTR *PFNEGLGETERRORPROC)(void); +typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETPLATFORMDISPLAYPROC)(EGLenum platform, void * native_display, const EGLAttrib * attrib_list); +typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETPLATFORMDISPLAYEXTPROC)(EGLenum platform, void * native_display, const EGLint * attrib_list); +typedef __eglMustCastToProperFunctionPointerType (GLAD_API_PTR *PFNEGLGETPROCADDRESSPROC)(const char * procname); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETSYNCATTRIBPROC)(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib * value); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLINITIALIZEPROC)(EGLDisplay dpy, EGLint * major, EGLint * minor); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLMAKECURRENTPROC)(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx); +typedef EGLenum (GLAD_API_PTR *PFNEGLQUERYAPIPROC)(void); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLQUERYCONTEXTPROC)(EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint * value); +typedef const char * (GLAD_API_PTR *PFNEGLQUERYSTRINGPROC)(EGLDisplay dpy, EGLint name); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLQUERYSURFACEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint * value); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLRELEASETEXIMAGEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint buffer); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLRELEASETHREADPROC)(void); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLSURFACEATTRIBPROC)(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLSWAPBUFFERSPROC)(EGLDisplay dpy, EGLSurface surface); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLSWAPINTERVALPROC)(EGLDisplay dpy, EGLint interval); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLTERMINATEPROC)(EGLDisplay dpy); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITCLIENTPROC)(void); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITGLPROC)(void); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITNATIVEPROC)(EGLint engine); +typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITSYNCPROC)(EGLDisplay dpy, EGLSync sync, EGLint flags); + +GLAD_API_CALL PFNEGLBINDAPIPROC glad_eglBindAPI; +#define eglBindAPI glad_eglBindAPI +GLAD_API_CALL PFNEGLBINDTEXIMAGEPROC glad_eglBindTexImage; +#define eglBindTexImage glad_eglBindTexImage +GLAD_API_CALL PFNEGLCHOOSECONFIGPROC glad_eglChooseConfig; +#define eglChooseConfig glad_eglChooseConfig +GLAD_API_CALL PFNEGLCLIENTWAITSYNCPROC glad_eglClientWaitSync; +#define eglClientWaitSync glad_eglClientWaitSync +GLAD_API_CALL PFNEGLCOPYBUFFERSPROC glad_eglCopyBuffers; +#define eglCopyBuffers glad_eglCopyBuffers +GLAD_API_CALL PFNEGLCREATECONTEXTPROC glad_eglCreateContext; +#define eglCreateContext glad_eglCreateContext +GLAD_API_CALL PFNEGLCREATEIMAGEPROC glad_eglCreateImage; +#define eglCreateImage glad_eglCreateImage +GLAD_API_CALL PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC glad_eglCreatePbufferFromClientBuffer; +#define eglCreatePbufferFromClientBuffer glad_eglCreatePbufferFromClientBuffer +GLAD_API_CALL PFNEGLCREATEPBUFFERSURFACEPROC glad_eglCreatePbufferSurface; +#define eglCreatePbufferSurface glad_eglCreatePbufferSurface +GLAD_API_CALL PFNEGLCREATEPIXMAPSURFACEPROC glad_eglCreatePixmapSurface; +#define eglCreatePixmapSurface glad_eglCreatePixmapSurface +GLAD_API_CALL PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC glad_eglCreatePlatformPixmapSurface; +#define eglCreatePlatformPixmapSurface glad_eglCreatePlatformPixmapSurface +GLAD_API_CALL PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC glad_eglCreatePlatformPixmapSurfaceEXT; +#define eglCreatePlatformPixmapSurfaceEXT glad_eglCreatePlatformPixmapSurfaceEXT +GLAD_API_CALL PFNEGLCREATEPLATFORMWINDOWSURFACEPROC glad_eglCreatePlatformWindowSurface; +#define eglCreatePlatformWindowSurface glad_eglCreatePlatformWindowSurface +GLAD_API_CALL PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC glad_eglCreatePlatformWindowSurfaceEXT; +#define eglCreatePlatformWindowSurfaceEXT glad_eglCreatePlatformWindowSurfaceEXT +GLAD_API_CALL PFNEGLCREATESYNCPROC glad_eglCreateSync; +#define eglCreateSync glad_eglCreateSync +GLAD_API_CALL PFNEGLCREATEWINDOWSURFACEPROC glad_eglCreateWindowSurface; +#define eglCreateWindowSurface glad_eglCreateWindowSurface +GLAD_API_CALL PFNEGLDESTROYCONTEXTPROC glad_eglDestroyContext; +#define eglDestroyContext glad_eglDestroyContext +GLAD_API_CALL PFNEGLDESTROYIMAGEPROC glad_eglDestroyImage; +#define eglDestroyImage glad_eglDestroyImage +GLAD_API_CALL PFNEGLDESTROYSURFACEPROC glad_eglDestroySurface; +#define eglDestroySurface glad_eglDestroySurface +GLAD_API_CALL PFNEGLDESTROYSYNCPROC glad_eglDestroySync; +#define eglDestroySync glad_eglDestroySync +GLAD_API_CALL PFNEGLGETCONFIGATTRIBPROC glad_eglGetConfigAttrib; +#define eglGetConfigAttrib glad_eglGetConfigAttrib +GLAD_API_CALL PFNEGLGETCONFIGSPROC glad_eglGetConfigs; +#define eglGetConfigs glad_eglGetConfigs +GLAD_API_CALL PFNEGLGETCURRENTCONTEXTPROC glad_eglGetCurrentContext; +#define eglGetCurrentContext glad_eglGetCurrentContext +GLAD_API_CALL PFNEGLGETCURRENTDISPLAYPROC glad_eglGetCurrentDisplay; +#define eglGetCurrentDisplay glad_eglGetCurrentDisplay +GLAD_API_CALL PFNEGLGETCURRENTSURFACEPROC glad_eglGetCurrentSurface; +#define eglGetCurrentSurface glad_eglGetCurrentSurface +GLAD_API_CALL PFNEGLGETDISPLAYPROC glad_eglGetDisplay; +#define eglGetDisplay glad_eglGetDisplay +GLAD_API_CALL PFNEGLGETERRORPROC glad_eglGetError; +#define eglGetError glad_eglGetError +GLAD_API_CALL PFNEGLGETPLATFORMDISPLAYPROC glad_eglGetPlatformDisplay; +#define eglGetPlatformDisplay glad_eglGetPlatformDisplay +GLAD_API_CALL PFNEGLGETPLATFORMDISPLAYEXTPROC glad_eglGetPlatformDisplayEXT; +#define eglGetPlatformDisplayEXT glad_eglGetPlatformDisplayEXT +GLAD_API_CALL PFNEGLGETPROCADDRESSPROC glad_eglGetProcAddress; +#define eglGetProcAddress glad_eglGetProcAddress +GLAD_API_CALL PFNEGLGETSYNCATTRIBPROC glad_eglGetSyncAttrib; +#define eglGetSyncAttrib glad_eglGetSyncAttrib +GLAD_API_CALL PFNEGLINITIALIZEPROC glad_eglInitialize; +#define eglInitialize glad_eglInitialize +GLAD_API_CALL PFNEGLMAKECURRENTPROC glad_eglMakeCurrent; +#define eglMakeCurrent glad_eglMakeCurrent +GLAD_API_CALL PFNEGLQUERYAPIPROC glad_eglQueryAPI; +#define eglQueryAPI glad_eglQueryAPI +GLAD_API_CALL PFNEGLQUERYCONTEXTPROC glad_eglQueryContext; +#define eglQueryContext glad_eglQueryContext +GLAD_API_CALL PFNEGLQUERYSTRINGPROC glad_eglQueryString; +#define eglQueryString glad_eglQueryString +GLAD_API_CALL PFNEGLQUERYSURFACEPROC glad_eglQuerySurface; +#define eglQuerySurface glad_eglQuerySurface +GLAD_API_CALL PFNEGLRELEASETEXIMAGEPROC glad_eglReleaseTexImage; +#define eglReleaseTexImage glad_eglReleaseTexImage +GLAD_API_CALL PFNEGLRELEASETHREADPROC glad_eglReleaseThread; +#define eglReleaseThread glad_eglReleaseThread +GLAD_API_CALL PFNEGLSURFACEATTRIBPROC glad_eglSurfaceAttrib; +#define eglSurfaceAttrib glad_eglSurfaceAttrib +GLAD_API_CALL PFNEGLSWAPBUFFERSPROC glad_eglSwapBuffers; +#define eglSwapBuffers glad_eglSwapBuffers +GLAD_API_CALL PFNEGLSWAPINTERVALPROC glad_eglSwapInterval; +#define eglSwapInterval glad_eglSwapInterval +GLAD_API_CALL PFNEGLTERMINATEPROC glad_eglTerminate; +#define eglTerminate glad_eglTerminate +GLAD_API_CALL PFNEGLWAITCLIENTPROC glad_eglWaitClient; +#define eglWaitClient glad_eglWaitClient +GLAD_API_CALL PFNEGLWAITGLPROC glad_eglWaitGL; +#define eglWaitGL glad_eglWaitGL +GLAD_API_CALL PFNEGLWAITNATIVEPROC glad_eglWaitNative; +#define eglWaitNative glad_eglWaitNative +GLAD_API_CALL PFNEGLWAITSYNCPROC glad_eglWaitSync; +#define eglWaitSync glad_eglWaitSync + + + + + +GLAD_API_CALL int gladLoadEGLUserPtr(EGLDisplay display, GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadEGL(EGLDisplay display, GLADloadfunc load); + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/gles2.h glmark2-2021.02/src/glad/include/glad/gles2.h --- glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/gles2.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/include/glad/gles2.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1015 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:14 2019 + * + * Generator: C/C++ + * Specification: gl + * Extensions: 2 + * + * APIs: + * - gles2=2.0 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='gles2=2.0' --extensions='GL_OES_mapbuffer,GL_OES_required_internalformat' c + * + * Online: + * http://glad.sh/#api=gles2%3D2.0&extensions=GL_OES_mapbuffer%2CGL_OES_required_internalformat&generator=c&options= + * + */ + +#ifndef GLAD_GLES2_H_ +#define GLAD_GLES2_H_ + +#ifdef __gl2_h_ + #error OpenGL ES 2 header already included (API: gles2), remove previous include! +#endif +#define __gl2_h_ 1 + +#ifdef __gl3_h_ + #error OpenGL ES 3 header already included (API: gles2), remove previous include! +#endif +#define __gl3_h_ 1 + + +#define GLAD_GLES2 + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALPHA 0x1906 +#define GL_ALPHA8_OES 0x803C +#define GL_ALPHA_BITS 0x0D55 +#define GL_ALWAYS 0x0207 +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_BACK 0x0405 +#define GL_BLEND 0x0BE2 +#define GL_BLEND_COLOR 0x8005 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_EQUATION 0x8009 +#define GL_BLEND_EQUATION_ALPHA 0x883D +#define GL_BLEND_EQUATION_RGB 0x8009 +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLUE_BITS 0x0D54 +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_BUFFER_ACCESS_OES 0x88BB +#define GL_BUFFER_MAPPED_OES 0x88BC +#define GL_BUFFER_MAP_POINTER_OES 0x88BD +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 +#define GL_BYTE 0x1400 +#define GL_CCW 0x0901 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_COMPILE_STATUS 0x8B81 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_CONSTANT_COLOR 0x8001 +#define GL_CULL_FACE 0x0B44 +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_CURRENT_PROGRAM 0x8B8D +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 +#define GL_CW 0x0900 +#define GL_DECR 0x1E03 +#define GL_DECR_WRAP 0x8508 +#define GL_DELETE_STATUS 0x8B80 +#define GL_DEPTH24_STENCIL8_OES 0x88F0 +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_DEPTH_BITS 0x0D56 +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_DEPTH_COMPONENT16_OES 0x81A5 +#define GL_DEPTH_COMPONENT24_OES 0x81A6 +#define GL_DEPTH_COMPONENT32_OES 0x81A7 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_TEST 0x0B71 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DITHER 0x0BD0 +#define GL_DONT_CARE 0x1100 +#define GL_DST_ALPHA 0x0304 +#define GL_DST_COLOR 0x0306 +#define GL_DYNAMIC_DRAW 0x88E8 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_EQUAL 0x0202 +#define GL_EXTENSIONS 0x1F03 +#define GL_FALSE 0 +#define GL_FASTEST 0x1101 +#define GL_FIXED 0x140C +#define GL_FLOAT 0x1406 +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_FRAMEBUFFER 0x8D40 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD +#define GL_FRONT 0x0404 +#define GL_FRONT_AND_BACK 0x0408 +#define GL_FRONT_FACE 0x0B46 +#define GL_FUNC_ADD 0x8006 +#define GL_FUNC_REVERSE_SUBTRACT 0x800B +#define GL_FUNC_SUBTRACT 0x800A +#define GL_GENERATE_MIPMAP_HINT 0x8192 +#define GL_GEQUAL 0x0206 +#define GL_GREATER 0x0204 +#define GL_GREEN_BITS 0x0D53 +#define GL_HIGH_FLOAT 0x8DF2 +#define GL_HIGH_INT 0x8DF5 +#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B +#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A +#define GL_INCR 0x1E02 +#define GL_INCR_WRAP 0x8507 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_INT 0x1404 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 +#define GL_INVALID_OPERATION 0x0502 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVERT 0x150A +#define GL_KEEP 0x1E00 +#define GL_LEQUAL 0x0203 +#define GL_LESS 0x0201 +#define GL_LINEAR 0x2601 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_LINE_WIDTH 0x0B21 +#define GL_LINK_STATUS 0x8B82 +#define GL_LOW_FLOAT 0x8DF0 +#define GL_LOW_INT 0x8DF3 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE4_ALPHA4_OES 0x8043 +#define GL_LUMINANCE8_ALPHA8_OES 0x8045 +#define GL_LUMINANCE8_OES 0x8040 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C +#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_VARYING_VECTORS 0x8DFC +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_MEDIUM_FLOAT 0x8DF1 +#define GL_MEDIUM_INT 0x8DF4 +#define GL_MIRRORED_REPEAT 0x8370 +#define GL_NEAREST 0x2600 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_NEVER 0x0200 +#define GL_NICEST 0x1102 +#define GL_NONE 0 +#define GL_NOTEQUAL 0x0205 +#define GL_NO_ERROR 0 +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9 +#define GL_ONE 1 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_OUT_OF_MEMORY 0x0505 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_POINTS 0x0000 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_RED_BITS 0x0D52 +#define GL_RENDERBUFFER 0x8D41 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERER 0x1F01 +#define GL_REPEAT 0x2901 +#define GL_REPLACE 0x1E01 +#define GL_RGB 0x1907 +#define GL_RGB10_A2_EXT 0x8059 +#define GL_RGB10_EXT 0x8052 +#define GL_RGB565 0x8D62 +#define GL_RGB565_OES 0x8D62 +#define GL_RGB5_A1 0x8057 +#define GL_RGB5_A1_OES 0x8057 +#define GL_RGB8_OES 0x8051 +#define GL_RGBA 0x1908 +#define GL_RGBA4 0x8056 +#define GL_RGBA4_OES 0x8056 +#define GL_RGBA8_OES 0x8058 +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLE_COVERAGE 0x80A0 +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SCISSOR_BOX 0x0C10 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_SHADER_BINARY_FORMATS 0x8DF8 +#define GL_SHADER_COMPILER 0x8DFA +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_SHADER_TYPE 0x8B4F +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_SHORT 0x1402 +#define GL_SRC_ALPHA 0x0302 +#define GL_SRC_ALPHA_SATURATE 0x0308 +#define GL_SRC_COLOR 0x0300 +#define GL_STATIC_DRAW 0x88E4 +#define GL_STENCIL_ATTACHMENT 0x8D20 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_STENCIL_BITS 0x0D57 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_INDEX8 0x8D48 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_TEST 0x0B90 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_STREAM_DRAW 0x88E0 +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_TEXTURE 0x1702 +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_FAN 0x0006 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRUE 1 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_VENDOR 0x1F00 +#define GL_VERSION 0x1F02 +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_VIEWPORT 0x0BA2 +#define GL_WRITE_ONLY_OES 0x88B9 +#define GL_ZERO 0 + + +#include +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef void GLvoid; +typedef khronos_int8_t GLbyte; +typedef khronos_uint8_t GLubyte; +typedef khronos_int16_t GLshort; +typedef khronos_uint16_t GLushort; +typedef int GLint; +typedef unsigned int GLuint; +typedef khronos_int32_t GLclampx; +typedef int GLsizei; +typedef khronos_float_t GLfloat; +typedef khronos_float_t GLclampf; +typedef double GLdouble; +typedef double GLclampd; +typedef void *GLeglClientBufferEXT; +typedef void *GLeglImageOES; +typedef char GLchar; +typedef char GLcharARB; +#ifdef __APPLE__ +typedef void *GLhandleARB; +#else +typedef unsigned int GLhandleARB; +#endif +typedef khronos_uint16_t GLhalf; +typedef khronos_uint16_t GLhalfARB; +typedef khronos_int32_t GLfixed; +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_intptr_t GLintptr; +#else +typedef khronos_intptr_t GLintptr; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_intptr_t GLintptrARB; +#else +typedef khronos_intptr_t GLintptrARB; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_ssize_t GLsizeiptr; +#else +typedef khronos_ssize_t GLsizeiptr; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_ssize_t GLsizeiptrARB; +#else +typedef khronos_ssize_t GLsizeiptrARB; +#endif +typedef khronos_int64_t GLint64; +typedef khronos_int64_t GLint64EXT; +typedef khronos_uint64_t GLuint64; +typedef khronos_uint64_t GLuint64EXT; +typedef struct __GLsync *GLsync; +struct _cl_context; +struct _cl_event; +typedef void (GLAD_API_PTR *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCARB)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCKHR)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCAMD)(GLuint id,GLenum category,GLenum severity,GLsizei length,const GLchar *message,void *userParam); +typedef unsigned short GLhalfNV; +typedef GLintptr GLvdpauSurfaceNV; +typedef void (GLAD_API_PTR *GLVULKANPROCNV)(void); + + +#define GL_ES_VERSION_2_0 1 +GLAD_API_CALL int GLAD_GL_ES_VERSION_2_0; +#define GL_OES_mapbuffer 1 +GLAD_API_CALL int GLAD_GL_OES_mapbuffer; +#define GL_OES_required_internalformat 1 +GLAD_API_CALL int GLAD_GL_OES_required_internalformat; + + +typedef void (GLAD_API_PTR *PFNGLACTIVETEXTUREPROC)(GLenum texture); +typedef void (GLAD_API_PTR *PFNGLATTACHSHADERPROC)(GLuint program, GLuint shader); +typedef void (GLAD_API_PTR *PFNGLBINDATTRIBLOCATIONPROC)(GLuint program, GLuint index, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLBINDBUFFERPROC)(GLenum target, GLuint buffer); +typedef void (GLAD_API_PTR *PFNGLBINDFRAMEBUFFERPROC)(GLenum target, GLuint framebuffer); +typedef void (GLAD_API_PTR *PFNGLBINDRENDERBUFFERPROC)(GLenum target, GLuint renderbuffer); +typedef void (GLAD_API_PTR *PFNGLBINDTEXTUREPROC)(GLenum target, GLuint texture); +typedef void (GLAD_API_PTR *PFNGLBLENDCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONSEPARATEPROC)(GLenum modeRGB, GLenum modeAlpha); +typedef void (GLAD_API_PTR *PFNGLBLENDFUNCPROC)(GLenum sfactor, GLenum dfactor); +typedef void (GLAD_API_PTR *PFNGLBLENDFUNCSEPARATEPROC)(GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); +typedef void (GLAD_API_PTR *PFNGLBUFFERDATAPROC)(GLenum target, GLsizeiptr size, const void * data, GLenum usage); +typedef void (GLAD_API_PTR *PFNGLBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, const void * data); +typedef GLenum (GLAD_API_PTR *PFNGLCHECKFRAMEBUFFERSTATUSPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLCLEARPROC)(GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLCLEARCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLCLEARDEPTHFPROC)(GLfloat d); +typedef void (GLAD_API_PTR *PFNGLCLEARSTENCILPROC)(GLint s); +typedef void (GLAD_API_PTR *PFNGLCOLORMASKPROC)(GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +typedef void (GLAD_API_PTR *PFNGLCOMPILESHADERPROC)(GLuint shader); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef GLuint (GLAD_API_PTR *PFNGLCREATEPROGRAMPROC)(void); +typedef GLuint (GLAD_API_PTR *PFNGLCREATESHADERPROC)(GLenum type); +typedef void (GLAD_API_PTR *PFNGLCULLFACEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLDELETEBUFFERSPROC)(GLsizei n, const GLuint * buffers); +typedef void (GLAD_API_PTR *PFNGLDELETEFRAMEBUFFERSPROC)(GLsizei n, const GLuint * framebuffers); +typedef void (GLAD_API_PTR *PFNGLDELETEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLDELETERENDERBUFFERSPROC)(GLsizei n, const GLuint * renderbuffers); +typedef void (GLAD_API_PTR *PFNGLDELETESHADERPROC)(GLuint shader); +typedef void (GLAD_API_PTR *PFNGLDELETETEXTURESPROC)(GLsizei n, const GLuint * textures); +typedef void (GLAD_API_PTR *PFNGLDEPTHFUNCPROC)(GLenum func); +typedef void (GLAD_API_PTR *PFNGLDEPTHMASKPROC)(GLboolean flag); +typedef void (GLAD_API_PTR *PFNGLDEPTHRANGEFPROC)(GLfloat n, GLfloat f); +typedef void (GLAD_API_PTR *PFNGLDETACHSHADERPROC)(GLuint program, GLuint shader); +typedef void (GLAD_API_PTR *PFNGLDISABLEPROC)(GLenum cap); +typedef void (GLAD_API_PTR *PFNGLDISABLEVERTEXATTRIBARRAYPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLDRAWARRAYSPROC)(GLenum mode, GLint first, GLsizei count); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices); +typedef void (GLAD_API_PTR *PFNGLENABLEPROC)(GLenum cap); +typedef void (GLAD_API_PTR *PFNGLENABLEVERTEXATTRIBARRAYPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLFINISHPROC)(void); +typedef void (GLAD_API_PTR *PFNGLFLUSHPROC)(void); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERRENDERBUFFERPROC)(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURE2DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (GLAD_API_PTR *PFNGLFRONTFACEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLGENBUFFERSPROC)(GLsizei n, GLuint * buffers); +typedef void (GLAD_API_PTR *PFNGLGENFRAMEBUFFERSPROC)(GLsizei n, GLuint * framebuffers); +typedef void (GLAD_API_PTR *PFNGLGENRENDERBUFFERSPROC)(GLsizei n, GLuint * renderbuffers); +typedef void (GLAD_API_PTR *PFNGLGENTEXTURESPROC)(GLsizei n, GLuint * textures); +typedef void (GLAD_API_PTR *PFNGLGENERATEMIPMAPPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEATTRIBPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLint * size, GLenum * type, GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLint * size, GLenum * type, GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETATTACHEDSHADERSPROC)(GLuint program, GLsizei maxCount, GLsizei * count, GLuint * shaders); +typedef GLint (GLAD_API_PTR *PFNGLGETATTRIBLOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETBOOLEANVPROC)(GLenum pname, GLboolean * data); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPOINTERVPROC)(GLenum target, GLenum pname, void ** params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPOINTERVOESPROC)(GLenum target, GLenum pname, void ** params); +typedef GLenum (GLAD_API_PTR *PFNGLGETERRORPROC)(void); +typedef void (GLAD_API_PTR *PFNGLGETFLOATVPROC)(GLenum pname, GLfloat * data); +typedef void (GLAD_API_PTR *PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLenum target, GLenum attachment, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETINTEGERVPROC)(GLenum pname, GLint * data); +typedef void (GLAD_API_PTR *PFNGLGETPROGRAMINFOLOGPROC)(GLuint program, GLsizei bufSize, GLsizei * length, GLchar * infoLog); +typedef void (GLAD_API_PTR *PFNGLGETPROGRAMIVPROC)(GLuint program, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETRENDERBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETSHADERINFOLOGPROC)(GLuint shader, GLsizei bufSize, GLsizei * length, GLchar * infoLog); +typedef void (GLAD_API_PTR *PFNGLGETSHADERPRECISIONFORMATPROC)(GLenum shadertype, GLenum precisiontype, GLint * range, GLint * precision); +typedef void (GLAD_API_PTR *PFNGLGETSHADERSOURCEPROC)(GLuint shader, GLsizei bufSize, GLsizei * length, GLchar * source); +typedef void (GLAD_API_PTR *PFNGLGETSHADERIVPROC)(GLuint shader, GLenum pname, GLint * params); +typedef const GLubyte * (GLAD_API_PTR *PFNGLGETSTRINGPROC)(GLenum name); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERFVPROC)(GLenum target, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef GLint (GLAD_API_PTR *PFNGLGETUNIFORMLOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMFVPROC)(GLuint program, GLint location, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMIVPROC)(GLuint program, GLint location, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBPOINTERVPROC)(GLuint index, GLenum pname, void ** pointer); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBFVPROC)(GLuint index, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBIVPROC)(GLuint index, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLHINTPROC)(GLenum target, GLenum mode); +typedef GLboolean (GLAD_API_PTR *PFNGLISBUFFERPROC)(GLuint buffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISENABLEDPROC)(GLenum cap); +typedef GLboolean (GLAD_API_PTR *PFNGLISFRAMEBUFFERPROC)(GLuint framebuffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISPROGRAMPROC)(GLuint program); +typedef GLboolean (GLAD_API_PTR *PFNGLISRENDERBUFFERPROC)(GLuint renderbuffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISSHADERPROC)(GLuint shader); +typedef GLboolean (GLAD_API_PTR *PFNGLISTEXTUREPROC)(GLuint texture); +typedef void (GLAD_API_PTR *PFNGLLINEWIDTHPROC)(GLfloat width); +typedef void (GLAD_API_PTR *PFNGLLINKPROGRAMPROC)(GLuint program); +typedef void * (GLAD_API_PTR *PFNGLMAPBUFFERPROC)(GLenum target, GLenum access); +typedef void * (GLAD_API_PTR *PFNGLMAPBUFFEROESPROC)(GLenum target, GLenum access); +typedef void (GLAD_API_PTR *PFNGLPIXELSTOREIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLPOLYGONOFFSETPROC)(GLfloat factor, GLfloat units); +typedef void (GLAD_API_PTR *PFNGLREADPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void * pixels); +typedef void (GLAD_API_PTR *PFNGLRELEASESHADERCOMPILERPROC)(void); +typedef void (GLAD_API_PTR *PFNGLRENDERBUFFERSTORAGEPROC)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLSAMPLECOVERAGEPROC)(GLfloat value, GLboolean invert); +typedef void (GLAD_API_PTR *PFNGLSCISSORPROC)(GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLSHADERBINARYPROC)(GLsizei count, const GLuint * shaders, GLenum binaryformat, const void * binary, GLsizei length); +typedef void (GLAD_API_PTR *PFNGLSHADERSOURCEPROC)(GLuint shader, GLsizei count, const GLchar *const* string, const GLint * length); +typedef void (GLAD_API_PTR *PFNGLSTENCILFUNCPROC)(GLenum func, GLint ref, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILFUNCSEPARATEPROC)(GLenum face, GLenum func, GLint ref, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILMASKPROC)(GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILMASKSEPARATEPROC)(GLenum face, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILOPPROC)(GLenum fail, GLenum zfail, GLenum zpass); +typedef void (GLAD_API_PTR *PFNGLSTENCILOPSEPARATEPROC)(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE2DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERFPROC)(GLenum target, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERFVPROC)(GLenum target, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIPROC)(GLenum target, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIVPROC)(GLenum target, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1FPROC)(GLint location, GLfloat v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1IPROC)(GLint location, GLint v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2FPROC)(GLint location, GLfloat v0, GLfloat v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2IPROC)(GLint location, GLint v0, GLint v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3IPROC)(GLint location, GLint v0, GLint v1, GLint v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4IPROC)(GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef GLboolean (GLAD_API_PTR *PFNGLUNMAPBUFFERPROC)(GLenum target); +typedef GLboolean (GLAD_API_PTR *PFNGLUNMAPBUFFEROESPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLUSEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLVALIDATEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1FPROC)(GLuint index, GLfloat x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2FPROC)(GLuint index, GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBPOINTERPROC)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLVIEWPORTPROC)(GLint x, GLint y, GLsizei width, GLsizei height); + +GLAD_API_CALL PFNGLACTIVETEXTUREPROC glad_glActiveTexture; +#define glActiveTexture glad_glActiveTexture +GLAD_API_CALL PFNGLATTACHSHADERPROC glad_glAttachShader; +#define glAttachShader glad_glAttachShader +GLAD_API_CALL PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation; +#define glBindAttribLocation glad_glBindAttribLocation +GLAD_API_CALL PFNGLBINDBUFFERPROC glad_glBindBuffer; +#define glBindBuffer glad_glBindBuffer +GLAD_API_CALL PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer; +#define glBindFramebuffer glad_glBindFramebuffer +GLAD_API_CALL PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer; +#define glBindRenderbuffer glad_glBindRenderbuffer +GLAD_API_CALL PFNGLBINDTEXTUREPROC glad_glBindTexture; +#define glBindTexture glad_glBindTexture +GLAD_API_CALL PFNGLBLENDCOLORPROC glad_glBlendColor; +#define glBlendColor glad_glBlendColor +GLAD_API_CALL PFNGLBLENDEQUATIONPROC glad_glBlendEquation; +#define glBlendEquation glad_glBlendEquation +GLAD_API_CALL PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate; +#define glBlendEquationSeparate glad_glBlendEquationSeparate +GLAD_API_CALL PFNGLBLENDFUNCPROC glad_glBlendFunc; +#define glBlendFunc glad_glBlendFunc +GLAD_API_CALL PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate; +#define glBlendFuncSeparate glad_glBlendFuncSeparate +GLAD_API_CALL PFNGLBUFFERDATAPROC glad_glBufferData; +#define glBufferData glad_glBufferData +GLAD_API_CALL PFNGLBUFFERSUBDATAPROC glad_glBufferSubData; +#define glBufferSubData glad_glBufferSubData +GLAD_API_CALL PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus; +#define glCheckFramebufferStatus glad_glCheckFramebufferStatus +GLAD_API_CALL PFNGLCLEARPROC glad_glClear; +#define glClear glad_glClear +GLAD_API_CALL PFNGLCLEARCOLORPROC glad_glClearColor; +#define glClearColor glad_glClearColor +GLAD_API_CALL PFNGLCLEARDEPTHFPROC glad_glClearDepthf; +#define glClearDepthf glad_glClearDepthf +GLAD_API_CALL PFNGLCLEARSTENCILPROC glad_glClearStencil; +#define glClearStencil glad_glClearStencil +GLAD_API_CALL PFNGLCOLORMASKPROC glad_glColorMask; +#define glColorMask glad_glColorMask +GLAD_API_CALL PFNGLCOMPILESHADERPROC glad_glCompileShader; +#define glCompileShader glad_glCompileShader +GLAD_API_CALL PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D; +#define glCompressedTexImage2D glad_glCompressedTexImage2D +GLAD_API_CALL PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D; +#define glCompressedTexSubImage2D glad_glCompressedTexSubImage2D +GLAD_API_CALL PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D; +#define glCopyTexImage2D glad_glCopyTexImage2D +GLAD_API_CALL PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D; +#define glCopyTexSubImage2D glad_glCopyTexSubImage2D +GLAD_API_CALL PFNGLCREATEPROGRAMPROC glad_glCreateProgram; +#define glCreateProgram glad_glCreateProgram +GLAD_API_CALL PFNGLCREATESHADERPROC glad_glCreateShader; +#define glCreateShader glad_glCreateShader +GLAD_API_CALL PFNGLCULLFACEPROC glad_glCullFace; +#define glCullFace glad_glCullFace +GLAD_API_CALL PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers; +#define glDeleteBuffers glad_glDeleteBuffers +GLAD_API_CALL PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers; +#define glDeleteFramebuffers glad_glDeleteFramebuffers +GLAD_API_CALL PFNGLDELETEPROGRAMPROC glad_glDeleteProgram; +#define glDeleteProgram glad_glDeleteProgram +GLAD_API_CALL PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers; +#define glDeleteRenderbuffers glad_glDeleteRenderbuffers +GLAD_API_CALL PFNGLDELETESHADERPROC glad_glDeleteShader; +#define glDeleteShader glad_glDeleteShader +GLAD_API_CALL PFNGLDELETETEXTURESPROC glad_glDeleteTextures; +#define glDeleteTextures glad_glDeleteTextures +GLAD_API_CALL PFNGLDEPTHFUNCPROC glad_glDepthFunc; +#define glDepthFunc glad_glDepthFunc +GLAD_API_CALL PFNGLDEPTHMASKPROC glad_glDepthMask; +#define glDepthMask glad_glDepthMask +GLAD_API_CALL PFNGLDEPTHRANGEFPROC glad_glDepthRangef; +#define glDepthRangef glad_glDepthRangef +GLAD_API_CALL PFNGLDETACHSHADERPROC glad_glDetachShader; +#define glDetachShader glad_glDetachShader +GLAD_API_CALL PFNGLDISABLEPROC glad_glDisable; +#define glDisable glad_glDisable +GLAD_API_CALL PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray; +#define glDisableVertexAttribArray glad_glDisableVertexAttribArray +GLAD_API_CALL PFNGLDRAWARRAYSPROC glad_glDrawArrays; +#define glDrawArrays glad_glDrawArrays +GLAD_API_CALL PFNGLDRAWELEMENTSPROC glad_glDrawElements; +#define glDrawElements glad_glDrawElements +GLAD_API_CALL PFNGLENABLEPROC glad_glEnable; +#define glEnable glad_glEnable +GLAD_API_CALL PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray; +#define glEnableVertexAttribArray glad_glEnableVertexAttribArray +GLAD_API_CALL PFNGLFINISHPROC glad_glFinish; +#define glFinish glad_glFinish +GLAD_API_CALL PFNGLFLUSHPROC glad_glFlush; +#define glFlush glad_glFlush +GLAD_API_CALL PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer; +#define glFramebufferRenderbuffer glad_glFramebufferRenderbuffer +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D; +#define glFramebufferTexture2D glad_glFramebufferTexture2D +GLAD_API_CALL PFNGLFRONTFACEPROC glad_glFrontFace; +#define glFrontFace glad_glFrontFace +GLAD_API_CALL PFNGLGENBUFFERSPROC glad_glGenBuffers; +#define glGenBuffers glad_glGenBuffers +GLAD_API_CALL PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers; +#define glGenFramebuffers glad_glGenFramebuffers +GLAD_API_CALL PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers; +#define glGenRenderbuffers glad_glGenRenderbuffers +GLAD_API_CALL PFNGLGENTEXTURESPROC glad_glGenTextures; +#define glGenTextures glad_glGenTextures +GLAD_API_CALL PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap; +#define glGenerateMipmap glad_glGenerateMipmap +GLAD_API_CALL PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib; +#define glGetActiveAttrib glad_glGetActiveAttrib +GLAD_API_CALL PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform; +#define glGetActiveUniform glad_glGetActiveUniform +GLAD_API_CALL PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders; +#define glGetAttachedShaders glad_glGetAttachedShaders +GLAD_API_CALL PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation; +#define glGetAttribLocation glad_glGetAttribLocation +GLAD_API_CALL PFNGLGETBOOLEANVPROC glad_glGetBooleanv; +#define glGetBooleanv glad_glGetBooleanv +GLAD_API_CALL PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv; +#define glGetBufferParameteriv glad_glGetBufferParameteriv +GLAD_API_CALL PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv; +#define glGetBufferPointerv glad_glGetBufferPointerv +GLAD_API_CALL PFNGLGETBUFFERPOINTERVOESPROC glad_glGetBufferPointervOES; +#define glGetBufferPointervOES glad_glGetBufferPointervOES +GLAD_API_CALL PFNGLGETERRORPROC glad_glGetError; +#define glGetError glad_glGetError +GLAD_API_CALL PFNGLGETFLOATVPROC glad_glGetFloatv; +#define glGetFloatv glad_glGetFloatv +GLAD_API_CALL PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv; +#define glGetFramebufferAttachmentParameteriv glad_glGetFramebufferAttachmentParameteriv +GLAD_API_CALL PFNGLGETINTEGERVPROC glad_glGetIntegerv; +#define glGetIntegerv glad_glGetIntegerv +GLAD_API_CALL PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog; +#define glGetProgramInfoLog glad_glGetProgramInfoLog +GLAD_API_CALL PFNGLGETPROGRAMIVPROC glad_glGetProgramiv; +#define glGetProgramiv glad_glGetProgramiv +GLAD_API_CALL PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv; +#define glGetRenderbufferParameteriv glad_glGetRenderbufferParameteriv +GLAD_API_CALL PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog; +#define glGetShaderInfoLog glad_glGetShaderInfoLog +GLAD_API_CALL PFNGLGETSHADERPRECISIONFORMATPROC glad_glGetShaderPrecisionFormat; +#define glGetShaderPrecisionFormat glad_glGetShaderPrecisionFormat +GLAD_API_CALL PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource; +#define glGetShaderSource glad_glGetShaderSource +GLAD_API_CALL PFNGLGETSHADERIVPROC glad_glGetShaderiv; +#define glGetShaderiv glad_glGetShaderiv +GLAD_API_CALL PFNGLGETSTRINGPROC glad_glGetString; +#define glGetString glad_glGetString +GLAD_API_CALL PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv; +#define glGetTexParameterfv glad_glGetTexParameterfv +GLAD_API_CALL PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv; +#define glGetTexParameteriv glad_glGetTexParameteriv +GLAD_API_CALL PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation; +#define glGetUniformLocation glad_glGetUniformLocation +GLAD_API_CALL PFNGLGETUNIFORMFVPROC glad_glGetUniformfv; +#define glGetUniformfv glad_glGetUniformfv +GLAD_API_CALL PFNGLGETUNIFORMIVPROC glad_glGetUniformiv; +#define glGetUniformiv glad_glGetUniformiv +GLAD_API_CALL PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv; +#define glGetVertexAttribPointerv glad_glGetVertexAttribPointerv +GLAD_API_CALL PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv; +#define glGetVertexAttribfv glad_glGetVertexAttribfv +GLAD_API_CALL PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv; +#define glGetVertexAttribiv glad_glGetVertexAttribiv +GLAD_API_CALL PFNGLHINTPROC glad_glHint; +#define glHint glad_glHint +GLAD_API_CALL PFNGLISBUFFERPROC glad_glIsBuffer; +#define glIsBuffer glad_glIsBuffer +GLAD_API_CALL PFNGLISENABLEDPROC glad_glIsEnabled; +#define glIsEnabled glad_glIsEnabled +GLAD_API_CALL PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer; +#define glIsFramebuffer glad_glIsFramebuffer +GLAD_API_CALL PFNGLISPROGRAMPROC glad_glIsProgram; +#define glIsProgram glad_glIsProgram +GLAD_API_CALL PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer; +#define glIsRenderbuffer glad_glIsRenderbuffer +GLAD_API_CALL PFNGLISSHADERPROC glad_glIsShader; +#define glIsShader glad_glIsShader +GLAD_API_CALL PFNGLISTEXTUREPROC glad_glIsTexture; +#define glIsTexture glad_glIsTexture +GLAD_API_CALL PFNGLLINEWIDTHPROC glad_glLineWidth; +#define glLineWidth glad_glLineWidth +GLAD_API_CALL PFNGLLINKPROGRAMPROC glad_glLinkProgram; +#define glLinkProgram glad_glLinkProgram +GLAD_API_CALL PFNGLMAPBUFFERPROC glad_glMapBuffer; +#define glMapBuffer glad_glMapBuffer +GLAD_API_CALL PFNGLMAPBUFFEROESPROC glad_glMapBufferOES; +#define glMapBufferOES glad_glMapBufferOES +GLAD_API_CALL PFNGLPIXELSTOREIPROC glad_glPixelStorei; +#define glPixelStorei glad_glPixelStorei +GLAD_API_CALL PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset; +#define glPolygonOffset glad_glPolygonOffset +GLAD_API_CALL PFNGLREADPIXELSPROC glad_glReadPixels; +#define glReadPixels glad_glReadPixels +GLAD_API_CALL PFNGLRELEASESHADERCOMPILERPROC glad_glReleaseShaderCompiler; +#define glReleaseShaderCompiler glad_glReleaseShaderCompiler +GLAD_API_CALL PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage; +#define glRenderbufferStorage glad_glRenderbufferStorage +GLAD_API_CALL PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage; +#define glSampleCoverage glad_glSampleCoverage +GLAD_API_CALL PFNGLSCISSORPROC glad_glScissor; +#define glScissor glad_glScissor +GLAD_API_CALL PFNGLSHADERBINARYPROC glad_glShaderBinary; +#define glShaderBinary glad_glShaderBinary +GLAD_API_CALL PFNGLSHADERSOURCEPROC glad_glShaderSource; +#define glShaderSource glad_glShaderSource +GLAD_API_CALL PFNGLSTENCILFUNCPROC glad_glStencilFunc; +#define glStencilFunc glad_glStencilFunc +GLAD_API_CALL PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate; +#define glStencilFuncSeparate glad_glStencilFuncSeparate +GLAD_API_CALL PFNGLSTENCILMASKPROC glad_glStencilMask; +#define glStencilMask glad_glStencilMask +GLAD_API_CALL PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate; +#define glStencilMaskSeparate glad_glStencilMaskSeparate +GLAD_API_CALL PFNGLSTENCILOPPROC glad_glStencilOp; +#define glStencilOp glad_glStencilOp +GLAD_API_CALL PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate; +#define glStencilOpSeparate glad_glStencilOpSeparate +GLAD_API_CALL PFNGLTEXIMAGE2DPROC glad_glTexImage2D; +#define glTexImage2D glad_glTexImage2D +GLAD_API_CALL PFNGLTEXPARAMETERFPROC glad_glTexParameterf; +#define glTexParameterf glad_glTexParameterf +GLAD_API_CALL PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv; +#define glTexParameterfv glad_glTexParameterfv +GLAD_API_CALL PFNGLTEXPARAMETERIPROC glad_glTexParameteri; +#define glTexParameteri glad_glTexParameteri +GLAD_API_CALL PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv; +#define glTexParameteriv glad_glTexParameteriv +GLAD_API_CALL PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D; +#define glTexSubImage2D glad_glTexSubImage2D +GLAD_API_CALL PFNGLUNIFORM1FPROC glad_glUniform1f; +#define glUniform1f glad_glUniform1f +GLAD_API_CALL PFNGLUNIFORM1FVPROC glad_glUniform1fv; +#define glUniform1fv glad_glUniform1fv +GLAD_API_CALL PFNGLUNIFORM1IPROC glad_glUniform1i; +#define glUniform1i glad_glUniform1i +GLAD_API_CALL PFNGLUNIFORM1IVPROC glad_glUniform1iv; +#define glUniform1iv glad_glUniform1iv +GLAD_API_CALL PFNGLUNIFORM2FPROC glad_glUniform2f; +#define glUniform2f glad_glUniform2f +GLAD_API_CALL PFNGLUNIFORM2FVPROC glad_glUniform2fv; +#define glUniform2fv glad_glUniform2fv +GLAD_API_CALL PFNGLUNIFORM2IPROC glad_glUniform2i; +#define glUniform2i glad_glUniform2i +GLAD_API_CALL PFNGLUNIFORM2IVPROC glad_glUniform2iv; +#define glUniform2iv glad_glUniform2iv +GLAD_API_CALL PFNGLUNIFORM3FPROC glad_glUniform3f; +#define glUniform3f glad_glUniform3f +GLAD_API_CALL PFNGLUNIFORM3FVPROC glad_glUniform3fv; +#define glUniform3fv glad_glUniform3fv +GLAD_API_CALL PFNGLUNIFORM3IPROC glad_glUniform3i; +#define glUniform3i glad_glUniform3i +GLAD_API_CALL PFNGLUNIFORM3IVPROC glad_glUniform3iv; +#define glUniform3iv glad_glUniform3iv +GLAD_API_CALL PFNGLUNIFORM4FPROC glad_glUniform4f; +#define glUniform4f glad_glUniform4f +GLAD_API_CALL PFNGLUNIFORM4FVPROC glad_glUniform4fv; +#define glUniform4fv glad_glUniform4fv +GLAD_API_CALL PFNGLUNIFORM4IPROC glad_glUniform4i; +#define glUniform4i glad_glUniform4i +GLAD_API_CALL PFNGLUNIFORM4IVPROC glad_glUniform4iv; +#define glUniform4iv glad_glUniform4iv +GLAD_API_CALL PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv; +#define glUniformMatrix2fv glad_glUniformMatrix2fv +GLAD_API_CALL PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv; +#define glUniformMatrix3fv glad_glUniformMatrix3fv +GLAD_API_CALL PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv; +#define glUniformMatrix4fv glad_glUniformMatrix4fv +GLAD_API_CALL PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer; +#define glUnmapBuffer glad_glUnmapBuffer +GLAD_API_CALL PFNGLUNMAPBUFFEROESPROC glad_glUnmapBufferOES; +#define glUnmapBufferOES glad_glUnmapBufferOES +GLAD_API_CALL PFNGLUSEPROGRAMPROC glad_glUseProgram; +#define glUseProgram glad_glUseProgram +GLAD_API_CALL PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram; +#define glValidateProgram glad_glValidateProgram +GLAD_API_CALL PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f; +#define glVertexAttrib1f glad_glVertexAttrib1f +GLAD_API_CALL PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv; +#define glVertexAttrib1fv glad_glVertexAttrib1fv +GLAD_API_CALL PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f; +#define glVertexAttrib2f glad_glVertexAttrib2f +GLAD_API_CALL PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv; +#define glVertexAttrib2fv glad_glVertexAttrib2fv +GLAD_API_CALL PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f; +#define glVertexAttrib3f glad_glVertexAttrib3f +GLAD_API_CALL PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv; +#define glVertexAttrib3fv glad_glVertexAttrib3fv +GLAD_API_CALL PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f; +#define glVertexAttrib4f glad_glVertexAttrib4f +GLAD_API_CALL PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv; +#define glVertexAttrib4fv glad_glVertexAttrib4fv +GLAD_API_CALL PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer; +#define glVertexAttribPointer glad_glVertexAttribPointer +GLAD_API_CALL PFNGLVIEWPORTPROC glad_glViewport; +#define glViewport glad_glViewport + + + + + +GLAD_API_CALL int gladLoadGLES2UserPtr( GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadGLES2( GLADloadfunc load); + + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/gl.h glmark2-2021.02/src/glad/include/glad/gl.h --- glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/gl.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/include/glad/gl.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,3686 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:14 2019 + * + * Generator: C/C++ + * Specification: gl + * Extensions: 0 + * + * APIs: + * - gl:compatibility=3.3 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='gl:compatibility=3.3' --extensions='' c + * + * Online: + * http://glad.sh/#api=gl%3Acompatibility%3D3.3&extensions=&generator=c&options= + * + */ + +#ifndef GLAD_GL_H_ +#define GLAD_GL_H_ + +#ifdef __gl_h_ + #error OpenGL header already included (API: gl), remove previous include! +#endif +#define __gl_h_ 1 + + +#define GLAD_GL + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define GL_2D 0x0600 +#define GL_2_BYTES 0x1407 +#define GL_3D 0x0601 +#define GL_3D_COLOR 0x0602 +#define GL_3D_COLOR_TEXTURE 0x0603 +#define GL_3_BYTES 0x1408 +#define GL_4D_COLOR_TEXTURE 0x0604 +#define GL_4_BYTES 0x1409 +#define GL_ACCUM 0x0100 +#define GL_ACCUM_ALPHA_BITS 0x0D5B +#define GL_ACCUM_BLUE_BITS 0x0D5A +#define GL_ACCUM_BUFFER_BIT 0x00000200 +#define GL_ACCUM_CLEAR_VALUE 0x0B80 +#define GL_ACCUM_GREEN_BITS 0x0D59 +#define GL_ACCUM_RED_BITS 0x0D58 +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36 +#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_ADD 0x0104 +#define GL_ADD_SIGNED 0x8574 +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALL_ATTRIB_BITS 0xFFFFFFFF +#define GL_ALPHA 0x1906 +#define GL_ALPHA12 0x803D +#define GL_ALPHA16 0x803E +#define GL_ALPHA4 0x803B +#define GL_ALPHA8 0x803C +#define GL_ALPHA_BIAS 0x0D1D +#define GL_ALPHA_BITS 0x0D55 +#define GL_ALPHA_INTEGER 0x8D97 +#define GL_ALPHA_SCALE 0x0D1C +#define GL_ALPHA_TEST 0x0BC0 +#define GL_ALPHA_TEST_FUNC 0x0BC1 +#define GL_ALPHA_TEST_REF 0x0BC2 +#define GL_ALREADY_SIGNALED 0x911A +#define GL_ALWAYS 0x0207 +#define GL_AMBIENT 0x1200 +#define GL_AMBIENT_AND_DIFFUSE 0x1602 +#define GL_AND 0x1501 +#define GL_AND_INVERTED 0x1504 +#define GL_AND_REVERSE 0x1502 +#define GL_ANY_SAMPLES_PASSED 0x8C2F +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_ATTRIB_STACK_DEPTH 0x0BB0 +#define GL_AUTO_NORMAL 0x0D80 +#define GL_AUX0 0x0409 +#define GL_AUX1 0x040A +#define GL_AUX2 0x040B +#define GL_AUX3 0x040C +#define GL_AUX_BUFFERS 0x0C00 +#define GL_BACK 0x0405 +#define GL_BACK_LEFT 0x0402 +#define GL_BACK_RIGHT 0x0403 +#define GL_BGR 0x80E0 +#define GL_BGRA 0x80E1 +#define GL_BGRA_INTEGER 0x8D9B +#define GL_BGR_INTEGER 0x8D9A +#define GL_BITMAP 0x1A00 +#define GL_BITMAP_TOKEN 0x0704 +#define GL_BLEND 0x0BE2 +#define GL_BLEND_COLOR 0x8005 +#define GL_BLEND_DST 0x0BE0 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_EQUATION 0x8009 +#define GL_BLEND_EQUATION_ALPHA 0x883D +#define GL_BLEND_EQUATION_RGB 0x8009 +#define GL_BLEND_SRC 0x0BE1 +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLUE 0x1905 +#define GL_BLUE_BIAS 0x0D1B +#define GL_BLUE_BITS 0x0D54 +#define GL_BLUE_INTEGER 0x8D96 +#define GL_BLUE_SCALE 0x0D1A +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_BUFFER_ACCESS 0x88BB +#define GL_BUFFER_ACCESS_FLAGS 0x911F +#define GL_BUFFER_MAPPED 0x88BC +#define GL_BUFFER_MAP_LENGTH 0x9120 +#define GL_BUFFER_MAP_OFFSET 0x9121 +#define GL_BUFFER_MAP_POINTER 0x88BD +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 +#define GL_BYTE 0x1400 +#define GL_C3F_V3F 0x2A24 +#define GL_C4F_N3F_V3F 0x2A26 +#define GL_C4UB_V2F 0x2A22 +#define GL_C4UB_V3F 0x2A23 +#define GL_CCW 0x0901 +#define GL_CLAMP 0x2900 +#define GL_CLAMP_FRAGMENT_COLOR 0x891B +#define GL_CLAMP_READ_COLOR 0x891C +#define GL_CLAMP_TO_BORDER 0x812D +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_CLAMP_VERTEX_COLOR 0x891A +#define GL_CLEAR 0x1500 +#define GL_CLIENT_ACTIVE_TEXTURE 0x84E1 +#define GL_CLIENT_ALL_ATTRIB_BITS 0xFFFFFFFF +#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1 +#define GL_CLIENT_PIXEL_STORE_BIT 0x00000001 +#define GL_CLIENT_VERTEX_ARRAY_BIT 0x00000002 +#define GL_CLIP_DISTANCE0 0x3000 +#define GL_CLIP_DISTANCE1 0x3001 +#define GL_CLIP_DISTANCE2 0x3002 +#define GL_CLIP_DISTANCE3 0x3003 +#define GL_CLIP_DISTANCE4 0x3004 +#define GL_CLIP_DISTANCE5 0x3005 +#define GL_CLIP_DISTANCE6 0x3006 +#define GL_CLIP_DISTANCE7 0x3007 +#define GL_CLIP_PLANE0 0x3000 +#define GL_CLIP_PLANE1 0x3001 +#define GL_CLIP_PLANE2 0x3002 +#define GL_CLIP_PLANE3 0x3003 +#define GL_CLIP_PLANE4 0x3004 +#define GL_CLIP_PLANE5 0x3005 +#define GL_COEFF 0x0A00 +#define GL_COLOR 0x1800 +#define GL_COLOR_ARRAY 0x8076 +#define GL_COLOR_ARRAY_BUFFER_BINDING 0x8898 +#define GL_COLOR_ARRAY_POINTER 0x8090 +#define GL_COLOR_ARRAY_SIZE 0x8081 +#define GL_COLOR_ARRAY_STRIDE 0x8083 +#define GL_COLOR_ARRAY_TYPE 0x8082 +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_COLOR_ATTACHMENT1 0x8CE1 +#define GL_COLOR_ATTACHMENT10 0x8CEA +#define GL_COLOR_ATTACHMENT11 0x8CEB +#define GL_COLOR_ATTACHMENT12 0x8CEC +#define GL_COLOR_ATTACHMENT13 0x8CED +#define GL_COLOR_ATTACHMENT14 0x8CEE +#define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_COLOR_ATTACHMENT16 0x8CF0 +#define GL_COLOR_ATTACHMENT17 0x8CF1 +#define GL_COLOR_ATTACHMENT18 0x8CF2 +#define GL_COLOR_ATTACHMENT19 0x8CF3 +#define GL_COLOR_ATTACHMENT2 0x8CE2 +#define GL_COLOR_ATTACHMENT20 0x8CF4 +#define GL_COLOR_ATTACHMENT21 0x8CF5 +#define GL_COLOR_ATTACHMENT22 0x8CF6 +#define GL_COLOR_ATTACHMENT23 0x8CF7 +#define GL_COLOR_ATTACHMENT24 0x8CF8 +#define GL_COLOR_ATTACHMENT25 0x8CF9 +#define GL_COLOR_ATTACHMENT26 0x8CFA +#define GL_COLOR_ATTACHMENT27 0x8CFB +#define GL_COLOR_ATTACHMENT28 0x8CFC +#define GL_COLOR_ATTACHMENT29 0x8CFD +#define GL_COLOR_ATTACHMENT3 0x8CE3 +#define GL_COLOR_ATTACHMENT30 0x8CFE +#define GL_COLOR_ATTACHMENT31 0x8CFF +#define GL_COLOR_ATTACHMENT4 0x8CE4 +#define GL_COLOR_ATTACHMENT5 0x8CE5 +#define GL_COLOR_ATTACHMENT6 0x8CE6 +#define GL_COLOR_ATTACHMENT7 0x8CE7 +#define GL_COLOR_ATTACHMENT8 0x8CE8 +#define GL_COLOR_ATTACHMENT9 0x8CE9 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_INDEX 0x1900 +#define GL_COLOR_INDEXES 0x1603 +#define GL_COLOR_LOGIC_OP 0x0BF2 +#define GL_COLOR_MATERIAL 0x0B57 +#define GL_COLOR_MATERIAL_FACE 0x0B55 +#define GL_COLOR_MATERIAL_PARAMETER 0x0B56 +#define GL_COLOR_SUM 0x8458 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_COMBINE 0x8570 +#define GL_COMBINE_ALPHA 0x8572 +#define GL_COMBINE_RGB 0x8571 +#define GL_COMPARE_REF_TO_TEXTURE 0x884E +#define GL_COMPARE_R_TO_TEXTURE 0x884E +#define GL_COMPILE 0x1300 +#define GL_COMPILE_AND_EXECUTE 0x1301 +#define GL_COMPILE_STATUS 0x8B81 +#define GL_COMPRESSED_ALPHA 0x84E9 +#define GL_COMPRESSED_INTENSITY 0x84EC +#define GL_COMPRESSED_LUMINANCE 0x84EA +#define GL_COMPRESSED_LUMINANCE_ALPHA 0x84EB +#define GL_COMPRESSED_RED 0x8225 +#define GL_COMPRESSED_RED_RGTC1 0x8DBB +#define GL_COMPRESSED_RG 0x8226 +#define GL_COMPRESSED_RGB 0x84ED +#define GL_COMPRESSED_RGBA 0x84EE +#define GL_COMPRESSED_RG_RGTC2 0x8DBD +#define GL_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC +#define GL_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE +#define GL_COMPRESSED_SLUMINANCE 0x8C4A +#define GL_COMPRESSED_SLUMINANCE_ALPHA 0x8C4B +#define GL_COMPRESSED_SRGB 0x8C48 +#define GL_COMPRESSED_SRGB_ALPHA 0x8C49 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 +#define GL_CONDITION_SATISFIED 0x911C +#define GL_CONSTANT 0x8576 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_CONSTANT_ATTENUATION 0x1207 +#define GL_CONSTANT_COLOR 0x8001 +#define GL_CONTEXT_COMPATIBILITY_PROFILE_BIT 0x00000002 +#define GL_CONTEXT_CORE_PROFILE_BIT 0x00000001 +#define GL_CONTEXT_FLAGS 0x821E +#define GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT 0x00000001 +#define GL_CONTEXT_PROFILE_MASK 0x9126 +#define GL_COORD_REPLACE 0x8862 +#define GL_COPY 0x1503 +#define GL_COPY_INVERTED 0x150C +#define GL_COPY_PIXEL_TOKEN 0x0706 +#define GL_COPY_READ_BUFFER 0x8F36 +#define GL_COPY_WRITE_BUFFER 0x8F37 +#define GL_CULL_FACE 0x0B44 +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_CURRENT_BIT 0x00000001 +#define GL_CURRENT_COLOR 0x0B00 +#define GL_CURRENT_FOG_COORD 0x8453 +#define GL_CURRENT_FOG_COORDINATE 0x8453 +#define GL_CURRENT_INDEX 0x0B01 +#define GL_CURRENT_NORMAL 0x0B02 +#define GL_CURRENT_PROGRAM 0x8B8D +#define GL_CURRENT_QUERY 0x8865 +#define GL_CURRENT_RASTER_COLOR 0x0B04 +#define GL_CURRENT_RASTER_DISTANCE 0x0B09 +#define GL_CURRENT_RASTER_INDEX 0x0B05 +#define GL_CURRENT_RASTER_POSITION 0x0B07 +#define GL_CURRENT_RASTER_POSITION_VALID 0x0B08 +#define GL_CURRENT_RASTER_SECONDARY_COLOR 0x845F +#define GL_CURRENT_RASTER_TEXTURE_COORDS 0x0B06 +#define GL_CURRENT_SECONDARY_COLOR 0x8459 +#define GL_CURRENT_TEXTURE_COORDS 0x0B03 +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 +#define GL_CW 0x0900 +#define GL_DECAL 0x2101 +#define GL_DECR 0x1E03 +#define GL_DECR_WRAP 0x8508 +#define GL_DELETE_STATUS 0x8B80 +#define GL_DEPTH 0x1801 +#define GL_DEPTH24_STENCIL8 0x88F0 +#define GL_DEPTH32F_STENCIL8 0x8CAD +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_DEPTH_BIAS 0x0D1F +#define GL_DEPTH_BITS 0x0D56 +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_DEPTH_CLAMP 0x864F +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_DEPTH_COMPONENT24 0x81A6 +#define GL_DEPTH_COMPONENT32 0x81A7 +#define GL_DEPTH_COMPONENT32F 0x8CAC +#define GL_DEPTH_FUNC 0x0B74 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_SCALE 0x0D1E +#define GL_DEPTH_STENCIL 0x84F9 +#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A +#define GL_DEPTH_TEST 0x0B71 +#define GL_DEPTH_TEXTURE_MODE 0x884B +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DIFFUSE 0x1201 +#define GL_DITHER 0x0BD0 +#define GL_DOMAIN 0x0A02 +#define GL_DONT_CARE 0x1100 +#define GL_DOT3_RGB 0x86AE +#define GL_DOT3_RGBA 0x86AF +#define GL_DOUBLE 0x140A +#define GL_DOUBLEBUFFER 0x0C32 +#define GL_DRAW_BUFFER 0x0C01 +#define GL_DRAW_BUFFER0 0x8825 +#define GL_DRAW_BUFFER1 0x8826 +#define GL_DRAW_BUFFER10 0x882F +#define GL_DRAW_BUFFER11 0x8830 +#define GL_DRAW_BUFFER12 0x8831 +#define GL_DRAW_BUFFER13 0x8832 +#define GL_DRAW_BUFFER14 0x8833 +#define GL_DRAW_BUFFER15 0x8834 +#define GL_DRAW_BUFFER2 0x8827 +#define GL_DRAW_BUFFER3 0x8828 +#define GL_DRAW_BUFFER4 0x8829 +#define GL_DRAW_BUFFER5 0x882A +#define GL_DRAW_BUFFER6 0x882B +#define GL_DRAW_BUFFER7 0x882C +#define GL_DRAW_BUFFER8 0x882D +#define GL_DRAW_BUFFER9 0x882E +#define GL_DRAW_FRAMEBUFFER 0x8CA9 +#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_DRAW_PIXEL_TOKEN 0x0705 +#define GL_DST_ALPHA 0x0304 +#define GL_DST_COLOR 0x0306 +#define GL_DYNAMIC_COPY 0x88EA +#define GL_DYNAMIC_DRAW 0x88E8 +#define GL_DYNAMIC_READ 0x88E9 +#define GL_EDGE_FLAG 0x0B43 +#define GL_EDGE_FLAG_ARRAY 0x8079 +#define GL_EDGE_FLAG_ARRAY_BUFFER_BINDING 0x889B +#define GL_EDGE_FLAG_ARRAY_POINTER 0x8093 +#define GL_EDGE_FLAG_ARRAY_STRIDE 0x808C +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_EMISSION 0x1600 +#define GL_ENABLE_BIT 0x00002000 +#define GL_EQUAL 0x0202 +#define GL_EQUIV 0x1509 +#define GL_EVAL_BIT 0x00010000 +#define GL_EXP 0x0800 +#define GL_EXP2 0x0801 +#define GL_EXTENSIONS 0x1F03 +#define GL_EYE_LINEAR 0x2400 +#define GL_EYE_PLANE 0x2502 +#define GL_FALSE 0 +#define GL_FASTEST 0x1101 +#define GL_FEEDBACK 0x1C01 +#define GL_FEEDBACK_BUFFER_POINTER 0x0DF0 +#define GL_FEEDBACK_BUFFER_SIZE 0x0DF1 +#define GL_FEEDBACK_BUFFER_TYPE 0x0DF2 +#define GL_FILL 0x1B02 +#define GL_FIRST_VERTEX_CONVENTION 0x8E4D +#define GL_FIXED_ONLY 0x891D +#define GL_FLAT 0x1D00 +#define GL_FLOAT 0x1406 +#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4 0x8B5C +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_FOG 0x0B60 +#define GL_FOG_BIT 0x00000080 +#define GL_FOG_COLOR 0x0B66 +#define GL_FOG_COORD 0x8451 +#define GL_FOG_COORDINATE 0x8451 +#define GL_FOG_COORDINATE_ARRAY 0x8457 +#define GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING 0x889D +#define GL_FOG_COORDINATE_ARRAY_POINTER 0x8456 +#define GL_FOG_COORDINATE_ARRAY_STRIDE 0x8455 +#define GL_FOG_COORDINATE_ARRAY_TYPE 0x8454 +#define GL_FOG_COORDINATE_SOURCE 0x8450 +#define GL_FOG_COORD_ARRAY 0x8457 +#define GL_FOG_COORD_ARRAY_BUFFER_BINDING 0x889D +#define GL_FOG_COORD_ARRAY_POINTER 0x8456 +#define GL_FOG_COORD_ARRAY_STRIDE 0x8455 +#define GL_FOG_COORD_ARRAY_TYPE 0x8454 +#define GL_FOG_COORD_SRC 0x8450 +#define GL_FOG_DENSITY 0x0B62 +#define GL_FOG_END 0x0B64 +#define GL_FOG_HINT 0x0C54 +#define GL_FOG_INDEX 0x0B61 +#define GL_FOG_MODE 0x0B65 +#define GL_FOG_START 0x0B63 +#define GL_FRAGMENT_DEPTH 0x8452 +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B +#define GL_FRAMEBUFFER 0x8D40 +#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215 +#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214 +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210 +#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211 +#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216 +#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213 +#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED 0x8DA7 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212 +#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_DEFAULT 0x8218 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER 0x8CDB +#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS 0x8DA8 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56 +#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER 0x8CDC +#define GL_FRAMEBUFFER_SRGB 0x8DB9 +#define GL_FRAMEBUFFER_UNDEFINED 0x8219 +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD +#define GL_FRONT 0x0404 +#define GL_FRONT_AND_BACK 0x0408 +#define GL_FRONT_FACE 0x0B46 +#define GL_FRONT_LEFT 0x0400 +#define GL_FRONT_RIGHT 0x0401 +#define GL_FUNC_ADD 0x8006 +#define GL_FUNC_REVERSE_SUBTRACT 0x800B +#define GL_FUNC_SUBTRACT 0x800A +#define GL_GENERATE_MIPMAP 0x8191 +#define GL_GENERATE_MIPMAP_HINT 0x8192 +#define GL_GEOMETRY_INPUT_TYPE 0x8917 +#define GL_GEOMETRY_OUTPUT_TYPE 0x8918 +#define GL_GEOMETRY_SHADER 0x8DD9 +#define GL_GEOMETRY_VERTICES_OUT 0x8916 +#define GL_GEQUAL 0x0206 +#define GL_GREATER 0x0204 +#define GL_GREEN 0x1904 +#define GL_GREEN_BIAS 0x0D19 +#define GL_GREEN_BITS 0x0D53 +#define GL_GREEN_INTEGER 0x8D95 +#define GL_GREEN_SCALE 0x0D18 +#define GL_HALF_FLOAT 0x140B +#define GL_HINT_BIT 0x00008000 +#define GL_INCR 0x1E02 +#define GL_INCR_WRAP 0x8507 +#define GL_INDEX 0x8222 +#define GL_INDEX_ARRAY 0x8077 +#define GL_INDEX_ARRAY_BUFFER_BINDING 0x8899 +#define GL_INDEX_ARRAY_POINTER 0x8091 +#define GL_INDEX_ARRAY_STRIDE 0x8086 +#define GL_INDEX_ARRAY_TYPE 0x8085 +#define GL_INDEX_BITS 0x0D51 +#define GL_INDEX_CLEAR_VALUE 0x0C20 +#define GL_INDEX_LOGIC_OP 0x0BF1 +#define GL_INDEX_MODE 0x0C30 +#define GL_INDEX_OFFSET 0x0D13 +#define GL_INDEX_SHIFT 0x0D12 +#define GL_INDEX_WRITEMASK 0x0C21 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_INT 0x1404 +#define GL_INTENSITY 0x8049 +#define GL_INTENSITY12 0x804C +#define GL_INTENSITY16 0x804D +#define GL_INTENSITY4 0x804A +#define GL_INTENSITY8 0x804B +#define GL_INTERLEAVED_ATTRIBS 0x8C8C +#define GL_INTERPOLATE 0x8575 +#define GL_INT_2_10_10_10_REV 0x8D9F +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 +#define GL_INVALID_INDEX 0xFFFFFFFF +#define GL_INVALID_OPERATION 0x0502 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVERT 0x150A +#define GL_KEEP 0x1E00 +#define GL_LAST_VERTEX_CONVENTION 0x8E4E +#define GL_LEFT 0x0406 +#define GL_LEQUAL 0x0203 +#define GL_LESS 0x0201 +#define GL_LIGHT0 0x4000 +#define GL_LIGHT1 0x4001 +#define GL_LIGHT2 0x4002 +#define GL_LIGHT3 0x4003 +#define GL_LIGHT4 0x4004 +#define GL_LIGHT5 0x4005 +#define GL_LIGHT6 0x4006 +#define GL_LIGHT7 0x4007 +#define GL_LIGHTING 0x0B50 +#define GL_LIGHTING_BIT 0x00000040 +#define GL_LIGHT_MODEL_AMBIENT 0x0B53 +#define GL_LIGHT_MODEL_COLOR_CONTROL 0x81F8 +#define GL_LIGHT_MODEL_LOCAL_VIEWER 0x0B51 +#define GL_LIGHT_MODEL_TWO_SIDE 0x0B52 +#define GL_LINE 0x1B01 +#define GL_LINEAR 0x2601 +#define GL_LINEAR_ATTENUATION 0x1208 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_LINES 0x0001 +#define GL_LINES_ADJACENCY 0x000A +#define GL_LINE_BIT 0x00000004 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_RESET_TOKEN 0x0707 +#define GL_LINE_SMOOTH 0x0B20 +#define GL_LINE_SMOOTH_HINT 0x0C52 +#define GL_LINE_STIPPLE 0x0B24 +#define GL_LINE_STIPPLE_PATTERN 0x0B25 +#define GL_LINE_STIPPLE_REPEAT 0x0B26 +#define GL_LINE_STRIP 0x0003 +#define GL_LINE_STRIP_ADJACENCY 0x000B +#define GL_LINE_TOKEN 0x0702 +#define GL_LINE_WIDTH 0x0B21 +#define GL_LINE_WIDTH_GRANULARITY 0x0B23 +#define GL_LINE_WIDTH_RANGE 0x0B22 +#define GL_LINK_STATUS 0x8B82 +#define GL_LIST_BASE 0x0B32 +#define GL_LIST_BIT 0x00020000 +#define GL_LIST_INDEX 0x0B33 +#define GL_LIST_MODE 0x0B30 +#define GL_LOAD 0x0101 +#define GL_LOGIC_OP 0x0BF1 +#define GL_LOGIC_OP_MODE 0x0BF0 +#define GL_LOWER_LEFT 0x8CA1 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE12 0x8041 +#define GL_LUMINANCE12_ALPHA12 0x8047 +#define GL_LUMINANCE12_ALPHA4 0x8046 +#define GL_LUMINANCE16 0x8042 +#define GL_LUMINANCE16_ALPHA16 0x8048 +#define GL_LUMINANCE4 0x803F +#define GL_LUMINANCE4_ALPHA4 0x8043 +#define GL_LUMINANCE6_ALPHA2 0x8044 +#define GL_LUMINANCE8 0x8040 +#define GL_LUMINANCE8_ALPHA8 0x8045 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_MAJOR_VERSION 0x821B +#define GL_MAP1_COLOR_4 0x0D90 +#define GL_MAP1_GRID_DOMAIN 0x0DD0 +#define GL_MAP1_GRID_SEGMENTS 0x0DD1 +#define GL_MAP1_INDEX 0x0D91 +#define GL_MAP1_NORMAL 0x0D92 +#define GL_MAP1_TEXTURE_COORD_1 0x0D93 +#define GL_MAP1_TEXTURE_COORD_2 0x0D94 +#define GL_MAP1_TEXTURE_COORD_3 0x0D95 +#define GL_MAP1_TEXTURE_COORD_4 0x0D96 +#define GL_MAP1_VERTEX_3 0x0D97 +#define GL_MAP1_VERTEX_4 0x0D98 +#define GL_MAP2_COLOR_4 0x0DB0 +#define GL_MAP2_GRID_DOMAIN 0x0DD2 +#define GL_MAP2_GRID_SEGMENTS 0x0DD3 +#define GL_MAP2_INDEX 0x0DB1 +#define GL_MAP2_NORMAL 0x0DB2 +#define GL_MAP2_TEXTURE_COORD_1 0x0DB3 +#define GL_MAP2_TEXTURE_COORD_2 0x0DB4 +#define GL_MAP2_TEXTURE_COORD_3 0x0DB5 +#define GL_MAP2_TEXTURE_COORD_4 0x0DB6 +#define GL_MAP2_VERTEX_3 0x0DB7 +#define GL_MAP2_VERTEX_4 0x0DB8 +#define GL_MAP_COLOR 0x0D10 +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008 +#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004 +#define GL_MAP_READ_BIT 0x0001 +#define GL_MAP_STENCIL 0x0D11 +#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020 +#define GL_MAP_WRITE_BIT 0x0002 +#define GL_MATRIX_MODE 0x0BA0 +#define GL_MAX 0x8008 +#define GL_MAX_3D_TEXTURE_SIZE 0x8073 +#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF +#define GL_MAX_ATTRIB_STACK_DEPTH 0x0D35 +#define GL_MAX_CLIENT_ATTRIB_STACK_DEPTH 0x0D3B +#define GL_MAX_CLIP_DISTANCES 0x0D32 +#define GL_MAX_CLIP_PLANES 0x0D32 +#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF +#define GL_MAX_COLOR_TEXTURE_SAMPLES 0x910E +#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33 +#define GL_MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS 0x8A32 +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E +#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31 +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C +#define GL_MAX_DEPTH_TEXTURE_SAMPLES 0x910F +#define GL_MAX_DRAW_BUFFERS 0x8824 +#define GL_MAX_DUAL_SOURCE_DRAW_BUFFERS 0x88FC +#define GL_MAX_ELEMENTS_INDICES 0x80E9 +#define GL_MAX_ELEMENTS_VERTICES 0x80E8 +#define GL_MAX_EVAL_ORDER 0x0D30 +#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125 +#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D +#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49 +#define GL_MAX_GEOMETRY_INPUT_COMPONENTS 0x9123 +#define GL_MAX_GEOMETRY_OUTPUT_COMPONENTS 0x9124 +#define GL_MAX_GEOMETRY_OUTPUT_VERTICES 0x8DE0 +#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS 0x8C29 +#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS 0x8DE1 +#define GL_MAX_GEOMETRY_UNIFORM_BLOCKS 0x8A2C +#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS 0x8DDF +#define GL_MAX_INTEGER_SAMPLES 0x9110 +#define GL_MAX_LIGHTS 0x0D31 +#define GL_MAX_LIST_NESTING 0x0B31 +#define GL_MAX_MODELVIEW_STACK_DEPTH 0x0D36 +#define GL_MAX_NAME_STACK_DEPTH 0x0D37 +#define GL_MAX_PIXEL_MAP_TABLE 0x0D34 +#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905 +#define GL_MAX_PROJECTION_STACK_DEPTH 0x0D38 +#define GL_MAX_RECTANGLE_TEXTURE_SIZE 0x84F8 +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 +#define GL_MAX_SAMPLES 0x8D57 +#define GL_MAX_SAMPLE_MASK_WORDS 0x8E59 +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_MAX_TEXTURE_BUFFER_SIZE 0x8C2B +#define GL_MAX_TEXTURE_COORDS 0x8871 +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_TEXTURE_STACK_DEPTH 0x0D39 +#define GL_MAX_TEXTURE_UNITS 0x84E2 +#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80 +#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30 +#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F +#define GL_MAX_VARYING_COMPONENTS 0x8B4B +#define GL_MAX_VARYING_FLOATS 0x8B4B +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122 +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B +#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_MIN 0x8007 +#define GL_MINOR_VERSION 0x821C +#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904 +#define GL_MIRRORED_REPEAT 0x8370 +#define GL_MODELVIEW 0x1700 +#define GL_MODELVIEW_MATRIX 0x0BA6 +#define GL_MODELVIEW_STACK_DEPTH 0x0BA3 +#define GL_MODULATE 0x2100 +#define GL_MULT 0x0103 +#define GL_MULTISAMPLE 0x809D +#define GL_MULTISAMPLE_BIT 0x20000000 +#define GL_N3F_V3F 0x2A25 +#define GL_NAME_STACK_DEPTH 0x0D70 +#define GL_NAND 0x150E +#define GL_NEAREST 0x2600 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_NEVER 0x0200 +#define GL_NICEST 0x1102 +#define GL_NONE 0 +#define GL_NOOP 0x1505 +#define GL_NOR 0x1508 +#define GL_NORMALIZE 0x0BA1 +#define GL_NORMAL_ARRAY 0x8075 +#define GL_NORMAL_ARRAY_BUFFER_BINDING 0x8897 +#define GL_NORMAL_ARRAY_POINTER 0x808F +#define GL_NORMAL_ARRAY_STRIDE 0x807F +#define GL_NORMAL_ARRAY_TYPE 0x807E +#define GL_NORMAL_MAP 0x8511 +#define GL_NOTEQUAL 0x0205 +#define GL_NO_ERROR 0 +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_NUM_EXTENSIONS 0x821D +#define GL_OBJECT_LINEAR 0x2401 +#define GL_OBJECT_PLANE 0x2501 +#define GL_OBJECT_TYPE 0x9112 +#define GL_ONE 1 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_ONE_MINUS_SRC1_ALPHA 0x88FB +#define GL_ONE_MINUS_SRC1_COLOR 0x88FA +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_OPERAND0_ALPHA 0x8598 +#define GL_OPERAND0_RGB 0x8590 +#define GL_OPERAND1_ALPHA 0x8599 +#define GL_OPERAND1_RGB 0x8591 +#define GL_OPERAND2_ALPHA 0x859A +#define GL_OPERAND2_RGB 0x8592 +#define GL_OR 0x1507 +#define GL_ORDER 0x0A01 +#define GL_OR_INVERTED 0x150D +#define GL_OR_REVERSE 0x150B +#define GL_OUT_OF_MEMORY 0x0505 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_PACK_IMAGE_HEIGHT 0x806C +#define GL_PACK_LSB_FIRST 0x0D01 +#define GL_PACK_ROW_LENGTH 0x0D02 +#define GL_PACK_SKIP_IMAGES 0x806B +#define GL_PACK_SKIP_PIXELS 0x0D04 +#define GL_PACK_SKIP_ROWS 0x0D03 +#define GL_PACK_SWAP_BYTES 0x0D00 +#define GL_PASS_THROUGH_TOKEN 0x0700 +#define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50 +#define GL_PIXEL_MAP_A_TO_A 0x0C79 +#define GL_PIXEL_MAP_A_TO_A_SIZE 0x0CB9 +#define GL_PIXEL_MAP_B_TO_B 0x0C78 +#define GL_PIXEL_MAP_B_TO_B_SIZE 0x0CB8 +#define GL_PIXEL_MAP_G_TO_G 0x0C77 +#define GL_PIXEL_MAP_G_TO_G_SIZE 0x0CB7 +#define GL_PIXEL_MAP_I_TO_A 0x0C75 +#define GL_PIXEL_MAP_I_TO_A_SIZE 0x0CB5 +#define GL_PIXEL_MAP_I_TO_B 0x0C74 +#define GL_PIXEL_MAP_I_TO_B_SIZE 0x0CB4 +#define GL_PIXEL_MAP_I_TO_G 0x0C73 +#define GL_PIXEL_MAP_I_TO_G_SIZE 0x0CB3 +#define GL_PIXEL_MAP_I_TO_I 0x0C70 +#define GL_PIXEL_MAP_I_TO_I_SIZE 0x0CB0 +#define GL_PIXEL_MAP_I_TO_R 0x0C72 +#define GL_PIXEL_MAP_I_TO_R_SIZE 0x0CB2 +#define GL_PIXEL_MAP_R_TO_R 0x0C76 +#define GL_PIXEL_MAP_R_TO_R_SIZE 0x0CB6 +#define GL_PIXEL_MAP_S_TO_S 0x0C71 +#define GL_PIXEL_MAP_S_TO_S_SIZE 0x0CB1 +#define GL_PIXEL_MODE_BIT 0x00000020 +#define GL_PIXEL_PACK_BUFFER 0x88EB +#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED +#define GL_PIXEL_UNPACK_BUFFER 0x88EC +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF +#define GL_POINT 0x1B00 +#define GL_POINTS 0x0000 +#define GL_POINT_BIT 0x00000002 +#define GL_POINT_DISTANCE_ATTENUATION 0x8129 +#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128 +#define GL_POINT_SIZE 0x0B11 +#define GL_POINT_SIZE_GRANULARITY 0x0B13 +#define GL_POINT_SIZE_MAX 0x8127 +#define GL_POINT_SIZE_MIN 0x8126 +#define GL_POINT_SIZE_RANGE 0x0B12 +#define GL_POINT_SMOOTH 0x0B10 +#define GL_POINT_SMOOTH_HINT 0x0C51 +#define GL_POINT_SPRITE 0x8861 +#define GL_POINT_SPRITE_COORD_ORIGIN 0x8CA0 +#define GL_POINT_TOKEN 0x0701 +#define GL_POLYGON 0x0009 +#define GL_POLYGON_BIT 0x00000008 +#define GL_POLYGON_MODE 0x0B40 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_POLYGON_OFFSET_LINE 0x2A02 +#define GL_POLYGON_OFFSET_POINT 0x2A01 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_POLYGON_SMOOTH 0x0B41 +#define GL_POLYGON_SMOOTH_HINT 0x0C53 +#define GL_POLYGON_STIPPLE 0x0B42 +#define GL_POLYGON_STIPPLE_BIT 0x00000010 +#define GL_POLYGON_TOKEN 0x0703 +#define GL_POSITION 0x1203 +#define GL_PREVIOUS 0x8578 +#define GL_PRIMARY_COLOR 0x8577 +#define GL_PRIMITIVES_GENERATED 0x8C87 +#define GL_PRIMITIVE_RESTART 0x8F9D +#define GL_PRIMITIVE_RESTART_INDEX 0x8F9E +#define GL_PROGRAM_POINT_SIZE 0x8642 +#define GL_PROJECTION 0x1701 +#define GL_PROJECTION_MATRIX 0x0BA7 +#define GL_PROJECTION_STACK_DEPTH 0x0BA4 +#define GL_PROVOKING_VERTEX 0x8E4F +#define GL_PROXY_TEXTURE_1D 0x8063 +#define GL_PROXY_TEXTURE_1D_ARRAY 0x8C19 +#define GL_PROXY_TEXTURE_2D 0x8064 +#define GL_PROXY_TEXTURE_2D_ARRAY 0x8C1B +#define GL_PROXY_TEXTURE_2D_MULTISAMPLE 0x9101 +#define GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9103 +#define GL_PROXY_TEXTURE_3D 0x8070 +#define GL_PROXY_TEXTURE_CUBE_MAP 0x851B +#define GL_PROXY_TEXTURE_RECTANGLE 0x84F7 +#define GL_Q 0x2003 +#define GL_QUADRATIC_ATTENUATION 0x1209 +#define GL_QUADS 0x0007 +#define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION 0x8E4C +#define GL_QUAD_STRIP 0x0008 +#define GL_QUERY_BY_REGION_NO_WAIT 0x8E16 +#define GL_QUERY_BY_REGION_WAIT 0x8E15 +#define GL_QUERY_COUNTER_BITS 0x8864 +#define GL_QUERY_NO_WAIT 0x8E14 +#define GL_QUERY_RESULT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#define GL_QUERY_WAIT 0x8E13 +#define GL_R 0x2002 +#define GL_R11F_G11F_B10F 0x8C3A +#define GL_R16 0x822A +#define GL_R16F 0x822D +#define GL_R16I 0x8233 +#define GL_R16UI 0x8234 +#define GL_R16_SNORM 0x8F98 +#define GL_R32F 0x822E +#define GL_R32I 0x8235 +#define GL_R32UI 0x8236 +#define GL_R3_G3_B2 0x2A10 +#define GL_R8 0x8229 +#define GL_R8I 0x8231 +#define GL_R8UI 0x8232 +#define GL_R8_SNORM 0x8F94 +#define GL_RASTERIZER_DISCARD 0x8C89 +#define GL_READ_BUFFER 0x0C02 +#define GL_READ_FRAMEBUFFER 0x8CA8 +#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA +#define GL_READ_ONLY 0x88B8 +#define GL_READ_WRITE 0x88BA +#define GL_RED 0x1903 +#define GL_RED_BIAS 0x0D15 +#define GL_RED_BITS 0x0D52 +#define GL_RED_INTEGER 0x8D94 +#define GL_RED_SCALE 0x0D14 +#define GL_REFLECTION_MAP 0x8512 +#define GL_RENDER 0x1C00 +#define GL_RENDERBUFFER 0x8D41 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_SAMPLES 0x8CAB +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERER 0x1F01 +#define GL_RENDER_MODE 0x0C40 +#define GL_REPEAT 0x2901 +#define GL_REPLACE 0x1E01 +#define GL_RESCALE_NORMAL 0x803A +#define GL_RETURN 0x0102 +#define GL_RG 0x8227 +#define GL_RG16 0x822C +#define GL_RG16F 0x822F +#define GL_RG16I 0x8239 +#define GL_RG16UI 0x823A +#define GL_RG16_SNORM 0x8F99 +#define GL_RG32F 0x8230 +#define GL_RG32I 0x823B +#define GL_RG32UI 0x823C +#define GL_RG8 0x822B +#define GL_RG8I 0x8237 +#define GL_RG8UI 0x8238 +#define GL_RG8_SNORM 0x8F95 +#define GL_RGB 0x1907 +#define GL_RGB10 0x8052 +#define GL_RGB10_A2 0x8059 +#define GL_RGB10_A2UI 0x906F +#define GL_RGB12 0x8053 +#define GL_RGB16 0x8054 +#define GL_RGB16F 0x881B +#define GL_RGB16I 0x8D89 +#define GL_RGB16UI 0x8D77 +#define GL_RGB16_SNORM 0x8F9A +#define GL_RGB32F 0x8815 +#define GL_RGB32I 0x8D83 +#define GL_RGB32UI 0x8D71 +#define GL_RGB4 0x804F +#define GL_RGB5 0x8050 +#define GL_RGB5_A1 0x8057 +#define GL_RGB8 0x8051 +#define GL_RGB8I 0x8D8F +#define GL_RGB8UI 0x8D7D +#define GL_RGB8_SNORM 0x8F96 +#define GL_RGB9_E5 0x8C3D +#define GL_RGBA 0x1908 +#define GL_RGBA12 0x805A +#define GL_RGBA16 0x805B +#define GL_RGBA16F 0x881A +#define GL_RGBA16I 0x8D88 +#define GL_RGBA16UI 0x8D76 +#define GL_RGBA16_SNORM 0x8F9B +#define GL_RGBA2 0x8055 +#define GL_RGBA32F 0x8814 +#define GL_RGBA32I 0x8D82 +#define GL_RGBA32UI 0x8D70 +#define GL_RGBA4 0x8056 +#define GL_RGBA8 0x8058 +#define GL_RGBA8I 0x8D8E +#define GL_RGBA8UI 0x8D7C +#define GL_RGBA8_SNORM 0x8F97 +#define GL_RGBA_INTEGER 0x8D99 +#define GL_RGBA_MODE 0x0C31 +#define GL_RGB_INTEGER 0x8D98 +#define GL_RGB_SCALE 0x8573 +#define GL_RG_INTEGER 0x8228 +#define GL_RIGHT 0x0407 +#define GL_S 0x2000 +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_BINDING 0x8919 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLES_PASSED 0x8914 +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_ALPHA_TO_ONE 0x809F +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLE_COVERAGE 0x80A0 +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_MASK 0x8E51 +#define GL_SAMPLE_MASK_VALUE 0x8E52 +#define GL_SAMPLE_POSITION 0x8E50 +#define GL_SCISSOR_BIT 0x00080000 +#define GL_SCISSOR_BOX 0x0C10 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_SECONDARY_COLOR_ARRAY 0x845E +#define GL_SECONDARY_COLOR_ARRAY_BUFFER_BINDING 0x889C +#define GL_SECONDARY_COLOR_ARRAY_POINTER 0x845D +#define GL_SECONDARY_COLOR_ARRAY_SIZE 0x845A +#define GL_SECONDARY_COLOR_ARRAY_STRIDE 0x845C +#define GL_SECONDARY_COLOR_ARRAY_TYPE 0x845B +#define GL_SELECT 0x1C02 +#define GL_SELECTION_BUFFER_POINTER 0x0DF3 +#define GL_SELECTION_BUFFER_SIZE 0x0DF4 +#define GL_SEPARATE_ATTRIBS 0x8C8D +#define GL_SEPARATE_SPECULAR_COLOR 0x81FA +#define GL_SET 0x150F +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_SHADER_TYPE 0x8B4F +#define GL_SHADE_MODEL 0x0B54 +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_SHININESS 0x1601 +#define GL_SHORT 0x1402 +#define GL_SIGNALED 0x9119 +#define GL_SIGNED_NORMALIZED 0x8F9C +#define GL_SINGLE_COLOR 0x81F9 +#define GL_SLUMINANCE 0x8C46 +#define GL_SLUMINANCE8 0x8C47 +#define GL_SLUMINANCE8_ALPHA8 0x8C45 +#define GL_SLUMINANCE_ALPHA 0x8C44 +#define GL_SMOOTH 0x1D01 +#define GL_SMOOTH_LINE_WIDTH_GRANULARITY 0x0B23 +#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22 +#define GL_SMOOTH_POINT_SIZE_GRANULARITY 0x0B13 +#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12 +#define GL_SOURCE0_ALPHA 0x8588 +#define GL_SOURCE0_RGB 0x8580 +#define GL_SOURCE1_ALPHA 0x8589 +#define GL_SOURCE1_RGB 0x8581 +#define GL_SOURCE2_ALPHA 0x858A +#define GL_SOURCE2_RGB 0x8582 +#define GL_SPECULAR 0x1202 +#define GL_SPHERE_MAP 0x2402 +#define GL_SPOT_CUTOFF 0x1206 +#define GL_SPOT_DIRECTION 0x1204 +#define GL_SPOT_EXPONENT 0x1205 +#define GL_SRC0_ALPHA 0x8588 +#define GL_SRC0_RGB 0x8580 +#define GL_SRC1_ALPHA 0x8589 +#define GL_SRC1_COLOR 0x88F9 +#define GL_SRC1_RGB 0x8581 +#define GL_SRC2_ALPHA 0x858A +#define GL_SRC2_RGB 0x8582 +#define GL_SRC_ALPHA 0x0302 +#define GL_SRC_ALPHA_SATURATE 0x0308 +#define GL_SRC_COLOR 0x0300 +#define GL_SRGB 0x8C40 +#define GL_SRGB8 0x8C41 +#define GL_SRGB8_ALPHA8 0x8C43 +#define GL_SRGB_ALPHA 0x8C42 +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#define GL_STATIC_COPY 0x88E6 +#define GL_STATIC_DRAW 0x88E4 +#define GL_STATIC_READ 0x88E5 +#define GL_STENCIL 0x1802 +#define GL_STENCIL_ATTACHMENT 0x8D20 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_STENCIL_BITS 0x0D57 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_INDEX 0x1901 +#define GL_STENCIL_INDEX1 0x8D46 +#define GL_STENCIL_INDEX16 0x8D49 +#define GL_STENCIL_INDEX4 0x8D47 +#define GL_STENCIL_INDEX8 0x8D48 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_TEST 0x0B90 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_STEREO 0x0C33 +#define GL_STREAM_COPY 0x88E2 +#define GL_STREAM_DRAW 0x88E0 +#define GL_STREAM_READ 0x88E1 +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_SUBTRACT 0x84E7 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_FLAGS 0x9115 +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_SYNC_STATUS 0x9114 +#define GL_T 0x2001 +#define GL_T2F_C3F_V3F 0x2A2A +#define GL_T2F_C4F_N3F_V3F 0x2A2C +#define GL_T2F_C4UB_V3F 0x2A29 +#define GL_T2F_N3F_V3F 0x2A2B +#define GL_T2F_V3F 0x2A27 +#define GL_T4F_C4F_N3F_V4F 0x2A2D +#define GL_T4F_V4F 0x2A28 +#define GL_TEXTURE 0x1702 +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE_1D 0x0DE0 +#define GL_TEXTURE_1D_ARRAY 0x8C18 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_TEXTURE_2D_ARRAY 0x8C1A +#define GL_TEXTURE_2D_MULTISAMPLE 0x9100 +#define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 +#define GL_TEXTURE_3D 0x806F +#define GL_TEXTURE_ALPHA_SIZE 0x805F +#define GL_TEXTURE_ALPHA_TYPE 0x8C13 +#define GL_TEXTURE_BASE_LEVEL 0x813C +#define GL_TEXTURE_BINDING_1D 0x8068 +#define GL_TEXTURE_BINDING_1D_ARRAY 0x8C1C +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE 0x9104 +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY 0x9105 +#define GL_TEXTURE_BINDING_3D 0x806A +#define GL_TEXTURE_BINDING_BUFFER 0x8C2C +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_BINDING_RECTANGLE 0x84F6 +#define GL_TEXTURE_BIT 0x00040000 +#define GL_TEXTURE_BLUE_SIZE 0x805E +#define GL_TEXTURE_BLUE_TYPE 0x8C12 +#define GL_TEXTURE_BORDER 0x1005 +#define GL_TEXTURE_BORDER_COLOR 0x1004 +#define GL_TEXTURE_BUFFER 0x8C2A +#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING 0x8C2D +#define GL_TEXTURE_COMPARE_FUNC 0x884D +#define GL_TEXTURE_COMPARE_MODE 0x884C +#define GL_TEXTURE_COMPONENTS 0x1003 +#define GL_TEXTURE_COMPRESSED 0x86A1 +#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE 0x86A0 +#define GL_TEXTURE_COMPRESSION_HINT 0x84EF +#define GL_TEXTURE_COORD_ARRAY 0x8078 +#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING 0x889A +#define GL_TEXTURE_COORD_ARRAY_POINTER 0x8092 +#define GL_TEXTURE_COORD_ARRAY_SIZE 0x8088 +#define GL_TEXTURE_COORD_ARRAY_STRIDE 0x808A +#define GL_TEXTURE_COORD_ARRAY_TYPE 0x8089 +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_CUBE_MAP_SEAMLESS 0x884F +#define GL_TEXTURE_DEPTH 0x8071 +#define GL_TEXTURE_DEPTH_SIZE 0x884A +#define GL_TEXTURE_DEPTH_TYPE 0x8C16 +#define GL_TEXTURE_ENV 0x2300 +#define GL_TEXTURE_ENV_COLOR 0x2201 +#define GL_TEXTURE_ENV_MODE 0x2200 +#define GL_TEXTURE_FILTER_CONTROL 0x8500 +#define GL_TEXTURE_FIXED_SAMPLE_LOCATIONS 0x9107 +#define GL_TEXTURE_GEN_MODE 0x2500 +#define GL_TEXTURE_GEN_Q 0x0C63 +#define GL_TEXTURE_GEN_R 0x0C62 +#define GL_TEXTURE_GEN_S 0x0C60 +#define GL_TEXTURE_GEN_T 0x0C61 +#define GL_TEXTURE_GREEN_SIZE 0x805D +#define GL_TEXTURE_GREEN_TYPE 0x8C11 +#define GL_TEXTURE_HEIGHT 0x1001 +#define GL_TEXTURE_INTENSITY_SIZE 0x8061 +#define GL_TEXTURE_INTENSITY_TYPE 0x8C15 +#define GL_TEXTURE_INTERNAL_FORMAT 0x1003 +#define GL_TEXTURE_LOD_BIAS 0x8501 +#define GL_TEXTURE_LUMINANCE_SIZE 0x8060 +#define GL_TEXTURE_LUMINANCE_TYPE 0x8C14 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MATRIX 0x0BA8 +#define GL_TEXTURE_MAX_LEVEL 0x813D +#define GL_TEXTURE_MAX_LOD 0x813B +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_MIN_LOD 0x813A +#define GL_TEXTURE_PRIORITY 0x8066 +#define GL_TEXTURE_RECTANGLE 0x84F5 +#define GL_TEXTURE_RED_SIZE 0x805C +#define GL_TEXTURE_RED_TYPE 0x8C10 +#define GL_TEXTURE_RESIDENT 0x8067 +#define GL_TEXTURE_SAMPLES 0x9106 +#define GL_TEXTURE_SHARED_SIZE 0x8C3F +#define GL_TEXTURE_STACK_DEPTH 0x0BA5 +#define GL_TEXTURE_STENCIL_SIZE 0x88F1 +#define GL_TEXTURE_SWIZZLE_A 0x8E45 +#define GL_TEXTURE_SWIZZLE_B 0x8E44 +#define GL_TEXTURE_SWIZZLE_G 0x8E43 +#define GL_TEXTURE_SWIZZLE_R 0x8E42 +#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46 +#define GL_TEXTURE_WIDTH 0x1000 +#define GL_TEXTURE_WRAP_R 0x8072 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFF +#define GL_TIMESTAMP 0x8E28 +#define GL_TIME_ELAPSED 0x88BF +#define GL_TRANSFORM_BIT 0x00001000 +#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E +#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F +#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F +#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85 +#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84 +#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88 +#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83 +#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76 +#define GL_TRANSPOSE_COLOR_MATRIX 0x84E6 +#define GL_TRANSPOSE_MODELVIEW_MATRIX 0x84E3 +#define GL_TRANSPOSE_PROJECTION_MATRIX 0x84E4 +#define GL_TRANSPOSE_TEXTURE_MATRIX 0x84E5 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLES_ADJACENCY 0x000C +#define GL_TRIANGLE_FAN 0x0006 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_STRIP_ADJACENCY 0x000D +#define GL_TRUE 1 +#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43 +#define GL_UNIFORM_BLOCK_BINDING 0x8A3F +#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40 +#define GL_UNIFORM_BLOCK_INDEX 0x8A3A +#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER 0x8A45 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44 +#define GL_UNIFORM_BUFFER 0x8A11 +#define GL_UNIFORM_BUFFER_BINDING 0x8A28 +#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34 +#define GL_UNIFORM_BUFFER_SIZE 0x8A2A +#define GL_UNIFORM_BUFFER_START 0x8A29 +#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E +#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D +#define GL_UNIFORM_NAME_LENGTH 0x8A39 +#define GL_UNIFORM_OFFSET 0x8A3B +#define GL_UNIFORM_SIZE 0x8A38 +#define GL_UNIFORM_TYPE 0x8A37 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_UNPACK_IMAGE_HEIGHT 0x806E +#define GL_UNPACK_LSB_FIRST 0x0CF1 +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#define GL_UNPACK_SKIP_IMAGES 0x806D +#define GL_UNPACK_SKIP_PIXELS 0x0CF4 +#define GL_UNPACK_SKIP_ROWS 0x0CF3 +#define GL_UNPACK_SWAP_BYTES 0x0CF0 +#define GL_UNSIGNALED 0x9118 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362 +#define GL_UNSIGNED_BYTE_3_3_2 0x8032 +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B +#define GL_UNSIGNED_INT_10_10_10_2 0x8036 +#define GL_UNSIGNED_INT_24_8 0x84FA +#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E +#define GL_UNSIGNED_INT_8_8_8_8 0x8035 +#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367 +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D +#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 +#define GL_UNSIGNED_NORMALIZED 0x8C17 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366 +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 +#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364 +#define GL_UPPER_LEFT 0x8CA2 +#define GL_V2F 0x2A20 +#define GL_V3F 0x2A21 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_VENDOR 0x1F00 +#define GL_VERSION 0x1F02 +#define GL_VERTEX_ARRAY 0x8074 +#define GL_VERTEX_ARRAY_BINDING 0x85B5 +#define GL_VERTEX_ARRAY_BUFFER_BINDING 0x8896 +#define GL_VERTEX_ARRAY_POINTER 0x808E +#define GL_VERTEX_ARRAY_SIZE 0x807A +#define GL_VERTEX_ARRAY_STRIDE 0x807C +#define GL_VERTEX_ARRAY_TYPE 0x807B +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F +#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642 +#define GL_VERTEX_PROGRAM_TWO_SIDE 0x8643 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_VIEWPORT 0x0BA2 +#define GL_VIEWPORT_BIT 0x00000800 +#define GL_WAIT_FAILED 0x911D +#define GL_WEIGHT_ARRAY_BUFFER_BINDING 0x889E +#define GL_WRITE_ONLY 0x88B9 +#define GL_XOR 0x1506 +#define GL_ZERO 0 +#define GL_ZOOM_X 0x0D16 +#define GL_ZOOM_Y 0x0D17 + + +#include +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef void GLvoid; +typedef khronos_int8_t GLbyte; +typedef khronos_uint8_t GLubyte; +typedef khronos_int16_t GLshort; +typedef khronos_uint16_t GLushort; +typedef int GLint; +typedef unsigned int GLuint; +typedef khronos_int32_t GLclampx; +typedef int GLsizei; +typedef khronos_float_t GLfloat; +typedef khronos_float_t GLclampf; +typedef double GLdouble; +typedef double GLclampd; +typedef void *GLeglClientBufferEXT; +typedef void *GLeglImageOES; +typedef char GLchar; +typedef char GLcharARB; +#ifdef __APPLE__ +typedef void *GLhandleARB; +#else +typedef unsigned int GLhandleARB; +#endif +typedef khronos_uint16_t GLhalf; +typedef khronos_uint16_t GLhalfARB; +typedef khronos_int32_t GLfixed; +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_intptr_t GLintptr; +#else +typedef khronos_intptr_t GLintptr; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_intptr_t GLintptrARB; +#else +typedef khronos_intptr_t GLintptrARB; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_ssize_t GLsizeiptr; +#else +typedef khronos_ssize_t GLsizeiptr; +#endif +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) +typedef khronos_ssize_t GLsizeiptrARB; +#else +typedef khronos_ssize_t GLsizeiptrARB; +#endif +typedef khronos_int64_t GLint64; +typedef khronos_int64_t GLint64EXT; +typedef khronos_uint64_t GLuint64; +typedef khronos_uint64_t GLuint64EXT; +typedef struct __GLsync *GLsync; +struct _cl_context; +struct _cl_event; +typedef void (GLAD_API_PTR *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCARB)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCKHR)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (GLAD_API_PTR *GLDEBUGPROCAMD)(GLuint id,GLenum category,GLenum severity,GLsizei length,const GLchar *message,void *userParam); +typedef unsigned short GLhalfNV; +typedef GLintptr GLvdpauSurfaceNV; +typedef void (GLAD_API_PTR *GLVULKANPROCNV)(void); + + +#define GL_VERSION_1_0 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_0; +#define GL_VERSION_1_1 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_1; +#define GL_VERSION_1_2 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_2; +#define GL_VERSION_1_3 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_3; +#define GL_VERSION_1_4 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_4; +#define GL_VERSION_1_5 1 +GLAD_API_CALL int GLAD_GL_VERSION_1_5; +#define GL_VERSION_2_0 1 +GLAD_API_CALL int GLAD_GL_VERSION_2_0; +#define GL_VERSION_2_1 1 +GLAD_API_CALL int GLAD_GL_VERSION_2_1; +#define GL_VERSION_3_0 1 +GLAD_API_CALL int GLAD_GL_VERSION_3_0; +#define GL_VERSION_3_1 1 +GLAD_API_CALL int GLAD_GL_VERSION_3_1; +#define GL_VERSION_3_2 1 +GLAD_API_CALL int GLAD_GL_VERSION_3_2; +#define GL_VERSION_3_3 1 +GLAD_API_CALL int GLAD_GL_VERSION_3_3; + + +typedef void (GLAD_API_PTR *PFNGLACCUMPROC)(GLenum op, GLfloat value); +typedef void (GLAD_API_PTR *PFNGLACTIVETEXTUREPROC)(GLenum texture); +typedef void (GLAD_API_PTR *PFNGLALPHAFUNCPROC)(GLenum func, GLfloat ref); +typedef GLboolean (GLAD_API_PTR *PFNGLARETEXTURESRESIDENTPROC)(GLsizei n, const GLuint * textures, GLboolean * residences); +typedef void (GLAD_API_PTR *PFNGLARRAYELEMENTPROC)(GLint i); +typedef void (GLAD_API_PTR *PFNGLATTACHSHADERPROC)(GLuint program, GLuint shader); +typedef void (GLAD_API_PTR *PFNGLBEGINPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLBEGINCONDITIONALRENDERPROC)(GLuint id, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLBEGINQUERYPROC)(GLenum target, GLuint id); +typedef void (GLAD_API_PTR *PFNGLBEGINTRANSFORMFEEDBACKPROC)(GLenum primitiveMode); +typedef void (GLAD_API_PTR *PFNGLBINDATTRIBLOCATIONPROC)(GLuint program, GLuint index, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLBINDBUFFERPROC)(GLenum target, GLuint buffer); +typedef void (GLAD_API_PTR *PFNGLBINDBUFFERBASEPROC)(GLenum target, GLuint index, GLuint buffer); +typedef void (GLAD_API_PTR *PFNGLBINDBUFFERRANGEPROC)(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +typedef void (GLAD_API_PTR *PFNGLBINDFRAGDATALOCATIONPROC)(GLuint program, GLuint color, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)(GLuint program, GLuint colorNumber, GLuint index, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLBINDFRAMEBUFFERPROC)(GLenum target, GLuint framebuffer); +typedef void (GLAD_API_PTR *PFNGLBINDRENDERBUFFERPROC)(GLenum target, GLuint renderbuffer); +typedef void (GLAD_API_PTR *PFNGLBINDSAMPLERPROC)(GLuint unit, GLuint sampler); +typedef void (GLAD_API_PTR *PFNGLBINDTEXTUREPROC)(GLenum target, GLuint texture); +typedef void (GLAD_API_PTR *PFNGLBINDVERTEXARRAYPROC)(GLuint array); +typedef void (GLAD_API_PTR *PFNGLBITMAPPROC)(GLsizei width, GLsizei height, GLfloat xorig, GLfloat yorig, GLfloat xmove, GLfloat ymove, const GLubyte * bitmap); +typedef void (GLAD_API_PTR *PFNGLBLENDCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONSEPARATEPROC)(GLenum modeRGB, GLenum modeAlpha); +typedef void (GLAD_API_PTR *PFNGLBLENDFUNCPROC)(GLenum sfactor, GLenum dfactor); +typedef void (GLAD_API_PTR *PFNGLBLENDFUNCSEPARATEPROC)(GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); +typedef void (GLAD_API_PTR *PFNGLBLITFRAMEBUFFERPROC)(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +typedef void (GLAD_API_PTR *PFNGLBUFFERDATAPROC)(GLenum target, GLsizeiptr size, const void * data, GLenum usage); +typedef void (GLAD_API_PTR *PFNGLBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, const void * data); +typedef void (GLAD_API_PTR *PFNGLCALLLISTPROC)(GLuint list); +typedef void (GLAD_API_PTR *PFNGLCALLLISTSPROC)(GLsizei n, GLenum type, const void * lists); +typedef GLenum (GLAD_API_PTR *PFNGLCHECKFRAMEBUFFERSTATUSPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLCLAMPCOLORPROC)(GLenum target, GLenum clamp); +typedef void (GLAD_API_PTR *PFNGLCLEARPROC)(GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLCLEARACCUMPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLCLEARBUFFERFIPROC)(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +typedef void (GLAD_API_PTR *PFNGLCLEARBUFFERFVPROC)(GLenum buffer, GLint drawbuffer, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLCLEARBUFFERIVPROC)(GLenum buffer, GLint drawbuffer, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLCLEARBUFFERUIVPROC)(GLenum buffer, GLint drawbuffer, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLCLEARCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLCLEARDEPTHPROC)(GLdouble depth); +typedef void (GLAD_API_PTR *PFNGLCLEARINDEXPROC)(GLfloat c); +typedef void (GLAD_API_PTR *PFNGLCLEARSTENCILPROC)(GLint s); +typedef void (GLAD_API_PTR *PFNGLCLIENTACTIVETEXTUREPROC)(GLenum texture); +typedef GLenum (GLAD_API_PTR *PFNGLCLIENTWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (GLAD_API_PTR *PFNGLCLIPPLANEPROC)(GLenum plane, const GLdouble * equation); +typedef void (GLAD_API_PTR *PFNGLCOLOR3BPROC)(GLbyte red, GLbyte green, GLbyte blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3BVPROC)(const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3DPROC)(GLdouble red, GLdouble green, GLdouble blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3FPROC)(GLfloat red, GLfloat green, GLfloat blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3IPROC)(GLint red, GLint green, GLint blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3SPROC)(GLshort red, GLshort green, GLshort blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3UBPROC)(GLubyte red, GLubyte green, GLubyte blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3UBVPROC)(const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3UIPROC)(GLuint red, GLuint green, GLuint blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3UIVPROC)(const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR3USPROC)(GLushort red, GLushort green, GLushort blue); +typedef void (GLAD_API_PTR *PFNGLCOLOR3USVPROC)(const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4BPROC)(GLbyte red, GLbyte green, GLbyte blue, GLbyte alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4BVPROC)(const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4DPROC)(GLdouble red, GLdouble green, GLdouble blue, GLdouble alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4FPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4IPROC)(GLint red, GLint green, GLint blue, GLint alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4SPROC)(GLshort red, GLshort green, GLshort blue, GLshort alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4UBPROC)(GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4UBVPROC)(const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4UIPROC)(GLuint red, GLuint green, GLuint blue, GLuint alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4UIVPROC)(const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLCOLOR4USPROC)(GLushort red, GLushort green, GLushort blue, GLushort alpha); +typedef void (GLAD_API_PTR *PFNGLCOLOR4USVPROC)(const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLCOLORMASKPROC)(GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +typedef void (GLAD_API_PTR *PFNGLCOLORMASKIPROC)(GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a); +typedef void (GLAD_API_PTR *PFNGLCOLORMATERIALPROC)(GLenum face, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLCOLORP3UIPROC)(GLenum type, GLuint color); +typedef void (GLAD_API_PTR *PFNGLCOLORP3UIVPROC)(GLenum type, const GLuint * color); +typedef void (GLAD_API_PTR *PFNGLCOLORP4UIPROC)(GLenum type, GLuint color); +typedef void (GLAD_API_PTR *PFNGLCOLORP4UIVPROC)(GLenum type, const GLuint * color); +typedef void (GLAD_API_PTR *PFNGLCOLORPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLCOMPILESHADERPROC)(GLuint shader); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXIMAGE3DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void * data); +typedef void (GLAD_API_PTR *PFNGLCOPYBUFFERSUBDATAPROC)(GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +typedef void (GLAD_API_PTR *PFNGLCOPYPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum type); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLCOPYTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef GLuint (GLAD_API_PTR *PFNGLCREATEPROGRAMPROC)(void); +typedef GLuint (GLAD_API_PTR *PFNGLCREATESHADERPROC)(GLenum type); +typedef void (GLAD_API_PTR *PFNGLCULLFACEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLDELETEBUFFERSPROC)(GLsizei n, const GLuint * buffers); +typedef void (GLAD_API_PTR *PFNGLDELETEFRAMEBUFFERSPROC)(GLsizei n, const GLuint * framebuffers); +typedef void (GLAD_API_PTR *PFNGLDELETELISTSPROC)(GLuint list, GLsizei range); +typedef void (GLAD_API_PTR *PFNGLDELETEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLDELETEQUERIESPROC)(GLsizei n, const GLuint * ids); +typedef void (GLAD_API_PTR *PFNGLDELETERENDERBUFFERSPROC)(GLsizei n, const GLuint * renderbuffers); +typedef void (GLAD_API_PTR *PFNGLDELETESAMPLERSPROC)(GLsizei count, const GLuint * samplers); +typedef void (GLAD_API_PTR *PFNGLDELETESHADERPROC)(GLuint shader); +typedef void (GLAD_API_PTR *PFNGLDELETESYNCPROC)(GLsync sync); +typedef void (GLAD_API_PTR *PFNGLDELETETEXTURESPROC)(GLsizei n, const GLuint * textures); +typedef void (GLAD_API_PTR *PFNGLDELETEVERTEXARRAYSPROC)(GLsizei n, const GLuint * arrays); +typedef void (GLAD_API_PTR *PFNGLDEPTHFUNCPROC)(GLenum func); +typedef void (GLAD_API_PTR *PFNGLDEPTHMASKPROC)(GLboolean flag); +typedef void (GLAD_API_PTR *PFNGLDEPTHRANGEPROC)(GLdouble n, GLdouble f); +typedef void (GLAD_API_PTR *PFNGLDETACHSHADERPROC)(GLuint program, GLuint shader); +typedef void (GLAD_API_PTR *PFNGLDISABLEPROC)(GLenum cap); +typedef void (GLAD_API_PTR *PFNGLDISABLECLIENTSTATEPROC)(GLenum array); +typedef void (GLAD_API_PTR *PFNGLDISABLEVERTEXATTRIBARRAYPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLDISABLEIPROC)(GLenum target, GLuint index); +typedef void (GLAD_API_PTR *PFNGLDRAWARRAYSPROC)(GLenum mode, GLint first, GLsizei count); +typedef void (GLAD_API_PTR *PFNGLDRAWARRAYSINSTANCEDPROC)(GLenum mode, GLint first, GLsizei count, GLsizei instancecount); +typedef void (GLAD_API_PTR *PFNGLDRAWBUFFERPROC)(GLenum buf); +typedef void (GLAD_API_PTR *PFNGLDRAWBUFFERSPROC)(GLsizei n, const GLenum * bufs); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices, GLint basevertex); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSINSTANCEDPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices, GLsizei instancecount); +typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void * indices, GLsizei instancecount, GLint basevertex); +typedef void (GLAD_API_PTR *PFNGLDRAWPIXELSPROC)(GLsizei width, GLsizei height, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLDRAWRANGEELEMENTSPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void * indices); +typedef void (GLAD_API_PTR *PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void * indices, GLint basevertex); +typedef void (GLAD_API_PTR *PFNGLEDGEFLAGPROC)(GLboolean flag); +typedef void (GLAD_API_PTR *PFNGLEDGEFLAGPOINTERPROC)(GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLEDGEFLAGVPROC)(const GLboolean * flag); +typedef void (GLAD_API_PTR *PFNGLENABLEPROC)(GLenum cap); +typedef void (GLAD_API_PTR *PFNGLENABLECLIENTSTATEPROC)(GLenum array); +typedef void (GLAD_API_PTR *PFNGLENABLEVERTEXATTRIBARRAYPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLENABLEIPROC)(GLenum target, GLuint index); +typedef void (GLAD_API_PTR *PFNGLENDPROC)(void); +typedef void (GLAD_API_PTR *PFNGLENDCONDITIONALRENDERPROC)(void); +typedef void (GLAD_API_PTR *PFNGLENDLISTPROC)(void); +typedef void (GLAD_API_PTR *PFNGLENDQUERYPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLENDTRANSFORMFEEDBACKPROC)(void); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD1DPROC)(GLdouble u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD1DVPROC)(const GLdouble * u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD1FPROC)(GLfloat u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD1FVPROC)(const GLfloat * u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD2DPROC)(GLdouble u, GLdouble v); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD2DVPROC)(const GLdouble * u); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD2FPROC)(GLfloat u, GLfloat v); +typedef void (GLAD_API_PTR *PFNGLEVALCOORD2FVPROC)(const GLfloat * u); +typedef void (GLAD_API_PTR *PFNGLEVALMESH1PROC)(GLenum mode, GLint i1, GLint i2); +typedef void (GLAD_API_PTR *PFNGLEVALMESH2PROC)(GLenum mode, GLint i1, GLint i2, GLint j1, GLint j2); +typedef void (GLAD_API_PTR *PFNGLEVALPOINT1PROC)(GLint i); +typedef void (GLAD_API_PTR *PFNGLEVALPOINT2PROC)(GLint i, GLint j); +typedef void (GLAD_API_PTR *PFNGLFEEDBACKBUFFERPROC)(GLsizei size, GLenum type, GLfloat * buffer); +typedef GLsync (GLAD_API_PTR *PFNGLFENCESYNCPROC)(GLenum condition, GLbitfield flags); +typedef void (GLAD_API_PTR *PFNGLFINISHPROC)(void); +typedef void (GLAD_API_PTR *PFNGLFLUSHPROC)(void); +typedef void (GLAD_API_PTR *PFNGLFLUSHMAPPEDBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDPOINTERPROC)(GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDDPROC)(GLdouble coord); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDDVPROC)(const GLdouble * coord); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDFPROC)(GLfloat coord); +typedef void (GLAD_API_PTR *PFNGLFOGCOORDFVPROC)(const GLfloat * coord); +typedef void (GLAD_API_PTR *PFNGLFOGFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLFOGFVPROC)(GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLFOGIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLFOGIVPROC)(GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERRENDERBUFFERPROC)(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTUREPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURE1DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURE2DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURE3DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset); +typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERTEXTURELAYERPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); +typedef void (GLAD_API_PTR *PFNGLFRONTFACEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLFRUSTUMPROC)(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar); +typedef void (GLAD_API_PTR *PFNGLGENBUFFERSPROC)(GLsizei n, GLuint * buffers); +typedef void (GLAD_API_PTR *PFNGLGENFRAMEBUFFERSPROC)(GLsizei n, GLuint * framebuffers); +typedef GLuint (GLAD_API_PTR *PFNGLGENLISTSPROC)(GLsizei range); +typedef void (GLAD_API_PTR *PFNGLGENQUERIESPROC)(GLsizei n, GLuint * ids); +typedef void (GLAD_API_PTR *PFNGLGENRENDERBUFFERSPROC)(GLsizei n, GLuint * renderbuffers); +typedef void (GLAD_API_PTR *PFNGLGENSAMPLERSPROC)(GLsizei count, GLuint * samplers); +typedef void (GLAD_API_PTR *PFNGLGENTEXTURESPROC)(GLsizei n, GLuint * textures); +typedef void (GLAD_API_PTR *PFNGLGENVERTEXARRAYSPROC)(GLsizei n, GLuint * arrays); +typedef void (GLAD_API_PTR *PFNGLGENERATEMIPMAPPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEATTRIBPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLint * size, GLenum * type, GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLint * size, GLenum * type, GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)(GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei * length, GLchar * uniformBlockName); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMBLOCKIVPROC)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMNAMEPROC)(GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei * length, GLchar * uniformName); +typedef void (GLAD_API_PTR *PFNGLGETACTIVEUNIFORMSIVPROC)(GLuint program, GLsizei uniformCount, const GLuint * uniformIndices, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETATTACHEDSHADERSPROC)(GLuint program, GLsizei maxCount, GLsizei * count, GLuint * shaders); +typedef GLint (GLAD_API_PTR *PFNGLGETATTRIBLOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETBOOLEANI_VPROC)(GLenum target, GLuint index, GLboolean * data); +typedef void (GLAD_API_PTR *PFNGLGETBOOLEANVPROC)(GLenum pname, GLboolean * data); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPARAMETERI64VPROC)(GLenum target, GLenum pname, GLint64 * params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERPOINTERVPROC)(GLenum target, GLenum pname, void ** params); +typedef void (GLAD_API_PTR *PFNGLGETBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, void * data); +typedef void (GLAD_API_PTR *PFNGLGETCLIPPLANEPROC)(GLenum plane, GLdouble * equation); +typedef void (GLAD_API_PTR *PFNGLGETCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint level, void * img); +typedef void (GLAD_API_PTR *PFNGLGETDOUBLEVPROC)(GLenum pname, GLdouble * data); +typedef GLenum (GLAD_API_PTR *PFNGLGETERRORPROC)(void); +typedef void (GLAD_API_PTR *PFNGLGETFLOATVPROC)(GLenum pname, GLfloat * data); +typedef GLint (GLAD_API_PTR *PFNGLGETFRAGDATAINDEXPROC)(GLuint program, const GLchar * name); +typedef GLint (GLAD_API_PTR *PFNGLGETFRAGDATALOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLenum target, GLenum attachment, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETINTEGER64I_VPROC)(GLenum target, GLuint index, GLint64 * data); +typedef void (GLAD_API_PTR *PFNGLGETINTEGER64VPROC)(GLenum pname, GLint64 * data); +typedef void (GLAD_API_PTR *PFNGLGETINTEGERI_VPROC)(GLenum target, GLuint index, GLint * data); +typedef void (GLAD_API_PTR *PFNGLGETINTEGERVPROC)(GLenum pname, GLint * data); +typedef void (GLAD_API_PTR *PFNGLGETLIGHTFVPROC)(GLenum light, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETLIGHTIVPROC)(GLenum light, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETMAPDVPROC)(GLenum target, GLenum query, GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLGETMAPFVPROC)(GLenum target, GLenum query, GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLGETMAPIVPROC)(GLenum target, GLenum query, GLint * v); +typedef void (GLAD_API_PTR *PFNGLGETMATERIALFVPROC)(GLenum face, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETMATERIALIVPROC)(GLenum face, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETMULTISAMPLEFVPROC)(GLenum pname, GLuint index, GLfloat * val); +typedef void (GLAD_API_PTR *PFNGLGETPIXELMAPFVPROC)(GLenum map, GLfloat * values); +typedef void (GLAD_API_PTR *PFNGLGETPIXELMAPUIVPROC)(GLenum map, GLuint * values); +typedef void (GLAD_API_PTR *PFNGLGETPIXELMAPUSVPROC)(GLenum map, GLushort * values); +typedef void (GLAD_API_PTR *PFNGLGETPOINTERVPROC)(GLenum pname, void ** params); +typedef void (GLAD_API_PTR *PFNGLGETPOLYGONSTIPPLEPROC)(GLubyte * mask); +typedef void (GLAD_API_PTR *PFNGLGETPROGRAMINFOLOGPROC)(GLuint program, GLsizei bufSize, GLsizei * length, GLchar * infoLog); +typedef void (GLAD_API_PTR *PFNGLGETPROGRAMIVPROC)(GLuint program, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYOBJECTI64VPROC)(GLuint id, GLenum pname, GLint64 * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYOBJECTIVPROC)(GLuint id, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYOBJECTUI64VPROC)(GLuint id, GLenum pname, GLuint64 * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYOBJECTUIVPROC)(GLuint id, GLenum pname, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETQUERYIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETRENDERBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETSHADERINFOLOGPROC)(GLuint shader, GLsizei bufSize, GLsizei * length, GLchar * infoLog); +typedef void (GLAD_API_PTR *PFNGLGETSHADERSOURCEPROC)(GLuint shader, GLsizei bufSize, GLsizei * length, GLchar * source); +typedef void (GLAD_API_PTR *PFNGLGETSHADERIVPROC)(GLuint shader, GLenum pname, GLint * params); +typedef const GLubyte * (GLAD_API_PTR *PFNGLGETSTRINGPROC)(GLenum name); +typedef const GLubyte * (GLAD_API_PTR *PFNGLGETSTRINGIPROC)(GLenum name, GLuint index); +typedef void (GLAD_API_PTR *PFNGLGETSYNCIVPROC)(GLsync sync, GLenum pname, GLsizei count, GLsizei * length, GLint * values); +typedef void (GLAD_API_PTR *PFNGLGETTEXENVFVPROC)(GLenum target, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXENVIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXGENDVPROC)(GLenum coord, GLenum pname, GLdouble * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXGENFVPROC)(GLenum coord, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXGENIVPROC)(GLenum coord, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, void * pixels); +typedef void (GLAD_API_PTR *PFNGLGETTEXLEVELPARAMETERFVPROC)(GLenum target, GLint level, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXLEVELPARAMETERIVPROC)(GLenum target, GLint level, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERFVPROC)(GLenum target, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETTEXPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETTRANSFORMFEEDBACKVARYINGPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei * length, GLsizei * size, GLenum * type, GLchar * name); +typedef GLuint (GLAD_API_PTR *PFNGLGETUNIFORMBLOCKINDEXPROC)(GLuint program, const GLchar * uniformBlockName); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMINDICESPROC)(GLuint program, GLsizei uniformCount, const GLchar *const* uniformNames, GLuint * uniformIndices); +typedef GLint (GLAD_API_PTR *PFNGLGETUNIFORMLOCATIONPROC)(GLuint program, const GLchar * name); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMFVPROC)(GLuint program, GLint location, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMIVPROC)(GLuint program, GLint location, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETUNIFORMUIVPROC)(GLuint program, GLint location, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBIIVPROC)(GLuint index, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBIUIVPROC)(GLuint index, GLenum pname, GLuint * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBPOINTERVPROC)(GLuint index, GLenum pname, void ** pointer); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBDVPROC)(GLuint index, GLenum pname, GLdouble * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBFVPROC)(GLuint index, GLenum pname, GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLGETVERTEXATTRIBIVPROC)(GLuint index, GLenum pname, GLint * params); +typedef void (GLAD_API_PTR *PFNGLHINTPROC)(GLenum target, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLINDEXMASKPROC)(GLuint mask); +typedef void (GLAD_API_PTR *PFNGLINDEXPOINTERPROC)(GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLINDEXDPROC)(GLdouble c); +typedef void (GLAD_API_PTR *PFNGLINDEXDVPROC)(const GLdouble * c); +typedef void (GLAD_API_PTR *PFNGLINDEXFPROC)(GLfloat c); +typedef void (GLAD_API_PTR *PFNGLINDEXFVPROC)(const GLfloat * c); +typedef void (GLAD_API_PTR *PFNGLINDEXIPROC)(GLint c); +typedef void (GLAD_API_PTR *PFNGLINDEXIVPROC)(const GLint * c); +typedef void (GLAD_API_PTR *PFNGLINDEXSPROC)(GLshort c); +typedef void (GLAD_API_PTR *PFNGLINDEXSVPROC)(const GLshort * c); +typedef void (GLAD_API_PTR *PFNGLINDEXUBPROC)(GLubyte c); +typedef void (GLAD_API_PTR *PFNGLINDEXUBVPROC)(const GLubyte * c); +typedef void (GLAD_API_PTR *PFNGLINITNAMESPROC)(void); +typedef void (GLAD_API_PTR *PFNGLINTERLEAVEDARRAYSPROC)(GLenum format, GLsizei stride, const void * pointer); +typedef GLboolean (GLAD_API_PTR *PFNGLISBUFFERPROC)(GLuint buffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISENABLEDPROC)(GLenum cap); +typedef GLboolean (GLAD_API_PTR *PFNGLISENABLEDIPROC)(GLenum target, GLuint index); +typedef GLboolean (GLAD_API_PTR *PFNGLISFRAMEBUFFERPROC)(GLuint framebuffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISLISTPROC)(GLuint list); +typedef GLboolean (GLAD_API_PTR *PFNGLISPROGRAMPROC)(GLuint program); +typedef GLboolean (GLAD_API_PTR *PFNGLISQUERYPROC)(GLuint id); +typedef GLboolean (GLAD_API_PTR *PFNGLISRENDERBUFFERPROC)(GLuint renderbuffer); +typedef GLboolean (GLAD_API_PTR *PFNGLISSAMPLERPROC)(GLuint sampler); +typedef GLboolean (GLAD_API_PTR *PFNGLISSHADERPROC)(GLuint shader); +typedef GLboolean (GLAD_API_PTR *PFNGLISSYNCPROC)(GLsync sync); +typedef GLboolean (GLAD_API_PTR *PFNGLISTEXTUREPROC)(GLuint texture); +typedef GLboolean (GLAD_API_PTR *PFNGLISVERTEXARRAYPROC)(GLuint array); +typedef void (GLAD_API_PTR *PFNGLLIGHTMODELFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLLIGHTMODELFVPROC)(GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLLIGHTMODELIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLLIGHTMODELIVPROC)(GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLLIGHTFPROC)(GLenum light, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLLIGHTFVPROC)(GLenum light, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLLIGHTIPROC)(GLenum light, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLLIGHTIVPROC)(GLenum light, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLLINESTIPPLEPROC)(GLint factor, GLushort pattern); +typedef void (GLAD_API_PTR *PFNGLLINEWIDTHPROC)(GLfloat width); +typedef void (GLAD_API_PTR *PFNGLLINKPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLLISTBASEPROC)(GLuint base); +typedef void (GLAD_API_PTR *PFNGLLOADIDENTITYPROC)(void); +typedef void (GLAD_API_PTR *PFNGLLOADMATRIXDPROC)(const GLdouble * m); +typedef void (GLAD_API_PTR *PFNGLLOADMATRIXFPROC)(const GLfloat * m); +typedef void (GLAD_API_PTR *PFNGLLOADNAMEPROC)(GLuint name); +typedef void (GLAD_API_PTR *PFNGLLOADTRANSPOSEMATRIXDPROC)(const GLdouble * m); +typedef void (GLAD_API_PTR *PFNGLLOADTRANSPOSEMATRIXFPROC)(const GLfloat * m); +typedef void (GLAD_API_PTR *PFNGLLOGICOPPROC)(GLenum opcode); +typedef void (GLAD_API_PTR *PFNGLMAP1DPROC)(GLenum target, GLdouble u1, GLdouble u2, GLint stride, GLint order, const GLdouble * points); +typedef void (GLAD_API_PTR *PFNGLMAP1FPROC)(GLenum target, GLfloat u1, GLfloat u2, GLint stride, GLint order, const GLfloat * points); +typedef void (GLAD_API_PTR *PFNGLMAP2DPROC)(GLenum target, GLdouble u1, GLdouble u2, GLint ustride, GLint uorder, GLdouble v1, GLdouble v2, GLint vstride, GLint vorder, const GLdouble * points); +typedef void (GLAD_API_PTR *PFNGLMAP2FPROC)(GLenum target, GLfloat u1, GLfloat u2, GLint ustride, GLint uorder, GLfloat v1, GLfloat v2, GLint vstride, GLint vorder, const GLfloat * points); +typedef void * (GLAD_API_PTR *PFNGLMAPBUFFERPROC)(GLenum target, GLenum access); +typedef void * (GLAD_API_PTR *PFNGLMAPBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +typedef void (GLAD_API_PTR *PFNGLMAPGRID1DPROC)(GLint un, GLdouble u1, GLdouble u2); +typedef void (GLAD_API_PTR *PFNGLMAPGRID1FPROC)(GLint un, GLfloat u1, GLfloat u2); +typedef void (GLAD_API_PTR *PFNGLMAPGRID2DPROC)(GLint un, GLdouble u1, GLdouble u2, GLint vn, GLdouble v1, GLdouble v2); +typedef void (GLAD_API_PTR *PFNGLMAPGRID2FPROC)(GLint un, GLfloat u1, GLfloat u2, GLint vn, GLfloat v1, GLfloat v2); +typedef void (GLAD_API_PTR *PFNGLMATERIALFPROC)(GLenum face, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLMATERIALFVPROC)(GLenum face, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLMATERIALIPROC)(GLenum face, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLMATERIALIVPROC)(GLenum face, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLMATRIXMODEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLMULTMATRIXDPROC)(const GLdouble * m); +typedef void (GLAD_API_PTR *PFNGLMULTMATRIXFPROC)(const GLfloat * m); +typedef void (GLAD_API_PTR *PFNGLMULTTRANSPOSEMATRIXDPROC)(const GLdouble * m); +typedef void (GLAD_API_PTR *PFNGLMULTTRANSPOSEMATRIXFPROC)(const GLfloat * m); +typedef void (GLAD_API_PTR *PFNGLMULTIDRAWARRAYSPROC)(GLenum mode, const GLint * first, const GLsizei * count, GLsizei drawcount); +typedef void (GLAD_API_PTR *PFNGLMULTIDRAWELEMENTSPROC)(GLenum mode, const GLsizei * count, GLenum type, const void *const* indices, GLsizei drawcount); +typedef void (GLAD_API_PTR *PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, const GLsizei * count, GLenum type, const void *const* indices, GLsizei drawcount, const GLint * basevertex); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1DPROC)(GLenum target, GLdouble s); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1DVPROC)(GLenum target, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1FPROC)(GLenum target, GLfloat s); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1FVPROC)(GLenum target, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1IPROC)(GLenum target, GLint s); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1IVPROC)(GLenum target, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1SPROC)(GLenum target, GLshort s); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD1SVPROC)(GLenum target, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2DPROC)(GLenum target, GLdouble s, GLdouble t); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2DVPROC)(GLenum target, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2FPROC)(GLenum target, GLfloat s, GLfloat t); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2FVPROC)(GLenum target, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2IPROC)(GLenum target, GLint s, GLint t); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2IVPROC)(GLenum target, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2SPROC)(GLenum target, GLshort s, GLshort t); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD2SVPROC)(GLenum target, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3DPROC)(GLenum target, GLdouble s, GLdouble t, GLdouble r); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3DVPROC)(GLenum target, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3FPROC)(GLenum target, GLfloat s, GLfloat t, GLfloat r); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3FVPROC)(GLenum target, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3IPROC)(GLenum target, GLint s, GLint t, GLint r); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3IVPROC)(GLenum target, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3SPROC)(GLenum target, GLshort s, GLshort t, GLshort r); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD3SVPROC)(GLenum target, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4DPROC)(GLenum target, GLdouble s, GLdouble t, GLdouble r, GLdouble q); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4DVPROC)(GLenum target, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4FPROC)(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4FVPROC)(GLenum target, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4IPROC)(GLenum target, GLint s, GLint t, GLint r, GLint q); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4IVPROC)(GLenum target, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4SPROC)(GLenum target, GLshort s, GLshort t, GLshort r, GLshort q); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORD4SVPROC)(GLenum target, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP1UIPROC)(GLenum texture, GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP1UIVPROC)(GLenum texture, GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP2UIPROC)(GLenum texture, GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP2UIVPROC)(GLenum texture, GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP3UIPROC)(GLenum texture, GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP3UIVPROC)(GLenum texture, GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP4UIPROC)(GLenum texture, GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP4UIVPROC)(GLenum texture, GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLNEWLISTPROC)(GLuint list, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLNORMAL3BPROC)(GLbyte nx, GLbyte ny, GLbyte nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3BVPROC)(const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLNORMAL3DPROC)(GLdouble nx, GLdouble ny, GLdouble nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLNORMAL3FPROC)(GLfloat nx, GLfloat ny, GLfloat nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLNORMAL3IPROC)(GLint nx, GLint ny, GLint nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLNORMAL3SPROC)(GLshort nx, GLshort ny, GLshort nz); +typedef void (GLAD_API_PTR *PFNGLNORMAL3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLNORMALP3UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLNORMALP3UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLNORMALPOINTERPROC)(GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLORTHOPROC)(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar); +typedef void (GLAD_API_PTR *PFNGLPASSTHROUGHPROC)(GLfloat token); +typedef void (GLAD_API_PTR *PFNGLPIXELMAPFVPROC)(GLenum map, GLsizei mapsize, const GLfloat * values); +typedef void (GLAD_API_PTR *PFNGLPIXELMAPUIVPROC)(GLenum map, GLsizei mapsize, const GLuint * values); +typedef void (GLAD_API_PTR *PFNGLPIXELMAPUSVPROC)(GLenum map, GLsizei mapsize, const GLushort * values); +typedef void (GLAD_API_PTR *PFNGLPIXELSTOREFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLPIXELSTOREIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLPIXELTRANSFERFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLPIXELTRANSFERIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLPIXELZOOMPROC)(GLfloat xfactor, GLfloat yfactor); +typedef void (GLAD_API_PTR *PFNGLPOINTPARAMETERFPROC)(GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLPOINTPARAMETERFVPROC)(GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLPOINTPARAMETERIPROC)(GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLPOINTPARAMETERIVPROC)(GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLPOINTSIZEPROC)(GLfloat size); +typedef void (GLAD_API_PTR *PFNGLPOLYGONMODEPROC)(GLenum face, GLenum mode); +typedef void (GLAD_API_PTR *PFNGLPOLYGONOFFSETPROC)(GLfloat factor, GLfloat units); +typedef void (GLAD_API_PTR *PFNGLPOLYGONSTIPPLEPROC)(const GLubyte * mask); +typedef void (GLAD_API_PTR *PFNGLPOPATTRIBPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPOPCLIENTATTRIBPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPOPMATRIXPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPOPNAMEPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPRIMITIVERESTARTINDEXPROC)(GLuint index); +typedef void (GLAD_API_PTR *PFNGLPRIORITIZETEXTURESPROC)(GLsizei n, const GLuint * textures, const GLfloat * priorities); +typedef void (GLAD_API_PTR *PFNGLPROVOKINGVERTEXPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLPUSHATTRIBPROC)(GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLPUSHCLIENTATTRIBPROC)(GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLPUSHMATRIXPROC)(void); +typedef void (GLAD_API_PTR *PFNGLPUSHNAMEPROC)(GLuint name); +typedef void (GLAD_API_PTR *PFNGLQUERYCOUNTERPROC)(GLuint id, GLenum target); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2DPROC)(GLdouble x, GLdouble y); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2FPROC)(GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2IPROC)(GLint x, GLint y); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2SPROC)(GLshort x, GLshort y); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS2SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3DPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3FPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3IPROC)(GLint x, GLint y, GLint z); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3SPROC)(GLshort x, GLshort y, GLshort z); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4DPROC)(GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4FPROC)(GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4IPROC)(GLint x, GLint y, GLint z, GLint w); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4SPROC)(GLshort x, GLshort y, GLshort z, GLshort w); +typedef void (GLAD_API_PTR *PFNGLRASTERPOS4SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLREADBUFFERPROC)(GLenum src); +typedef void (GLAD_API_PTR *PFNGLREADPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void * pixels); +typedef void (GLAD_API_PTR *PFNGLRECTDPROC)(GLdouble x1, GLdouble y1, GLdouble x2, GLdouble y2); +typedef void (GLAD_API_PTR *PFNGLRECTDVPROC)(const GLdouble * v1, const GLdouble * v2); +typedef void (GLAD_API_PTR *PFNGLRECTFPROC)(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2); +typedef void (GLAD_API_PTR *PFNGLRECTFVPROC)(const GLfloat * v1, const GLfloat * v2); +typedef void (GLAD_API_PTR *PFNGLRECTIPROC)(GLint x1, GLint y1, GLint x2, GLint y2); +typedef void (GLAD_API_PTR *PFNGLRECTIVPROC)(const GLint * v1, const GLint * v2); +typedef void (GLAD_API_PTR *PFNGLRECTSPROC)(GLshort x1, GLshort y1, GLshort x2, GLshort y2); +typedef void (GLAD_API_PTR *PFNGLRECTSVPROC)(const GLshort * v1, const GLshort * v2); +typedef GLint (GLAD_API_PTR *PFNGLRENDERMODEPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLRENDERBUFFERSTORAGEPROC)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLROTATEDPROC)(GLdouble angle, GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLROTATEFPROC)(GLfloat angle, GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLSAMPLECOVERAGEPROC)(GLfloat value, GLboolean invert); +typedef void (GLAD_API_PTR *PFNGLSAMPLEMASKIPROC)(GLuint maskNumber, GLbitfield mask); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, const GLint * param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, const GLuint * param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERFPROC)(GLuint sampler, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, const GLfloat * param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERIPROC)(GLuint sampler, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, const GLint * param); +typedef void (GLAD_API_PTR *PFNGLSCALEDPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLSCALEFPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLSCISSORPROC)(GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3BPROC)(GLbyte red, GLbyte green, GLbyte blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3BVPROC)(const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3DPROC)(GLdouble red, GLdouble green, GLdouble blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3FPROC)(GLfloat red, GLfloat green, GLfloat blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3IPROC)(GLint red, GLint green, GLint blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3SPROC)(GLshort red, GLshort green, GLshort blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3UBPROC)(GLubyte red, GLubyte green, GLubyte blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3UBVPROC)(const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3UIPROC)(GLuint red, GLuint green, GLuint blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3UIVPROC)(const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3USPROC)(GLushort red, GLushort green, GLushort blue); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLOR3USVPROC)(const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLORP3UIPROC)(GLenum type, GLuint color); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLORP3UIVPROC)(GLenum type, const GLuint * color); +typedef void (GLAD_API_PTR *PFNGLSECONDARYCOLORPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLSELECTBUFFERPROC)(GLsizei size, GLuint * buffer); +typedef void (GLAD_API_PTR *PFNGLSHADEMODELPROC)(GLenum mode); +typedef void (GLAD_API_PTR *PFNGLSHADERSOURCEPROC)(GLuint shader, GLsizei count, const GLchar *const* string, const GLint * length); +typedef void (GLAD_API_PTR *PFNGLSTENCILFUNCPROC)(GLenum func, GLint ref, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILFUNCSEPARATEPROC)(GLenum face, GLenum func, GLint ref, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILMASKPROC)(GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILMASKSEPARATEPROC)(GLenum face, GLuint mask); +typedef void (GLAD_API_PTR *PFNGLSTENCILOPPROC)(GLenum fail, GLenum zfail, GLenum zpass); +typedef void (GLAD_API_PTR *PFNGLSTENCILOPSEPARATEPROC)(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); +typedef void (GLAD_API_PTR *PFNGLTEXBUFFERPROC)(GLenum target, GLenum internalformat, GLuint buffer); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1DPROC)(GLdouble s); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1FPROC)(GLfloat s); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1IPROC)(GLint s); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1SPROC)(GLshort s); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD1SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2DPROC)(GLdouble s, GLdouble t); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2FPROC)(GLfloat s, GLfloat t); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2IPROC)(GLint s, GLint t); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2SPROC)(GLshort s, GLshort t); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD2SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3DPROC)(GLdouble s, GLdouble t, GLdouble r); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3FPROC)(GLfloat s, GLfloat t, GLfloat r); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3IPROC)(GLint s, GLint t, GLint r); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3SPROC)(GLshort s, GLshort t, GLshort r); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4DPROC)(GLdouble s, GLdouble t, GLdouble r, GLdouble q); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4FPROC)(GLfloat s, GLfloat t, GLfloat r, GLfloat q); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4IPROC)(GLint s, GLint t, GLint r, GLint q); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4SPROC)(GLshort s, GLshort t, GLshort r, GLshort q); +typedef void (GLAD_API_PTR *PFNGLTEXCOORD4SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP1UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP1UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP2UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP2UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP3UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP3UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP4UIPROC)(GLenum type, GLuint coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDP4UIVPROC)(GLenum type, const GLuint * coords); +typedef void (GLAD_API_PTR *PFNGLTEXCOORDPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLTEXENVFPROC)(GLenum target, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLTEXENVFVPROC)(GLenum target, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLTEXENVIPROC)(GLenum target, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLTEXENVIVPROC)(GLenum target, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXGENDPROC)(GLenum coord, GLenum pname, GLdouble param); +typedef void (GLAD_API_PTR *PFNGLTEXGENDVPROC)(GLenum coord, GLenum pname, const GLdouble * params); +typedef void (GLAD_API_PTR *PFNGLTEXGENFPROC)(GLenum coord, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLTEXGENFVPROC)(GLenum coord, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLTEXGENIPROC)(GLenum coord, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLTEXGENIVPROC)(GLenum coord, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE1DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE2DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE3DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXIMAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, const GLuint * params); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERFPROC)(GLenum target, GLenum pname, GLfloat param); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERFVPROC)(GLenum target, GLenum pname, const GLfloat * params); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIPROC)(GLenum target, GLenum pname, GLint param); +typedef void (GLAD_API_PTR *PFNGLTEXPARAMETERIVPROC)(GLenum target, GLenum pname, const GLint * params); +typedef void (GLAD_API_PTR *PFNGLTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void * pixels); +typedef void (GLAD_API_PTR *PFNGLTRANSFORMFEEDBACKVARYINGSPROC)(GLuint program, GLsizei count, const GLchar *const* varyings, GLenum bufferMode); +typedef void (GLAD_API_PTR *PFNGLTRANSLATEDPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLTRANSLATEFPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1FPROC)(GLint location, GLfloat v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1IPROC)(GLint location, GLint v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1UIPROC)(GLint location, GLuint v0); +typedef void (GLAD_API_PTR *PFNGLUNIFORM1UIVPROC)(GLint location, GLsizei count, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2FPROC)(GLint location, GLfloat v0, GLfloat v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2IPROC)(GLint location, GLint v0, GLint v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2UIPROC)(GLint location, GLuint v0, GLuint v1); +typedef void (GLAD_API_PTR *PFNGLUNIFORM2UIVPROC)(GLint location, GLsizei count, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3IPROC)(GLint location, GLint v0, GLint v1, GLint v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2); +typedef void (GLAD_API_PTR *PFNGLUNIFORM3UIVPROC)(GLint location, GLsizei count, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4FVPROC)(GLint location, GLsizei count, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4IPROC)(GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4IVPROC)(GLint location, GLsizei count, const GLint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +typedef void (GLAD_API_PTR *PFNGLUNIFORM4UIVPROC)(GLint location, GLsizei count, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMBLOCKBINDINGPROC)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX2X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX2X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX3X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX3X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX4X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef void (GLAD_API_PTR *PFNGLUNIFORMMATRIX4X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat * value); +typedef GLboolean (GLAD_API_PTR *PFNGLUNMAPBUFFERPROC)(GLenum target); +typedef void (GLAD_API_PTR *PFNGLUSEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLVALIDATEPROGRAMPROC)(GLuint program); +typedef void (GLAD_API_PTR *PFNGLVERTEX2DPROC)(GLdouble x, GLdouble y); +typedef void (GLAD_API_PTR *PFNGLVERTEX2DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX2FPROC)(GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLVERTEX2FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX2IPROC)(GLint x, GLint y); +typedef void (GLAD_API_PTR *PFNGLVERTEX2IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX2SPROC)(GLshort x, GLshort y); +typedef void (GLAD_API_PTR *PFNGLVERTEX2SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX3DPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLVERTEX3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX3FPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLVERTEX3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX3IPROC)(GLint x, GLint y, GLint z); +typedef void (GLAD_API_PTR *PFNGLVERTEX3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX3SPROC)(GLshort x, GLshort y, GLshort z); +typedef void (GLAD_API_PTR *PFNGLVERTEX3SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX4DPROC)(GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (GLAD_API_PTR *PFNGLVERTEX4DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX4FPROC)(GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GLAD_API_PTR *PFNGLVERTEX4FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX4IPROC)(GLint x, GLint y, GLint z, GLint w); +typedef void (GLAD_API_PTR *PFNGLVERTEX4IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEX4SPROC)(GLshort x, GLshort y, GLshort z, GLshort w); +typedef void (GLAD_API_PTR *PFNGLVERTEX4SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1DPROC)(GLuint index, GLdouble x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1DVPROC)(GLuint index, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1FPROC)(GLuint index, GLfloat x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1SPROC)(GLuint index, GLshort x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB1SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2DPROC)(GLuint index, GLdouble x, GLdouble y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2DVPROC)(GLuint index, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2FPROC)(GLuint index, GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2SPROC)(GLuint index, GLshort x, GLshort y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB2SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3DVPROC)(GLuint index, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3SPROC)(GLuint index, GLshort x, GLshort y, GLshort z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB3SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NBVPROC)(GLuint index, const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NIVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NSVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NUBPROC)(GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NUBVPROC)(GLuint index, const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NUIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4NUSVPROC)(GLuint index, const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4BVPROC)(GLuint index, const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4DVPROC)(GLuint index, const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4FVPROC)(GLuint index, const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4SPROC)(GLuint index, GLshort x, GLshort y, GLshort z, GLshort w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4UBVPROC)(GLuint index, const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIB4USVPROC)(GLuint index, const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBDIVISORPROC)(GLuint index, GLuint divisor); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI1IPROC)(GLuint index, GLint x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI1IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI1UIPROC)(GLuint index, GLuint x); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI1UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI2IPROC)(GLuint index, GLint x, GLint y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI2IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI2UIPROC)(GLuint index, GLuint x, GLuint y); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI2UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI3IPROC)(GLuint index, GLint x, GLint y, GLint z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI3IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI3UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI3UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4BVPROC)(GLuint index, const GLbyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4IPROC)(GLuint index, GLint x, GLint y, GLint z, GLint w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4IVPROC)(GLuint index, const GLint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4SVPROC)(GLuint index, const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4UBVPROC)(GLuint index, const GLubyte * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z, GLuint w); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4UIVPROC)(GLuint index, const GLuint * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBI4USVPROC)(GLuint index, const GLushort * v); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBIPOINTERPROC)(GLuint index, GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP1UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP1UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP2UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP2UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP3UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP3UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP4UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBP4UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBPOINTERPROC)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLVERTEXP2UIPROC)(GLenum type, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP2UIVPROC)(GLenum type, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP3UIPROC)(GLenum type, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP3UIVPROC)(GLenum type, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP4UIPROC)(GLenum type, GLuint value); +typedef void (GLAD_API_PTR *PFNGLVERTEXP4UIVPROC)(GLenum type, const GLuint * value); +typedef void (GLAD_API_PTR *PFNGLVERTEXPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void * pointer); +typedef void (GLAD_API_PTR *PFNGLVIEWPORTPROC)(GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GLAD_API_PTR *PFNGLWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2DPROC)(GLdouble x, GLdouble y); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2FPROC)(GLfloat x, GLfloat y); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2IPROC)(GLint x, GLint y); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2SPROC)(GLshort x, GLshort y); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS2SVPROC)(const GLshort * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3DPROC)(GLdouble x, GLdouble y, GLdouble z); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3DVPROC)(const GLdouble * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3FPROC)(GLfloat x, GLfloat y, GLfloat z); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3FVPROC)(const GLfloat * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3IPROC)(GLint x, GLint y, GLint z); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3IVPROC)(const GLint * v); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3SPROC)(GLshort x, GLshort y, GLshort z); +typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3SVPROC)(const GLshort * v); + +GLAD_API_CALL PFNGLACCUMPROC glad_glAccum; +#define glAccum glad_glAccum +GLAD_API_CALL PFNGLACTIVETEXTUREPROC glad_glActiveTexture; +#define glActiveTexture glad_glActiveTexture +GLAD_API_CALL PFNGLALPHAFUNCPROC glad_glAlphaFunc; +#define glAlphaFunc glad_glAlphaFunc +GLAD_API_CALL PFNGLARETEXTURESRESIDENTPROC glad_glAreTexturesResident; +#define glAreTexturesResident glad_glAreTexturesResident +GLAD_API_CALL PFNGLARRAYELEMENTPROC glad_glArrayElement; +#define glArrayElement glad_glArrayElement +GLAD_API_CALL PFNGLATTACHSHADERPROC glad_glAttachShader; +#define glAttachShader glad_glAttachShader +GLAD_API_CALL PFNGLBEGINPROC glad_glBegin; +#define glBegin glad_glBegin +GLAD_API_CALL PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender; +#define glBeginConditionalRender glad_glBeginConditionalRender +GLAD_API_CALL PFNGLBEGINQUERYPROC glad_glBeginQuery; +#define glBeginQuery glad_glBeginQuery +GLAD_API_CALL PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback; +#define glBeginTransformFeedback glad_glBeginTransformFeedback +GLAD_API_CALL PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation; +#define glBindAttribLocation glad_glBindAttribLocation +GLAD_API_CALL PFNGLBINDBUFFERPROC glad_glBindBuffer; +#define glBindBuffer glad_glBindBuffer +GLAD_API_CALL PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase; +#define glBindBufferBase glad_glBindBufferBase +GLAD_API_CALL PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange; +#define glBindBufferRange glad_glBindBufferRange +GLAD_API_CALL PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation; +#define glBindFragDataLocation glad_glBindFragDataLocation +GLAD_API_CALL PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed; +#define glBindFragDataLocationIndexed glad_glBindFragDataLocationIndexed +GLAD_API_CALL PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer; +#define glBindFramebuffer glad_glBindFramebuffer +GLAD_API_CALL PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer; +#define glBindRenderbuffer glad_glBindRenderbuffer +GLAD_API_CALL PFNGLBINDSAMPLERPROC glad_glBindSampler; +#define glBindSampler glad_glBindSampler +GLAD_API_CALL PFNGLBINDTEXTUREPROC glad_glBindTexture; +#define glBindTexture glad_glBindTexture +GLAD_API_CALL PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray; +#define glBindVertexArray glad_glBindVertexArray +GLAD_API_CALL PFNGLBITMAPPROC glad_glBitmap; +#define glBitmap glad_glBitmap +GLAD_API_CALL PFNGLBLENDCOLORPROC glad_glBlendColor; +#define glBlendColor glad_glBlendColor +GLAD_API_CALL PFNGLBLENDEQUATIONPROC glad_glBlendEquation; +#define glBlendEquation glad_glBlendEquation +GLAD_API_CALL PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate; +#define glBlendEquationSeparate glad_glBlendEquationSeparate +GLAD_API_CALL PFNGLBLENDFUNCPROC glad_glBlendFunc; +#define glBlendFunc glad_glBlendFunc +GLAD_API_CALL PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate; +#define glBlendFuncSeparate glad_glBlendFuncSeparate +GLAD_API_CALL PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer; +#define glBlitFramebuffer glad_glBlitFramebuffer +GLAD_API_CALL PFNGLBUFFERDATAPROC glad_glBufferData; +#define glBufferData glad_glBufferData +GLAD_API_CALL PFNGLBUFFERSUBDATAPROC glad_glBufferSubData; +#define glBufferSubData glad_glBufferSubData +GLAD_API_CALL PFNGLCALLLISTPROC glad_glCallList; +#define glCallList glad_glCallList +GLAD_API_CALL PFNGLCALLLISTSPROC glad_glCallLists; +#define glCallLists glad_glCallLists +GLAD_API_CALL PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus; +#define glCheckFramebufferStatus glad_glCheckFramebufferStatus +GLAD_API_CALL PFNGLCLAMPCOLORPROC glad_glClampColor; +#define glClampColor glad_glClampColor +GLAD_API_CALL PFNGLCLEARPROC glad_glClear; +#define glClear glad_glClear +GLAD_API_CALL PFNGLCLEARACCUMPROC glad_glClearAccum; +#define glClearAccum glad_glClearAccum +GLAD_API_CALL PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi; +#define glClearBufferfi glad_glClearBufferfi +GLAD_API_CALL PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv; +#define glClearBufferfv glad_glClearBufferfv +GLAD_API_CALL PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv; +#define glClearBufferiv glad_glClearBufferiv +GLAD_API_CALL PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv; +#define glClearBufferuiv glad_glClearBufferuiv +GLAD_API_CALL PFNGLCLEARCOLORPROC glad_glClearColor; +#define glClearColor glad_glClearColor +GLAD_API_CALL PFNGLCLEARDEPTHPROC glad_glClearDepth; +#define glClearDepth glad_glClearDepth +GLAD_API_CALL PFNGLCLEARINDEXPROC glad_glClearIndex; +#define glClearIndex glad_glClearIndex +GLAD_API_CALL PFNGLCLEARSTENCILPROC glad_glClearStencil; +#define glClearStencil glad_glClearStencil +GLAD_API_CALL PFNGLCLIENTACTIVETEXTUREPROC glad_glClientActiveTexture; +#define glClientActiveTexture glad_glClientActiveTexture +GLAD_API_CALL PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync; +#define glClientWaitSync glad_glClientWaitSync +GLAD_API_CALL PFNGLCLIPPLANEPROC glad_glClipPlane; +#define glClipPlane glad_glClipPlane +GLAD_API_CALL PFNGLCOLOR3BPROC glad_glColor3b; +#define glColor3b glad_glColor3b +GLAD_API_CALL PFNGLCOLOR3BVPROC glad_glColor3bv; +#define glColor3bv glad_glColor3bv +GLAD_API_CALL PFNGLCOLOR3DPROC glad_glColor3d; +#define glColor3d glad_glColor3d +GLAD_API_CALL PFNGLCOLOR3DVPROC glad_glColor3dv; +#define glColor3dv glad_glColor3dv +GLAD_API_CALL PFNGLCOLOR3FPROC glad_glColor3f; +#define glColor3f glad_glColor3f +GLAD_API_CALL PFNGLCOLOR3FVPROC glad_glColor3fv; +#define glColor3fv glad_glColor3fv +GLAD_API_CALL PFNGLCOLOR3IPROC glad_glColor3i; +#define glColor3i glad_glColor3i +GLAD_API_CALL PFNGLCOLOR3IVPROC glad_glColor3iv; +#define glColor3iv glad_glColor3iv +GLAD_API_CALL PFNGLCOLOR3SPROC glad_glColor3s; +#define glColor3s glad_glColor3s +GLAD_API_CALL PFNGLCOLOR3SVPROC glad_glColor3sv; +#define glColor3sv glad_glColor3sv +GLAD_API_CALL PFNGLCOLOR3UBPROC glad_glColor3ub; +#define glColor3ub glad_glColor3ub +GLAD_API_CALL PFNGLCOLOR3UBVPROC glad_glColor3ubv; +#define glColor3ubv glad_glColor3ubv +GLAD_API_CALL PFNGLCOLOR3UIPROC glad_glColor3ui; +#define glColor3ui glad_glColor3ui +GLAD_API_CALL PFNGLCOLOR3UIVPROC glad_glColor3uiv; +#define glColor3uiv glad_glColor3uiv +GLAD_API_CALL PFNGLCOLOR3USPROC glad_glColor3us; +#define glColor3us glad_glColor3us +GLAD_API_CALL PFNGLCOLOR3USVPROC glad_glColor3usv; +#define glColor3usv glad_glColor3usv +GLAD_API_CALL PFNGLCOLOR4BPROC glad_glColor4b; +#define glColor4b glad_glColor4b +GLAD_API_CALL PFNGLCOLOR4BVPROC glad_glColor4bv; +#define glColor4bv glad_glColor4bv +GLAD_API_CALL PFNGLCOLOR4DPROC glad_glColor4d; +#define glColor4d glad_glColor4d +GLAD_API_CALL PFNGLCOLOR4DVPROC glad_glColor4dv; +#define glColor4dv glad_glColor4dv +GLAD_API_CALL PFNGLCOLOR4FPROC glad_glColor4f; +#define glColor4f glad_glColor4f +GLAD_API_CALL PFNGLCOLOR4FVPROC glad_glColor4fv; +#define glColor4fv glad_glColor4fv +GLAD_API_CALL PFNGLCOLOR4IPROC glad_glColor4i; +#define glColor4i glad_glColor4i +GLAD_API_CALL PFNGLCOLOR4IVPROC glad_glColor4iv; +#define glColor4iv glad_glColor4iv +GLAD_API_CALL PFNGLCOLOR4SPROC glad_glColor4s; +#define glColor4s glad_glColor4s +GLAD_API_CALL PFNGLCOLOR4SVPROC glad_glColor4sv; +#define glColor4sv glad_glColor4sv +GLAD_API_CALL PFNGLCOLOR4UBPROC glad_glColor4ub; +#define glColor4ub glad_glColor4ub +GLAD_API_CALL PFNGLCOLOR4UBVPROC glad_glColor4ubv; +#define glColor4ubv glad_glColor4ubv +GLAD_API_CALL PFNGLCOLOR4UIPROC glad_glColor4ui; +#define glColor4ui glad_glColor4ui +GLAD_API_CALL PFNGLCOLOR4UIVPROC glad_glColor4uiv; +#define glColor4uiv glad_glColor4uiv +GLAD_API_CALL PFNGLCOLOR4USPROC glad_glColor4us; +#define glColor4us glad_glColor4us +GLAD_API_CALL PFNGLCOLOR4USVPROC glad_glColor4usv; +#define glColor4usv glad_glColor4usv +GLAD_API_CALL PFNGLCOLORMASKPROC glad_glColorMask; +#define glColorMask glad_glColorMask +GLAD_API_CALL PFNGLCOLORMASKIPROC glad_glColorMaski; +#define glColorMaski glad_glColorMaski +GLAD_API_CALL PFNGLCOLORMATERIALPROC glad_glColorMaterial; +#define glColorMaterial glad_glColorMaterial +GLAD_API_CALL PFNGLCOLORP3UIPROC glad_glColorP3ui; +#define glColorP3ui glad_glColorP3ui +GLAD_API_CALL PFNGLCOLORP3UIVPROC glad_glColorP3uiv; +#define glColorP3uiv glad_glColorP3uiv +GLAD_API_CALL PFNGLCOLORP4UIPROC glad_glColorP4ui; +#define glColorP4ui glad_glColorP4ui +GLAD_API_CALL PFNGLCOLORP4UIVPROC glad_glColorP4uiv; +#define glColorP4uiv glad_glColorP4uiv +GLAD_API_CALL PFNGLCOLORPOINTERPROC glad_glColorPointer; +#define glColorPointer glad_glColorPointer +GLAD_API_CALL PFNGLCOMPILESHADERPROC glad_glCompileShader; +#define glCompileShader glad_glCompileShader +GLAD_API_CALL PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D; +#define glCompressedTexImage1D glad_glCompressedTexImage1D +GLAD_API_CALL PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D; +#define glCompressedTexImage2D glad_glCompressedTexImage2D +GLAD_API_CALL PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D; +#define glCompressedTexImage3D glad_glCompressedTexImage3D +GLAD_API_CALL PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D; +#define glCompressedTexSubImage1D glad_glCompressedTexSubImage1D +GLAD_API_CALL PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D; +#define glCompressedTexSubImage2D glad_glCompressedTexSubImage2D +GLAD_API_CALL PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D; +#define glCompressedTexSubImage3D glad_glCompressedTexSubImage3D +GLAD_API_CALL PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData; +#define glCopyBufferSubData glad_glCopyBufferSubData +GLAD_API_CALL PFNGLCOPYPIXELSPROC glad_glCopyPixels; +#define glCopyPixels glad_glCopyPixels +GLAD_API_CALL PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D; +#define glCopyTexImage1D glad_glCopyTexImage1D +GLAD_API_CALL PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D; +#define glCopyTexImage2D glad_glCopyTexImage2D +GLAD_API_CALL PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D; +#define glCopyTexSubImage1D glad_glCopyTexSubImage1D +GLAD_API_CALL PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D; +#define glCopyTexSubImage2D glad_glCopyTexSubImage2D +GLAD_API_CALL PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D; +#define glCopyTexSubImage3D glad_glCopyTexSubImage3D +GLAD_API_CALL PFNGLCREATEPROGRAMPROC glad_glCreateProgram; +#define glCreateProgram glad_glCreateProgram +GLAD_API_CALL PFNGLCREATESHADERPROC glad_glCreateShader; +#define glCreateShader glad_glCreateShader +GLAD_API_CALL PFNGLCULLFACEPROC glad_glCullFace; +#define glCullFace glad_glCullFace +GLAD_API_CALL PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers; +#define glDeleteBuffers glad_glDeleteBuffers +GLAD_API_CALL PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers; +#define glDeleteFramebuffers glad_glDeleteFramebuffers +GLAD_API_CALL PFNGLDELETELISTSPROC glad_glDeleteLists; +#define glDeleteLists glad_glDeleteLists +GLAD_API_CALL PFNGLDELETEPROGRAMPROC glad_glDeleteProgram; +#define glDeleteProgram glad_glDeleteProgram +GLAD_API_CALL PFNGLDELETEQUERIESPROC glad_glDeleteQueries; +#define glDeleteQueries glad_glDeleteQueries +GLAD_API_CALL PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers; +#define glDeleteRenderbuffers glad_glDeleteRenderbuffers +GLAD_API_CALL PFNGLDELETESAMPLERSPROC glad_glDeleteSamplers; +#define glDeleteSamplers glad_glDeleteSamplers +GLAD_API_CALL PFNGLDELETESHADERPROC glad_glDeleteShader; +#define glDeleteShader glad_glDeleteShader +GLAD_API_CALL PFNGLDELETESYNCPROC glad_glDeleteSync; +#define glDeleteSync glad_glDeleteSync +GLAD_API_CALL PFNGLDELETETEXTURESPROC glad_glDeleteTextures; +#define glDeleteTextures glad_glDeleteTextures +GLAD_API_CALL PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays; +#define glDeleteVertexArrays glad_glDeleteVertexArrays +GLAD_API_CALL PFNGLDEPTHFUNCPROC glad_glDepthFunc; +#define glDepthFunc glad_glDepthFunc +GLAD_API_CALL PFNGLDEPTHMASKPROC glad_glDepthMask; +#define glDepthMask glad_glDepthMask +GLAD_API_CALL PFNGLDEPTHRANGEPROC glad_glDepthRange; +#define glDepthRange glad_glDepthRange +GLAD_API_CALL PFNGLDETACHSHADERPROC glad_glDetachShader; +#define glDetachShader glad_glDetachShader +GLAD_API_CALL PFNGLDISABLEPROC glad_glDisable; +#define glDisable glad_glDisable +GLAD_API_CALL PFNGLDISABLECLIENTSTATEPROC glad_glDisableClientState; +#define glDisableClientState glad_glDisableClientState +GLAD_API_CALL PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray; +#define glDisableVertexAttribArray glad_glDisableVertexAttribArray +GLAD_API_CALL PFNGLDISABLEIPROC glad_glDisablei; +#define glDisablei glad_glDisablei +GLAD_API_CALL PFNGLDRAWARRAYSPROC glad_glDrawArrays; +#define glDrawArrays glad_glDrawArrays +GLAD_API_CALL PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced; +#define glDrawArraysInstanced glad_glDrawArraysInstanced +GLAD_API_CALL PFNGLDRAWBUFFERPROC glad_glDrawBuffer; +#define glDrawBuffer glad_glDrawBuffer +GLAD_API_CALL PFNGLDRAWBUFFERSPROC glad_glDrawBuffers; +#define glDrawBuffers glad_glDrawBuffers +GLAD_API_CALL PFNGLDRAWELEMENTSPROC glad_glDrawElements; +#define glDrawElements glad_glDrawElements +GLAD_API_CALL PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex; +#define glDrawElementsBaseVertex glad_glDrawElementsBaseVertex +GLAD_API_CALL PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced; +#define glDrawElementsInstanced glad_glDrawElementsInstanced +GLAD_API_CALL PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex; +#define glDrawElementsInstancedBaseVertex glad_glDrawElementsInstancedBaseVertex +GLAD_API_CALL PFNGLDRAWPIXELSPROC glad_glDrawPixels; +#define glDrawPixels glad_glDrawPixels +GLAD_API_CALL PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements; +#define glDrawRangeElements glad_glDrawRangeElements +GLAD_API_CALL PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex; +#define glDrawRangeElementsBaseVertex glad_glDrawRangeElementsBaseVertex +GLAD_API_CALL PFNGLEDGEFLAGPROC glad_glEdgeFlag; +#define glEdgeFlag glad_glEdgeFlag +GLAD_API_CALL PFNGLEDGEFLAGPOINTERPROC glad_glEdgeFlagPointer; +#define glEdgeFlagPointer glad_glEdgeFlagPointer +GLAD_API_CALL PFNGLEDGEFLAGVPROC glad_glEdgeFlagv; +#define glEdgeFlagv glad_glEdgeFlagv +GLAD_API_CALL PFNGLENABLEPROC glad_glEnable; +#define glEnable glad_glEnable +GLAD_API_CALL PFNGLENABLECLIENTSTATEPROC glad_glEnableClientState; +#define glEnableClientState glad_glEnableClientState +GLAD_API_CALL PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray; +#define glEnableVertexAttribArray glad_glEnableVertexAttribArray +GLAD_API_CALL PFNGLENABLEIPROC glad_glEnablei; +#define glEnablei glad_glEnablei +GLAD_API_CALL PFNGLENDPROC glad_glEnd; +#define glEnd glad_glEnd +GLAD_API_CALL PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender; +#define glEndConditionalRender glad_glEndConditionalRender +GLAD_API_CALL PFNGLENDLISTPROC glad_glEndList; +#define glEndList glad_glEndList +GLAD_API_CALL PFNGLENDQUERYPROC glad_glEndQuery; +#define glEndQuery glad_glEndQuery +GLAD_API_CALL PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback; +#define glEndTransformFeedback glad_glEndTransformFeedback +GLAD_API_CALL PFNGLEVALCOORD1DPROC glad_glEvalCoord1d; +#define glEvalCoord1d glad_glEvalCoord1d +GLAD_API_CALL PFNGLEVALCOORD1DVPROC glad_glEvalCoord1dv; +#define glEvalCoord1dv glad_glEvalCoord1dv +GLAD_API_CALL PFNGLEVALCOORD1FPROC glad_glEvalCoord1f; +#define glEvalCoord1f glad_glEvalCoord1f +GLAD_API_CALL PFNGLEVALCOORD1FVPROC glad_glEvalCoord1fv; +#define glEvalCoord1fv glad_glEvalCoord1fv +GLAD_API_CALL PFNGLEVALCOORD2DPROC glad_glEvalCoord2d; +#define glEvalCoord2d glad_glEvalCoord2d +GLAD_API_CALL PFNGLEVALCOORD2DVPROC glad_glEvalCoord2dv; +#define glEvalCoord2dv glad_glEvalCoord2dv +GLAD_API_CALL PFNGLEVALCOORD2FPROC glad_glEvalCoord2f; +#define glEvalCoord2f glad_glEvalCoord2f +GLAD_API_CALL PFNGLEVALCOORD2FVPROC glad_glEvalCoord2fv; +#define glEvalCoord2fv glad_glEvalCoord2fv +GLAD_API_CALL PFNGLEVALMESH1PROC glad_glEvalMesh1; +#define glEvalMesh1 glad_glEvalMesh1 +GLAD_API_CALL PFNGLEVALMESH2PROC glad_glEvalMesh2; +#define glEvalMesh2 glad_glEvalMesh2 +GLAD_API_CALL PFNGLEVALPOINT1PROC glad_glEvalPoint1; +#define glEvalPoint1 glad_glEvalPoint1 +GLAD_API_CALL PFNGLEVALPOINT2PROC glad_glEvalPoint2; +#define glEvalPoint2 glad_glEvalPoint2 +GLAD_API_CALL PFNGLFEEDBACKBUFFERPROC glad_glFeedbackBuffer; +#define glFeedbackBuffer glad_glFeedbackBuffer +GLAD_API_CALL PFNGLFENCESYNCPROC glad_glFenceSync; +#define glFenceSync glad_glFenceSync +GLAD_API_CALL PFNGLFINISHPROC glad_glFinish; +#define glFinish glad_glFinish +GLAD_API_CALL PFNGLFLUSHPROC glad_glFlush; +#define glFlush glad_glFlush +GLAD_API_CALL PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange; +#define glFlushMappedBufferRange glad_glFlushMappedBufferRange +GLAD_API_CALL PFNGLFOGCOORDPOINTERPROC glad_glFogCoordPointer; +#define glFogCoordPointer glad_glFogCoordPointer +GLAD_API_CALL PFNGLFOGCOORDDPROC glad_glFogCoordd; +#define glFogCoordd glad_glFogCoordd +GLAD_API_CALL PFNGLFOGCOORDDVPROC glad_glFogCoorddv; +#define glFogCoorddv glad_glFogCoorddv +GLAD_API_CALL PFNGLFOGCOORDFPROC glad_glFogCoordf; +#define glFogCoordf glad_glFogCoordf +GLAD_API_CALL PFNGLFOGCOORDFVPROC glad_glFogCoordfv; +#define glFogCoordfv glad_glFogCoordfv +GLAD_API_CALL PFNGLFOGFPROC glad_glFogf; +#define glFogf glad_glFogf +GLAD_API_CALL PFNGLFOGFVPROC glad_glFogfv; +#define glFogfv glad_glFogfv +GLAD_API_CALL PFNGLFOGIPROC glad_glFogi; +#define glFogi glad_glFogi +GLAD_API_CALL PFNGLFOGIVPROC glad_glFogiv; +#define glFogiv glad_glFogiv +GLAD_API_CALL PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer; +#define glFramebufferRenderbuffer glad_glFramebufferRenderbuffer +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture; +#define glFramebufferTexture glad_glFramebufferTexture +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D; +#define glFramebufferTexture1D glad_glFramebufferTexture1D +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D; +#define glFramebufferTexture2D glad_glFramebufferTexture2D +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D; +#define glFramebufferTexture3D glad_glFramebufferTexture3D +GLAD_API_CALL PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer; +#define glFramebufferTextureLayer glad_glFramebufferTextureLayer +GLAD_API_CALL PFNGLFRONTFACEPROC glad_glFrontFace; +#define glFrontFace glad_glFrontFace +GLAD_API_CALL PFNGLFRUSTUMPROC glad_glFrustum; +#define glFrustum glad_glFrustum +GLAD_API_CALL PFNGLGENBUFFERSPROC glad_glGenBuffers; +#define glGenBuffers glad_glGenBuffers +GLAD_API_CALL PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers; +#define glGenFramebuffers glad_glGenFramebuffers +GLAD_API_CALL PFNGLGENLISTSPROC glad_glGenLists; +#define glGenLists glad_glGenLists +GLAD_API_CALL PFNGLGENQUERIESPROC glad_glGenQueries; +#define glGenQueries glad_glGenQueries +GLAD_API_CALL PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers; +#define glGenRenderbuffers glad_glGenRenderbuffers +GLAD_API_CALL PFNGLGENSAMPLERSPROC glad_glGenSamplers; +#define glGenSamplers glad_glGenSamplers +GLAD_API_CALL PFNGLGENTEXTURESPROC glad_glGenTextures; +#define glGenTextures glad_glGenTextures +GLAD_API_CALL PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays; +#define glGenVertexArrays glad_glGenVertexArrays +GLAD_API_CALL PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap; +#define glGenerateMipmap glad_glGenerateMipmap +GLAD_API_CALL PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib; +#define glGetActiveAttrib glad_glGetActiveAttrib +GLAD_API_CALL PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform; +#define glGetActiveUniform glad_glGetActiveUniform +GLAD_API_CALL PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName; +#define glGetActiveUniformBlockName glad_glGetActiveUniformBlockName +GLAD_API_CALL PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv; +#define glGetActiveUniformBlockiv glad_glGetActiveUniformBlockiv +GLAD_API_CALL PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName; +#define glGetActiveUniformName glad_glGetActiveUniformName +GLAD_API_CALL PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv; +#define glGetActiveUniformsiv glad_glGetActiveUniformsiv +GLAD_API_CALL PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders; +#define glGetAttachedShaders glad_glGetAttachedShaders +GLAD_API_CALL PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation; +#define glGetAttribLocation glad_glGetAttribLocation +GLAD_API_CALL PFNGLGETBOOLEANI_VPROC glad_glGetBooleani_v; +#define glGetBooleani_v glad_glGetBooleani_v +GLAD_API_CALL PFNGLGETBOOLEANVPROC glad_glGetBooleanv; +#define glGetBooleanv glad_glGetBooleanv +GLAD_API_CALL PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v; +#define glGetBufferParameteri64v glad_glGetBufferParameteri64v +GLAD_API_CALL PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv; +#define glGetBufferParameteriv glad_glGetBufferParameteriv +GLAD_API_CALL PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv; +#define glGetBufferPointerv glad_glGetBufferPointerv +GLAD_API_CALL PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData; +#define glGetBufferSubData glad_glGetBufferSubData +GLAD_API_CALL PFNGLGETCLIPPLANEPROC glad_glGetClipPlane; +#define glGetClipPlane glad_glGetClipPlane +GLAD_API_CALL PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage; +#define glGetCompressedTexImage glad_glGetCompressedTexImage +GLAD_API_CALL PFNGLGETDOUBLEVPROC glad_glGetDoublev; +#define glGetDoublev glad_glGetDoublev +GLAD_API_CALL PFNGLGETERRORPROC glad_glGetError; +#define glGetError glad_glGetError +GLAD_API_CALL PFNGLGETFLOATVPROC glad_glGetFloatv; +#define glGetFloatv glad_glGetFloatv +GLAD_API_CALL PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex; +#define glGetFragDataIndex glad_glGetFragDataIndex +GLAD_API_CALL PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation; +#define glGetFragDataLocation glad_glGetFragDataLocation +GLAD_API_CALL PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv; +#define glGetFramebufferAttachmentParameteriv glad_glGetFramebufferAttachmentParameteriv +GLAD_API_CALL PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v; +#define glGetInteger64i_v glad_glGetInteger64i_v +GLAD_API_CALL PFNGLGETINTEGER64VPROC glad_glGetInteger64v; +#define glGetInteger64v glad_glGetInteger64v +GLAD_API_CALL PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v; +#define glGetIntegeri_v glad_glGetIntegeri_v +GLAD_API_CALL PFNGLGETINTEGERVPROC glad_glGetIntegerv; +#define glGetIntegerv glad_glGetIntegerv +GLAD_API_CALL PFNGLGETLIGHTFVPROC glad_glGetLightfv; +#define glGetLightfv glad_glGetLightfv +GLAD_API_CALL PFNGLGETLIGHTIVPROC glad_glGetLightiv; +#define glGetLightiv glad_glGetLightiv +GLAD_API_CALL PFNGLGETMAPDVPROC glad_glGetMapdv; +#define glGetMapdv glad_glGetMapdv +GLAD_API_CALL PFNGLGETMAPFVPROC glad_glGetMapfv; +#define glGetMapfv glad_glGetMapfv +GLAD_API_CALL PFNGLGETMAPIVPROC glad_glGetMapiv; +#define glGetMapiv glad_glGetMapiv +GLAD_API_CALL PFNGLGETMATERIALFVPROC glad_glGetMaterialfv; +#define glGetMaterialfv glad_glGetMaterialfv +GLAD_API_CALL PFNGLGETMATERIALIVPROC glad_glGetMaterialiv; +#define glGetMaterialiv glad_glGetMaterialiv +GLAD_API_CALL PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv; +#define glGetMultisamplefv glad_glGetMultisamplefv +GLAD_API_CALL PFNGLGETPIXELMAPFVPROC glad_glGetPixelMapfv; +#define glGetPixelMapfv glad_glGetPixelMapfv +GLAD_API_CALL PFNGLGETPIXELMAPUIVPROC glad_glGetPixelMapuiv; +#define glGetPixelMapuiv glad_glGetPixelMapuiv +GLAD_API_CALL PFNGLGETPIXELMAPUSVPROC glad_glGetPixelMapusv; +#define glGetPixelMapusv glad_glGetPixelMapusv +GLAD_API_CALL PFNGLGETPOINTERVPROC glad_glGetPointerv; +#define glGetPointerv glad_glGetPointerv +GLAD_API_CALL PFNGLGETPOLYGONSTIPPLEPROC glad_glGetPolygonStipple; +#define glGetPolygonStipple glad_glGetPolygonStipple +GLAD_API_CALL PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog; +#define glGetProgramInfoLog glad_glGetProgramInfoLog +GLAD_API_CALL PFNGLGETPROGRAMIVPROC glad_glGetProgramiv; +#define glGetProgramiv glad_glGetProgramiv +GLAD_API_CALL PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v; +#define glGetQueryObjecti64v glad_glGetQueryObjecti64v +GLAD_API_CALL PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv; +#define glGetQueryObjectiv glad_glGetQueryObjectiv +GLAD_API_CALL PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v; +#define glGetQueryObjectui64v glad_glGetQueryObjectui64v +GLAD_API_CALL PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv; +#define glGetQueryObjectuiv glad_glGetQueryObjectuiv +GLAD_API_CALL PFNGLGETQUERYIVPROC glad_glGetQueryiv; +#define glGetQueryiv glad_glGetQueryiv +GLAD_API_CALL PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv; +#define glGetRenderbufferParameteriv glad_glGetRenderbufferParameteriv +GLAD_API_CALL PFNGLGETSAMPLERPARAMETERIIVPROC glad_glGetSamplerParameterIiv; +#define glGetSamplerParameterIiv glad_glGetSamplerParameterIiv +GLAD_API_CALL PFNGLGETSAMPLERPARAMETERIUIVPROC glad_glGetSamplerParameterIuiv; +#define glGetSamplerParameterIuiv glad_glGetSamplerParameterIuiv +GLAD_API_CALL PFNGLGETSAMPLERPARAMETERFVPROC glad_glGetSamplerParameterfv; +#define glGetSamplerParameterfv glad_glGetSamplerParameterfv +GLAD_API_CALL PFNGLGETSAMPLERPARAMETERIVPROC glad_glGetSamplerParameteriv; +#define glGetSamplerParameteriv glad_glGetSamplerParameteriv +GLAD_API_CALL PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog; +#define glGetShaderInfoLog glad_glGetShaderInfoLog +GLAD_API_CALL PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource; +#define glGetShaderSource glad_glGetShaderSource +GLAD_API_CALL PFNGLGETSHADERIVPROC glad_glGetShaderiv; +#define glGetShaderiv glad_glGetShaderiv +GLAD_API_CALL PFNGLGETSTRINGPROC glad_glGetString; +#define glGetString glad_glGetString +GLAD_API_CALL PFNGLGETSTRINGIPROC glad_glGetStringi; +#define glGetStringi glad_glGetStringi +GLAD_API_CALL PFNGLGETSYNCIVPROC glad_glGetSynciv; +#define glGetSynciv glad_glGetSynciv +GLAD_API_CALL PFNGLGETTEXENVFVPROC glad_glGetTexEnvfv; +#define glGetTexEnvfv glad_glGetTexEnvfv +GLAD_API_CALL PFNGLGETTEXENVIVPROC glad_glGetTexEnviv; +#define glGetTexEnviv glad_glGetTexEnviv +GLAD_API_CALL PFNGLGETTEXGENDVPROC glad_glGetTexGendv; +#define glGetTexGendv glad_glGetTexGendv +GLAD_API_CALL PFNGLGETTEXGENFVPROC glad_glGetTexGenfv; +#define glGetTexGenfv glad_glGetTexGenfv +GLAD_API_CALL PFNGLGETTEXGENIVPROC glad_glGetTexGeniv; +#define glGetTexGeniv glad_glGetTexGeniv +GLAD_API_CALL PFNGLGETTEXIMAGEPROC glad_glGetTexImage; +#define glGetTexImage glad_glGetTexImage +GLAD_API_CALL PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv; +#define glGetTexLevelParameterfv glad_glGetTexLevelParameterfv +GLAD_API_CALL PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv; +#define glGetTexLevelParameteriv glad_glGetTexLevelParameteriv +GLAD_API_CALL PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv; +#define glGetTexParameterIiv glad_glGetTexParameterIiv +GLAD_API_CALL PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv; +#define glGetTexParameterIuiv glad_glGetTexParameterIuiv +GLAD_API_CALL PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv; +#define glGetTexParameterfv glad_glGetTexParameterfv +GLAD_API_CALL PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv; +#define glGetTexParameteriv glad_glGetTexParameteriv +GLAD_API_CALL PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying; +#define glGetTransformFeedbackVarying glad_glGetTransformFeedbackVarying +GLAD_API_CALL PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex; +#define glGetUniformBlockIndex glad_glGetUniformBlockIndex +GLAD_API_CALL PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices; +#define glGetUniformIndices glad_glGetUniformIndices +GLAD_API_CALL PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation; +#define glGetUniformLocation glad_glGetUniformLocation +GLAD_API_CALL PFNGLGETUNIFORMFVPROC glad_glGetUniformfv; +#define glGetUniformfv glad_glGetUniformfv +GLAD_API_CALL PFNGLGETUNIFORMIVPROC glad_glGetUniformiv; +#define glGetUniformiv glad_glGetUniformiv +GLAD_API_CALL PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv; +#define glGetUniformuiv glad_glGetUniformuiv +GLAD_API_CALL PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv; +#define glGetVertexAttribIiv glad_glGetVertexAttribIiv +GLAD_API_CALL PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv; +#define glGetVertexAttribIuiv glad_glGetVertexAttribIuiv +GLAD_API_CALL PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv; +#define glGetVertexAttribPointerv glad_glGetVertexAttribPointerv +GLAD_API_CALL PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv; +#define glGetVertexAttribdv glad_glGetVertexAttribdv +GLAD_API_CALL PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv; +#define glGetVertexAttribfv glad_glGetVertexAttribfv +GLAD_API_CALL PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv; +#define glGetVertexAttribiv glad_glGetVertexAttribiv +GLAD_API_CALL PFNGLHINTPROC glad_glHint; +#define glHint glad_glHint +GLAD_API_CALL PFNGLINDEXMASKPROC glad_glIndexMask; +#define glIndexMask glad_glIndexMask +GLAD_API_CALL PFNGLINDEXPOINTERPROC glad_glIndexPointer; +#define glIndexPointer glad_glIndexPointer +GLAD_API_CALL PFNGLINDEXDPROC glad_glIndexd; +#define glIndexd glad_glIndexd +GLAD_API_CALL PFNGLINDEXDVPROC glad_glIndexdv; +#define glIndexdv glad_glIndexdv +GLAD_API_CALL PFNGLINDEXFPROC glad_glIndexf; +#define glIndexf glad_glIndexf +GLAD_API_CALL PFNGLINDEXFVPROC glad_glIndexfv; +#define glIndexfv glad_glIndexfv +GLAD_API_CALL PFNGLINDEXIPROC glad_glIndexi; +#define glIndexi glad_glIndexi +GLAD_API_CALL PFNGLINDEXIVPROC glad_glIndexiv; +#define glIndexiv glad_glIndexiv +GLAD_API_CALL PFNGLINDEXSPROC glad_glIndexs; +#define glIndexs glad_glIndexs +GLAD_API_CALL PFNGLINDEXSVPROC glad_glIndexsv; +#define glIndexsv glad_glIndexsv +GLAD_API_CALL PFNGLINDEXUBPROC glad_glIndexub; +#define glIndexub glad_glIndexub +GLAD_API_CALL PFNGLINDEXUBVPROC glad_glIndexubv; +#define glIndexubv glad_glIndexubv +GLAD_API_CALL PFNGLINITNAMESPROC glad_glInitNames; +#define glInitNames glad_glInitNames +GLAD_API_CALL PFNGLINTERLEAVEDARRAYSPROC glad_glInterleavedArrays; +#define glInterleavedArrays glad_glInterleavedArrays +GLAD_API_CALL PFNGLISBUFFERPROC glad_glIsBuffer; +#define glIsBuffer glad_glIsBuffer +GLAD_API_CALL PFNGLISENABLEDPROC glad_glIsEnabled; +#define glIsEnabled glad_glIsEnabled +GLAD_API_CALL PFNGLISENABLEDIPROC glad_glIsEnabledi; +#define glIsEnabledi glad_glIsEnabledi +GLAD_API_CALL PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer; +#define glIsFramebuffer glad_glIsFramebuffer +GLAD_API_CALL PFNGLISLISTPROC glad_glIsList; +#define glIsList glad_glIsList +GLAD_API_CALL PFNGLISPROGRAMPROC glad_glIsProgram; +#define glIsProgram glad_glIsProgram +GLAD_API_CALL PFNGLISQUERYPROC glad_glIsQuery; +#define glIsQuery glad_glIsQuery +GLAD_API_CALL PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer; +#define glIsRenderbuffer glad_glIsRenderbuffer +GLAD_API_CALL PFNGLISSAMPLERPROC glad_glIsSampler; +#define glIsSampler glad_glIsSampler +GLAD_API_CALL PFNGLISSHADERPROC glad_glIsShader; +#define glIsShader glad_glIsShader +GLAD_API_CALL PFNGLISSYNCPROC glad_glIsSync; +#define glIsSync glad_glIsSync +GLAD_API_CALL PFNGLISTEXTUREPROC glad_glIsTexture; +#define glIsTexture glad_glIsTexture +GLAD_API_CALL PFNGLISVERTEXARRAYPROC glad_glIsVertexArray; +#define glIsVertexArray glad_glIsVertexArray +GLAD_API_CALL PFNGLLIGHTMODELFPROC glad_glLightModelf; +#define glLightModelf glad_glLightModelf +GLAD_API_CALL PFNGLLIGHTMODELFVPROC glad_glLightModelfv; +#define glLightModelfv glad_glLightModelfv +GLAD_API_CALL PFNGLLIGHTMODELIPROC glad_glLightModeli; +#define glLightModeli glad_glLightModeli +GLAD_API_CALL PFNGLLIGHTMODELIVPROC glad_glLightModeliv; +#define glLightModeliv glad_glLightModeliv +GLAD_API_CALL PFNGLLIGHTFPROC glad_glLightf; +#define glLightf glad_glLightf +GLAD_API_CALL PFNGLLIGHTFVPROC glad_glLightfv; +#define glLightfv glad_glLightfv +GLAD_API_CALL PFNGLLIGHTIPROC glad_glLighti; +#define glLighti glad_glLighti +GLAD_API_CALL PFNGLLIGHTIVPROC glad_glLightiv; +#define glLightiv glad_glLightiv +GLAD_API_CALL PFNGLLINESTIPPLEPROC glad_glLineStipple; +#define glLineStipple glad_glLineStipple +GLAD_API_CALL PFNGLLINEWIDTHPROC glad_glLineWidth; +#define glLineWidth glad_glLineWidth +GLAD_API_CALL PFNGLLINKPROGRAMPROC glad_glLinkProgram; +#define glLinkProgram glad_glLinkProgram +GLAD_API_CALL PFNGLLISTBASEPROC glad_glListBase; +#define glListBase glad_glListBase +GLAD_API_CALL PFNGLLOADIDENTITYPROC glad_glLoadIdentity; +#define glLoadIdentity glad_glLoadIdentity +GLAD_API_CALL PFNGLLOADMATRIXDPROC glad_glLoadMatrixd; +#define glLoadMatrixd glad_glLoadMatrixd +GLAD_API_CALL PFNGLLOADMATRIXFPROC glad_glLoadMatrixf; +#define glLoadMatrixf glad_glLoadMatrixf +GLAD_API_CALL PFNGLLOADNAMEPROC glad_glLoadName; +#define glLoadName glad_glLoadName +GLAD_API_CALL PFNGLLOADTRANSPOSEMATRIXDPROC glad_glLoadTransposeMatrixd; +#define glLoadTransposeMatrixd glad_glLoadTransposeMatrixd +GLAD_API_CALL PFNGLLOADTRANSPOSEMATRIXFPROC glad_glLoadTransposeMatrixf; +#define glLoadTransposeMatrixf glad_glLoadTransposeMatrixf +GLAD_API_CALL PFNGLLOGICOPPROC glad_glLogicOp; +#define glLogicOp glad_glLogicOp +GLAD_API_CALL PFNGLMAP1DPROC glad_glMap1d; +#define glMap1d glad_glMap1d +GLAD_API_CALL PFNGLMAP1FPROC glad_glMap1f; +#define glMap1f glad_glMap1f +GLAD_API_CALL PFNGLMAP2DPROC glad_glMap2d; +#define glMap2d glad_glMap2d +GLAD_API_CALL PFNGLMAP2FPROC glad_glMap2f; +#define glMap2f glad_glMap2f +GLAD_API_CALL PFNGLMAPBUFFERPROC glad_glMapBuffer; +#define glMapBuffer glad_glMapBuffer +GLAD_API_CALL PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange; +#define glMapBufferRange glad_glMapBufferRange +GLAD_API_CALL PFNGLMAPGRID1DPROC glad_glMapGrid1d; +#define glMapGrid1d glad_glMapGrid1d +GLAD_API_CALL PFNGLMAPGRID1FPROC glad_glMapGrid1f; +#define glMapGrid1f glad_glMapGrid1f +GLAD_API_CALL PFNGLMAPGRID2DPROC glad_glMapGrid2d; +#define glMapGrid2d glad_glMapGrid2d +GLAD_API_CALL PFNGLMAPGRID2FPROC glad_glMapGrid2f; +#define glMapGrid2f glad_glMapGrid2f +GLAD_API_CALL PFNGLMATERIALFPROC glad_glMaterialf; +#define glMaterialf glad_glMaterialf +GLAD_API_CALL PFNGLMATERIALFVPROC glad_glMaterialfv; +#define glMaterialfv glad_glMaterialfv +GLAD_API_CALL PFNGLMATERIALIPROC glad_glMateriali; +#define glMateriali glad_glMateriali +GLAD_API_CALL PFNGLMATERIALIVPROC glad_glMaterialiv; +#define glMaterialiv glad_glMaterialiv +GLAD_API_CALL PFNGLMATRIXMODEPROC glad_glMatrixMode; +#define glMatrixMode glad_glMatrixMode +GLAD_API_CALL PFNGLMULTMATRIXDPROC glad_glMultMatrixd; +#define glMultMatrixd glad_glMultMatrixd +GLAD_API_CALL PFNGLMULTMATRIXFPROC glad_glMultMatrixf; +#define glMultMatrixf glad_glMultMatrixf +GLAD_API_CALL PFNGLMULTTRANSPOSEMATRIXDPROC glad_glMultTransposeMatrixd; +#define glMultTransposeMatrixd glad_glMultTransposeMatrixd +GLAD_API_CALL PFNGLMULTTRANSPOSEMATRIXFPROC glad_glMultTransposeMatrixf; +#define glMultTransposeMatrixf glad_glMultTransposeMatrixf +GLAD_API_CALL PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays; +#define glMultiDrawArrays glad_glMultiDrawArrays +GLAD_API_CALL PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements; +#define glMultiDrawElements glad_glMultiDrawElements +GLAD_API_CALL PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex; +#define glMultiDrawElementsBaseVertex glad_glMultiDrawElementsBaseVertex +GLAD_API_CALL PFNGLMULTITEXCOORD1DPROC glad_glMultiTexCoord1d; +#define glMultiTexCoord1d glad_glMultiTexCoord1d +GLAD_API_CALL PFNGLMULTITEXCOORD1DVPROC glad_glMultiTexCoord1dv; +#define glMultiTexCoord1dv glad_glMultiTexCoord1dv +GLAD_API_CALL PFNGLMULTITEXCOORD1FPROC glad_glMultiTexCoord1f; +#define glMultiTexCoord1f glad_glMultiTexCoord1f +GLAD_API_CALL PFNGLMULTITEXCOORD1FVPROC glad_glMultiTexCoord1fv; +#define glMultiTexCoord1fv glad_glMultiTexCoord1fv +GLAD_API_CALL PFNGLMULTITEXCOORD1IPROC glad_glMultiTexCoord1i; +#define glMultiTexCoord1i glad_glMultiTexCoord1i +GLAD_API_CALL PFNGLMULTITEXCOORD1IVPROC glad_glMultiTexCoord1iv; +#define glMultiTexCoord1iv glad_glMultiTexCoord1iv +GLAD_API_CALL PFNGLMULTITEXCOORD1SPROC glad_glMultiTexCoord1s; +#define glMultiTexCoord1s glad_glMultiTexCoord1s +GLAD_API_CALL PFNGLMULTITEXCOORD1SVPROC glad_glMultiTexCoord1sv; +#define glMultiTexCoord1sv glad_glMultiTexCoord1sv +GLAD_API_CALL PFNGLMULTITEXCOORD2DPROC glad_glMultiTexCoord2d; +#define glMultiTexCoord2d glad_glMultiTexCoord2d +GLAD_API_CALL PFNGLMULTITEXCOORD2DVPROC glad_glMultiTexCoord2dv; +#define glMultiTexCoord2dv glad_glMultiTexCoord2dv +GLAD_API_CALL PFNGLMULTITEXCOORD2FPROC glad_glMultiTexCoord2f; +#define glMultiTexCoord2f glad_glMultiTexCoord2f +GLAD_API_CALL PFNGLMULTITEXCOORD2FVPROC glad_glMultiTexCoord2fv; +#define glMultiTexCoord2fv glad_glMultiTexCoord2fv +GLAD_API_CALL PFNGLMULTITEXCOORD2IPROC glad_glMultiTexCoord2i; +#define glMultiTexCoord2i glad_glMultiTexCoord2i +GLAD_API_CALL PFNGLMULTITEXCOORD2IVPROC glad_glMultiTexCoord2iv; +#define glMultiTexCoord2iv glad_glMultiTexCoord2iv +GLAD_API_CALL PFNGLMULTITEXCOORD2SPROC glad_glMultiTexCoord2s; +#define glMultiTexCoord2s glad_glMultiTexCoord2s +GLAD_API_CALL PFNGLMULTITEXCOORD2SVPROC glad_glMultiTexCoord2sv; +#define glMultiTexCoord2sv glad_glMultiTexCoord2sv +GLAD_API_CALL PFNGLMULTITEXCOORD3DPROC glad_glMultiTexCoord3d; +#define glMultiTexCoord3d glad_glMultiTexCoord3d +GLAD_API_CALL PFNGLMULTITEXCOORD3DVPROC glad_glMultiTexCoord3dv; +#define glMultiTexCoord3dv glad_glMultiTexCoord3dv +GLAD_API_CALL PFNGLMULTITEXCOORD3FPROC glad_glMultiTexCoord3f; +#define glMultiTexCoord3f glad_glMultiTexCoord3f +GLAD_API_CALL PFNGLMULTITEXCOORD3FVPROC glad_glMultiTexCoord3fv; +#define glMultiTexCoord3fv glad_glMultiTexCoord3fv +GLAD_API_CALL PFNGLMULTITEXCOORD3IPROC glad_glMultiTexCoord3i; +#define glMultiTexCoord3i glad_glMultiTexCoord3i +GLAD_API_CALL PFNGLMULTITEXCOORD3IVPROC glad_glMultiTexCoord3iv; +#define glMultiTexCoord3iv glad_glMultiTexCoord3iv +GLAD_API_CALL PFNGLMULTITEXCOORD3SPROC glad_glMultiTexCoord3s; +#define glMultiTexCoord3s glad_glMultiTexCoord3s +GLAD_API_CALL PFNGLMULTITEXCOORD3SVPROC glad_glMultiTexCoord3sv; +#define glMultiTexCoord3sv glad_glMultiTexCoord3sv +GLAD_API_CALL PFNGLMULTITEXCOORD4DPROC glad_glMultiTexCoord4d; +#define glMultiTexCoord4d glad_glMultiTexCoord4d +GLAD_API_CALL PFNGLMULTITEXCOORD4DVPROC glad_glMultiTexCoord4dv; +#define glMultiTexCoord4dv glad_glMultiTexCoord4dv +GLAD_API_CALL PFNGLMULTITEXCOORD4FPROC glad_glMultiTexCoord4f; +#define glMultiTexCoord4f glad_glMultiTexCoord4f +GLAD_API_CALL PFNGLMULTITEXCOORD4FVPROC glad_glMultiTexCoord4fv; +#define glMultiTexCoord4fv glad_glMultiTexCoord4fv +GLAD_API_CALL PFNGLMULTITEXCOORD4IPROC glad_glMultiTexCoord4i; +#define glMultiTexCoord4i glad_glMultiTexCoord4i +GLAD_API_CALL PFNGLMULTITEXCOORD4IVPROC glad_glMultiTexCoord4iv; +#define glMultiTexCoord4iv glad_glMultiTexCoord4iv +GLAD_API_CALL PFNGLMULTITEXCOORD4SPROC glad_glMultiTexCoord4s; +#define glMultiTexCoord4s glad_glMultiTexCoord4s +GLAD_API_CALL PFNGLMULTITEXCOORD4SVPROC glad_glMultiTexCoord4sv; +#define glMultiTexCoord4sv glad_glMultiTexCoord4sv +GLAD_API_CALL PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui; +#define glMultiTexCoordP1ui glad_glMultiTexCoordP1ui +GLAD_API_CALL PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv; +#define glMultiTexCoordP1uiv glad_glMultiTexCoordP1uiv +GLAD_API_CALL PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui; +#define glMultiTexCoordP2ui glad_glMultiTexCoordP2ui +GLAD_API_CALL PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv; +#define glMultiTexCoordP2uiv glad_glMultiTexCoordP2uiv +GLAD_API_CALL PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui; +#define glMultiTexCoordP3ui glad_glMultiTexCoordP3ui +GLAD_API_CALL PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv; +#define glMultiTexCoordP3uiv glad_glMultiTexCoordP3uiv +GLAD_API_CALL PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui; +#define glMultiTexCoordP4ui glad_glMultiTexCoordP4ui +GLAD_API_CALL PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv; +#define glMultiTexCoordP4uiv glad_glMultiTexCoordP4uiv +GLAD_API_CALL PFNGLNEWLISTPROC glad_glNewList; +#define glNewList glad_glNewList +GLAD_API_CALL PFNGLNORMAL3BPROC glad_glNormal3b; +#define glNormal3b glad_glNormal3b +GLAD_API_CALL PFNGLNORMAL3BVPROC glad_glNormal3bv; +#define glNormal3bv glad_glNormal3bv +GLAD_API_CALL PFNGLNORMAL3DPROC glad_glNormal3d; +#define glNormal3d glad_glNormal3d +GLAD_API_CALL PFNGLNORMAL3DVPROC glad_glNormal3dv; +#define glNormal3dv glad_glNormal3dv +GLAD_API_CALL PFNGLNORMAL3FPROC glad_glNormal3f; +#define glNormal3f glad_glNormal3f +GLAD_API_CALL PFNGLNORMAL3FVPROC glad_glNormal3fv; +#define glNormal3fv glad_glNormal3fv +GLAD_API_CALL PFNGLNORMAL3IPROC glad_glNormal3i; +#define glNormal3i glad_glNormal3i +GLAD_API_CALL PFNGLNORMAL3IVPROC glad_glNormal3iv; +#define glNormal3iv glad_glNormal3iv +GLAD_API_CALL PFNGLNORMAL3SPROC glad_glNormal3s; +#define glNormal3s glad_glNormal3s +GLAD_API_CALL PFNGLNORMAL3SVPROC glad_glNormal3sv; +#define glNormal3sv glad_glNormal3sv +GLAD_API_CALL PFNGLNORMALP3UIPROC glad_glNormalP3ui; +#define glNormalP3ui glad_glNormalP3ui +GLAD_API_CALL PFNGLNORMALP3UIVPROC glad_glNormalP3uiv; +#define glNormalP3uiv glad_glNormalP3uiv +GLAD_API_CALL PFNGLNORMALPOINTERPROC glad_glNormalPointer; +#define glNormalPointer glad_glNormalPointer +GLAD_API_CALL PFNGLORTHOPROC glad_glOrtho; +#define glOrtho glad_glOrtho +GLAD_API_CALL PFNGLPASSTHROUGHPROC glad_glPassThrough; +#define glPassThrough glad_glPassThrough +GLAD_API_CALL PFNGLPIXELMAPFVPROC glad_glPixelMapfv; +#define glPixelMapfv glad_glPixelMapfv +GLAD_API_CALL PFNGLPIXELMAPUIVPROC glad_glPixelMapuiv; +#define glPixelMapuiv glad_glPixelMapuiv +GLAD_API_CALL PFNGLPIXELMAPUSVPROC glad_glPixelMapusv; +#define glPixelMapusv glad_glPixelMapusv +GLAD_API_CALL PFNGLPIXELSTOREFPROC glad_glPixelStoref; +#define glPixelStoref glad_glPixelStoref +GLAD_API_CALL PFNGLPIXELSTOREIPROC glad_glPixelStorei; +#define glPixelStorei glad_glPixelStorei +GLAD_API_CALL PFNGLPIXELTRANSFERFPROC glad_glPixelTransferf; +#define glPixelTransferf glad_glPixelTransferf +GLAD_API_CALL PFNGLPIXELTRANSFERIPROC glad_glPixelTransferi; +#define glPixelTransferi glad_glPixelTransferi +GLAD_API_CALL PFNGLPIXELZOOMPROC glad_glPixelZoom; +#define glPixelZoom glad_glPixelZoom +GLAD_API_CALL PFNGLPOINTPARAMETERFPROC glad_glPointParameterf; +#define glPointParameterf glad_glPointParameterf +GLAD_API_CALL PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv; +#define glPointParameterfv glad_glPointParameterfv +GLAD_API_CALL PFNGLPOINTPARAMETERIPROC glad_glPointParameteri; +#define glPointParameteri glad_glPointParameteri +GLAD_API_CALL PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv; +#define glPointParameteriv glad_glPointParameteriv +GLAD_API_CALL PFNGLPOINTSIZEPROC glad_glPointSize; +#define glPointSize glad_glPointSize +GLAD_API_CALL PFNGLPOLYGONMODEPROC glad_glPolygonMode; +#define glPolygonMode glad_glPolygonMode +GLAD_API_CALL PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset; +#define glPolygonOffset glad_glPolygonOffset +GLAD_API_CALL PFNGLPOLYGONSTIPPLEPROC glad_glPolygonStipple; +#define glPolygonStipple glad_glPolygonStipple +GLAD_API_CALL PFNGLPOPATTRIBPROC glad_glPopAttrib; +#define glPopAttrib glad_glPopAttrib +GLAD_API_CALL PFNGLPOPCLIENTATTRIBPROC glad_glPopClientAttrib; +#define glPopClientAttrib glad_glPopClientAttrib +GLAD_API_CALL PFNGLPOPMATRIXPROC glad_glPopMatrix; +#define glPopMatrix glad_glPopMatrix +GLAD_API_CALL PFNGLPOPNAMEPROC glad_glPopName; +#define glPopName glad_glPopName +GLAD_API_CALL PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex; +#define glPrimitiveRestartIndex glad_glPrimitiveRestartIndex +GLAD_API_CALL PFNGLPRIORITIZETEXTURESPROC glad_glPrioritizeTextures; +#define glPrioritizeTextures glad_glPrioritizeTextures +GLAD_API_CALL PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex; +#define glProvokingVertex glad_glProvokingVertex +GLAD_API_CALL PFNGLPUSHATTRIBPROC glad_glPushAttrib; +#define glPushAttrib glad_glPushAttrib +GLAD_API_CALL PFNGLPUSHCLIENTATTRIBPROC glad_glPushClientAttrib; +#define glPushClientAttrib glad_glPushClientAttrib +GLAD_API_CALL PFNGLPUSHMATRIXPROC glad_glPushMatrix; +#define glPushMatrix glad_glPushMatrix +GLAD_API_CALL PFNGLPUSHNAMEPROC glad_glPushName; +#define glPushName glad_glPushName +GLAD_API_CALL PFNGLQUERYCOUNTERPROC glad_glQueryCounter; +#define glQueryCounter glad_glQueryCounter +GLAD_API_CALL PFNGLRASTERPOS2DPROC glad_glRasterPos2d; +#define glRasterPos2d glad_glRasterPos2d +GLAD_API_CALL PFNGLRASTERPOS2DVPROC glad_glRasterPos2dv; +#define glRasterPos2dv glad_glRasterPos2dv +GLAD_API_CALL PFNGLRASTERPOS2FPROC glad_glRasterPos2f; +#define glRasterPos2f glad_glRasterPos2f +GLAD_API_CALL PFNGLRASTERPOS2FVPROC glad_glRasterPos2fv; +#define glRasterPos2fv glad_glRasterPos2fv +GLAD_API_CALL PFNGLRASTERPOS2IPROC glad_glRasterPos2i; +#define glRasterPos2i glad_glRasterPos2i +GLAD_API_CALL PFNGLRASTERPOS2IVPROC glad_glRasterPos2iv; +#define glRasterPos2iv glad_glRasterPos2iv +GLAD_API_CALL PFNGLRASTERPOS2SPROC glad_glRasterPos2s; +#define glRasterPos2s glad_glRasterPos2s +GLAD_API_CALL PFNGLRASTERPOS2SVPROC glad_glRasterPos2sv; +#define glRasterPos2sv glad_glRasterPos2sv +GLAD_API_CALL PFNGLRASTERPOS3DPROC glad_glRasterPos3d; +#define glRasterPos3d glad_glRasterPos3d +GLAD_API_CALL PFNGLRASTERPOS3DVPROC glad_glRasterPos3dv; +#define glRasterPos3dv glad_glRasterPos3dv +GLAD_API_CALL PFNGLRASTERPOS3FPROC glad_glRasterPos3f; +#define glRasterPos3f glad_glRasterPos3f +GLAD_API_CALL PFNGLRASTERPOS3FVPROC glad_glRasterPos3fv; +#define glRasterPos3fv glad_glRasterPos3fv +GLAD_API_CALL PFNGLRASTERPOS3IPROC glad_glRasterPos3i; +#define glRasterPos3i glad_glRasterPos3i +GLAD_API_CALL PFNGLRASTERPOS3IVPROC glad_glRasterPos3iv; +#define glRasterPos3iv glad_glRasterPos3iv +GLAD_API_CALL PFNGLRASTERPOS3SPROC glad_glRasterPos3s; +#define glRasterPos3s glad_glRasterPos3s +GLAD_API_CALL PFNGLRASTERPOS3SVPROC glad_glRasterPos3sv; +#define glRasterPos3sv glad_glRasterPos3sv +GLAD_API_CALL PFNGLRASTERPOS4DPROC glad_glRasterPos4d; +#define glRasterPos4d glad_glRasterPos4d +GLAD_API_CALL PFNGLRASTERPOS4DVPROC glad_glRasterPos4dv; +#define glRasterPos4dv glad_glRasterPos4dv +GLAD_API_CALL PFNGLRASTERPOS4FPROC glad_glRasterPos4f; +#define glRasterPos4f glad_glRasterPos4f +GLAD_API_CALL PFNGLRASTERPOS4FVPROC glad_glRasterPos4fv; +#define glRasterPos4fv glad_glRasterPos4fv +GLAD_API_CALL PFNGLRASTERPOS4IPROC glad_glRasterPos4i; +#define glRasterPos4i glad_glRasterPos4i +GLAD_API_CALL PFNGLRASTERPOS4IVPROC glad_glRasterPos4iv; +#define glRasterPos4iv glad_glRasterPos4iv +GLAD_API_CALL PFNGLRASTERPOS4SPROC glad_glRasterPos4s; +#define glRasterPos4s glad_glRasterPos4s +GLAD_API_CALL PFNGLRASTERPOS4SVPROC glad_glRasterPos4sv; +#define glRasterPos4sv glad_glRasterPos4sv +GLAD_API_CALL PFNGLREADBUFFERPROC glad_glReadBuffer; +#define glReadBuffer glad_glReadBuffer +GLAD_API_CALL PFNGLREADPIXELSPROC glad_glReadPixels; +#define glReadPixels glad_glReadPixels +GLAD_API_CALL PFNGLRECTDPROC glad_glRectd; +#define glRectd glad_glRectd +GLAD_API_CALL PFNGLRECTDVPROC glad_glRectdv; +#define glRectdv glad_glRectdv +GLAD_API_CALL PFNGLRECTFPROC glad_glRectf; +#define glRectf glad_glRectf +GLAD_API_CALL PFNGLRECTFVPROC glad_glRectfv; +#define glRectfv glad_glRectfv +GLAD_API_CALL PFNGLRECTIPROC glad_glRecti; +#define glRecti glad_glRecti +GLAD_API_CALL PFNGLRECTIVPROC glad_glRectiv; +#define glRectiv glad_glRectiv +GLAD_API_CALL PFNGLRECTSPROC glad_glRects; +#define glRects glad_glRects +GLAD_API_CALL PFNGLRECTSVPROC glad_glRectsv; +#define glRectsv glad_glRectsv +GLAD_API_CALL PFNGLRENDERMODEPROC glad_glRenderMode; +#define glRenderMode glad_glRenderMode +GLAD_API_CALL PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage; +#define glRenderbufferStorage glad_glRenderbufferStorage +GLAD_API_CALL PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample; +#define glRenderbufferStorageMultisample glad_glRenderbufferStorageMultisample +GLAD_API_CALL PFNGLROTATEDPROC glad_glRotated; +#define glRotated glad_glRotated +GLAD_API_CALL PFNGLROTATEFPROC glad_glRotatef; +#define glRotatef glad_glRotatef +GLAD_API_CALL PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage; +#define glSampleCoverage glad_glSampleCoverage +GLAD_API_CALL PFNGLSAMPLEMASKIPROC glad_glSampleMaski; +#define glSampleMaski glad_glSampleMaski +GLAD_API_CALL PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv; +#define glSamplerParameterIiv glad_glSamplerParameterIiv +GLAD_API_CALL PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv; +#define glSamplerParameterIuiv glad_glSamplerParameterIuiv +GLAD_API_CALL PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf; +#define glSamplerParameterf glad_glSamplerParameterf +GLAD_API_CALL PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv; +#define glSamplerParameterfv glad_glSamplerParameterfv +GLAD_API_CALL PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri; +#define glSamplerParameteri glad_glSamplerParameteri +GLAD_API_CALL PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv; +#define glSamplerParameteriv glad_glSamplerParameteriv +GLAD_API_CALL PFNGLSCALEDPROC glad_glScaled; +#define glScaled glad_glScaled +GLAD_API_CALL PFNGLSCALEFPROC glad_glScalef; +#define glScalef glad_glScalef +GLAD_API_CALL PFNGLSCISSORPROC glad_glScissor; +#define glScissor glad_glScissor +GLAD_API_CALL PFNGLSECONDARYCOLOR3BPROC glad_glSecondaryColor3b; +#define glSecondaryColor3b glad_glSecondaryColor3b +GLAD_API_CALL PFNGLSECONDARYCOLOR3BVPROC glad_glSecondaryColor3bv; +#define glSecondaryColor3bv glad_glSecondaryColor3bv +GLAD_API_CALL PFNGLSECONDARYCOLOR3DPROC glad_glSecondaryColor3d; +#define glSecondaryColor3d glad_glSecondaryColor3d +GLAD_API_CALL PFNGLSECONDARYCOLOR3DVPROC glad_glSecondaryColor3dv; +#define glSecondaryColor3dv glad_glSecondaryColor3dv +GLAD_API_CALL PFNGLSECONDARYCOLOR3FPROC glad_glSecondaryColor3f; +#define glSecondaryColor3f glad_glSecondaryColor3f +GLAD_API_CALL PFNGLSECONDARYCOLOR3FVPROC glad_glSecondaryColor3fv; +#define glSecondaryColor3fv glad_glSecondaryColor3fv +GLAD_API_CALL PFNGLSECONDARYCOLOR3IPROC glad_glSecondaryColor3i; +#define glSecondaryColor3i glad_glSecondaryColor3i +GLAD_API_CALL PFNGLSECONDARYCOLOR3IVPROC glad_glSecondaryColor3iv; +#define glSecondaryColor3iv glad_glSecondaryColor3iv +GLAD_API_CALL PFNGLSECONDARYCOLOR3SPROC glad_glSecondaryColor3s; +#define glSecondaryColor3s glad_glSecondaryColor3s +GLAD_API_CALL PFNGLSECONDARYCOLOR3SVPROC glad_glSecondaryColor3sv; +#define glSecondaryColor3sv glad_glSecondaryColor3sv +GLAD_API_CALL PFNGLSECONDARYCOLOR3UBPROC glad_glSecondaryColor3ub; +#define glSecondaryColor3ub glad_glSecondaryColor3ub +GLAD_API_CALL PFNGLSECONDARYCOLOR3UBVPROC glad_glSecondaryColor3ubv; +#define glSecondaryColor3ubv glad_glSecondaryColor3ubv +GLAD_API_CALL PFNGLSECONDARYCOLOR3UIPROC glad_glSecondaryColor3ui; +#define glSecondaryColor3ui glad_glSecondaryColor3ui +GLAD_API_CALL PFNGLSECONDARYCOLOR3UIVPROC glad_glSecondaryColor3uiv; +#define glSecondaryColor3uiv glad_glSecondaryColor3uiv +GLAD_API_CALL PFNGLSECONDARYCOLOR3USPROC glad_glSecondaryColor3us; +#define glSecondaryColor3us glad_glSecondaryColor3us +GLAD_API_CALL PFNGLSECONDARYCOLOR3USVPROC glad_glSecondaryColor3usv; +#define glSecondaryColor3usv glad_glSecondaryColor3usv +GLAD_API_CALL PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui; +#define glSecondaryColorP3ui glad_glSecondaryColorP3ui +GLAD_API_CALL PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv; +#define glSecondaryColorP3uiv glad_glSecondaryColorP3uiv +GLAD_API_CALL PFNGLSECONDARYCOLORPOINTERPROC glad_glSecondaryColorPointer; +#define glSecondaryColorPointer glad_glSecondaryColorPointer +GLAD_API_CALL PFNGLSELECTBUFFERPROC glad_glSelectBuffer; +#define glSelectBuffer glad_glSelectBuffer +GLAD_API_CALL PFNGLSHADEMODELPROC glad_glShadeModel; +#define glShadeModel glad_glShadeModel +GLAD_API_CALL PFNGLSHADERSOURCEPROC glad_glShaderSource; +#define glShaderSource glad_glShaderSource +GLAD_API_CALL PFNGLSTENCILFUNCPROC glad_glStencilFunc; +#define glStencilFunc glad_glStencilFunc +GLAD_API_CALL PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate; +#define glStencilFuncSeparate glad_glStencilFuncSeparate +GLAD_API_CALL PFNGLSTENCILMASKPROC glad_glStencilMask; +#define glStencilMask glad_glStencilMask +GLAD_API_CALL PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate; +#define glStencilMaskSeparate glad_glStencilMaskSeparate +GLAD_API_CALL PFNGLSTENCILOPPROC glad_glStencilOp; +#define glStencilOp glad_glStencilOp +GLAD_API_CALL PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate; +#define glStencilOpSeparate glad_glStencilOpSeparate +GLAD_API_CALL PFNGLTEXBUFFERPROC glad_glTexBuffer; +#define glTexBuffer glad_glTexBuffer +GLAD_API_CALL PFNGLTEXCOORD1DPROC glad_glTexCoord1d; +#define glTexCoord1d glad_glTexCoord1d +GLAD_API_CALL PFNGLTEXCOORD1DVPROC glad_glTexCoord1dv; +#define glTexCoord1dv glad_glTexCoord1dv +GLAD_API_CALL PFNGLTEXCOORD1FPROC glad_glTexCoord1f; +#define glTexCoord1f glad_glTexCoord1f +GLAD_API_CALL PFNGLTEXCOORD1FVPROC glad_glTexCoord1fv; +#define glTexCoord1fv glad_glTexCoord1fv +GLAD_API_CALL PFNGLTEXCOORD1IPROC glad_glTexCoord1i; +#define glTexCoord1i glad_glTexCoord1i +GLAD_API_CALL PFNGLTEXCOORD1IVPROC glad_glTexCoord1iv; +#define glTexCoord1iv glad_glTexCoord1iv +GLAD_API_CALL PFNGLTEXCOORD1SPROC glad_glTexCoord1s; +#define glTexCoord1s glad_glTexCoord1s +GLAD_API_CALL PFNGLTEXCOORD1SVPROC glad_glTexCoord1sv; +#define glTexCoord1sv glad_glTexCoord1sv +GLAD_API_CALL PFNGLTEXCOORD2DPROC glad_glTexCoord2d; +#define glTexCoord2d glad_glTexCoord2d +GLAD_API_CALL PFNGLTEXCOORD2DVPROC glad_glTexCoord2dv; +#define glTexCoord2dv glad_glTexCoord2dv +GLAD_API_CALL PFNGLTEXCOORD2FPROC glad_glTexCoord2f; +#define glTexCoord2f glad_glTexCoord2f +GLAD_API_CALL PFNGLTEXCOORD2FVPROC glad_glTexCoord2fv; +#define glTexCoord2fv glad_glTexCoord2fv +GLAD_API_CALL PFNGLTEXCOORD2IPROC glad_glTexCoord2i; +#define glTexCoord2i glad_glTexCoord2i +GLAD_API_CALL PFNGLTEXCOORD2IVPROC glad_glTexCoord2iv; +#define glTexCoord2iv glad_glTexCoord2iv +GLAD_API_CALL PFNGLTEXCOORD2SPROC glad_glTexCoord2s; +#define glTexCoord2s glad_glTexCoord2s +GLAD_API_CALL PFNGLTEXCOORD2SVPROC glad_glTexCoord2sv; +#define glTexCoord2sv glad_glTexCoord2sv +GLAD_API_CALL PFNGLTEXCOORD3DPROC glad_glTexCoord3d; +#define glTexCoord3d glad_glTexCoord3d +GLAD_API_CALL PFNGLTEXCOORD3DVPROC glad_glTexCoord3dv; +#define glTexCoord3dv glad_glTexCoord3dv +GLAD_API_CALL PFNGLTEXCOORD3FPROC glad_glTexCoord3f; +#define glTexCoord3f glad_glTexCoord3f +GLAD_API_CALL PFNGLTEXCOORD3FVPROC glad_glTexCoord3fv; +#define glTexCoord3fv glad_glTexCoord3fv +GLAD_API_CALL PFNGLTEXCOORD3IPROC glad_glTexCoord3i; +#define glTexCoord3i glad_glTexCoord3i +GLAD_API_CALL PFNGLTEXCOORD3IVPROC glad_glTexCoord3iv; +#define glTexCoord3iv glad_glTexCoord3iv +GLAD_API_CALL PFNGLTEXCOORD3SPROC glad_glTexCoord3s; +#define glTexCoord3s glad_glTexCoord3s +GLAD_API_CALL PFNGLTEXCOORD3SVPROC glad_glTexCoord3sv; +#define glTexCoord3sv glad_glTexCoord3sv +GLAD_API_CALL PFNGLTEXCOORD4DPROC glad_glTexCoord4d; +#define glTexCoord4d glad_glTexCoord4d +GLAD_API_CALL PFNGLTEXCOORD4DVPROC glad_glTexCoord4dv; +#define glTexCoord4dv glad_glTexCoord4dv +GLAD_API_CALL PFNGLTEXCOORD4FPROC glad_glTexCoord4f; +#define glTexCoord4f glad_glTexCoord4f +GLAD_API_CALL PFNGLTEXCOORD4FVPROC glad_glTexCoord4fv; +#define glTexCoord4fv glad_glTexCoord4fv +GLAD_API_CALL PFNGLTEXCOORD4IPROC glad_glTexCoord4i; +#define glTexCoord4i glad_glTexCoord4i +GLAD_API_CALL PFNGLTEXCOORD4IVPROC glad_glTexCoord4iv; +#define glTexCoord4iv glad_glTexCoord4iv +GLAD_API_CALL PFNGLTEXCOORD4SPROC glad_glTexCoord4s; +#define glTexCoord4s glad_glTexCoord4s +GLAD_API_CALL PFNGLTEXCOORD4SVPROC glad_glTexCoord4sv; +#define glTexCoord4sv glad_glTexCoord4sv +GLAD_API_CALL PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui; +#define glTexCoordP1ui glad_glTexCoordP1ui +GLAD_API_CALL PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv; +#define glTexCoordP1uiv glad_glTexCoordP1uiv +GLAD_API_CALL PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui; +#define glTexCoordP2ui glad_glTexCoordP2ui +GLAD_API_CALL PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv; +#define glTexCoordP2uiv glad_glTexCoordP2uiv +GLAD_API_CALL PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui; +#define glTexCoordP3ui glad_glTexCoordP3ui +GLAD_API_CALL PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv; +#define glTexCoordP3uiv glad_glTexCoordP3uiv +GLAD_API_CALL PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui; +#define glTexCoordP4ui glad_glTexCoordP4ui +GLAD_API_CALL PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv; +#define glTexCoordP4uiv glad_glTexCoordP4uiv +GLAD_API_CALL PFNGLTEXCOORDPOINTERPROC glad_glTexCoordPointer; +#define glTexCoordPointer glad_glTexCoordPointer +GLAD_API_CALL PFNGLTEXENVFPROC glad_glTexEnvf; +#define glTexEnvf glad_glTexEnvf +GLAD_API_CALL PFNGLTEXENVFVPROC glad_glTexEnvfv; +#define glTexEnvfv glad_glTexEnvfv +GLAD_API_CALL PFNGLTEXENVIPROC glad_glTexEnvi; +#define glTexEnvi glad_glTexEnvi +GLAD_API_CALL PFNGLTEXENVIVPROC glad_glTexEnviv; +#define glTexEnviv glad_glTexEnviv +GLAD_API_CALL PFNGLTEXGENDPROC glad_glTexGend; +#define glTexGend glad_glTexGend +GLAD_API_CALL PFNGLTEXGENDVPROC glad_glTexGendv; +#define glTexGendv glad_glTexGendv +GLAD_API_CALL PFNGLTEXGENFPROC glad_glTexGenf; +#define glTexGenf glad_glTexGenf +GLAD_API_CALL PFNGLTEXGENFVPROC glad_glTexGenfv; +#define glTexGenfv glad_glTexGenfv +GLAD_API_CALL PFNGLTEXGENIPROC glad_glTexGeni; +#define glTexGeni glad_glTexGeni +GLAD_API_CALL PFNGLTEXGENIVPROC glad_glTexGeniv; +#define glTexGeniv glad_glTexGeniv +GLAD_API_CALL PFNGLTEXIMAGE1DPROC glad_glTexImage1D; +#define glTexImage1D glad_glTexImage1D +GLAD_API_CALL PFNGLTEXIMAGE2DPROC glad_glTexImage2D; +#define glTexImage2D glad_glTexImage2D +GLAD_API_CALL PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample; +#define glTexImage2DMultisample glad_glTexImage2DMultisample +GLAD_API_CALL PFNGLTEXIMAGE3DPROC glad_glTexImage3D; +#define glTexImage3D glad_glTexImage3D +GLAD_API_CALL PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample; +#define glTexImage3DMultisample glad_glTexImage3DMultisample +GLAD_API_CALL PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv; +#define glTexParameterIiv glad_glTexParameterIiv +GLAD_API_CALL PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv; +#define glTexParameterIuiv glad_glTexParameterIuiv +GLAD_API_CALL PFNGLTEXPARAMETERFPROC glad_glTexParameterf; +#define glTexParameterf glad_glTexParameterf +GLAD_API_CALL PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv; +#define glTexParameterfv glad_glTexParameterfv +GLAD_API_CALL PFNGLTEXPARAMETERIPROC glad_glTexParameteri; +#define glTexParameteri glad_glTexParameteri +GLAD_API_CALL PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv; +#define glTexParameteriv glad_glTexParameteriv +GLAD_API_CALL PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D; +#define glTexSubImage1D glad_glTexSubImage1D +GLAD_API_CALL PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D; +#define glTexSubImage2D glad_glTexSubImage2D +GLAD_API_CALL PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D; +#define glTexSubImage3D glad_glTexSubImage3D +GLAD_API_CALL PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings; +#define glTransformFeedbackVaryings glad_glTransformFeedbackVaryings +GLAD_API_CALL PFNGLTRANSLATEDPROC glad_glTranslated; +#define glTranslated glad_glTranslated +GLAD_API_CALL PFNGLTRANSLATEFPROC glad_glTranslatef; +#define glTranslatef glad_glTranslatef +GLAD_API_CALL PFNGLUNIFORM1FPROC glad_glUniform1f; +#define glUniform1f glad_glUniform1f +GLAD_API_CALL PFNGLUNIFORM1FVPROC glad_glUniform1fv; +#define glUniform1fv glad_glUniform1fv +GLAD_API_CALL PFNGLUNIFORM1IPROC glad_glUniform1i; +#define glUniform1i glad_glUniform1i +GLAD_API_CALL PFNGLUNIFORM1IVPROC glad_glUniform1iv; +#define glUniform1iv glad_glUniform1iv +GLAD_API_CALL PFNGLUNIFORM1UIPROC glad_glUniform1ui; +#define glUniform1ui glad_glUniform1ui +GLAD_API_CALL PFNGLUNIFORM1UIVPROC glad_glUniform1uiv; +#define glUniform1uiv glad_glUniform1uiv +GLAD_API_CALL PFNGLUNIFORM2FPROC glad_glUniform2f; +#define glUniform2f glad_glUniform2f +GLAD_API_CALL PFNGLUNIFORM2FVPROC glad_glUniform2fv; +#define glUniform2fv glad_glUniform2fv +GLAD_API_CALL PFNGLUNIFORM2IPROC glad_glUniform2i; +#define glUniform2i glad_glUniform2i +GLAD_API_CALL PFNGLUNIFORM2IVPROC glad_glUniform2iv; +#define glUniform2iv glad_glUniform2iv +GLAD_API_CALL PFNGLUNIFORM2UIPROC glad_glUniform2ui; +#define glUniform2ui glad_glUniform2ui +GLAD_API_CALL PFNGLUNIFORM2UIVPROC glad_glUniform2uiv; +#define glUniform2uiv glad_glUniform2uiv +GLAD_API_CALL PFNGLUNIFORM3FPROC glad_glUniform3f; +#define glUniform3f glad_glUniform3f +GLAD_API_CALL PFNGLUNIFORM3FVPROC glad_glUniform3fv; +#define glUniform3fv glad_glUniform3fv +GLAD_API_CALL PFNGLUNIFORM3IPROC glad_glUniform3i; +#define glUniform3i glad_glUniform3i +GLAD_API_CALL PFNGLUNIFORM3IVPROC glad_glUniform3iv; +#define glUniform3iv glad_glUniform3iv +GLAD_API_CALL PFNGLUNIFORM3UIPROC glad_glUniform3ui; +#define glUniform3ui glad_glUniform3ui +GLAD_API_CALL PFNGLUNIFORM3UIVPROC glad_glUniform3uiv; +#define glUniform3uiv glad_glUniform3uiv +GLAD_API_CALL PFNGLUNIFORM4FPROC glad_glUniform4f; +#define glUniform4f glad_glUniform4f +GLAD_API_CALL PFNGLUNIFORM4FVPROC glad_glUniform4fv; +#define glUniform4fv glad_glUniform4fv +GLAD_API_CALL PFNGLUNIFORM4IPROC glad_glUniform4i; +#define glUniform4i glad_glUniform4i +GLAD_API_CALL PFNGLUNIFORM4IVPROC glad_glUniform4iv; +#define glUniform4iv glad_glUniform4iv +GLAD_API_CALL PFNGLUNIFORM4UIPROC glad_glUniform4ui; +#define glUniform4ui glad_glUniform4ui +GLAD_API_CALL PFNGLUNIFORM4UIVPROC glad_glUniform4uiv; +#define glUniform4uiv glad_glUniform4uiv +GLAD_API_CALL PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding; +#define glUniformBlockBinding glad_glUniformBlockBinding +GLAD_API_CALL PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv; +#define glUniformMatrix2fv glad_glUniformMatrix2fv +GLAD_API_CALL PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv; +#define glUniformMatrix2x3fv glad_glUniformMatrix2x3fv +GLAD_API_CALL PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv; +#define glUniformMatrix2x4fv glad_glUniformMatrix2x4fv +GLAD_API_CALL PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv; +#define glUniformMatrix3fv glad_glUniformMatrix3fv +GLAD_API_CALL PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv; +#define glUniformMatrix3x2fv glad_glUniformMatrix3x2fv +GLAD_API_CALL PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv; +#define glUniformMatrix3x4fv glad_glUniformMatrix3x4fv +GLAD_API_CALL PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv; +#define glUniformMatrix4fv glad_glUniformMatrix4fv +GLAD_API_CALL PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv; +#define glUniformMatrix4x2fv glad_glUniformMatrix4x2fv +GLAD_API_CALL PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv; +#define glUniformMatrix4x3fv glad_glUniformMatrix4x3fv +GLAD_API_CALL PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer; +#define glUnmapBuffer glad_glUnmapBuffer +GLAD_API_CALL PFNGLUSEPROGRAMPROC glad_glUseProgram; +#define glUseProgram glad_glUseProgram +GLAD_API_CALL PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram; +#define glValidateProgram glad_glValidateProgram +GLAD_API_CALL PFNGLVERTEX2DPROC glad_glVertex2d; +#define glVertex2d glad_glVertex2d +GLAD_API_CALL PFNGLVERTEX2DVPROC glad_glVertex2dv; +#define glVertex2dv glad_glVertex2dv +GLAD_API_CALL PFNGLVERTEX2FPROC glad_glVertex2f; +#define glVertex2f glad_glVertex2f +GLAD_API_CALL PFNGLVERTEX2FVPROC glad_glVertex2fv; +#define glVertex2fv glad_glVertex2fv +GLAD_API_CALL PFNGLVERTEX2IPROC glad_glVertex2i; +#define glVertex2i glad_glVertex2i +GLAD_API_CALL PFNGLVERTEX2IVPROC glad_glVertex2iv; +#define glVertex2iv glad_glVertex2iv +GLAD_API_CALL PFNGLVERTEX2SPROC glad_glVertex2s; +#define glVertex2s glad_glVertex2s +GLAD_API_CALL PFNGLVERTEX2SVPROC glad_glVertex2sv; +#define glVertex2sv glad_glVertex2sv +GLAD_API_CALL PFNGLVERTEX3DPROC glad_glVertex3d; +#define glVertex3d glad_glVertex3d +GLAD_API_CALL PFNGLVERTEX3DVPROC glad_glVertex3dv; +#define glVertex3dv glad_glVertex3dv +GLAD_API_CALL PFNGLVERTEX3FPROC glad_glVertex3f; +#define glVertex3f glad_glVertex3f +GLAD_API_CALL PFNGLVERTEX3FVPROC glad_glVertex3fv; +#define glVertex3fv glad_glVertex3fv +GLAD_API_CALL PFNGLVERTEX3IPROC glad_glVertex3i; +#define glVertex3i glad_glVertex3i +GLAD_API_CALL PFNGLVERTEX3IVPROC glad_glVertex3iv; +#define glVertex3iv glad_glVertex3iv +GLAD_API_CALL PFNGLVERTEX3SPROC glad_glVertex3s; +#define glVertex3s glad_glVertex3s +GLAD_API_CALL PFNGLVERTEX3SVPROC glad_glVertex3sv; +#define glVertex3sv glad_glVertex3sv +GLAD_API_CALL PFNGLVERTEX4DPROC glad_glVertex4d; +#define glVertex4d glad_glVertex4d +GLAD_API_CALL PFNGLVERTEX4DVPROC glad_glVertex4dv; +#define glVertex4dv glad_glVertex4dv +GLAD_API_CALL PFNGLVERTEX4FPROC glad_glVertex4f; +#define glVertex4f glad_glVertex4f +GLAD_API_CALL PFNGLVERTEX4FVPROC glad_glVertex4fv; +#define glVertex4fv glad_glVertex4fv +GLAD_API_CALL PFNGLVERTEX4IPROC glad_glVertex4i; +#define glVertex4i glad_glVertex4i +GLAD_API_CALL PFNGLVERTEX4IVPROC glad_glVertex4iv; +#define glVertex4iv glad_glVertex4iv +GLAD_API_CALL PFNGLVERTEX4SPROC glad_glVertex4s; +#define glVertex4s glad_glVertex4s +GLAD_API_CALL PFNGLVERTEX4SVPROC glad_glVertex4sv; +#define glVertex4sv glad_glVertex4sv +GLAD_API_CALL PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d; +#define glVertexAttrib1d glad_glVertexAttrib1d +GLAD_API_CALL PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv; +#define glVertexAttrib1dv glad_glVertexAttrib1dv +GLAD_API_CALL PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f; +#define glVertexAttrib1f glad_glVertexAttrib1f +GLAD_API_CALL PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv; +#define glVertexAttrib1fv glad_glVertexAttrib1fv +GLAD_API_CALL PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s; +#define glVertexAttrib1s glad_glVertexAttrib1s +GLAD_API_CALL PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv; +#define glVertexAttrib1sv glad_glVertexAttrib1sv +GLAD_API_CALL PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d; +#define glVertexAttrib2d glad_glVertexAttrib2d +GLAD_API_CALL PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv; +#define glVertexAttrib2dv glad_glVertexAttrib2dv +GLAD_API_CALL PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f; +#define glVertexAttrib2f glad_glVertexAttrib2f +GLAD_API_CALL PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv; +#define glVertexAttrib2fv glad_glVertexAttrib2fv +GLAD_API_CALL PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s; +#define glVertexAttrib2s glad_glVertexAttrib2s +GLAD_API_CALL PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv; +#define glVertexAttrib2sv glad_glVertexAttrib2sv +GLAD_API_CALL PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d; +#define glVertexAttrib3d glad_glVertexAttrib3d +GLAD_API_CALL PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv; +#define glVertexAttrib3dv glad_glVertexAttrib3dv +GLAD_API_CALL PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f; +#define glVertexAttrib3f glad_glVertexAttrib3f +GLAD_API_CALL PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv; +#define glVertexAttrib3fv glad_glVertexAttrib3fv +GLAD_API_CALL PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s; +#define glVertexAttrib3s glad_glVertexAttrib3s +GLAD_API_CALL PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv; +#define glVertexAttrib3sv glad_glVertexAttrib3sv +GLAD_API_CALL PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv; +#define glVertexAttrib4Nbv glad_glVertexAttrib4Nbv +GLAD_API_CALL PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv; +#define glVertexAttrib4Niv glad_glVertexAttrib4Niv +GLAD_API_CALL PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv; +#define glVertexAttrib4Nsv glad_glVertexAttrib4Nsv +GLAD_API_CALL PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub; +#define glVertexAttrib4Nub glad_glVertexAttrib4Nub +GLAD_API_CALL PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv; +#define glVertexAttrib4Nubv glad_glVertexAttrib4Nubv +GLAD_API_CALL PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv; +#define glVertexAttrib4Nuiv glad_glVertexAttrib4Nuiv +GLAD_API_CALL PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv; +#define glVertexAttrib4Nusv glad_glVertexAttrib4Nusv +GLAD_API_CALL PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv; +#define glVertexAttrib4bv glad_glVertexAttrib4bv +GLAD_API_CALL PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d; +#define glVertexAttrib4d glad_glVertexAttrib4d +GLAD_API_CALL PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv; +#define glVertexAttrib4dv glad_glVertexAttrib4dv +GLAD_API_CALL PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f; +#define glVertexAttrib4f glad_glVertexAttrib4f +GLAD_API_CALL PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv; +#define glVertexAttrib4fv glad_glVertexAttrib4fv +GLAD_API_CALL PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv; +#define glVertexAttrib4iv glad_glVertexAttrib4iv +GLAD_API_CALL PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s; +#define glVertexAttrib4s glad_glVertexAttrib4s +GLAD_API_CALL PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv; +#define glVertexAttrib4sv glad_glVertexAttrib4sv +GLAD_API_CALL PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv; +#define glVertexAttrib4ubv glad_glVertexAttrib4ubv +GLAD_API_CALL PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv; +#define glVertexAttrib4uiv glad_glVertexAttrib4uiv +GLAD_API_CALL PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv; +#define glVertexAttrib4usv glad_glVertexAttrib4usv +GLAD_API_CALL PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor; +#define glVertexAttribDivisor glad_glVertexAttribDivisor +GLAD_API_CALL PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i; +#define glVertexAttribI1i glad_glVertexAttribI1i +GLAD_API_CALL PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv; +#define glVertexAttribI1iv glad_glVertexAttribI1iv +GLAD_API_CALL PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui; +#define glVertexAttribI1ui glad_glVertexAttribI1ui +GLAD_API_CALL PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv; +#define glVertexAttribI1uiv glad_glVertexAttribI1uiv +GLAD_API_CALL PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i; +#define glVertexAttribI2i glad_glVertexAttribI2i +GLAD_API_CALL PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv; +#define glVertexAttribI2iv glad_glVertexAttribI2iv +GLAD_API_CALL PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui; +#define glVertexAttribI2ui glad_glVertexAttribI2ui +GLAD_API_CALL PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv; +#define glVertexAttribI2uiv glad_glVertexAttribI2uiv +GLAD_API_CALL PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i; +#define glVertexAttribI3i glad_glVertexAttribI3i +GLAD_API_CALL PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv; +#define glVertexAttribI3iv glad_glVertexAttribI3iv +GLAD_API_CALL PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui; +#define glVertexAttribI3ui glad_glVertexAttribI3ui +GLAD_API_CALL PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv; +#define glVertexAttribI3uiv glad_glVertexAttribI3uiv +GLAD_API_CALL PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv; +#define glVertexAttribI4bv glad_glVertexAttribI4bv +GLAD_API_CALL PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i; +#define glVertexAttribI4i glad_glVertexAttribI4i +GLAD_API_CALL PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv; +#define glVertexAttribI4iv glad_glVertexAttribI4iv +GLAD_API_CALL PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv; +#define glVertexAttribI4sv glad_glVertexAttribI4sv +GLAD_API_CALL PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv; +#define glVertexAttribI4ubv glad_glVertexAttribI4ubv +GLAD_API_CALL PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui; +#define glVertexAttribI4ui glad_glVertexAttribI4ui +GLAD_API_CALL PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv; +#define glVertexAttribI4uiv glad_glVertexAttribI4uiv +GLAD_API_CALL PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv; +#define glVertexAttribI4usv glad_glVertexAttribI4usv +GLAD_API_CALL PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer; +#define glVertexAttribIPointer glad_glVertexAttribIPointer +GLAD_API_CALL PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui; +#define glVertexAttribP1ui glad_glVertexAttribP1ui +GLAD_API_CALL PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv; +#define glVertexAttribP1uiv glad_glVertexAttribP1uiv +GLAD_API_CALL PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui; +#define glVertexAttribP2ui glad_glVertexAttribP2ui +GLAD_API_CALL PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv; +#define glVertexAttribP2uiv glad_glVertexAttribP2uiv +GLAD_API_CALL PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui; +#define glVertexAttribP3ui glad_glVertexAttribP3ui +GLAD_API_CALL PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv; +#define glVertexAttribP3uiv glad_glVertexAttribP3uiv +GLAD_API_CALL PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui; +#define glVertexAttribP4ui glad_glVertexAttribP4ui +GLAD_API_CALL PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv; +#define glVertexAttribP4uiv glad_glVertexAttribP4uiv +GLAD_API_CALL PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer; +#define glVertexAttribPointer glad_glVertexAttribPointer +GLAD_API_CALL PFNGLVERTEXP2UIPROC glad_glVertexP2ui; +#define glVertexP2ui glad_glVertexP2ui +GLAD_API_CALL PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv; +#define glVertexP2uiv glad_glVertexP2uiv +GLAD_API_CALL PFNGLVERTEXP3UIPROC glad_glVertexP3ui; +#define glVertexP3ui glad_glVertexP3ui +GLAD_API_CALL PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv; +#define glVertexP3uiv glad_glVertexP3uiv +GLAD_API_CALL PFNGLVERTEXP4UIPROC glad_glVertexP4ui; +#define glVertexP4ui glad_glVertexP4ui +GLAD_API_CALL PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv; +#define glVertexP4uiv glad_glVertexP4uiv +GLAD_API_CALL PFNGLVERTEXPOINTERPROC glad_glVertexPointer; +#define glVertexPointer glad_glVertexPointer +GLAD_API_CALL PFNGLVIEWPORTPROC glad_glViewport; +#define glViewport glad_glViewport +GLAD_API_CALL PFNGLWAITSYNCPROC glad_glWaitSync; +#define glWaitSync glad_glWaitSync +GLAD_API_CALL PFNGLWINDOWPOS2DPROC glad_glWindowPos2d; +#define glWindowPos2d glad_glWindowPos2d +GLAD_API_CALL PFNGLWINDOWPOS2DVPROC glad_glWindowPos2dv; +#define glWindowPos2dv glad_glWindowPos2dv +GLAD_API_CALL PFNGLWINDOWPOS2FPROC glad_glWindowPos2f; +#define glWindowPos2f glad_glWindowPos2f +GLAD_API_CALL PFNGLWINDOWPOS2FVPROC glad_glWindowPos2fv; +#define glWindowPos2fv glad_glWindowPos2fv +GLAD_API_CALL PFNGLWINDOWPOS2IPROC glad_glWindowPos2i; +#define glWindowPos2i glad_glWindowPos2i +GLAD_API_CALL PFNGLWINDOWPOS2IVPROC glad_glWindowPos2iv; +#define glWindowPos2iv glad_glWindowPos2iv +GLAD_API_CALL PFNGLWINDOWPOS2SPROC glad_glWindowPos2s; +#define glWindowPos2s glad_glWindowPos2s +GLAD_API_CALL PFNGLWINDOWPOS2SVPROC glad_glWindowPos2sv; +#define glWindowPos2sv glad_glWindowPos2sv +GLAD_API_CALL PFNGLWINDOWPOS3DPROC glad_glWindowPos3d; +#define glWindowPos3d glad_glWindowPos3d +GLAD_API_CALL PFNGLWINDOWPOS3DVPROC glad_glWindowPos3dv; +#define glWindowPos3dv glad_glWindowPos3dv +GLAD_API_CALL PFNGLWINDOWPOS3FPROC glad_glWindowPos3f; +#define glWindowPos3f glad_glWindowPos3f +GLAD_API_CALL PFNGLWINDOWPOS3FVPROC glad_glWindowPos3fv; +#define glWindowPos3fv glad_glWindowPos3fv +GLAD_API_CALL PFNGLWINDOWPOS3IPROC glad_glWindowPos3i; +#define glWindowPos3i glad_glWindowPos3i +GLAD_API_CALL PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv; +#define glWindowPos3iv glad_glWindowPos3iv +GLAD_API_CALL PFNGLWINDOWPOS3SPROC glad_glWindowPos3s; +#define glWindowPos3s glad_glWindowPos3s +GLAD_API_CALL PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv; +#define glWindowPos3sv glad_glWindowPos3sv + + + + + +GLAD_API_CALL int gladLoadGLUserPtr( GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadGL( GLADloadfunc load); + + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/glx.h glmark2-2021.02/src/glad/include/glad/glx.h --- glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/glx.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/include/glad/glx.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,570 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:13 2019 + * + * Generator: C/C++ + * Specification: glx + * Extensions: 2 + * + * APIs: + * - glx=1.4 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='glx=1.4' --extensions='GLX_EXT_swap_control,GLX_MESA_swap_control' c + * + * Online: + * http://glad.sh/#api=glx%3D1.4&extensions=GLX_EXT_swap_control%2CGLX_MESA_swap_control&generator=c&options= + * + */ + +#ifndef GLAD_GLX_H_ +#define GLAD_GLX_H_ + +#ifdef GLX_H + #error GLX header already included (API: glx), remove previous include! +#endif +#define GLX_H 1 + + +#include +#include +#include + +#include + +#define GLAD_GLX + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define GLX_ACCUM_ALPHA_SIZE 17 +#define GLX_ACCUM_BLUE_SIZE 16 +#define GLX_ACCUM_BUFFER_BIT 0x00000080 +#define GLX_ACCUM_GREEN_SIZE 15 +#define GLX_ACCUM_RED_SIZE 14 +#define GLX_ALPHA_SIZE 11 +#define GLX_AUX_BUFFERS 7 +#define GLX_AUX_BUFFERS_BIT 0x00000010 +#define GLX_BACK_LEFT_BUFFER_BIT 0x00000004 +#define GLX_BACK_RIGHT_BUFFER_BIT 0x00000008 +#define GLX_BAD_ATTRIBUTE 2 +#define GLX_BAD_CONTEXT 5 +#define GLX_BAD_ENUM 7 +#define GLX_BAD_SCREEN 1 +#define GLX_BAD_VALUE 6 +#define GLX_BAD_VISUAL 4 +#define GLX_BLUE_SIZE 10 +#define GLX_BUFFER_SIZE 2 +#define GLX_BufferSwapComplete 1 +#define GLX_COLOR_INDEX_BIT 0x00000002 +#define GLX_COLOR_INDEX_TYPE 0x8015 +#define GLX_CONFIG_CAVEAT 0x20 +#define GLX_DAMAGED 0x8020 +#define GLX_DEPTH_BUFFER_BIT 0x00000020 +#define GLX_DEPTH_SIZE 12 +#define GLX_DIRECT_COLOR 0x8003 +#define GLX_DONT_CARE 0xFFFFFFFF +#define GLX_DOUBLEBUFFER 5 +#define GLX_DRAWABLE_TYPE 0x8010 +#define GLX_EVENT_MASK 0x801F +#define GLX_EXTENSIONS 0x3 +#define GLX_EXTENSION_NAME "GLX" +#define GLX_FBCONFIG_ID 0x8013 +#define GLX_FRONT_LEFT_BUFFER_BIT 0x00000001 +#define GLX_FRONT_RIGHT_BUFFER_BIT 0x00000002 +#define GLX_GRAY_SCALE 0x8006 +#define GLX_GREEN_SIZE 9 +#define GLX_HEIGHT 0x801E +#define GLX_LARGEST_PBUFFER 0x801C +#define GLX_LEVEL 3 +#define GLX_MAX_PBUFFER_HEIGHT 0x8017 +#define GLX_MAX_PBUFFER_PIXELS 0x8018 +#define GLX_MAX_PBUFFER_WIDTH 0x8016 +#define GLX_MAX_SWAP_INTERVAL_EXT 0x20F2 +#define GLX_NONE 0x8000 +#define GLX_NON_CONFORMANT_CONFIG 0x800D +#define GLX_NO_EXTENSION 3 +#define GLX_PBUFFER 0x8023 +#define GLX_PBUFFER_BIT 0x00000004 +#define GLX_PBUFFER_CLOBBER_MASK 0x08000000 +#define GLX_PBUFFER_HEIGHT 0x8040 +#define GLX_PBUFFER_WIDTH 0x8041 +#define GLX_PIXMAP_BIT 0x00000002 +#define GLX_PRESERVED_CONTENTS 0x801B +#define GLX_PSEUDO_COLOR 0x8004 +#define GLX_PbufferClobber 0 +#define GLX_RED_SIZE 8 +#define GLX_RENDER_TYPE 0x8011 +#define GLX_RGBA 4 +#define GLX_RGBA_BIT 0x00000001 +#define GLX_RGBA_TYPE 0x8014 +#define GLX_SAMPLES 100001 +#define GLX_SAMPLE_BUFFERS 100000 +#define GLX_SAVED 0x8021 +#define GLX_SCREEN 0x800C +#define GLX_SLOW_CONFIG 0x8001 +#define GLX_STATIC_COLOR 0x8005 +#define GLX_STATIC_GRAY 0x8007 +#define GLX_STENCIL_BUFFER_BIT 0x00000040 +#define GLX_STENCIL_SIZE 13 +#define GLX_STEREO 6 +#define GLX_SWAP_INTERVAL_EXT 0x20F1 +#define GLX_TRANSPARENT_ALPHA_VALUE 0x28 +#define GLX_TRANSPARENT_BLUE_VALUE 0x27 +#define GLX_TRANSPARENT_GREEN_VALUE 0x26 +#define GLX_TRANSPARENT_INDEX 0x8009 +#define GLX_TRANSPARENT_INDEX_VALUE 0x24 +#define GLX_TRANSPARENT_RED_VALUE 0x25 +#define GLX_TRANSPARENT_RGB 0x8008 +#define GLX_TRANSPARENT_TYPE 0x23 +#define GLX_TRUE_COLOR 0x8002 +#define GLX_USE_GL 1 +#define GLX_VENDOR 0x1 +#define GLX_VERSION 0x2 +#define GLX_VISUAL_ID 0x800B +#define GLX_WIDTH 0x801D +#define GLX_WINDOW 0x8022 +#define GLX_WINDOW_BIT 0x00000001 +#define GLX_X_RENDERABLE 0x8012 +#define GLX_X_VISUAL_TYPE 0x22 +#define __GLX_NUMBER_EVENTS 17 + + +#ifndef GLEXT_64_TYPES_DEFINED +/* This code block is duplicated in glext.h, so must be protected */ +#define GLEXT_64_TYPES_DEFINED +/* Define int32_t, int64_t, and uint64_t types for UST/MSC */ +/* (as used in the GLX_OML_sync_control extension). */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#include +#elif defined(__sun__) || defined(__digital__) +#include +#if defined(__STDC__) +#if defined(__arch64__) || defined(_LP64) +typedef long int int64_t; +typedef unsigned long int uint64_t; +#else +typedef long long int int64_t; +typedef unsigned long long int uint64_t; +#endif /* __arch64__ */ +#endif /* __STDC__ */ +#elif defined( __VMS ) || defined(__sgi) +#include +#elif defined(__SCO__) || defined(__USLC__) +#include +#elif defined(__UNIXOS2__) || defined(__SOL64__) +typedef long int int32_t; +typedef long long int int64_t; +typedef unsigned long long int uint64_t; +#elif defined(_WIN32) && defined(__GNUC__) +#include +#elif defined(_WIN32) +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +/* Fallback if nothing above works */ +#include +#endif +#endif + + + + + + + + + + + + + + + + +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) + +#else + +#endif + +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1060) + +#else + +#endif + + + + + + + +typedef XID GLXFBConfigID; +typedef struct __GLXFBConfigRec *GLXFBConfig; +typedef XID GLXContextID; +typedef struct __GLXcontextRec *GLXContext; +typedef XID GLXPixmap; +typedef XID GLXDrawable; +typedef XID GLXWindow; +typedef XID GLXPbuffer; +typedef void (GLAD_API_PTR *__GLXextFuncPtr)(void); +typedef XID GLXVideoCaptureDeviceNV; +typedef unsigned int GLXVideoDeviceNV; +typedef XID GLXVideoSourceSGIX; +typedef XID GLXFBConfigIDSGIX; +typedef struct __GLXFBConfigRec *GLXFBConfigSGIX; +typedef XID GLXPbufferSGIX; +typedef struct { + int event_type; /* GLX_DAMAGED or GLX_SAVED */ + int draw_type; /* GLX_WINDOW or GLX_PBUFFER */ + unsigned long serial; /* # of last request processed by server */ + Bool send_event; /* true if this came for SendEvent request */ + Display *display; /* display the event was read from */ + GLXDrawable drawable; /* XID of Drawable */ + unsigned int buffer_mask; /* mask indicating which buffers are affected */ + unsigned int aux_buffer; /* which aux buffer was affected */ + int x, y; + int width, height; + int count; /* if nonzero, at least this many more */ +} GLXPbufferClobberEvent; +typedef struct { + int type; + unsigned long serial; /* # of last request processed by server */ + Bool send_event; /* true if this came from a SendEvent request */ + Display *display; /* Display the event was read from */ + GLXDrawable drawable; /* drawable on which event was requested in event mask */ + int event_type; + int64_t ust; + int64_t msc; + int64_t sbc; +} GLXBufferSwapComplete; +typedef union __GLXEvent { + GLXPbufferClobberEvent glxpbufferclobber; + GLXBufferSwapComplete glxbufferswapcomplete; + long pad[24]; +} GLXEvent; +typedef struct { + int type; + unsigned long serial; + Bool send_event; + Display *display; + int extension; + int evtype; + GLXDrawable window; + Bool stereo_tree; +} GLXStereoNotifyEventEXT; +typedef struct { + int type; + unsigned long serial; /* # of last request processed by server */ + Bool send_event; /* true if this came for SendEvent request */ + Display *display; /* display the event was read from */ + GLXDrawable drawable; /* i.d. of Drawable */ + int event_type; /* GLX_DAMAGED_SGIX or GLX_SAVED_SGIX */ + int draw_type; /* GLX_WINDOW_SGIX or GLX_PBUFFER_SGIX */ + unsigned int mask; /* mask indicating which buffers are affected*/ + int x, y; + int width, height; + int count; /* if nonzero, at least this many more */ +} GLXBufferClobberEventSGIX; +typedef struct { + char pipeName[80]; /* Should be [GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX] */ + int networkId; +} GLXHyperpipeNetworkSGIX; +typedef struct { + char pipeName[80]; /* Should be [GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX] */ + int channel; + unsigned int participationType; + int timeSlice; +} GLXHyperpipeConfigSGIX; +typedef struct { + char pipeName[80]; /* Should be [GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX] */ + int srcXOrigin, srcYOrigin, srcWidth, srcHeight; + int destXOrigin, destYOrigin, destWidth, destHeight; +} GLXPipeRect; +typedef struct { + char pipeName[80]; /* Should be [GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX] */ + int XOrigin, YOrigin, maxHeight, maxWidth; +} GLXPipeRectLimits; + + +#define GLX_VERSION_1_0 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_0; +#define GLX_VERSION_1_1 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_1; +#define GLX_VERSION_1_2 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_2; +#define GLX_VERSION_1_3 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_3; +#define GLX_VERSION_1_4 1 +GLAD_API_CALL int GLAD_GLX_VERSION_1_4; +#define GLX_EXT_swap_control 1 +GLAD_API_CALL int GLAD_GLX_EXT_swap_control; +#define GLX_MESA_swap_control 1 +GLAD_API_CALL int GLAD_GLX_MESA_swap_control; + + +typedef GLXFBConfig * (GLAD_API_PTR *PFNGLXCHOOSEFBCONFIGPROC)(Display * dpy, int screen, const int * attrib_list, int * nelements); +typedef XVisualInfo * (GLAD_API_PTR *PFNGLXCHOOSEVISUALPROC)(Display * dpy, int screen, int * attribList); +typedef void (GLAD_API_PTR *PFNGLXCOPYCONTEXTPROC)(Display * dpy, GLXContext src, GLXContext dst, unsigned long mask); +typedef GLXContext (GLAD_API_PTR *PFNGLXCREATECONTEXTPROC)(Display * dpy, XVisualInfo * vis, GLXContext shareList, Bool direct); +typedef GLXPixmap (GLAD_API_PTR *PFNGLXCREATEGLXPIXMAPPROC)(Display * dpy, XVisualInfo * visual, Pixmap pixmap); +typedef GLXContext (GLAD_API_PTR *PFNGLXCREATENEWCONTEXTPROC)(Display * dpy, GLXFBConfig config, int render_type, GLXContext share_list, Bool direct); +typedef GLXPbuffer (GLAD_API_PTR *PFNGLXCREATEPBUFFERPROC)(Display * dpy, GLXFBConfig config, const int * attrib_list); +typedef GLXPixmap (GLAD_API_PTR *PFNGLXCREATEPIXMAPPROC)(Display * dpy, GLXFBConfig config, Pixmap pixmap, const int * attrib_list); +typedef GLXWindow (GLAD_API_PTR *PFNGLXCREATEWINDOWPROC)(Display * dpy, GLXFBConfig config, Window win, const int * attrib_list); +typedef void (GLAD_API_PTR *PFNGLXDESTROYCONTEXTPROC)(Display * dpy, GLXContext ctx); +typedef void (GLAD_API_PTR *PFNGLXDESTROYGLXPIXMAPPROC)(Display * dpy, GLXPixmap pixmap); +typedef void (GLAD_API_PTR *PFNGLXDESTROYPBUFFERPROC)(Display * dpy, GLXPbuffer pbuf); +typedef void (GLAD_API_PTR *PFNGLXDESTROYPIXMAPPROC)(Display * dpy, GLXPixmap pixmap); +typedef void (GLAD_API_PTR *PFNGLXDESTROYWINDOWPROC)(Display * dpy, GLXWindow win); +typedef const char * (GLAD_API_PTR *PFNGLXGETCLIENTSTRINGPROC)(Display * dpy, int name); +typedef int (GLAD_API_PTR *PFNGLXGETCONFIGPROC)(Display * dpy, XVisualInfo * visual, int attrib, int * value); +typedef GLXContext (GLAD_API_PTR *PFNGLXGETCURRENTCONTEXTPROC)(void); +typedef Display * (GLAD_API_PTR *PFNGLXGETCURRENTDISPLAYPROC)(void); +typedef GLXDrawable (GLAD_API_PTR *PFNGLXGETCURRENTDRAWABLEPROC)(void); +typedef GLXDrawable (GLAD_API_PTR *PFNGLXGETCURRENTREADDRAWABLEPROC)(void); +typedef int (GLAD_API_PTR *PFNGLXGETFBCONFIGATTRIBPROC)(Display * dpy, GLXFBConfig config, int attribute, int * value); +typedef GLXFBConfig * (GLAD_API_PTR *PFNGLXGETFBCONFIGSPROC)(Display * dpy, int screen, int * nelements); +typedef __GLXextFuncPtr (GLAD_API_PTR *PFNGLXGETPROCADDRESSPROC)(const GLubyte * procName); +typedef void (GLAD_API_PTR *PFNGLXGETSELECTEDEVENTPROC)(Display * dpy, GLXDrawable draw, unsigned long * event_mask); +typedef int (GLAD_API_PTR *PFNGLXGETSWAPINTERVALMESAPROC)(void); +typedef XVisualInfo * (GLAD_API_PTR *PFNGLXGETVISUALFROMFBCONFIGPROC)(Display * dpy, GLXFBConfig config); +typedef Bool (GLAD_API_PTR *PFNGLXISDIRECTPROC)(Display * dpy, GLXContext ctx); +typedef Bool (GLAD_API_PTR *PFNGLXMAKECONTEXTCURRENTPROC)(Display * dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx); +typedef Bool (GLAD_API_PTR *PFNGLXMAKECURRENTPROC)(Display * dpy, GLXDrawable drawable, GLXContext ctx); +typedef int (GLAD_API_PTR *PFNGLXQUERYCONTEXTPROC)(Display * dpy, GLXContext ctx, int attribute, int * value); +typedef void (GLAD_API_PTR *PFNGLXQUERYDRAWABLEPROC)(Display * dpy, GLXDrawable draw, int attribute, unsigned int * value); +typedef Bool (GLAD_API_PTR *PFNGLXQUERYEXTENSIONPROC)(Display * dpy, int * errorb, int * event); +typedef const char * (GLAD_API_PTR *PFNGLXQUERYEXTENSIONSSTRINGPROC)(Display * dpy, int screen); +typedef const char * (GLAD_API_PTR *PFNGLXQUERYSERVERSTRINGPROC)(Display * dpy, int screen, int name); +typedef Bool (GLAD_API_PTR *PFNGLXQUERYVERSIONPROC)(Display * dpy, int * maj, int * min); +typedef void (GLAD_API_PTR *PFNGLXSELECTEVENTPROC)(Display * dpy, GLXDrawable draw, unsigned long event_mask); +typedef void (GLAD_API_PTR *PFNGLXSWAPBUFFERSPROC)(Display * dpy, GLXDrawable drawable); +typedef void (GLAD_API_PTR *PFNGLXSWAPINTERVALEXTPROC)(Display * dpy, GLXDrawable drawable, int interval); +typedef int (GLAD_API_PTR *PFNGLXSWAPINTERVALMESAPROC)(unsigned int interval); +typedef void (GLAD_API_PTR *PFNGLXUSEXFONTPROC)(Font font, int first, int count, int list); +typedef void (GLAD_API_PTR *PFNGLXWAITGLPROC)(void); +typedef void (GLAD_API_PTR *PFNGLXWAITXPROC)(void); + +GLAD_API_CALL PFNGLXCHOOSEFBCONFIGPROC glad_glXChooseFBConfig; +#define glXChooseFBConfig glad_glXChooseFBConfig +GLAD_API_CALL PFNGLXCHOOSEVISUALPROC glad_glXChooseVisual; +#define glXChooseVisual glad_glXChooseVisual +GLAD_API_CALL PFNGLXCOPYCONTEXTPROC glad_glXCopyContext; +#define glXCopyContext glad_glXCopyContext +GLAD_API_CALL PFNGLXCREATECONTEXTPROC glad_glXCreateContext; +#define glXCreateContext glad_glXCreateContext +GLAD_API_CALL PFNGLXCREATEGLXPIXMAPPROC glad_glXCreateGLXPixmap; +#define glXCreateGLXPixmap glad_glXCreateGLXPixmap +GLAD_API_CALL PFNGLXCREATENEWCONTEXTPROC glad_glXCreateNewContext; +#define glXCreateNewContext glad_glXCreateNewContext +GLAD_API_CALL PFNGLXCREATEPBUFFERPROC glad_glXCreatePbuffer; +#define glXCreatePbuffer glad_glXCreatePbuffer +GLAD_API_CALL PFNGLXCREATEPIXMAPPROC glad_glXCreatePixmap; +#define glXCreatePixmap glad_glXCreatePixmap +GLAD_API_CALL PFNGLXCREATEWINDOWPROC glad_glXCreateWindow; +#define glXCreateWindow glad_glXCreateWindow +GLAD_API_CALL PFNGLXDESTROYCONTEXTPROC glad_glXDestroyContext; +#define glXDestroyContext glad_glXDestroyContext +GLAD_API_CALL PFNGLXDESTROYGLXPIXMAPPROC glad_glXDestroyGLXPixmap; +#define glXDestroyGLXPixmap glad_glXDestroyGLXPixmap +GLAD_API_CALL PFNGLXDESTROYPBUFFERPROC glad_glXDestroyPbuffer; +#define glXDestroyPbuffer glad_glXDestroyPbuffer +GLAD_API_CALL PFNGLXDESTROYPIXMAPPROC glad_glXDestroyPixmap; +#define glXDestroyPixmap glad_glXDestroyPixmap +GLAD_API_CALL PFNGLXDESTROYWINDOWPROC glad_glXDestroyWindow; +#define glXDestroyWindow glad_glXDestroyWindow +GLAD_API_CALL PFNGLXGETCLIENTSTRINGPROC glad_glXGetClientString; +#define glXGetClientString glad_glXGetClientString +GLAD_API_CALL PFNGLXGETCONFIGPROC glad_glXGetConfig; +#define glXGetConfig glad_glXGetConfig +GLAD_API_CALL PFNGLXGETCURRENTCONTEXTPROC glad_glXGetCurrentContext; +#define glXGetCurrentContext glad_glXGetCurrentContext +GLAD_API_CALL PFNGLXGETCURRENTDISPLAYPROC glad_glXGetCurrentDisplay; +#define glXGetCurrentDisplay glad_glXGetCurrentDisplay +GLAD_API_CALL PFNGLXGETCURRENTDRAWABLEPROC glad_glXGetCurrentDrawable; +#define glXGetCurrentDrawable glad_glXGetCurrentDrawable +GLAD_API_CALL PFNGLXGETCURRENTREADDRAWABLEPROC glad_glXGetCurrentReadDrawable; +#define glXGetCurrentReadDrawable glad_glXGetCurrentReadDrawable +GLAD_API_CALL PFNGLXGETFBCONFIGATTRIBPROC glad_glXGetFBConfigAttrib; +#define glXGetFBConfigAttrib glad_glXGetFBConfigAttrib +GLAD_API_CALL PFNGLXGETFBCONFIGSPROC glad_glXGetFBConfigs; +#define glXGetFBConfigs glad_glXGetFBConfigs +GLAD_API_CALL PFNGLXGETPROCADDRESSPROC glad_glXGetProcAddress; +#define glXGetProcAddress glad_glXGetProcAddress +GLAD_API_CALL PFNGLXGETSELECTEDEVENTPROC glad_glXGetSelectedEvent; +#define glXGetSelectedEvent glad_glXGetSelectedEvent +GLAD_API_CALL PFNGLXGETSWAPINTERVALMESAPROC glad_glXGetSwapIntervalMESA; +#define glXGetSwapIntervalMESA glad_glXGetSwapIntervalMESA +GLAD_API_CALL PFNGLXGETVISUALFROMFBCONFIGPROC glad_glXGetVisualFromFBConfig; +#define glXGetVisualFromFBConfig glad_glXGetVisualFromFBConfig +GLAD_API_CALL PFNGLXISDIRECTPROC glad_glXIsDirect; +#define glXIsDirect glad_glXIsDirect +GLAD_API_CALL PFNGLXMAKECONTEXTCURRENTPROC glad_glXMakeContextCurrent; +#define glXMakeContextCurrent glad_glXMakeContextCurrent +GLAD_API_CALL PFNGLXMAKECURRENTPROC glad_glXMakeCurrent; +#define glXMakeCurrent glad_glXMakeCurrent +GLAD_API_CALL PFNGLXQUERYCONTEXTPROC glad_glXQueryContext; +#define glXQueryContext glad_glXQueryContext +GLAD_API_CALL PFNGLXQUERYDRAWABLEPROC glad_glXQueryDrawable; +#define glXQueryDrawable glad_glXQueryDrawable +GLAD_API_CALL PFNGLXQUERYEXTENSIONPROC glad_glXQueryExtension; +#define glXQueryExtension glad_glXQueryExtension +GLAD_API_CALL PFNGLXQUERYEXTENSIONSSTRINGPROC glad_glXQueryExtensionsString; +#define glXQueryExtensionsString glad_glXQueryExtensionsString +GLAD_API_CALL PFNGLXQUERYSERVERSTRINGPROC glad_glXQueryServerString; +#define glXQueryServerString glad_glXQueryServerString +GLAD_API_CALL PFNGLXQUERYVERSIONPROC glad_glXQueryVersion; +#define glXQueryVersion glad_glXQueryVersion +GLAD_API_CALL PFNGLXSELECTEVENTPROC glad_glXSelectEvent; +#define glXSelectEvent glad_glXSelectEvent +GLAD_API_CALL PFNGLXSWAPBUFFERSPROC glad_glXSwapBuffers; +#define glXSwapBuffers glad_glXSwapBuffers +GLAD_API_CALL PFNGLXSWAPINTERVALEXTPROC glad_glXSwapIntervalEXT; +#define glXSwapIntervalEXT glad_glXSwapIntervalEXT +GLAD_API_CALL PFNGLXSWAPINTERVALMESAPROC glad_glXSwapIntervalMESA; +#define glXSwapIntervalMESA glad_glXSwapIntervalMESA +GLAD_API_CALL PFNGLXUSEXFONTPROC glad_glXUseXFont; +#define glXUseXFont glad_glXUseXFont +GLAD_API_CALL PFNGLXWAITGLPROC glad_glXWaitGL; +#define glXWaitGL glad_glXWaitGL +GLAD_API_CALL PFNGLXWAITXPROC glad_glXWaitX; +#define glXWaitX glad_glXWaitX + + + + + +GLAD_API_CALL int gladLoadGLXUserPtr(Display *display, int screen, GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadGLX(Display *display, int screen, GLADloadfunc load); + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/wgl.h glmark2-2021.02/src/glad/include/glad/wgl.h --- glmark2-2014.03+git20150611.fa71af2d/src/glad/include/glad/wgl.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/include/glad/wgl.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,298 @@ +/** + * Loader generated by glad 2.0.0-beta on Wed Dec 25 23:13:13 2019 + * + * Generator: C/C++ + * Specification: wgl + * Extensions: 3 + * + * APIs: + * - wgl=1.0 + * + * Options: + * - MX_GLOBAL = False + * - ON_DEMAND = False + * - LOADER = False + * - ALIAS = False + * - HEADER_ONLY = False + * - DEBUG = False + * - MX = False + * + * Commandline: + * --api='wgl=1.0' --extensions='WGL_ARB_extensions_string,WGL_EXT_extensions_string,WGL_EXT_swap_control' c + * + * Online: + * http://glad.sh/#api=wgl%3D1.0&extensions=WGL_ARB_extensions_string%2CWGL_EXT_extensions_string%2CWGL_EXT_swap_control&generator=c&options= + * + */ + +#ifndef GLAD_WGL_H_ +#define GLAD_WGL_H_ + +#include +#include + +#define GLAD_WGL + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef GLAD_PLATFORM_H_ +#define GLAD_PLATFORM_H_ + +#ifndef GLAD_PLATFORM_WIN32 + #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__) + #define GLAD_PLATFORM_WIN32 1 + #else + #define GLAD_PLATFORM_WIN32 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_APPLE + #ifdef __APPLE__ + #define GLAD_PLATFORM_APPLE 1 + #else + #define GLAD_PLATFORM_APPLE 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_EMSCRIPTEN + #ifdef __EMSCRIPTEN__ + #define GLAD_PLATFORM_EMSCRIPTEN 1 + #else + #define GLAD_PLATFORM_EMSCRIPTEN 0 + #endif +#endif + +#ifndef GLAD_PLATFORM_UWP + #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY) + #ifdef __has_include + #if __has_include() + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1 + #endif + #endif + + #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define GLAD_PLATFORM_UWP 1 + #endif + #endif + + #ifndef GLAD_PLATFORM_UWP + #define GLAD_PLATFORM_UWP 0 + #endif +#endif + +#ifdef __GNUC__ + #define GLAD_GNUC_EXTENSION __extension__ +#else + #define GLAD_GNUC_EXTENSION +#endif + +#ifndef GLAD_API_CALL + #if defined(GLAD_API_CALL_EXPORT) + #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__) + #if defined(GLAD_API_CALL_EXPORT_BUILD) + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllexport)) extern + #else + #define GLAD_API_CALL __declspec(dllexport) extern + #endif + #else + #if defined(__GNUC__) + #define GLAD_API_CALL __attribute__ ((dllimport)) extern + #else + #define GLAD_API_CALL __declspec(dllimport) extern + #endif + #endif + #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD) + #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern + #else + #define GLAD_API_CALL extern + #endif + #else + #define GLAD_API_CALL extern + #endif +#endif + +#ifdef APIENTRY + #define GLAD_API_PTR APIENTRY +#elif GLAD_PLATFORM_WIN32 + #define GLAD_API_PTR __stdcall +#else + #define GLAD_API_PTR +#endif + +#ifndef GLAPI +#define GLAPI GLAD_API_CALL +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY GLAD_API_PTR +#endif + +#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor) +#define GLAD_VERSION_MAJOR(version) (version / 10000) +#define GLAD_VERSION_MINOR(version) (version % 10000) + +#define GLAD_GENERATOR_VERSION "2.0.0-beta" + +typedef void (*GLADapiproc)(void); + +typedef GLADapiproc (*GLADloadfunc)(const char *name); +typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name); + +typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...); +typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...); + +#endif /* GLAD_PLATFORM_H_ */ + +#define WGL_FONT_LINES 0 +#define WGL_FONT_POLYGONS 1 +#define WGL_SWAP_MAIN_PLANE 0x00000001 +#define WGL_SWAP_OVERLAY1 0x00000002 +#define WGL_SWAP_OVERLAY10 0x00000400 +#define WGL_SWAP_OVERLAY11 0x00000800 +#define WGL_SWAP_OVERLAY12 0x00001000 +#define WGL_SWAP_OVERLAY13 0x00002000 +#define WGL_SWAP_OVERLAY14 0x00004000 +#define WGL_SWAP_OVERLAY15 0x00008000 +#define WGL_SWAP_OVERLAY2 0x00000004 +#define WGL_SWAP_OVERLAY3 0x00000008 +#define WGL_SWAP_OVERLAY4 0x00000010 +#define WGL_SWAP_OVERLAY5 0x00000020 +#define WGL_SWAP_OVERLAY6 0x00000040 +#define WGL_SWAP_OVERLAY7 0x00000080 +#define WGL_SWAP_OVERLAY8 0x00000100 +#define WGL_SWAP_OVERLAY9 0x00000200 +#define WGL_SWAP_UNDERLAY1 0x00010000 +#define WGL_SWAP_UNDERLAY10 0x02000000 +#define WGL_SWAP_UNDERLAY11 0x04000000 +#define WGL_SWAP_UNDERLAY12 0x08000000 +#define WGL_SWAP_UNDERLAY13 0x10000000 +#define WGL_SWAP_UNDERLAY14 0x20000000 +#define WGL_SWAP_UNDERLAY15 0x40000000 +#define WGL_SWAP_UNDERLAY2 0x00020000 +#define WGL_SWAP_UNDERLAY3 0x00040000 +#define WGL_SWAP_UNDERLAY4 0x00080000 +#define WGL_SWAP_UNDERLAY5 0x00100000 +#define WGL_SWAP_UNDERLAY6 0x00200000 +#define WGL_SWAP_UNDERLAY7 0x00400000 +#define WGL_SWAP_UNDERLAY8 0x00800000 +#define WGL_SWAP_UNDERLAY9 0x01000000 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +struct _GPU_DEVICE { + DWORD cb; + CHAR DeviceName[32]; + CHAR DeviceString[128]; + DWORD Flags; + RECT rcVirtualScreen; +}; +DECLARE_HANDLE(HPBUFFERARB); +DECLARE_HANDLE(HPBUFFEREXT); +DECLARE_HANDLE(HVIDEOOUTPUTDEVICENV); +DECLARE_HANDLE(HPVIDEODEV); +DECLARE_HANDLE(HPGPUNV); +DECLARE_HANDLE(HGPUNV); +DECLARE_HANDLE(HVIDEOINPUTDEVICENV); +typedef struct _GPU_DEVICE GPU_DEVICE; +typedef struct _GPU_DEVICE *PGPU_DEVICE; + + +#define WGL_VERSION_1_0 1 +GLAD_API_CALL int GLAD_WGL_VERSION_1_0; +#define WGL_ARB_extensions_string 1 +GLAD_API_CALL int GLAD_WGL_ARB_extensions_string; +#define WGL_EXT_extensions_string 1 +GLAD_API_CALL int GLAD_WGL_EXT_extensions_string; +#define WGL_EXT_swap_control 1 +GLAD_API_CALL int GLAD_WGL_EXT_swap_control; + + +typedef int (GLAD_API_PTR *PFNCHOOSEPIXELFORMATPROC)(HDC hDc, const PIXELFORMATDESCRIPTOR * pPfd); +typedef int (GLAD_API_PTR *PFNDESCRIBEPIXELFORMATPROC)(HDC hdc, int ipfd, UINT cjpfd, const PIXELFORMATDESCRIPTOR * ppfd); +typedef UINT (GLAD_API_PTR *PFNGETENHMETAFILEPIXELFORMATPROC)(HENHMETAFILE hemf, const PIXELFORMATDESCRIPTOR * ppfd); +typedef int (GLAD_API_PTR *PFNGETPIXELFORMATPROC)(HDC hdc); +typedef BOOL (GLAD_API_PTR *PFNSETPIXELFORMATPROC)(HDC hdc, int ipfd, const PIXELFORMATDESCRIPTOR * ppfd); +typedef BOOL (GLAD_API_PTR *PFNSWAPBUFFERSPROC)(HDC hdc); +typedef BOOL (GLAD_API_PTR *PFNWGLCOPYCONTEXTPROC)(HGLRC hglrcSrc, HGLRC hglrcDst, UINT mask); +typedef HGLRC (GLAD_API_PTR *PFNWGLCREATECONTEXTPROC)(HDC hDc); +typedef HGLRC (GLAD_API_PTR *PFNWGLCREATELAYERCONTEXTPROC)(HDC hDc, int level); +typedef BOOL (GLAD_API_PTR *PFNWGLDELETECONTEXTPROC)(HGLRC oldContext); +typedef BOOL (GLAD_API_PTR *PFNWGLDESCRIBELAYERPLANEPROC)(HDC hDc, int pixelFormat, int layerPlane, UINT nBytes, const LAYERPLANEDESCRIPTOR * plpd); +typedef HGLRC (GLAD_API_PTR *PFNWGLGETCURRENTCONTEXTPROC)(void); +typedef HDC (GLAD_API_PTR *PFNWGLGETCURRENTDCPROC)(void); +typedef const char * (GLAD_API_PTR *PFNWGLGETEXTENSIONSSTRINGARBPROC)(HDC hdc); +typedef const char * (GLAD_API_PTR *PFNWGLGETEXTENSIONSSTRINGEXTPROC)(void); +typedef int (GLAD_API_PTR *PFNWGLGETLAYERPALETTEENTRIESPROC)(HDC hdc, int iLayerPlane, int iStart, int cEntries, const COLORREF * pcr); +typedef PROC (GLAD_API_PTR *PFNWGLGETPROCADDRESSPROC)(LPCSTR lpszProc); +typedef int (GLAD_API_PTR *PFNWGLGETSWAPINTERVALEXTPROC)(void); +typedef BOOL (GLAD_API_PTR *PFNWGLMAKECURRENTPROC)(HDC hDc, HGLRC newContext); +typedef BOOL (GLAD_API_PTR *PFNWGLREALIZELAYERPALETTEPROC)(HDC hdc, int iLayerPlane, BOOL bRealize); +typedef int (GLAD_API_PTR *PFNWGLSETLAYERPALETTEENTRIESPROC)(HDC hdc, int iLayerPlane, int iStart, int cEntries, const COLORREF * pcr); +typedef BOOL (GLAD_API_PTR *PFNWGLSHARELISTSPROC)(HGLRC hrcSrvShare, HGLRC hrcSrvSource); +typedef BOOL (GLAD_API_PTR *PFNWGLSWAPINTERVALEXTPROC)(int interval); +typedef BOOL (GLAD_API_PTR *PFNWGLSWAPLAYERBUFFERSPROC)(HDC hdc, UINT fuFlags); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTBITMAPSPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTBITMAPSAPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTBITMAPSWPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTOUTLINESPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase, FLOAT deviation, FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTOUTLINESAPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase, FLOAT deviation, FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf); +typedef BOOL (GLAD_API_PTR *PFNWGLUSEFONTOUTLINESWPROC)(HDC hDC, DWORD first, DWORD count, DWORD listBase, FLOAT deviation, FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf); + +GLAD_API_CALL PFNWGLGETEXTENSIONSSTRINGARBPROC glad_wglGetExtensionsStringARB; +#define wglGetExtensionsStringARB glad_wglGetExtensionsStringARB +GLAD_API_CALL PFNWGLGETEXTENSIONSSTRINGEXTPROC glad_wglGetExtensionsStringEXT; +#define wglGetExtensionsStringEXT glad_wglGetExtensionsStringEXT +GLAD_API_CALL PFNWGLGETSWAPINTERVALEXTPROC glad_wglGetSwapIntervalEXT; +#define wglGetSwapIntervalEXT glad_wglGetSwapIntervalEXT +GLAD_API_CALL PFNWGLSWAPINTERVALEXTPROC glad_wglSwapIntervalEXT; +#define wglSwapIntervalEXT glad_wglSwapIntervalEXT + + + + + +GLAD_API_CALL int gladLoadWGLUserPtr(HDC hdc, GLADuserptrloadfunc load, void *userptr); +GLAD_API_CALL int gladLoadWGL(HDC hdc, GLADloadfunc load); + + +#ifdef __cplusplus +} +#endif +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/include/KHR/khrplatform.h glmark2-2021.02/src/glad/include/KHR/khrplatform.h --- glmark2-2014.03+git20150611.fa71af2d/src/glad/include/KHR/khrplatform.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/include/KHR/khrplatform.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,290 @@ +#ifndef __khrplatform_h_ +#define __khrplatform_h_ + +/* +** Copyright (c) 2008-2018 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Khronos platform-specific types and definitions. + * + * The master copy of khrplatform.h is maintained in the Khronos EGL + * Registry repository at https://github.com/KhronosGroup/EGL-Registry + * The last semantic modification to khrplatform.h was at commit ID: + * 67a3e0864c2d75ea5287b9f3d2eb74a745936692 + * + * Adopters may modify this file to suit their platform. Adopters are + * encouraged to submit platform specific modifications to the Khronos + * group so that they can be included in future versions of this file. + * Please submit changes by filing pull requests or issues on + * the EGL Registry repository linked above. + * + * + * See the Implementer's Guidelines for information about where this file + * should be located on your system and for more details of its use: + * http://www.khronos.org/registry/implementers_guide.pdf + * + * This file should be included as + * #include + * by Khronos client API header files that use its types and defines. + * + * The types in khrplatform.h should only be used to define API-specific types. + * + * Types defined in khrplatform.h: + * khronos_int8_t signed 8 bit + * khronos_uint8_t unsigned 8 bit + * khronos_int16_t signed 16 bit + * khronos_uint16_t unsigned 16 bit + * khronos_int32_t signed 32 bit + * khronos_uint32_t unsigned 32 bit + * khronos_int64_t signed 64 bit + * khronos_uint64_t unsigned 64 bit + * khronos_intptr_t signed same number of bits as a pointer + * khronos_uintptr_t unsigned same number of bits as a pointer + * khronos_ssize_t signed size + * khronos_usize_t unsigned size + * khronos_float_t signed 32 bit floating point + * khronos_time_ns_t unsigned 64 bit time in nanoseconds + * khronos_utime_nanoseconds_t unsigned time interval or absolute time in + * nanoseconds + * khronos_stime_nanoseconds_t signed time interval in nanoseconds + * khronos_boolean_enum_t enumerated boolean type. This should + * only be used as a base type when a client API's boolean type is + * an enum. Client APIs which use an integer or other type for + * booleans cannot use this as the base type for their boolean. + * + * Tokens defined in khrplatform.h: + * + * KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values. + * + * KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0. + * KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0. + * + * Calling convention macros defined in this file: + * KHRONOS_APICALL + * KHRONOS_APIENTRY + * KHRONOS_APIATTRIBUTES + * + * These may be used in function prototypes as: + * + * KHRONOS_APICALL void KHRONOS_APIENTRY funcname( + * int arg1, + * int arg2) KHRONOS_APIATTRIBUTES; + */ + +#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC) +# define KHRONOS_STATIC 1 +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APICALL + *------------------------------------------------------------------------- + * This precedes the return type of the function in the function prototype. + */ +#if defined(KHRONOS_STATIC) + /* If the preprocessor constant KHRONOS_STATIC is defined, make the + * header compatible with static linking. */ +# define KHRONOS_APICALL +#elif defined(_WIN32) +# define KHRONOS_APICALL __declspec(dllimport) +#elif defined (__SYMBIAN32__) +# define KHRONOS_APICALL IMPORT_C +#elif defined(__ANDROID__) +# define KHRONOS_APICALL __attribute__((visibility("default"))) +#else +# define KHRONOS_APICALL +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIENTRY + *------------------------------------------------------------------------- + * This follows the return type of the function and precedes the function + * name in the function prototype. + */ +#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(KHRONOS_STATIC) + /* Win32 but not WinCE */ +# define KHRONOS_APIENTRY __stdcall +#else +# define KHRONOS_APIENTRY +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIATTRIBUTES + *------------------------------------------------------------------------- + * This follows the closing parenthesis of the function prototype arguments. + */ +#if defined (__ARMCC_2__) +#define KHRONOS_APIATTRIBUTES __softfp +#else +#define KHRONOS_APIATTRIBUTES +#endif + +/*------------------------------------------------------------------------- + * basic type definitions + *-----------------------------------------------------------------------*/ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__) + + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__VMS ) || defined(__sgi) + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(_WIN32) && !defined(__SCITECH_SNAP__) + +/* + * Win32 + */ +typedef __int32 khronos_int32_t; +typedef unsigned __int32 khronos_uint32_t; +typedef __int64 khronos_int64_t; +typedef unsigned __int64 khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__sun__) || defined(__digital__) + +/* + * Sun or Digital + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#if defined(__arch64__) || defined(_LP64) +typedef long int khronos_int64_t; +typedef unsigned long int khronos_uint64_t; +#else +typedef long long int khronos_int64_t; +typedef unsigned long long int khronos_uint64_t; +#endif /* __arch64__ */ +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif 0 + +/* + * Hypothetical platform with no float or int64 support + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#define KHRONOS_SUPPORT_INT64 0 +#define KHRONOS_SUPPORT_FLOAT 0 + +#else + +/* + * Generic fallback + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#endif + + +/* + * Types that are (so far) the same on all platforms + */ +typedef signed char khronos_int8_t; +typedef unsigned char khronos_uint8_t; +typedef signed short int khronos_int16_t; +typedef unsigned short int khronos_uint16_t; + +/* + * Types that differ between LLP64 and LP64 architectures - in LLP64, + * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears + * to be the only LLP64 architecture in current use. + */ +#ifdef _WIN64 +typedef signed long long int khronos_intptr_t; +typedef unsigned long long int khronos_uintptr_t; +typedef signed long long int khronos_ssize_t; +typedef unsigned long long int khronos_usize_t; +#else +typedef signed long int khronos_intptr_t; +typedef unsigned long int khronos_uintptr_t; +typedef signed long int khronos_ssize_t; +typedef unsigned long int khronos_usize_t; +#endif + +#if KHRONOS_SUPPORT_FLOAT +/* + * Float type + */ +typedef float khronos_float_t; +#endif + +#if KHRONOS_SUPPORT_INT64 +/* Time types + * + * These types can be used to represent a time interval in nanoseconds or + * an absolute Unadjusted System Time. Unadjusted System Time is the number + * of nanoseconds since some arbitrary system event (e.g. since the last + * time the system booted). The Unadjusted System Time is an unsigned + * 64 bit value that wraps back to 0 every 584 years. Time intervals + * may be either signed or unsigned. + */ +typedef khronos_uint64_t khronos_utime_nanoseconds_t; +typedef khronos_int64_t khronos_stime_nanoseconds_t; +#endif + +/* + * Dummy value used to pad enum types to 32 bits. + */ +#ifndef KHRONOS_MAX_ENUM +#define KHRONOS_MAX_ENUM 0x7FFFFFFF +#endif + +/* + * Enumerated boolean type + * + * Values other than zero should be considered to be true. Therefore + * comparisons should not be made against KHRONOS_TRUE. + */ +typedef enum { + KHRONOS_FALSE = 0, + KHRONOS_TRUE = 1, + KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM +} khronos_boolean_enum_t; + +#endif /* __khrplatform_h_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/README.md glmark2-2021.02/src/glad/README.md --- glmark2-2014.03+git20150611.fa71af2d/src/glad/README.md 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/README.md 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,7 @@ +This directory contains a copy of the +[glad2](https://github.com/Dav1dde/glad/tree/glad2) library. + +The contents of this directory can be recreated (at the latest available +upstream version) with [this][permalink] permalink. + +[permalink]: http://gen.glad.sh/#profile=gl%3Dcompatibility%2Cgles1%3Dcommon&api=egl%3D1.5%2Cgl%3D3.3%2Cgles2%3D2.0%2Cglx%3D1.4%2Cwgl%3D1.0&extensions=EGL_EXT_platform_base%2CEGL_KHR_platform_gbm%2CEGL_KHR_platform_wayland%2CEGL_KHR_platform_x11%2CGLX_EXT_swap_control%2CGLX_MESA_swap_control%2CGL_OES_mapbuffer%2CGL_OES_required_internalformat%2CWGL_ARB_extensions_string%2CWGL_EXT_extensions_string%2CWGL_EXT_swap_control&generator=c diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/src/egl.c glmark2-2021.02/src/glad/src/egl.c --- glmark2-2014.03+git20150611.fa71af2d/src/glad/src/egl.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/src/egl.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,258 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_EGL_VERSION_1_0 = 0; +int GLAD_EGL_VERSION_1_1 = 0; +int GLAD_EGL_VERSION_1_2 = 0; +int GLAD_EGL_VERSION_1_3 = 0; +int GLAD_EGL_VERSION_1_4 = 0; +int GLAD_EGL_VERSION_1_5 = 0; +int GLAD_EGL_EXT_platform_base = 0; +int GLAD_EGL_KHR_platform_gbm = 0; +int GLAD_EGL_KHR_platform_wayland = 0; +int GLAD_EGL_KHR_platform_x11 = 0; + + + +PFNEGLBINDAPIPROC glad_eglBindAPI = NULL; +PFNEGLBINDTEXIMAGEPROC glad_eglBindTexImage = NULL; +PFNEGLCHOOSECONFIGPROC glad_eglChooseConfig = NULL; +PFNEGLCLIENTWAITSYNCPROC glad_eglClientWaitSync = NULL; +PFNEGLCOPYBUFFERSPROC glad_eglCopyBuffers = NULL; +PFNEGLCREATECONTEXTPROC glad_eglCreateContext = NULL; +PFNEGLCREATEIMAGEPROC glad_eglCreateImage = NULL; +PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC glad_eglCreatePbufferFromClientBuffer = NULL; +PFNEGLCREATEPBUFFERSURFACEPROC glad_eglCreatePbufferSurface = NULL; +PFNEGLCREATEPIXMAPSURFACEPROC glad_eglCreatePixmapSurface = NULL; +PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC glad_eglCreatePlatformPixmapSurface = NULL; +PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC glad_eglCreatePlatformPixmapSurfaceEXT = NULL; +PFNEGLCREATEPLATFORMWINDOWSURFACEPROC glad_eglCreatePlatformWindowSurface = NULL; +PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC glad_eglCreatePlatformWindowSurfaceEXT = NULL; +PFNEGLCREATESYNCPROC glad_eglCreateSync = NULL; +PFNEGLCREATEWINDOWSURFACEPROC glad_eglCreateWindowSurface = NULL; +PFNEGLDESTROYCONTEXTPROC glad_eglDestroyContext = NULL; +PFNEGLDESTROYIMAGEPROC glad_eglDestroyImage = NULL; +PFNEGLDESTROYSURFACEPROC glad_eglDestroySurface = NULL; +PFNEGLDESTROYSYNCPROC glad_eglDestroySync = NULL; +PFNEGLGETCONFIGATTRIBPROC glad_eglGetConfigAttrib = NULL; +PFNEGLGETCONFIGSPROC glad_eglGetConfigs = NULL; +PFNEGLGETCURRENTCONTEXTPROC glad_eglGetCurrentContext = NULL; +PFNEGLGETCURRENTDISPLAYPROC glad_eglGetCurrentDisplay = NULL; +PFNEGLGETCURRENTSURFACEPROC glad_eglGetCurrentSurface = NULL; +PFNEGLGETDISPLAYPROC glad_eglGetDisplay = NULL; +PFNEGLGETERRORPROC glad_eglGetError = NULL; +PFNEGLGETPLATFORMDISPLAYPROC glad_eglGetPlatformDisplay = NULL; +PFNEGLGETPLATFORMDISPLAYEXTPROC glad_eglGetPlatformDisplayEXT = NULL; +PFNEGLGETPROCADDRESSPROC glad_eglGetProcAddress = NULL; +PFNEGLGETSYNCATTRIBPROC glad_eglGetSyncAttrib = NULL; +PFNEGLINITIALIZEPROC glad_eglInitialize = NULL; +PFNEGLMAKECURRENTPROC glad_eglMakeCurrent = NULL; +PFNEGLQUERYAPIPROC glad_eglQueryAPI = NULL; +PFNEGLQUERYCONTEXTPROC glad_eglQueryContext = NULL; +PFNEGLQUERYSTRINGPROC glad_eglQueryString = NULL; +PFNEGLQUERYSURFACEPROC glad_eglQuerySurface = NULL; +PFNEGLRELEASETEXIMAGEPROC glad_eglReleaseTexImage = NULL; +PFNEGLRELEASETHREADPROC glad_eglReleaseThread = NULL; +PFNEGLSURFACEATTRIBPROC glad_eglSurfaceAttrib = NULL; +PFNEGLSWAPBUFFERSPROC glad_eglSwapBuffers = NULL; +PFNEGLSWAPINTERVALPROC glad_eglSwapInterval = NULL; +PFNEGLTERMINATEPROC glad_eglTerminate = NULL; +PFNEGLWAITCLIENTPROC glad_eglWaitClient = NULL; +PFNEGLWAITGLPROC glad_eglWaitGL = NULL; +PFNEGLWAITNATIVEPROC glad_eglWaitNative = NULL; +PFNEGLWAITSYNCPROC glad_eglWaitSync = NULL; + + +static void glad_egl_load_EGL_VERSION_1_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_0) return; + glad_eglChooseConfig = (PFNEGLCHOOSECONFIGPROC) load(userptr, "eglChooseConfig"); + glad_eglCopyBuffers = (PFNEGLCOPYBUFFERSPROC) load(userptr, "eglCopyBuffers"); + glad_eglCreateContext = (PFNEGLCREATECONTEXTPROC) load(userptr, "eglCreateContext"); + glad_eglCreatePbufferSurface = (PFNEGLCREATEPBUFFERSURFACEPROC) load(userptr, "eglCreatePbufferSurface"); + glad_eglCreatePixmapSurface = (PFNEGLCREATEPIXMAPSURFACEPROC) load(userptr, "eglCreatePixmapSurface"); + glad_eglCreateWindowSurface = (PFNEGLCREATEWINDOWSURFACEPROC) load(userptr, "eglCreateWindowSurface"); + glad_eglDestroyContext = (PFNEGLDESTROYCONTEXTPROC) load(userptr, "eglDestroyContext"); + glad_eglDestroySurface = (PFNEGLDESTROYSURFACEPROC) load(userptr, "eglDestroySurface"); + glad_eglGetConfigAttrib = (PFNEGLGETCONFIGATTRIBPROC) load(userptr, "eglGetConfigAttrib"); + glad_eglGetConfigs = (PFNEGLGETCONFIGSPROC) load(userptr, "eglGetConfigs"); + glad_eglGetCurrentDisplay = (PFNEGLGETCURRENTDISPLAYPROC) load(userptr, "eglGetCurrentDisplay"); + glad_eglGetCurrentSurface = (PFNEGLGETCURRENTSURFACEPROC) load(userptr, "eglGetCurrentSurface"); + glad_eglGetDisplay = (PFNEGLGETDISPLAYPROC) load(userptr, "eglGetDisplay"); + glad_eglGetError = (PFNEGLGETERRORPROC) load(userptr, "eglGetError"); + glad_eglGetProcAddress = (PFNEGLGETPROCADDRESSPROC) load(userptr, "eglGetProcAddress"); + glad_eglInitialize = (PFNEGLINITIALIZEPROC) load(userptr, "eglInitialize"); + glad_eglMakeCurrent = (PFNEGLMAKECURRENTPROC) load(userptr, "eglMakeCurrent"); + glad_eglQueryContext = (PFNEGLQUERYCONTEXTPROC) load(userptr, "eglQueryContext"); + glad_eglQueryString = (PFNEGLQUERYSTRINGPROC) load(userptr, "eglQueryString"); + glad_eglQuerySurface = (PFNEGLQUERYSURFACEPROC) load(userptr, "eglQuerySurface"); + glad_eglSwapBuffers = (PFNEGLSWAPBUFFERSPROC) load(userptr, "eglSwapBuffers"); + glad_eglTerminate = (PFNEGLTERMINATEPROC) load(userptr, "eglTerminate"); + glad_eglWaitGL = (PFNEGLWAITGLPROC) load(userptr, "eglWaitGL"); + glad_eglWaitNative = (PFNEGLWAITNATIVEPROC) load(userptr, "eglWaitNative"); +} +static void glad_egl_load_EGL_VERSION_1_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_1) return; + glad_eglBindTexImage = (PFNEGLBINDTEXIMAGEPROC) load(userptr, "eglBindTexImage"); + glad_eglReleaseTexImage = (PFNEGLRELEASETEXIMAGEPROC) load(userptr, "eglReleaseTexImage"); + glad_eglSurfaceAttrib = (PFNEGLSURFACEATTRIBPROC) load(userptr, "eglSurfaceAttrib"); + glad_eglSwapInterval = (PFNEGLSWAPINTERVALPROC) load(userptr, "eglSwapInterval"); +} +static void glad_egl_load_EGL_VERSION_1_2( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_2) return; + glad_eglBindAPI = (PFNEGLBINDAPIPROC) load(userptr, "eglBindAPI"); + glad_eglCreatePbufferFromClientBuffer = (PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC) load(userptr, "eglCreatePbufferFromClientBuffer"); + glad_eglQueryAPI = (PFNEGLQUERYAPIPROC) load(userptr, "eglQueryAPI"); + glad_eglReleaseThread = (PFNEGLRELEASETHREADPROC) load(userptr, "eglReleaseThread"); + glad_eglWaitClient = (PFNEGLWAITCLIENTPROC) load(userptr, "eglWaitClient"); +} +static void glad_egl_load_EGL_VERSION_1_4( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_4) return; + glad_eglGetCurrentContext = (PFNEGLGETCURRENTCONTEXTPROC) load(userptr, "eglGetCurrentContext"); +} +static void glad_egl_load_EGL_VERSION_1_5( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_VERSION_1_5) return; + glad_eglClientWaitSync = (PFNEGLCLIENTWAITSYNCPROC) load(userptr, "eglClientWaitSync"); + glad_eglCreateImage = (PFNEGLCREATEIMAGEPROC) load(userptr, "eglCreateImage"); + glad_eglCreatePlatformPixmapSurface = (PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC) load(userptr, "eglCreatePlatformPixmapSurface"); + glad_eglCreatePlatformWindowSurface = (PFNEGLCREATEPLATFORMWINDOWSURFACEPROC) load(userptr, "eglCreatePlatformWindowSurface"); + glad_eglCreateSync = (PFNEGLCREATESYNCPROC) load(userptr, "eglCreateSync"); + glad_eglDestroyImage = (PFNEGLDESTROYIMAGEPROC) load(userptr, "eglDestroyImage"); + glad_eglDestroySync = (PFNEGLDESTROYSYNCPROC) load(userptr, "eglDestroySync"); + glad_eglGetPlatformDisplay = (PFNEGLGETPLATFORMDISPLAYPROC) load(userptr, "eglGetPlatformDisplay"); + glad_eglGetSyncAttrib = (PFNEGLGETSYNCATTRIBPROC) load(userptr, "eglGetSyncAttrib"); + glad_eglWaitSync = (PFNEGLWAITSYNCPROC) load(userptr, "eglWaitSync"); +} +static void glad_egl_load_EGL_EXT_platform_base( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_EGL_EXT_platform_base) return; + glad_eglCreatePlatformPixmapSurfaceEXT = (PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC) load(userptr, "eglCreatePlatformPixmapSurfaceEXT"); + glad_eglCreatePlatformWindowSurfaceEXT = (PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC) load(userptr, "eglCreatePlatformWindowSurfaceEXT"); + glad_eglGetPlatformDisplayEXT = (PFNEGLGETPLATFORMDISPLAYEXTPROC) load(userptr, "eglGetPlatformDisplayEXT"); +} + + + +static int glad_egl_get_extensions(EGLDisplay display, const char **extensions) { + *extensions = eglQueryString(display, EGL_EXTENSIONS); + + return extensions != NULL; +} + +static int glad_egl_has_extension(const char *extensions, const char *ext) { + const char *loc; + const char *terminator; + if(extensions == NULL) { + return 0; + } + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) { + return 0; + } + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) { + return 1; + } + extensions = terminator; + } +} + +static GLADapiproc glad_egl_get_proc_from_userptr(void *userptr, const char *name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_egl_find_extensions_egl(EGLDisplay display) { + const char *extensions; + if (!glad_egl_get_extensions(display, &extensions)) return 0; + + GLAD_EGL_EXT_platform_base = glad_egl_has_extension(extensions, "EGL_EXT_platform_base"); + GLAD_EGL_KHR_platform_gbm = glad_egl_has_extension(extensions, "EGL_KHR_platform_gbm"); + GLAD_EGL_KHR_platform_wayland = glad_egl_has_extension(extensions, "EGL_KHR_platform_wayland"); + GLAD_EGL_KHR_platform_x11 = glad_egl_has_extension(extensions, "EGL_KHR_platform_x11"); + + return 1; +} + +static int glad_egl_find_core_egl(EGLDisplay display) { + int major, minor; + const char *version; + + if (display == NULL) { + display = EGL_NO_DISPLAY; /* this is usually NULL, better safe than sorry */ + } + if (display == EGL_NO_DISPLAY) { + display = eglGetCurrentDisplay(); + } +#ifdef EGL_VERSION_1_4 + if (display == EGL_NO_DISPLAY) { + display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + } +#endif +#ifndef EGL_VERSION_1_5 + if (display == EGL_NO_DISPLAY) { + return 0; + } +#endif + + version = eglQueryString(display, EGL_VERSION); + (void) eglGetError(); + + if (version == NULL) { + major = 1; + minor = 0; + } else { + GLAD_IMPL_UTIL_SSCANF(version, "%d.%d", &major, &minor); + } + + GLAD_EGL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + GLAD_EGL_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1; + GLAD_EGL_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1; + GLAD_EGL_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1; + GLAD_EGL_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1; + GLAD_EGL_VERSION_1_5 = (major == 1 && minor >= 5) || major > 1; + + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadEGLUserPtr(EGLDisplay display, GLADuserptrloadfunc load, void* userptr) { + int version; + eglGetDisplay = (PFNEGLGETDISPLAYPROC) load(userptr, "eglGetDisplay"); + eglGetCurrentDisplay = (PFNEGLGETCURRENTDISPLAYPROC) load(userptr, "eglGetCurrentDisplay"); + eglQueryString = (PFNEGLQUERYSTRINGPROC) load(userptr, "eglQueryString"); + eglGetError = (PFNEGLGETERRORPROC) load(userptr, "eglGetError"); + if (eglGetDisplay == NULL || eglGetCurrentDisplay == NULL || eglQueryString == NULL || eglGetError == NULL) return 0; + + version = glad_egl_find_core_egl(display); + if (!version) return 0; + glad_egl_load_EGL_VERSION_1_0(load, userptr); + glad_egl_load_EGL_VERSION_1_1(load, userptr); + glad_egl_load_EGL_VERSION_1_2(load, userptr); + glad_egl_load_EGL_VERSION_1_4(load, userptr); + glad_egl_load_EGL_VERSION_1_5(load, userptr); + + if (!glad_egl_find_extensions_egl(display)) return 0; + glad_egl_load_EGL_EXT_platform_base(load, userptr); + + return version; +} + +int gladLoadEGL(EGLDisplay display, GLADloadfunc load) { + return gladLoadEGLUserPtr(display, glad_egl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/src/gl.c glmark2-2021.02/src/glad/src/gl.c --- glmark2-2014.03+git20150611.fa71af2d/src/glad/src/gl.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/src/gl.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1709 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_GL_VERSION_1_0 = 0; +int GLAD_GL_VERSION_1_1 = 0; +int GLAD_GL_VERSION_1_2 = 0; +int GLAD_GL_VERSION_1_3 = 0; +int GLAD_GL_VERSION_1_4 = 0; +int GLAD_GL_VERSION_1_5 = 0; +int GLAD_GL_VERSION_2_0 = 0; +int GLAD_GL_VERSION_2_1 = 0; +int GLAD_GL_VERSION_3_0 = 0; +int GLAD_GL_VERSION_3_1 = 0; +int GLAD_GL_VERSION_3_2 = 0; +int GLAD_GL_VERSION_3_3 = 0; + + + +PFNGLACCUMPROC glad_glAccum = NULL; +PFNGLACTIVETEXTUREPROC glad_glActiveTexture = NULL; +PFNGLALPHAFUNCPROC glad_glAlphaFunc = NULL; +PFNGLARETEXTURESRESIDENTPROC glad_glAreTexturesResident = NULL; +PFNGLARRAYELEMENTPROC glad_glArrayElement = NULL; +PFNGLATTACHSHADERPROC glad_glAttachShader = NULL; +PFNGLBEGINPROC glad_glBegin = NULL; +PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender = NULL; +PFNGLBEGINQUERYPROC glad_glBeginQuery = NULL; +PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback = NULL; +PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation = NULL; +PFNGLBINDBUFFERPROC glad_glBindBuffer = NULL; +PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase = NULL; +PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange = NULL; +PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation = NULL; +PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed = NULL; +PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer = NULL; +PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer = NULL; +PFNGLBINDSAMPLERPROC glad_glBindSampler = NULL; +PFNGLBINDTEXTUREPROC glad_glBindTexture = NULL; +PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray = NULL; +PFNGLBITMAPPROC glad_glBitmap = NULL; +PFNGLBLENDCOLORPROC glad_glBlendColor = NULL; +PFNGLBLENDEQUATIONPROC glad_glBlendEquation = NULL; +PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate = NULL; +PFNGLBLENDFUNCPROC glad_glBlendFunc = NULL; +PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate = NULL; +PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer = NULL; +PFNGLBUFFERDATAPROC glad_glBufferData = NULL; +PFNGLBUFFERSUBDATAPROC glad_glBufferSubData = NULL; +PFNGLCALLLISTPROC glad_glCallList = NULL; +PFNGLCALLLISTSPROC glad_glCallLists = NULL; +PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus = NULL; +PFNGLCLAMPCOLORPROC glad_glClampColor = NULL; +PFNGLCLEARPROC glad_glClear = NULL; +PFNGLCLEARACCUMPROC glad_glClearAccum = NULL; +PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi = NULL; +PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv = NULL; +PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv = NULL; +PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv = NULL; +PFNGLCLEARCOLORPROC glad_glClearColor = NULL; +PFNGLCLEARDEPTHPROC glad_glClearDepth = NULL; +PFNGLCLEARINDEXPROC glad_glClearIndex = NULL; +PFNGLCLEARSTENCILPROC glad_glClearStencil = NULL; +PFNGLCLIENTACTIVETEXTUREPROC glad_glClientActiveTexture = NULL; +PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync = NULL; +PFNGLCLIPPLANEPROC glad_glClipPlane = NULL; +PFNGLCOLOR3BPROC glad_glColor3b = NULL; +PFNGLCOLOR3BVPROC glad_glColor3bv = NULL; +PFNGLCOLOR3DPROC glad_glColor3d = NULL; +PFNGLCOLOR3DVPROC glad_glColor3dv = NULL; +PFNGLCOLOR3FPROC glad_glColor3f = NULL; +PFNGLCOLOR3FVPROC glad_glColor3fv = NULL; +PFNGLCOLOR3IPROC glad_glColor3i = NULL; +PFNGLCOLOR3IVPROC glad_glColor3iv = NULL; +PFNGLCOLOR3SPROC glad_glColor3s = NULL; +PFNGLCOLOR3SVPROC glad_glColor3sv = NULL; +PFNGLCOLOR3UBPROC glad_glColor3ub = NULL; +PFNGLCOLOR3UBVPROC glad_glColor3ubv = NULL; +PFNGLCOLOR3UIPROC glad_glColor3ui = NULL; +PFNGLCOLOR3UIVPROC glad_glColor3uiv = NULL; +PFNGLCOLOR3USPROC glad_glColor3us = NULL; +PFNGLCOLOR3USVPROC glad_glColor3usv = NULL; +PFNGLCOLOR4BPROC glad_glColor4b = NULL; +PFNGLCOLOR4BVPROC glad_glColor4bv = NULL; +PFNGLCOLOR4DPROC glad_glColor4d = NULL; +PFNGLCOLOR4DVPROC glad_glColor4dv = NULL; +PFNGLCOLOR4FPROC glad_glColor4f = NULL; +PFNGLCOLOR4FVPROC glad_glColor4fv = NULL; +PFNGLCOLOR4IPROC glad_glColor4i = NULL; +PFNGLCOLOR4IVPROC glad_glColor4iv = NULL; +PFNGLCOLOR4SPROC glad_glColor4s = NULL; +PFNGLCOLOR4SVPROC glad_glColor4sv = NULL; +PFNGLCOLOR4UBPROC glad_glColor4ub = NULL; +PFNGLCOLOR4UBVPROC glad_glColor4ubv = NULL; +PFNGLCOLOR4UIPROC glad_glColor4ui = NULL; +PFNGLCOLOR4UIVPROC glad_glColor4uiv = NULL; +PFNGLCOLOR4USPROC glad_glColor4us = NULL; +PFNGLCOLOR4USVPROC glad_glColor4usv = NULL; +PFNGLCOLORMASKPROC glad_glColorMask = NULL; +PFNGLCOLORMASKIPROC glad_glColorMaski = NULL; +PFNGLCOLORMATERIALPROC glad_glColorMaterial = NULL; +PFNGLCOLORP3UIPROC glad_glColorP3ui = NULL; +PFNGLCOLORP3UIVPROC glad_glColorP3uiv = NULL; +PFNGLCOLORP4UIPROC glad_glColorP4ui = NULL; +PFNGLCOLORP4UIVPROC glad_glColorP4uiv = NULL; +PFNGLCOLORPOINTERPROC glad_glColorPointer = NULL; +PFNGLCOMPILESHADERPROC glad_glCompileShader = NULL; +PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D = NULL; +PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D = NULL; +PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D = NULL; +PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData = NULL; +PFNGLCOPYPIXELSPROC glad_glCopyPixels = NULL; +PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D = NULL; +PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D = NULL; +PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D = NULL; +PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D = NULL; +PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D = NULL; +PFNGLCREATEPROGRAMPROC glad_glCreateProgram = NULL; +PFNGLCREATESHADERPROC glad_glCreateShader = NULL; +PFNGLCULLFACEPROC glad_glCullFace = NULL; +PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers = NULL; +PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers = NULL; +PFNGLDELETELISTSPROC glad_glDeleteLists = NULL; +PFNGLDELETEPROGRAMPROC glad_glDeleteProgram = NULL; +PFNGLDELETEQUERIESPROC glad_glDeleteQueries = NULL; +PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers = NULL; +PFNGLDELETESAMPLERSPROC glad_glDeleteSamplers = NULL; +PFNGLDELETESHADERPROC glad_glDeleteShader = NULL; +PFNGLDELETESYNCPROC glad_glDeleteSync = NULL; +PFNGLDELETETEXTURESPROC glad_glDeleteTextures = NULL; +PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays = NULL; +PFNGLDEPTHFUNCPROC glad_glDepthFunc = NULL; +PFNGLDEPTHMASKPROC glad_glDepthMask = NULL; +PFNGLDEPTHRANGEPROC glad_glDepthRange = NULL; +PFNGLDETACHSHADERPROC glad_glDetachShader = NULL; +PFNGLDISABLEPROC glad_glDisable = NULL; +PFNGLDISABLECLIENTSTATEPROC glad_glDisableClientState = NULL; +PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray = NULL; +PFNGLDISABLEIPROC glad_glDisablei = NULL; +PFNGLDRAWARRAYSPROC glad_glDrawArrays = NULL; +PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced = NULL; +PFNGLDRAWBUFFERPROC glad_glDrawBuffer = NULL; +PFNGLDRAWBUFFERSPROC glad_glDrawBuffers = NULL; +PFNGLDRAWELEMENTSPROC glad_glDrawElements = NULL; +PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex = NULL; +PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced = NULL; +PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex = NULL; +PFNGLDRAWPIXELSPROC glad_glDrawPixels = NULL; +PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements = NULL; +PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex = NULL; +PFNGLEDGEFLAGPROC glad_glEdgeFlag = NULL; +PFNGLEDGEFLAGPOINTERPROC glad_glEdgeFlagPointer = NULL; +PFNGLEDGEFLAGVPROC glad_glEdgeFlagv = NULL; +PFNGLENABLEPROC glad_glEnable = NULL; +PFNGLENABLECLIENTSTATEPROC glad_glEnableClientState = NULL; +PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray = NULL; +PFNGLENABLEIPROC glad_glEnablei = NULL; +PFNGLENDPROC glad_glEnd = NULL; +PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender = NULL; +PFNGLENDLISTPROC glad_glEndList = NULL; +PFNGLENDQUERYPROC glad_glEndQuery = NULL; +PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback = NULL; +PFNGLEVALCOORD1DPROC glad_glEvalCoord1d = NULL; +PFNGLEVALCOORD1DVPROC glad_glEvalCoord1dv = NULL; +PFNGLEVALCOORD1FPROC glad_glEvalCoord1f = NULL; +PFNGLEVALCOORD1FVPROC glad_glEvalCoord1fv = NULL; +PFNGLEVALCOORD2DPROC glad_glEvalCoord2d = NULL; +PFNGLEVALCOORD2DVPROC glad_glEvalCoord2dv = NULL; +PFNGLEVALCOORD2FPROC glad_glEvalCoord2f = NULL; +PFNGLEVALCOORD2FVPROC glad_glEvalCoord2fv = NULL; +PFNGLEVALMESH1PROC glad_glEvalMesh1 = NULL; +PFNGLEVALMESH2PROC glad_glEvalMesh2 = NULL; +PFNGLEVALPOINT1PROC glad_glEvalPoint1 = NULL; +PFNGLEVALPOINT2PROC glad_glEvalPoint2 = NULL; +PFNGLFEEDBACKBUFFERPROC glad_glFeedbackBuffer = NULL; +PFNGLFENCESYNCPROC glad_glFenceSync = NULL; +PFNGLFINISHPROC glad_glFinish = NULL; +PFNGLFLUSHPROC glad_glFlush = NULL; +PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange = NULL; +PFNGLFOGCOORDPOINTERPROC glad_glFogCoordPointer = NULL; +PFNGLFOGCOORDDPROC glad_glFogCoordd = NULL; +PFNGLFOGCOORDDVPROC glad_glFogCoorddv = NULL; +PFNGLFOGCOORDFPROC glad_glFogCoordf = NULL; +PFNGLFOGCOORDFVPROC glad_glFogCoordfv = NULL; +PFNGLFOGFPROC glad_glFogf = NULL; +PFNGLFOGFVPROC glad_glFogfv = NULL; +PFNGLFOGIPROC glad_glFogi = NULL; +PFNGLFOGIVPROC glad_glFogiv = NULL; +PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer = NULL; +PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture = NULL; +PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D = NULL; +PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D = NULL; +PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D = NULL; +PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer = NULL; +PFNGLFRONTFACEPROC glad_glFrontFace = NULL; +PFNGLFRUSTUMPROC glad_glFrustum = NULL; +PFNGLGENBUFFERSPROC glad_glGenBuffers = NULL; +PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers = NULL; +PFNGLGENLISTSPROC glad_glGenLists = NULL; +PFNGLGENQUERIESPROC glad_glGenQueries = NULL; +PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers = NULL; +PFNGLGENSAMPLERSPROC glad_glGenSamplers = NULL; +PFNGLGENTEXTURESPROC glad_glGenTextures = NULL; +PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays = NULL; +PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap = NULL; +PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib = NULL; +PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform = NULL; +PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName = NULL; +PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv = NULL; +PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName = NULL; +PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv = NULL; +PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders = NULL; +PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation = NULL; +PFNGLGETBOOLEANI_VPROC glad_glGetBooleani_v = NULL; +PFNGLGETBOOLEANVPROC glad_glGetBooleanv = NULL; +PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v = NULL; +PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv = NULL; +PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv = NULL; +PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData = NULL; +PFNGLGETCLIPPLANEPROC glad_glGetClipPlane = NULL; +PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage = NULL; +PFNGLGETDOUBLEVPROC glad_glGetDoublev = NULL; +PFNGLGETERRORPROC glad_glGetError = NULL; +PFNGLGETFLOATVPROC glad_glGetFloatv = NULL; +PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex = NULL; +PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation = NULL; +PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv = NULL; +PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v = NULL; +PFNGLGETINTEGER64VPROC glad_glGetInteger64v = NULL; +PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v = NULL; +PFNGLGETINTEGERVPROC glad_glGetIntegerv = NULL; +PFNGLGETLIGHTFVPROC glad_glGetLightfv = NULL; +PFNGLGETLIGHTIVPROC glad_glGetLightiv = NULL; +PFNGLGETMAPDVPROC glad_glGetMapdv = NULL; +PFNGLGETMAPFVPROC glad_glGetMapfv = NULL; +PFNGLGETMAPIVPROC glad_glGetMapiv = NULL; +PFNGLGETMATERIALFVPROC glad_glGetMaterialfv = NULL; +PFNGLGETMATERIALIVPROC glad_glGetMaterialiv = NULL; +PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv = NULL; +PFNGLGETPIXELMAPFVPROC glad_glGetPixelMapfv = NULL; +PFNGLGETPIXELMAPUIVPROC glad_glGetPixelMapuiv = NULL; +PFNGLGETPIXELMAPUSVPROC glad_glGetPixelMapusv = NULL; +PFNGLGETPOINTERVPROC glad_glGetPointerv = NULL; +PFNGLGETPOLYGONSTIPPLEPROC glad_glGetPolygonStipple = NULL; +PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog = NULL; +PFNGLGETPROGRAMIVPROC glad_glGetProgramiv = NULL; +PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v = NULL; +PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv = NULL; +PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v = NULL; +PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv = NULL; +PFNGLGETQUERYIVPROC glad_glGetQueryiv = NULL; +PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv = NULL; +PFNGLGETSAMPLERPARAMETERIIVPROC glad_glGetSamplerParameterIiv = NULL; +PFNGLGETSAMPLERPARAMETERIUIVPROC glad_glGetSamplerParameterIuiv = NULL; +PFNGLGETSAMPLERPARAMETERFVPROC glad_glGetSamplerParameterfv = NULL; +PFNGLGETSAMPLERPARAMETERIVPROC glad_glGetSamplerParameteriv = NULL; +PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog = NULL; +PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource = NULL; +PFNGLGETSHADERIVPROC glad_glGetShaderiv = NULL; +PFNGLGETSTRINGPROC glad_glGetString = NULL; +PFNGLGETSTRINGIPROC glad_glGetStringi = NULL; +PFNGLGETSYNCIVPROC glad_glGetSynciv = NULL; +PFNGLGETTEXENVFVPROC glad_glGetTexEnvfv = NULL; +PFNGLGETTEXENVIVPROC glad_glGetTexEnviv = NULL; +PFNGLGETTEXGENDVPROC glad_glGetTexGendv = NULL; +PFNGLGETTEXGENFVPROC glad_glGetTexGenfv = NULL; +PFNGLGETTEXGENIVPROC glad_glGetTexGeniv = NULL; +PFNGLGETTEXIMAGEPROC glad_glGetTexImage = NULL; +PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv = NULL; +PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv = NULL; +PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv = NULL; +PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv = NULL; +PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv = NULL; +PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv = NULL; +PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying = NULL; +PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex = NULL; +PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices = NULL; +PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation = NULL; +PFNGLGETUNIFORMFVPROC glad_glGetUniformfv = NULL; +PFNGLGETUNIFORMIVPROC glad_glGetUniformiv = NULL; +PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv = NULL; +PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv = NULL; +PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv = NULL; +PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv = NULL; +PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv = NULL; +PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv = NULL; +PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv = NULL; +PFNGLHINTPROC glad_glHint = NULL; +PFNGLINDEXMASKPROC glad_glIndexMask = NULL; +PFNGLINDEXPOINTERPROC glad_glIndexPointer = NULL; +PFNGLINDEXDPROC glad_glIndexd = NULL; +PFNGLINDEXDVPROC glad_glIndexdv = NULL; +PFNGLINDEXFPROC glad_glIndexf = NULL; +PFNGLINDEXFVPROC glad_glIndexfv = NULL; +PFNGLINDEXIPROC glad_glIndexi = NULL; +PFNGLINDEXIVPROC glad_glIndexiv = NULL; +PFNGLINDEXSPROC glad_glIndexs = NULL; +PFNGLINDEXSVPROC glad_glIndexsv = NULL; +PFNGLINDEXUBPROC glad_glIndexub = NULL; +PFNGLINDEXUBVPROC glad_glIndexubv = NULL; +PFNGLINITNAMESPROC glad_glInitNames = NULL; +PFNGLINTERLEAVEDARRAYSPROC glad_glInterleavedArrays = NULL; +PFNGLISBUFFERPROC glad_glIsBuffer = NULL; +PFNGLISENABLEDPROC glad_glIsEnabled = NULL; +PFNGLISENABLEDIPROC glad_glIsEnabledi = NULL; +PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer = NULL; +PFNGLISLISTPROC glad_glIsList = NULL; +PFNGLISPROGRAMPROC glad_glIsProgram = NULL; +PFNGLISQUERYPROC glad_glIsQuery = NULL; +PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer = NULL; +PFNGLISSAMPLERPROC glad_glIsSampler = NULL; +PFNGLISSHADERPROC glad_glIsShader = NULL; +PFNGLISSYNCPROC glad_glIsSync = NULL; +PFNGLISTEXTUREPROC glad_glIsTexture = NULL; +PFNGLISVERTEXARRAYPROC glad_glIsVertexArray = NULL; +PFNGLLIGHTMODELFPROC glad_glLightModelf = NULL; +PFNGLLIGHTMODELFVPROC glad_glLightModelfv = NULL; +PFNGLLIGHTMODELIPROC glad_glLightModeli = NULL; +PFNGLLIGHTMODELIVPROC glad_glLightModeliv = NULL; +PFNGLLIGHTFPROC glad_glLightf = NULL; +PFNGLLIGHTFVPROC glad_glLightfv = NULL; +PFNGLLIGHTIPROC glad_glLighti = NULL; +PFNGLLIGHTIVPROC glad_glLightiv = NULL; +PFNGLLINESTIPPLEPROC glad_glLineStipple = NULL; +PFNGLLINEWIDTHPROC glad_glLineWidth = NULL; +PFNGLLINKPROGRAMPROC glad_glLinkProgram = NULL; +PFNGLLISTBASEPROC glad_glListBase = NULL; +PFNGLLOADIDENTITYPROC glad_glLoadIdentity = NULL; +PFNGLLOADMATRIXDPROC glad_glLoadMatrixd = NULL; +PFNGLLOADMATRIXFPROC glad_glLoadMatrixf = NULL; +PFNGLLOADNAMEPROC glad_glLoadName = NULL; +PFNGLLOADTRANSPOSEMATRIXDPROC glad_glLoadTransposeMatrixd = NULL; +PFNGLLOADTRANSPOSEMATRIXFPROC glad_glLoadTransposeMatrixf = NULL; +PFNGLLOGICOPPROC glad_glLogicOp = NULL; +PFNGLMAP1DPROC glad_glMap1d = NULL; +PFNGLMAP1FPROC glad_glMap1f = NULL; +PFNGLMAP2DPROC glad_glMap2d = NULL; +PFNGLMAP2FPROC glad_glMap2f = NULL; +PFNGLMAPBUFFERPROC glad_glMapBuffer = NULL; +PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange = NULL; +PFNGLMAPGRID1DPROC glad_glMapGrid1d = NULL; +PFNGLMAPGRID1FPROC glad_glMapGrid1f = NULL; +PFNGLMAPGRID2DPROC glad_glMapGrid2d = NULL; +PFNGLMAPGRID2FPROC glad_glMapGrid2f = NULL; +PFNGLMATERIALFPROC glad_glMaterialf = NULL; +PFNGLMATERIALFVPROC glad_glMaterialfv = NULL; +PFNGLMATERIALIPROC glad_glMateriali = NULL; +PFNGLMATERIALIVPROC glad_glMaterialiv = NULL; +PFNGLMATRIXMODEPROC glad_glMatrixMode = NULL; +PFNGLMULTMATRIXDPROC glad_glMultMatrixd = NULL; +PFNGLMULTMATRIXFPROC glad_glMultMatrixf = NULL; +PFNGLMULTTRANSPOSEMATRIXDPROC glad_glMultTransposeMatrixd = NULL; +PFNGLMULTTRANSPOSEMATRIXFPROC glad_glMultTransposeMatrixf = NULL; +PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays = NULL; +PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements = NULL; +PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex = NULL; +PFNGLMULTITEXCOORD1DPROC glad_glMultiTexCoord1d = NULL; +PFNGLMULTITEXCOORD1DVPROC glad_glMultiTexCoord1dv = NULL; +PFNGLMULTITEXCOORD1FPROC glad_glMultiTexCoord1f = NULL; +PFNGLMULTITEXCOORD1FVPROC glad_glMultiTexCoord1fv = NULL; +PFNGLMULTITEXCOORD1IPROC glad_glMultiTexCoord1i = NULL; +PFNGLMULTITEXCOORD1IVPROC glad_glMultiTexCoord1iv = NULL; +PFNGLMULTITEXCOORD1SPROC glad_glMultiTexCoord1s = NULL; +PFNGLMULTITEXCOORD1SVPROC glad_glMultiTexCoord1sv = NULL; +PFNGLMULTITEXCOORD2DPROC glad_glMultiTexCoord2d = NULL; +PFNGLMULTITEXCOORD2DVPROC glad_glMultiTexCoord2dv = NULL; +PFNGLMULTITEXCOORD2FPROC glad_glMultiTexCoord2f = NULL; +PFNGLMULTITEXCOORD2FVPROC glad_glMultiTexCoord2fv = NULL; +PFNGLMULTITEXCOORD2IPROC glad_glMultiTexCoord2i = NULL; +PFNGLMULTITEXCOORD2IVPROC glad_glMultiTexCoord2iv = NULL; +PFNGLMULTITEXCOORD2SPROC glad_glMultiTexCoord2s = NULL; +PFNGLMULTITEXCOORD2SVPROC glad_glMultiTexCoord2sv = NULL; +PFNGLMULTITEXCOORD3DPROC glad_glMultiTexCoord3d = NULL; +PFNGLMULTITEXCOORD3DVPROC glad_glMultiTexCoord3dv = NULL; +PFNGLMULTITEXCOORD3FPROC glad_glMultiTexCoord3f = NULL; +PFNGLMULTITEXCOORD3FVPROC glad_glMultiTexCoord3fv = NULL; +PFNGLMULTITEXCOORD3IPROC glad_glMultiTexCoord3i = NULL; +PFNGLMULTITEXCOORD3IVPROC glad_glMultiTexCoord3iv = NULL; +PFNGLMULTITEXCOORD3SPROC glad_glMultiTexCoord3s = NULL; +PFNGLMULTITEXCOORD3SVPROC glad_glMultiTexCoord3sv = NULL; +PFNGLMULTITEXCOORD4DPROC glad_glMultiTexCoord4d = NULL; +PFNGLMULTITEXCOORD4DVPROC glad_glMultiTexCoord4dv = NULL; +PFNGLMULTITEXCOORD4FPROC glad_glMultiTexCoord4f = NULL; +PFNGLMULTITEXCOORD4FVPROC glad_glMultiTexCoord4fv = NULL; +PFNGLMULTITEXCOORD4IPROC glad_glMultiTexCoord4i = NULL; +PFNGLMULTITEXCOORD4IVPROC glad_glMultiTexCoord4iv = NULL; +PFNGLMULTITEXCOORD4SPROC glad_glMultiTexCoord4s = NULL; +PFNGLMULTITEXCOORD4SVPROC glad_glMultiTexCoord4sv = NULL; +PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui = NULL; +PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv = NULL; +PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui = NULL; +PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv = NULL; +PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui = NULL; +PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv = NULL; +PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui = NULL; +PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv = NULL; +PFNGLNEWLISTPROC glad_glNewList = NULL; +PFNGLNORMAL3BPROC glad_glNormal3b = NULL; +PFNGLNORMAL3BVPROC glad_glNormal3bv = NULL; +PFNGLNORMAL3DPROC glad_glNormal3d = NULL; +PFNGLNORMAL3DVPROC glad_glNormal3dv = NULL; +PFNGLNORMAL3FPROC glad_glNormal3f = NULL; +PFNGLNORMAL3FVPROC glad_glNormal3fv = NULL; +PFNGLNORMAL3IPROC glad_glNormal3i = NULL; +PFNGLNORMAL3IVPROC glad_glNormal3iv = NULL; +PFNGLNORMAL3SPROC glad_glNormal3s = NULL; +PFNGLNORMAL3SVPROC glad_glNormal3sv = NULL; +PFNGLNORMALP3UIPROC glad_glNormalP3ui = NULL; +PFNGLNORMALP3UIVPROC glad_glNormalP3uiv = NULL; +PFNGLNORMALPOINTERPROC glad_glNormalPointer = NULL; +PFNGLORTHOPROC glad_glOrtho = NULL; +PFNGLPASSTHROUGHPROC glad_glPassThrough = NULL; +PFNGLPIXELMAPFVPROC glad_glPixelMapfv = NULL; +PFNGLPIXELMAPUIVPROC glad_glPixelMapuiv = NULL; +PFNGLPIXELMAPUSVPROC glad_glPixelMapusv = NULL; +PFNGLPIXELSTOREFPROC glad_glPixelStoref = NULL; +PFNGLPIXELSTOREIPROC glad_glPixelStorei = NULL; +PFNGLPIXELTRANSFERFPROC glad_glPixelTransferf = NULL; +PFNGLPIXELTRANSFERIPROC glad_glPixelTransferi = NULL; +PFNGLPIXELZOOMPROC glad_glPixelZoom = NULL; +PFNGLPOINTPARAMETERFPROC glad_glPointParameterf = NULL; +PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv = NULL; +PFNGLPOINTPARAMETERIPROC glad_glPointParameteri = NULL; +PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv = NULL; +PFNGLPOINTSIZEPROC glad_glPointSize = NULL; +PFNGLPOLYGONMODEPROC glad_glPolygonMode = NULL; +PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset = NULL; +PFNGLPOLYGONSTIPPLEPROC glad_glPolygonStipple = NULL; +PFNGLPOPATTRIBPROC glad_glPopAttrib = NULL; +PFNGLPOPCLIENTATTRIBPROC glad_glPopClientAttrib = NULL; +PFNGLPOPMATRIXPROC glad_glPopMatrix = NULL; +PFNGLPOPNAMEPROC glad_glPopName = NULL; +PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex = NULL; +PFNGLPRIORITIZETEXTURESPROC glad_glPrioritizeTextures = NULL; +PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex = NULL; +PFNGLPUSHATTRIBPROC glad_glPushAttrib = NULL; +PFNGLPUSHCLIENTATTRIBPROC glad_glPushClientAttrib = NULL; +PFNGLPUSHMATRIXPROC glad_glPushMatrix = NULL; +PFNGLPUSHNAMEPROC glad_glPushName = NULL; +PFNGLQUERYCOUNTERPROC glad_glQueryCounter = NULL; +PFNGLRASTERPOS2DPROC glad_glRasterPos2d = NULL; +PFNGLRASTERPOS2DVPROC glad_glRasterPos2dv = NULL; +PFNGLRASTERPOS2FPROC glad_glRasterPos2f = NULL; +PFNGLRASTERPOS2FVPROC glad_glRasterPos2fv = NULL; +PFNGLRASTERPOS2IPROC glad_glRasterPos2i = NULL; +PFNGLRASTERPOS2IVPROC glad_glRasterPos2iv = NULL; +PFNGLRASTERPOS2SPROC glad_glRasterPos2s = NULL; +PFNGLRASTERPOS2SVPROC glad_glRasterPos2sv = NULL; +PFNGLRASTERPOS3DPROC glad_glRasterPos3d = NULL; +PFNGLRASTERPOS3DVPROC glad_glRasterPos3dv = NULL; +PFNGLRASTERPOS3FPROC glad_glRasterPos3f = NULL; +PFNGLRASTERPOS3FVPROC glad_glRasterPos3fv = NULL; +PFNGLRASTERPOS3IPROC glad_glRasterPos3i = NULL; +PFNGLRASTERPOS3IVPROC glad_glRasterPos3iv = NULL; +PFNGLRASTERPOS3SPROC glad_glRasterPos3s = NULL; +PFNGLRASTERPOS3SVPROC glad_glRasterPos3sv = NULL; +PFNGLRASTERPOS4DPROC glad_glRasterPos4d = NULL; +PFNGLRASTERPOS4DVPROC glad_glRasterPos4dv = NULL; +PFNGLRASTERPOS4FPROC glad_glRasterPos4f = NULL; +PFNGLRASTERPOS4FVPROC glad_glRasterPos4fv = NULL; +PFNGLRASTERPOS4IPROC glad_glRasterPos4i = NULL; +PFNGLRASTERPOS4IVPROC glad_glRasterPos4iv = NULL; +PFNGLRASTERPOS4SPROC glad_glRasterPos4s = NULL; +PFNGLRASTERPOS4SVPROC glad_glRasterPos4sv = NULL; +PFNGLREADBUFFERPROC glad_glReadBuffer = NULL; +PFNGLREADPIXELSPROC glad_glReadPixels = NULL; +PFNGLRECTDPROC glad_glRectd = NULL; +PFNGLRECTDVPROC glad_glRectdv = NULL; +PFNGLRECTFPROC glad_glRectf = NULL; +PFNGLRECTFVPROC glad_glRectfv = NULL; +PFNGLRECTIPROC glad_glRecti = NULL; +PFNGLRECTIVPROC glad_glRectiv = NULL; +PFNGLRECTSPROC glad_glRects = NULL; +PFNGLRECTSVPROC glad_glRectsv = NULL; +PFNGLRENDERMODEPROC glad_glRenderMode = NULL; +PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage = NULL; +PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample = NULL; +PFNGLROTATEDPROC glad_glRotated = NULL; +PFNGLROTATEFPROC glad_glRotatef = NULL; +PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage = NULL; +PFNGLSAMPLEMASKIPROC glad_glSampleMaski = NULL; +PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv = NULL; +PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv = NULL; +PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf = NULL; +PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv = NULL; +PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri = NULL; +PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv = NULL; +PFNGLSCALEDPROC glad_glScaled = NULL; +PFNGLSCALEFPROC glad_glScalef = NULL; +PFNGLSCISSORPROC glad_glScissor = NULL; +PFNGLSECONDARYCOLOR3BPROC glad_glSecondaryColor3b = NULL; +PFNGLSECONDARYCOLOR3BVPROC glad_glSecondaryColor3bv = NULL; +PFNGLSECONDARYCOLOR3DPROC glad_glSecondaryColor3d = NULL; +PFNGLSECONDARYCOLOR3DVPROC glad_glSecondaryColor3dv = NULL; +PFNGLSECONDARYCOLOR3FPROC glad_glSecondaryColor3f = NULL; +PFNGLSECONDARYCOLOR3FVPROC glad_glSecondaryColor3fv = NULL; +PFNGLSECONDARYCOLOR3IPROC glad_glSecondaryColor3i = NULL; +PFNGLSECONDARYCOLOR3IVPROC glad_glSecondaryColor3iv = NULL; +PFNGLSECONDARYCOLOR3SPROC glad_glSecondaryColor3s = NULL; +PFNGLSECONDARYCOLOR3SVPROC glad_glSecondaryColor3sv = NULL; +PFNGLSECONDARYCOLOR3UBPROC glad_glSecondaryColor3ub = NULL; +PFNGLSECONDARYCOLOR3UBVPROC glad_glSecondaryColor3ubv = NULL; +PFNGLSECONDARYCOLOR3UIPROC glad_glSecondaryColor3ui = NULL; +PFNGLSECONDARYCOLOR3UIVPROC glad_glSecondaryColor3uiv = NULL; +PFNGLSECONDARYCOLOR3USPROC glad_glSecondaryColor3us = NULL; +PFNGLSECONDARYCOLOR3USVPROC glad_glSecondaryColor3usv = NULL; +PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui = NULL; +PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv = NULL; +PFNGLSECONDARYCOLORPOINTERPROC glad_glSecondaryColorPointer = NULL; +PFNGLSELECTBUFFERPROC glad_glSelectBuffer = NULL; +PFNGLSHADEMODELPROC glad_glShadeModel = NULL; +PFNGLSHADERSOURCEPROC glad_glShaderSource = NULL; +PFNGLSTENCILFUNCPROC glad_glStencilFunc = NULL; +PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate = NULL; +PFNGLSTENCILMASKPROC glad_glStencilMask = NULL; +PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate = NULL; +PFNGLSTENCILOPPROC glad_glStencilOp = NULL; +PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate = NULL; +PFNGLTEXBUFFERPROC glad_glTexBuffer = NULL; +PFNGLTEXCOORD1DPROC glad_glTexCoord1d = NULL; +PFNGLTEXCOORD1DVPROC glad_glTexCoord1dv = NULL; +PFNGLTEXCOORD1FPROC glad_glTexCoord1f = NULL; +PFNGLTEXCOORD1FVPROC glad_glTexCoord1fv = NULL; +PFNGLTEXCOORD1IPROC glad_glTexCoord1i = NULL; +PFNGLTEXCOORD1IVPROC glad_glTexCoord1iv = NULL; +PFNGLTEXCOORD1SPROC glad_glTexCoord1s = NULL; +PFNGLTEXCOORD1SVPROC glad_glTexCoord1sv = NULL; +PFNGLTEXCOORD2DPROC glad_glTexCoord2d = NULL; +PFNGLTEXCOORD2DVPROC glad_glTexCoord2dv = NULL; +PFNGLTEXCOORD2FPROC glad_glTexCoord2f = NULL; +PFNGLTEXCOORD2FVPROC glad_glTexCoord2fv = NULL; +PFNGLTEXCOORD2IPROC glad_glTexCoord2i = NULL; +PFNGLTEXCOORD2IVPROC glad_glTexCoord2iv = NULL; +PFNGLTEXCOORD2SPROC glad_glTexCoord2s = NULL; +PFNGLTEXCOORD2SVPROC glad_glTexCoord2sv = NULL; +PFNGLTEXCOORD3DPROC glad_glTexCoord3d = NULL; +PFNGLTEXCOORD3DVPROC glad_glTexCoord3dv = NULL; +PFNGLTEXCOORD3FPROC glad_glTexCoord3f = NULL; +PFNGLTEXCOORD3FVPROC glad_glTexCoord3fv = NULL; +PFNGLTEXCOORD3IPROC glad_glTexCoord3i = NULL; +PFNGLTEXCOORD3IVPROC glad_glTexCoord3iv = NULL; +PFNGLTEXCOORD3SPROC glad_glTexCoord3s = NULL; +PFNGLTEXCOORD3SVPROC glad_glTexCoord3sv = NULL; +PFNGLTEXCOORD4DPROC glad_glTexCoord4d = NULL; +PFNGLTEXCOORD4DVPROC glad_glTexCoord4dv = NULL; +PFNGLTEXCOORD4FPROC glad_glTexCoord4f = NULL; +PFNGLTEXCOORD4FVPROC glad_glTexCoord4fv = NULL; +PFNGLTEXCOORD4IPROC glad_glTexCoord4i = NULL; +PFNGLTEXCOORD4IVPROC glad_glTexCoord4iv = NULL; +PFNGLTEXCOORD4SPROC glad_glTexCoord4s = NULL; +PFNGLTEXCOORD4SVPROC glad_glTexCoord4sv = NULL; +PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui = NULL; +PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv = NULL; +PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui = NULL; +PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv = NULL; +PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui = NULL; +PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv = NULL; +PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui = NULL; +PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv = NULL; +PFNGLTEXCOORDPOINTERPROC glad_glTexCoordPointer = NULL; +PFNGLTEXENVFPROC glad_glTexEnvf = NULL; +PFNGLTEXENVFVPROC glad_glTexEnvfv = NULL; +PFNGLTEXENVIPROC glad_glTexEnvi = NULL; +PFNGLTEXENVIVPROC glad_glTexEnviv = NULL; +PFNGLTEXGENDPROC glad_glTexGend = NULL; +PFNGLTEXGENDVPROC glad_glTexGendv = NULL; +PFNGLTEXGENFPROC glad_glTexGenf = NULL; +PFNGLTEXGENFVPROC glad_glTexGenfv = NULL; +PFNGLTEXGENIPROC glad_glTexGeni = NULL; +PFNGLTEXGENIVPROC glad_glTexGeniv = NULL; +PFNGLTEXIMAGE1DPROC glad_glTexImage1D = NULL; +PFNGLTEXIMAGE2DPROC glad_glTexImage2D = NULL; +PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample = NULL; +PFNGLTEXIMAGE3DPROC glad_glTexImage3D = NULL; +PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample = NULL; +PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv = NULL; +PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv = NULL; +PFNGLTEXPARAMETERFPROC glad_glTexParameterf = NULL; +PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv = NULL; +PFNGLTEXPARAMETERIPROC glad_glTexParameteri = NULL; +PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv = NULL; +PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D = NULL; +PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D = NULL; +PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D = NULL; +PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings = NULL; +PFNGLTRANSLATEDPROC glad_glTranslated = NULL; +PFNGLTRANSLATEFPROC glad_glTranslatef = NULL; +PFNGLUNIFORM1FPROC glad_glUniform1f = NULL; +PFNGLUNIFORM1FVPROC glad_glUniform1fv = NULL; +PFNGLUNIFORM1IPROC glad_glUniform1i = NULL; +PFNGLUNIFORM1IVPROC glad_glUniform1iv = NULL; +PFNGLUNIFORM1UIPROC glad_glUniform1ui = NULL; +PFNGLUNIFORM1UIVPROC glad_glUniform1uiv = NULL; +PFNGLUNIFORM2FPROC glad_glUniform2f = NULL; +PFNGLUNIFORM2FVPROC glad_glUniform2fv = NULL; +PFNGLUNIFORM2IPROC glad_glUniform2i = NULL; +PFNGLUNIFORM2IVPROC glad_glUniform2iv = NULL; +PFNGLUNIFORM2UIPROC glad_glUniform2ui = NULL; +PFNGLUNIFORM2UIVPROC glad_glUniform2uiv = NULL; +PFNGLUNIFORM3FPROC glad_glUniform3f = NULL; +PFNGLUNIFORM3FVPROC glad_glUniform3fv = NULL; +PFNGLUNIFORM3IPROC glad_glUniform3i = NULL; +PFNGLUNIFORM3IVPROC glad_glUniform3iv = NULL; +PFNGLUNIFORM3UIPROC glad_glUniform3ui = NULL; +PFNGLUNIFORM3UIVPROC glad_glUniform3uiv = NULL; +PFNGLUNIFORM4FPROC glad_glUniform4f = NULL; +PFNGLUNIFORM4FVPROC glad_glUniform4fv = NULL; +PFNGLUNIFORM4IPROC glad_glUniform4i = NULL; +PFNGLUNIFORM4IVPROC glad_glUniform4iv = NULL; +PFNGLUNIFORM4UIPROC glad_glUniform4ui = NULL; +PFNGLUNIFORM4UIVPROC glad_glUniform4uiv = NULL; +PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding = NULL; +PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv = NULL; +PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv = NULL; +PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv = NULL; +PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv = NULL; +PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv = NULL; +PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv = NULL; +PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv = NULL; +PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv = NULL; +PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv = NULL; +PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer = NULL; +PFNGLUSEPROGRAMPROC glad_glUseProgram = NULL; +PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram = NULL; +PFNGLVERTEX2DPROC glad_glVertex2d = NULL; +PFNGLVERTEX2DVPROC glad_glVertex2dv = NULL; +PFNGLVERTEX2FPROC glad_glVertex2f = NULL; +PFNGLVERTEX2FVPROC glad_glVertex2fv = NULL; +PFNGLVERTEX2IPROC glad_glVertex2i = NULL; +PFNGLVERTEX2IVPROC glad_glVertex2iv = NULL; +PFNGLVERTEX2SPROC glad_glVertex2s = NULL; +PFNGLVERTEX2SVPROC glad_glVertex2sv = NULL; +PFNGLVERTEX3DPROC glad_glVertex3d = NULL; +PFNGLVERTEX3DVPROC glad_glVertex3dv = NULL; +PFNGLVERTEX3FPROC glad_glVertex3f = NULL; +PFNGLVERTEX3FVPROC glad_glVertex3fv = NULL; +PFNGLVERTEX3IPROC glad_glVertex3i = NULL; +PFNGLVERTEX3IVPROC glad_glVertex3iv = NULL; +PFNGLVERTEX3SPROC glad_glVertex3s = NULL; +PFNGLVERTEX3SVPROC glad_glVertex3sv = NULL; +PFNGLVERTEX4DPROC glad_glVertex4d = NULL; +PFNGLVERTEX4DVPROC glad_glVertex4dv = NULL; +PFNGLVERTEX4FPROC glad_glVertex4f = NULL; +PFNGLVERTEX4FVPROC glad_glVertex4fv = NULL; +PFNGLVERTEX4IPROC glad_glVertex4i = NULL; +PFNGLVERTEX4IVPROC glad_glVertex4iv = NULL; +PFNGLVERTEX4SPROC glad_glVertex4s = NULL; +PFNGLVERTEX4SVPROC glad_glVertex4sv = NULL; +PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d = NULL; +PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv = NULL; +PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f = NULL; +PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv = NULL; +PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s = NULL; +PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv = NULL; +PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d = NULL; +PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv = NULL; +PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f = NULL; +PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv = NULL; +PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s = NULL; +PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv = NULL; +PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d = NULL; +PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv = NULL; +PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f = NULL; +PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv = NULL; +PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s = NULL; +PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv = NULL; +PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv = NULL; +PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv = NULL; +PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv = NULL; +PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub = NULL; +PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv = NULL; +PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv = NULL; +PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv = NULL; +PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv = NULL; +PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d = NULL; +PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv = NULL; +PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f = NULL; +PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv = NULL; +PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv = NULL; +PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s = NULL; +PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv = NULL; +PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv = NULL; +PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv = NULL; +PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv = NULL; +PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor = NULL; +PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i = NULL; +PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv = NULL; +PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui = NULL; +PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv = NULL; +PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i = NULL; +PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv = NULL; +PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui = NULL; +PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv = NULL; +PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i = NULL; +PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv = NULL; +PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui = NULL; +PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv = NULL; +PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv = NULL; +PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i = NULL; +PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv = NULL; +PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv = NULL; +PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv = NULL; +PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui = NULL; +PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv = NULL; +PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv = NULL; +PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer = NULL; +PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui = NULL; +PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv = NULL; +PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui = NULL; +PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv = NULL; +PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui = NULL; +PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv = NULL; +PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui = NULL; +PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv = NULL; +PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer = NULL; +PFNGLVERTEXP2UIPROC glad_glVertexP2ui = NULL; +PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv = NULL; +PFNGLVERTEXP3UIPROC glad_glVertexP3ui = NULL; +PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv = NULL; +PFNGLVERTEXP4UIPROC glad_glVertexP4ui = NULL; +PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv = NULL; +PFNGLVERTEXPOINTERPROC glad_glVertexPointer = NULL; +PFNGLVIEWPORTPROC glad_glViewport = NULL; +PFNGLWAITSYNCPROC glad_glWaitSync = NULL; +PFNGLWINDOWPOS2DPROC glad_glWindowPos2d = NULL; +PFNGLWINDOWPOS2DVPROC glad_glWindowPos2dv = NULL; +PFNGLWINDOWPOS2FPROC glad_glWindowPos2f = NULL; +PFNGLWINDOWPOS2FVPROC glad_glWindowPos2fv = NULL; +PFNGLWINDOWPOS2IPROC glad_glWindowPos2i = NULL; +PFNGLWINDOWPOS2IVPROC glad_glWindowPos2iv = NULL; +PFNGLWINDOWPOS2SPROC glad_glWindowPos2s = NULL; +PFNGLWINDOWPOS2SVPROC glad_glWindowPos2sv = NULL; +PFNGLWINDOWPOS3DPROC glad_glWindowPos3d = NULL; +PFNGLWINDOWPOS3DVPROC glad_glWindowPos3dv = NULL; +PFNGLWINDOWPOS3FPROC glad_glWindowPos3f = NULL; +PFNGLWINDOWPOS3FVPROC glad_glWindowPos3fv = NULL; +PFNGLWINDOWPOS3IPROC glad_glWindowPos3i = NULL; +PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv = NULL; +PFNGLWINDOWPOS3SPROC glad_glWindowPos3s = NULL; +PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv = NULL; + + +static void glad_gl_load_GL_VERSION_1_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_0) return; + glad_glAccum = (PFNGLACCUMPROC) load(userptr, "glAccum"); + glad_glAlphaFunc = (PFNGLALPHAFUNCPROC) load(userptr, "glAlphaFunc"); + glad_glBegin = (PFNGLBEGINPROC) load(userptr, "glBegin"); + glad_glBitmap = (PFNGLBITMAPPROC) load(userptr, "glBitmap"); + glad_glBlendFunc = (PFNGLBLENDFUNCPROC) load(userptr, "glBlendFunc"); + glad_glCallList = (PFNGLCALLLISTPROC) load(userptr, "glCallList"); + glad_glCallLists = (PFNGLCALLLISTSPROC) load(userptr, "glCallLists"); + glad_glClear = (PFNGLCLEARPROC) load(userptr, "glClear"); + glad_glClearAccum = (PFNGLCLEARACCUMPROC) load(userptr, "glClearAccum"); + glad_glClearColor = (PFNGLCLEARCOLORPROC) load(userptr, "glClearColor"); + glad_glClearDepth = (PFNGLCLEARDEPTHPROC) load(userptr, "glClearDepth"); + glad_glClearIndex = (PFNGLCLEARINDEXPROC) load(userptr, "glClearIndex"); + glad_glClearStencil = (PFNGLCLEARSTENCILPROC) load(userptr, "glClearStencil"); + glad_glClipPlane = (PFNGLCLIPPLANEPROC) load(userptr, "glClipPlane"); + glad_glColor3b = (PFNGLCOLOR3BPROC) load(userptr, "glColor3b"); + glad_glColor3bv = (PFNGLCOLOR3BVPROC) load(userptr, "glColor3bv"); + glad_glColor3d = (PFNGLCOLOR3DPROC) load(userptr, "glColor3d"); + glad_glColor3dv = (PFNGLCOLOR3DVPROC) load(userptr, "glColor3dv"); + glad_glColor3f = (PFNGLCOLOR3FPROC) load(userptr, "glColor3f"); + glad_glColor3fv = (PFNGLCOLOR3FVPROC) load(userptr, "glColor3fv"); + glad_glColor3i = (PFNGLCOLOR3IPROC) load(userptr, "glColor3i"); + glad_glColor3iv = (PFNGLCOLOR3IVPROC) load(userptr, "glColor3iv"); + glad_glColor3s = (PFNGLCOLOR3SPROC) load(userptr, "glColor3s"); + glad_glColor3sv = (PFNGLCOLOR3SVPROC) load(userptr, "glColor3sv"); + glad_glColor3ub = (PFNGLCOLOR3UBPROC) load(userptr, "glColor3ub"); + glad_glColor3ubv = (PFNGLCOLOR3UBVPROC) load(userptr, "glColor3ubv"); + glad_glColor3ui = (PFNGLCOLOR3UIPROC) load(userptr, "glColor3ui"); + glad_glColor3uiv = (PFNGLCOLOR3UIVPROC) load(userptr, "glColor3uiv"); + glad_glColor3us = (PFNGLCOLOR3USPROC) load(userptr, "glColor3us"); + glad_glColor3usv = (PFNGLCOLOR3USVPROC) load(userptr, "glColor3usv"); + glad_glColor4b = (PFNGLCOLOR4BPROC) load(userptr, "glColor4b"); + glad_glColor4bv = (PFNGLCOLOR4BVPROC) load(userptr, "glColor4bv"); + glad_glColor4d = (PFNGLCOLOR4DPROC) load(userptr, "glColor4d"); + glad_glColor4dv = (PFNGLCOLOR4DVPROC) load(userptr, "glColor4dv"); + glad_glColor4f = (PFNGLCOLOR4FPROC) load(userptr, "glColor4f"); + glad_glColor4fv = (PFNGLCOLOR4FVPROC) load(userptr, "glColor4fv"); + glad_glColor4i = (PFNGLCOLOR4IPROC) load(userptr, "glColor4i"); + glad_glColor4iv = (PFNGLCOLOR4IVPROC) load(userptr, "glColor4iv"); + glad_glColor4s = (PFNGLCOLOR4SPROC) load(userptr, "glColor4s"); + glad_glColor4sv = (PFNGLCOLOR4SVPROC) load(userptr, "glColor4sv"); + glad_glColor4ub = (PFNGLCOLOR4UBPROC) load(userptr, "glColor4ub"); + glad_glColor4ubv = (PFNGLCOLOR4UBVPROC) load(userptr, "glColor4ubv"); + glad_glColor4ui = (PFNGLCOLOR4UIPROC) load(userptr, "glColor4ui"); + glad_glColor4uiv = (PFNGLCOLOR4UIVPROC) load(userptr, "glColor4uiv"); + glad_glColor4us = (PFNGLCOLOR4USPROC) load(userptr, "glColor4us"); + glad_glColor4usv = (PFNGLCOLOR4USVPROC) load(userptr, "glColor4usv"); + glad_glColorMask = (PFNGLCOLORMASKPROC) load(userptr, "glColorMask"); + glad_glColorMaterial = (PFNGLCOLORMATERIALPROC) load(userptr, "glColorMaterial"); + glad_glCopyPixels = (PFNGLCOPYPIXELSPROC) load(userptr, "glCopyPixels"); + glad_glCullFace = (PFNGLCULLFACEPROC) load(userptr, "glCullFace"); + glad_glDeleteLists = (PFNGLDELETELISTSPROC) load(userptr, "glDeleteLists"); + glad_glDepthFunc = (PFNGLDEPTHFUNCPROC) load(userptr, "glDepthFunc"); + glad_glDepthMask = (PFNGLDEPTHMASKPROC) load(userptr, "glDepthMask"); + glad_glDepthRange = (PFNGLDEPTHRANGEPROC) load(userptr, "glDepthRange"); + glad_glDisable = (PFNGLDISABLEPROC) load(userptr, "glDisable"); + glad_glDrawBuffer = (PFNGLDRAWBUFFERPROC) load(userptr, "glDrawBuffer"); + glad_glDrawPixels = (PFNGLDRAWPIXELSPROC) load(userptr, "glDrawPixels"); + glad_glEdgeFlag = (PFNGLEDGEFLAGPROC) load(userptr, "glEdgeFlag"); + glad_glEdgeFlagv = (PFNGLEDGEFLAGVPROC) load(userptr, "glEdgeFlagv"); + glad_glEnable = (PFNGLENABLEPROC) load(userptr, "glEnable"); + glad_glEnd = (PFNGLENDPROC) load(userptr, "glEnd"); + glad_glEndList = (PFNGLENDLISTPROC) load(userptr, "glEndList"); + glad_glEvalCoord1d = (PFNGLEVALCOORD1DPROC) load(userptr, "glEvalCoord1d"); + glad_glEvalCoord1dv = (PFNGLEVALCOORD1DVPROC) load(userptr, "glEvalCoord1dv"); + glad_glEvalCoord1f = (PFNGLEVALCOORD1FPROC) load(userptr, "glEvalCoord1f"); + glad_glEvalCoord1fv = (PFNGLEVALCOORD1FVPROC) load(userptr, "glEvalCoord1fv"); + glad_glEvalCoord2d = (PFNGLEVALCOORD2DPROC) load(userptr, "glEvalCoord2d"); + glad_glEvalCoord2dv = (PFNGLEVALCOORD2DVPROC) load(userptr, "glEvalCoord2dv"); + glad_glEvalCoord2f = (PFNGLEVALCOORD2FPROC) load(userptr, "glEvalCoord2f"); + glad_glEvalCoord2fv = (PFNGLEVALCOORD2FVPROC) load(userptr, "glEvalCoord2fv"); + glad_glEvalMesh1 = (PFNGLEVALMESH1PROC) load(userptr, "glEvalMesh1"); + glad_glEvalMesh2 = (PFNGLEVALMESH2PROC) load(userptr, "glEvalMesh2"); + glad_glEvalPoint1 = (PFNGLEVALPOINT1PROC) load(userptr, "glEvalPoint1"); + glad_glEvalPoint2 = (PFNGLEVALPOINT2PROC) load(userptr, "glEvalPoint2"); + glad_glFeedbackBuffer = (PFNGLFEEDBACKBUFFERPROC) load(userptr, "glFeedbackBuffer"); + glad_glFinish = (PFNGLFINISHPROC) load(userptr, "glFinish"); + glad_glFlush = (PFNGLFLUSHPROC) load(userptr, "glFlush"); + glad_glFogf = (PFNGLFOGFPROC) load(userptr, "glFogf"); + glad_glFogfv = (PFNGLFOGFVPROC) load(userptr, "glFogfv"); + glad_glFogi = (PFNGLFOGIPROC) load(userptr, "glFogi"); + glad_glFogiv = (PFNGLFOGIVPROC) load(userptr, "glFogiv"); + glad_glFrontFace = (PFNGLFRONTFACEPROC) load(userptr, "glFrontFace"); + glad_glFrustum = (PFNGLFRUSTUMPROC) load(userptr, "glFrustum"); + glad_glGenLists = (PFNGLGENLISTSPROC) load(userptr, "glGenLists"); + glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC) load(userptr, "glGetBooleanv"); + glad_glGetClipPlane = (PFNGLGETCLIPPLANEPROC) load(userptr, "glGetClipPlane"); + glad_glGetDoublev = (PFNGLGETDOUBLEVPROC) load(userptr, "glGetDoublev"); + glad_glGetError = (PFNGLGETERRORPROC) load(userptr, "glGetError"); + glad_glGetFloatv = (PFNGLGETFLOATVPROC) load(userptr, "glGetFloatv"); + glad_glGetIntegerv = (PFNGLGETINTEGERVPROC) load(userptr, "glGetIntegerv"); + glad_glGetLightfv = (PFNGLGETLIGHTFVPROC) load(userptr, "glGetLightfv"); + glad_glGetLightiv = (PFNGLGETLIGHTIVPROC) load(userptr, "glGetLightiv"); + glad_glGetMapdv = (PFNGLGETMAPDVPROC) load(userptr, "glGetMapdv"); + glad_glGetMapfv = (PFNGLGETMAPFVPROC) load(userptr, "glGetMapfv"); + glad_glGetMapiv = (PFNGLGETMAPIVPROC) load(userptr, "glGetMapiv"); + glad_glGetMaterialfv = (PFNGLGETMATERIALFVPROC) load(userptr, "glGetMaterialfv"); + glad_glGetMaterialiv = (PFNGLGETMATERIALIVPROC) load(userptr, "glGetMaterialiv"); + glad_glGetPixelMapfv = (PFNGLGETPIXELMAPFVPROC) load(userptr, "glGetPixelMapfv"); + glad_glGetPixelMapuiv = (PFNGLGETPIXELMAPUIVPROC) load(userptr, "glGetPixelMapuiv"); + glad_glGetPixelMapusv = (PFNGLGETPIXELMAPUSVPROC) load(userptr, "glGetPixelMapusv"); + glad_glGetPolygonStipple = (PFNGLGETPOLYGONSTIPPLEPROC) load(userptr, "glGetPolygonStipple"); + glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString"); + glad_glGetTexEnvfv = (PFNGLGETTEXENVFVPROC) load(userptr, "glGetTexEnvfv"); + glad_glGetTexEnviv = (PFNGLGETTEXENVIVPROC) load(userptr, "glGetTexEnviv"); + glad_glGetTexGendv = (PFNGLGETTEXGENDVPROC) load(userptr, "glGetTexGendv"); + glad_glGetTexGenfv = (PFNGLGETTEXGENFVPROC) load(userptr, "glGetTexGenfv"); + glad_glGetTexGeniv = (PFNGLGETTEXGENIVPROC) load(userptr, "glGetTexGeniv"); + glad_glGetTexImage = (PFNGLGETTEXIMAGEPROC) load(userptr, "glGetTexImage"); + glad_glGetTexLevelParameterfv = (PFNGLGETTEXLEVELPARAMETERFVPROC) load(userptr, "glGetTexLevelParameterfv"); + glad_glGetTexLevelParameteriv = (PFNGLGETTEXLEVELPARAMETERIVPROC) load(userptr, "glGetTexLevelParameteriv"); + glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC) load(userptr, "glGetTexParameterfv"); + glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC) load(userptr, "glGetTexParameteriv"); + glad_glHint = (PFNGLHINTPROC) load(userptr, "glHint"); + glad_glIndexMask = (PFNGLINDEXMASKPROC) load(userptr, "glIndexMask"); + glad_glIndexd = (PFNGLINDEXDPROC) load(userptr, "glIndexd"); + glad_glIndexdv = (PFNGLINDEXDVPROC) load(userptr, "glIndexdv"); + glad_glIndexf = (PFNGLINDEXFPROC) load(userptr, "glIndexf"); + glad_glIndexfv = (PFNGLINDEXFVPROC) load(userptr, "glIndexfv"); + glad_glIndexi = (PFNGLINDEXIPROC) load(userptr, "glIndexi"); + glad_glIndexiv = (PFNGLINDEXIVPROC) load(userptr, "glIndexiv"); + glad_glIndexs = (PFNGLINDEXSPROC) load(userptr, "glIndexs"); + glad_glIndexsv = (PFNGLINDEXSVPROC) load(userptr, "glIndexsv"); + glad_glInitNames = (PFNGLINITNAMESPROC) load(userptr, "glInitNames"); + glad_glIsEnabled = (PFNGLISENABLEDPROC) load(userptr, "glIsEnabled"); + glad_glIsList = (PFNGLISLISTPROC) load(userptr, "glIsList"); + glad_glLightModelf = (PFNGLLIGHTMODELFPROC) load(userptr, "glLightModelf"); + glad_glLightModelfv = (PFNGLLIGHTMODELFVPROC) load(userptr, "glLightModelfv"); + glad_glLightModeli = (PFNGLLIGHTMODELIPROC) load(userptr, "glLightModeli"); + glad_glLightModeliv = (PFNGLLIGHTMODELIVPROC) load(userptr, "glLightModeliv"); + glad_glLightf = (PFNGLLIGHTFPROC) load(userptr, "glLightf"); + glad_glLightfv = (PFNGLLIGHTFVPROC) load(userptr, "glLightfv"); + glad_glLighti = (PFNGLLIGHTIPROC) load(userptr, "glLighti"); + glad_glLightiv = (PFNGLLIGHTIVPROC) load(userptr, "glLightiv"); + glad_glLineStipple = (PFNGLLINESTIPPLEPROC) load(userptr, "glLineStipple"); + glad_glLineWidth = (PFNGLLINEWIDTHPROC) load(userptr, "glLineWidth"); + glad_glListBase = (PFNGLLISTBASEPROC) load(userptr, "glListBase"); + glad_glLoadIdentity = (PFNGLLOADIDENTITYPROC) load(userptr, "glLoadIdentity"); + glad_glLoadMatrixd = (PFNGLLOADMATRIXDPROC) load(userptr, "glLoadMatrixd"); + glad_glLoadMatrixf = (PFNGLLOADMATRIXFPROC) load(userptr, "glLoadMatrixf"); + glad_glLoadName = (PFNGLLOADNAMEPROC) load(userptr, "glLoadName"); + glad_glLogicOp = (PFNGLLOGICOPPROC) load(userptr, "glLogicOp"); + glad_glMap1d = (PFNGLMAP1DPROC) load(userptr, "glMap1d"); + glad_glMap1f = (PFNGLMAP1FPROC) load(userptr, "glMap1f"); + glad_glMap2d = (PFNGLMAP2DPROC) load(userptr, "glMap2d"); + glad_glMap2f = (PFNGLMAP2FPROC) load(userptr, "glMap2f"); + glad_glMapGrid1d = (PFNGLMAPGRID1DPROC) load(userptr, "glMapGrid1d"); + glad_glMapGrid1f = (PFNGLMAPGRID1FPROC) load(userptr, "glMapGrid1f"); + glad_glMapGrid2d = (PFNGLMAPGRID2DPROC) load(userptr, "glMapGrid2d"); + glad_glMapGrid2f = (PFNGLMAPGRID2FPROC) load(userptr, "glMapGrid2f"); + glad_glMaterialf = (PFNGLMATERIALFPROC) load(userptr, "glMaterialf"); + glad_glMaterialfv = (PFNGLMATERIALFVPROC) load(userptr, "glMaterialfv"); + glad_glMateriali = (PFNGLMATERIALIPROC) load(userptr, "glMateriali"); + glad_glMaterialiv = (PFNGLMATERIALIVPROC) load(userptr, "glMaterialiv"); + glad_glMatrixMode = (PFNGLMATRIXMODEPROC) load(userptr, "glMatrixMode"); + glad_glMultMatrixd = (PFNGLMULTMATRIXDPROC) load(userptr, "glMultMatrixd"); + glad_glMultMatrixf = (PFNGLMULTMATRIXFPROC) load(userptr, "glMultMatrixf"); + glad_glNewList = (PFNGLNEWLISTPROC) load(userptr, "glNewList"); + glad_glNormal3b = (PFNGLNORMAL3BPROC) load(userptr, "glNormal3b"); + glad_glNormal3bv = (PFNGLNORMAL3BVPROC) load(userptr, "glNormal3bv"); + glad_glNormal3d = (PFNGLNORMAL3DPROC) load(userptr, "glNormal3d"); + glad_glNormal3dv = (PFNGLNORMAL3DVPROC) load(userptr, "glNormal3dv"); + glad_glNormal3f = (PFNGLNORMAL3FPROC) load(userptr, "glNormal3f"); + glad_glNormal3fv = (PFNGLNORMAL3FVPROC) load(userptr, "glNormal3fv"); + glad_glNormal3i = (PFNGLNORMAL3IPROC) load(userptr, "glNormal3i"); + glad_glNormal3iv = (PFNGLNORMAL3IVPROC) load(userptr, "glNormal3iv"); + glad_glNormal3s = (PFNGLNORMAL3SPROC) load(userptr, "glNormal3s"); + glad_glNormal3sv = (PFNGLNORMAL3SVPROC) load(userptr, "glNormal3sv"); + glad_glOrtho = (PFNGLORTHOPROC) load(userptr, "glOrtho"); + glad_glPassThrough = (PFNGLPASSTHROUGHPROC) load(userptr, "glPassThrough"); + glad_glPixelMapfv = (PFNGLPIXELMAPFVPROC) load(userptr, "glPixelMapfv"); + glad_glPixelMapuiv = (PFNGLPIXELMAPUIVPROC) load(userptr, "glPixelMapuiv"); + glad_glPixelMapusv = (PFNGLPIXELMAPUSVPROC) load(userptr, "glPixelMapusv"); + glad_glPixelStoref = (PFNGLPIXELSTOREFPROC) load(userptr, "glPixelStoref"); + glad_glPixelStorei = (PFNGLPIXELSTOREIPROC) load(userptr, "glPixelStorei"); + glad_glPixelTransferf = (PFNGLPIXELTRANSFERFPROC) load(userptr, "glPixelTransferf"); + glad_glPixelTransferi = (PFNGLPIXELTRANSFERIPROC) load(userptr, "glPixelTransferi"); + glad_glPixelZoom = (PFNGLPIXELZOOMPROC) load(userptr, "glPixelZoom"); + glad_glPointSize = (PFNGLPOINTSIZEPROC) load(userptr, "glPointSize"); + glad_glPolygonMode = (PFNGLPOLYGONMODEPROC) load(userptr, "glPolygonMode"); + glad_glPolygonStipple = (PFNGLPOLYGONSTIPPLEPROC) load(userptr, "glPolygonStipple"); + glad_glPopAttrib = (PFNGLPOPATTRIBPROC) load(userptr, "glPopAttrib"); + glad_glPopMatrix = (PFNGLPOPMATRIXPROC) load(userptr, "glPopMatrix"); + glad_glPopName = (PFNGLPOPNAMEPROC) load(userptr, "glPopName"); + glad_glPushAttrib = (PFNGLPUSHATTRIBPROC) load(userptr, "glPushAttrib"); + glad_glPushMatrix = (PFNGLPUSHMATRIXPROC) load(userptr, "glPushMatrix"); + glad_glPushName = (PFNGLPUSHNAMEPROC) load(userptr, "glPushName"); + glad_glRasterPos2d = (PFNGLRASTERPOS2DPROC) load(userptr, "glRasterPos2d"); + glad_glRasterPos2dv = (PFNGLRASTERPOS2DVPROC) load(userptr, "glRasterPos2dv"); + glad_glRasterPos2f = (PFNGLRASTERPOS2FPROC) load(userptr, "glRasterPos2f"); + glad_glRasterPos2fv = (PFNGLRASTERPOS2FVPROC) load(userptr, "glRasterPos2fv"); + glad_glRasterPos2i = (PFNGLRASTERPOS2IPROC) load(userptr, "glRasterPos2i"); + glad_glRasterPos2iv = (PFNGLRASTERPOS2IVPROC) load(userptr, "glRasterPos2iv"); + glad_glRasterPos2s = (PFNGLRASTERPOS2SPROC) load(userptr, "glRasterPos2s"); + glad_glRasterPos2sv = (PFNGLRASTERPOS2SVPROC) load(userptr, "glRasterPos2sv"); + glad_glRasterPos3d = (PFNGLRASTERPOS3DPROC) load(userptr, "glRasterPos3d"); + glad_glRasterPos3dv = (PFNGLRASTERPOS3DVPROC) load(userptr, "glRasterPos3dv"); + glad_glRasterPos3f = (PFNGLRASTERPOS3FPROC) load(userptr, "glRasterPos3f"); + glad_glRasterPos3fv = (PFNGLRASTERPOS3FVPROC) load(userptr, "glRasterPos3fv"); + glad_glRasterPos3i = (PFNGLRASTERPOS3IPROC) load(userptr, "glRasterPos3i"); + glad_glRasterPos3iv = (PFNGLRASTERPOS3IVPROC) load(userptr, "glRasterPos3iv"); + glad_glRasterPos3s = (PFNGLRASTERPOS3SPROC) load(userptr, "glRasterPos3s"); + glad_glRasterPos3sv = (PFNGLRASTERPOS3SVPROC) load(userptr, "glRasterPos3sv"); + glad_glRasterPos4d = (PFNGLRASTERPOS4DPROC) load(userptr, "glRasterPos4d"); + glad_glRasterPos4dv = (PFNGLRASTERPOS4DVPROC) load(userptr, "glRasterPos4dv"); + glad_glRasterPos4f = (PFNGLRASTERPOS4FPROC) load(userptr, "glRasterPos4f"); + glad_glRasterPos4fv = (PFNGLRASTERPOS4FVPROC) load(userptr, "glRasterPos4fv"); + glad_glRasterPos4i = (PFNGLRASTERPOS4IPROC) load(userptr, "glRasterPos4i"); + glad_glRasterPos4iv = (PFNGLRASTERPOS4IVPROC) load(userptr, "glRasterPos4iv"); + glad_glRasterPos4s = (PFNGLRASTERPOS4SPROC) load(userptr, "glRasterPos4s"); + glad_glRasterPos4sv = (PFNGLRASTERPOS4SVPROC) load(userptr, "glRasterPos4sv"); + glad_glReadBuffer = (PFNGLREADBUFFERPROC) load(userptr, "glReadBuffer"); + glad_glReadPixels = (PFNGLREADPIXELSPROC) load(userptr, "glReadPixels"); + glad_glRectd = (PFNGLRECTDPROC) load(userptr, "glRectd"); + glad_glRectdv = (PFNGLRECTDVPROC) load(userptr, "glRectdv"); + glad_glRectf = (PFNGLRECTFPROC) load(userptr, "glRectf"); + glad_glRectfv = (PFNGLRECTFVPROC) load(userptr, "glRectfv"); + glad_glRecti = (PFNGLRECTIPROC) load(userptr, "glRecti"); + glad_glRectiv = (PFNGLRECTIVPROC) load(userptr, "glRectiv"); + glad_glRects = (PFNGLRECTSPROC) load(userptr, "glRects"); + glad_glRectsv = (PFNGLRECTSVPROC) load(userptr, "glRectsv"); + glad_glRenderMode = (PFNGLRENDERMODEPROC) load(userptr, "glRenderMode"); + glad_glRotated = (PFNGLROTATEDPROC) load(userptr, "glRotated"); + glad_glRotatef = (PFNGLROTATEFPROC) load(userptr, "glRotatef"); + glad_glScaled = (PFNGLSCALEDPROC) load(userptr, "glScaled"); + glad_glScalef = (PFNGLSCALEFPROC) load(userptr, "glScalef"); + glad_glScissor = (PFNGLSCISSORPROC) load(userptr, "glScissor"); + glad_glSelectBuffer = (PFNGLSELECTBUFFERPROC) load(userptr, "glSelectBuffer"); + glad_glShadeModel = (PFNGLSHADEMODELPROC) load(userptr, "glShadeModel"); + glad_glStencilFunc = (PFNGLSTENCILFUNCPROC) load(userptr, "glStencilFunc"); + glad_glStencilMask = (PFNGLSTENCILMASKPROC) load(userptr, "glStencilMask"); + glad_glStencilOp = (PFNGLSTENCILOPPROC) load(userptr, "glStencilOp"); + glad_glTexCoord1d = (PFNGLTEXCOORD1DPROC) load(userptr, "glTexCoord1d"); + glad_glTexCoord1dv = (PFNGLTEXCOORD1DVPROC) load(userptr, "glTexCoord1dv"); + glad_glTexCoord1f = (PFNGLTEXCOORD1FPROC) load(userptr, "glTexCoord1f"); + glad_glTexCoord1fv = (PFNGLTEXCOORD1FVPROC) load(userptr, "glTexCoord1fv"); + glad_glTexCoord1i = (PFNGLTEXCOORD1IPROC) load(userptr, "glTexCoord1i"); + glad_glTexCoord1iv = (PFNGLTEXCOORD1IVPROC) load(userptr, "glTexCoord1iv"); + glad_glTexCoord1s = (PFNGLTEXCOORD1SPROC) load(userptr, "glTexCoord1s"); + glad_glTexCoord1sv = (PFNGLTEXCOORD1SVPROC) load(userptr, "glTexCoord1sv"); + glad_glTexCoord2d = (PFNGLTEXCOORD2DPROC) load(userptr, "glTexCoord2d"); + glad_glTexCoord2dv = (PFNGLTEXCOORD2DVPROC) load(userptr, "glTexCoord2dv"); + glad_glTexCoord2f = (PFNGLTEXCOORD2FPROC) load(userptr, "glTexCoord2f"); + glad_glTexCoord2fv = (PFNGLTEXCOORD2FVPROC) load(userptr, "glTexCoord2fv"); + glad_glTexCoord2i = (PFNGLTEXCOORD2IPROC) load(userptr, "glTexCoord2i"); + glad_glTexCoord2iv = (PFNGLTEXCOORD2IVPROC) load(userptr, "glTexCoord2iv"); + glad_glTexCoord2s = (PFNGLTEXCOORD2SPROC) load(userptr, "glTexCoord2s"); + glad_glTexCoord2sv = (PFNGLTEXCOORD2SVPROC) load(userptr, "glTexCoord2sv"); + glad_glTexCoord3d = (PFNGLTEXCOORD3DPROC) load(userptr, "glTexCoord3d"); + glad_glTexCoord3dv = (PFNGLTEXCOORD3DVPROC) load(userptr, "glTexCoord3dv"); + glad_glTexCoord3f = (PFNGLTEXCOORD3FPROC) load(userptr, "glTexCoord3f"); + glad_glTexCoord3fv = (PFNGLTEXCOORD3FVPROC) load(userptr, "glTexCoord3fv"); + glad_glTexCoord3i = (PFNGLTEXCOORD3IPROC) load(userptr, "glTexCoord3i"); + glad_glTexCoord3iv = (PFNGLTEXCOORD3IVPROC) load(userptr, "glTexCoord3iv"); + glad_glTexCoord3s = (PFNGLTEXCOORD3SPROC) load(userptr, "glTexCoord3s"); + glad_glTexCoord3sv = (PFNGLTEXCOORD3SVPROC) load(userptr, "glTexCoord3sv"); + glad_glTexCoord4d = (PFNGLTEXCOORD4DPROC) load(userptr, "glTexCoord4d"); + glad_glTexCoord4dv = (PFNGLTEXCOORD4DVPROC) load(userptr, "glTexCoord4dv"); + glad_glTexCoord4f = (PFNGLTEXCOORD4FPROC) load(userptr, "glTexCoord4f"); + glad_glTexCoord4fv = (PFNGLTEXCOORD4FVPROC) load(userptr, "glTexCoord4fv"); + glad_glTexCoord4i = (PFNGLTEXCOORD4IPROC) load(userptr, "glTexCoord4i"); + glad_glTexCoord4iv = (PFNGLTEXCOORD4IVPROC) load(userptr, "glTexCoord4iv"); + glad_glTexCoord4s = (PFNGLTEXCOORD4SPROC) load(userptr, "glTexCoord4s"); + glad_glTexCoord4sv = (PFNGLTEXCOORD4SVPROC) load(userptr, "glTexCoord4sv"); + glad_glTexEnvf = (PFNGLTEXENVFPROC) load(userptr, "glTexEnvf"); + glad_glTexEnvfv = (PFNGLTEXENVFVPROC) load(userptr, "glTexEnvfv"); + glad_glTexEnvi = (PFNGLTEXENVIPROC) load(userptr, "glTexEnvi"); + glad_glTexEnviv = (PFNGLTEXENVIVPROC) load(userptr, "glTexEnviv"); + glad_glTexGend = (PFNGLTEXGENDPROC) load(userptr, "glTexGend"); + glad_glTexGendv = (PFNGLTEXGENDVPROC) load(userptr, "glTexGendv"); + glad_glTexGenf = (PFNGLTEXGENFPROC) load(userptr, "glTexGenf"); + glad_glTexGenfv = (PFNGLTEXGENFVPROC) load(userptr, "glTexGenfv"); + glad_glTexGeni = (PFNGLTEXGENIPROC) load(userptr, "glTexGeni"); + glad_glTexGeniv = (PFNGLTEXGENIVPROC) load(userptr, "glTexGeniv"); + glad_glTexImage1D = (PFNGLTEXIMAGE1DPROC) load(userptr, "glTexImage1D"); + glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC) load(userptr, "glTexImage2D"); + glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC) load(userptr, "glTexParameterf"); + glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC) load(userptr, "glTexParameterfv"); + glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC) load(userptr, "glTexParameteri"); + glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC) load(userptr, "glTexParameteriv"); + glad_glTranslated = (PFNGLTRANSLATEDPROC) load(userptr, "glTranslated"); + glad_glTranslatef = (PFNGLTRANSLATEFPROC) load(userptr, "glTranslatef"); + glad_glVertex2d = (PFNGLVERTEX2DPROC) load(userptr, "glVertex2d"); + glad_glVertex2dv = (PFNGLVERTEX2DVPROC) load(userptr, "glVertex2dv"); + glad_glVertex2f = (PFNGLVERTEX2FPROC) load(userptr, "glVertex2f"); + glad_glVertex2fv = (PFNGLVERTEX2FVPROC) load(userptr, "glVertex2fv"); + glad_glVertex2i = (PFNGLVERTEX2IPROC) load(userptr, "glVertex2i"); + glad_glVertex2iv = (PFNGLVERTEX2IVPROC) load(userptr, "glVertex2iv"); + glad_glVertex2s = (PFNGLVERTEX2SPROC) load(userptr, "glVertex2s"); + glad_glVertex2sv = (PFNGLVERTEX2SVPROC) load(userptr, "glVertex2sv"); + glad_glVertex3d = (PFNGLVERTEX3DPROC) load(userptr, "glVertex3d"); + glad_glVertex3dv = (PFNGLVERTEX3DVPROC) load(userptr, "glVertex3dv"); + glad_glVertex3f = (PFNGLVERTEX3FPROC) load(userptr, "glVertex3f"); + glad_glVertex3fv = (PFNGLVERTEX3FVPROC) load(userptr, "glVertex3fv"); + glad_glVertex3i = (PFNGLVERTEX3IPROC) load(userptr, "glVertex3i"); + glad_glVertex3iv = (PFNGLVERTEX3IVPROC) load(userptr, "glVertex3iv"); + glad_glVertex3s = (PFNGLVERTEX3SPROC) load(userptr, "glVertex3s"); + glad_glVertex3sv = (PFNGLVERTEX3SVPROC) load(userptr, "glVertex3sv"); + glad_glVertex4d = (PFNGLVERTEX4DPROC) load(userptr, "glVertex4d"); + glad_glVertex4dv = (PFNGLVERTEX4DVPROC) load(userptr, "glVertex4dv"); + glad_glVertex4f = (PFNGLVERTEX4FPROC) load(userptr, "glVertex4f"); + glad_glVertex4fv = (PFNGLVERTEX4FVPROC) load(userptr, "glVertex4fv"); + glad_glVertex4i = (PFNGLVERTEX4IPROC) load(userptr, "glVertex4i"); + glad_glVertex4iv = (PFNGLVERTEX4IVPROC) load(userptr, "glVertex4iv"); + glad_glVertex4s = (PFNGLVERTEX4SPROC) load(userptr, "glVertex4s"); + glad_glVertex4sv = (PFNGLVERTEX4SVPROC) load(userptr, "glVertex4sv"); + glad_glViewport = (PFNGLVIEWPORTPROC) load(userptr, "glViewport"); +} +static void glad_gl_load_GL_VERSION_1_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_1) return; + glad_glAreTexturesResident = (PFNGLARETEXTURESRESIDENTPROC) load(userptr, "glAreTexturesResident"); + glad_glArrayElement = (PFNGLARRAYELEMENTPROC) load(userptr, "glArrayElement"); + glad_glBindTexture = (PFNGLBINDTEXTUREPROC) load(userptr, "glBindTexture"); + glad_glColorPointer = (PFNGLCOLORPOINTERPROC) load(userptr, "glColorPointer"); + glad_glCopyTexImage1D = (PFNGLCOPYTEXIMAGE1DPROC) load(userptr, "glCopyTexImage1D"); + glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC) load(userptr, "glCopyTexImage2D"); + glad_glCopyTexSubImage1D = (PFNGLCOPYTEXSUBIMAGE1DPROC) load(userptr, "glCopyTexSubImage1D"); + glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC) load(userptr, "glCopyTexSubImage2D"); + glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC) load(userptr, "glDeleteTextures"); + glad_glDisableClientState = (PFNGLDISABLECLIENTSTATEPROC) load(userptr, "glDisableClientState"); + glad_glDrawArrays = (PFNGLDRAWARRAYSPROC) load(userptr, "glDrawArrays"); + glad_glDrawElements = (PFNGLDRAWELEMENTSPROC) load(userptr, "glDrawElements"); + glad_glEdgeFlagPointer = (PFNGLEDGEFLAGPOINTERPROC) load(userptr, "glEdgeFlagPointer"); + glad_glEnableClientState = (PFNGLENABLECLIENTSTATEPROC) load(userptr, "glEnableClientState"); + glad_glGenTextures = (PFNGLGENTEXTURESPROC) load(userptr, "glGenTextures"); + glad_glGetPointerv = (PFNGLGETPOINTERVPROC) load(userptr, "glGetPointerv"); + glad_glIndexPointer = (PFNGLINDEXPOINTERPROC) load(userptr, "glIndexPointer"); + glad_glIndexub = (PFNGLINDEXUBPROC) load(userptr, "glIndexub"); + glad_glIndexubv = (PFNGLINDEXUBVPROC) load(userptr, "glIndexubv"); + glad_glInterleavedArrays = (PFNGLINTERLEAVEDARRAYSPROC) load(userptr, "glInterleavedArrays"); + glad_glIsTexture = (PFNGLISTEXTUREPROC) load(userptr, "glIsTexture"); + glad_glNormalPointer = (PFNGLNORMALPOINTERPROC) load(userptr, "glNormalPointer"); + glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC) load(userptr, "glPolygonOffset"); + glad_glPopClientAttrib = (PFNGLPOPCLIENTATTRIBPROC) load(userptr, "glPopClientAttrib"); + glad_glPrioritizeTextures = (PFNGLPRIORITIZETEXTURESPROC) load(userptr, "glPrioritizeTextures"); + glad_glPushClientAttrib = (PFNGLPUSHCLIENTATTRIBPROC) load(userptr, "glPushClientAttrib"); + glad_glTexCoordPointer = (PFNGLTEXCOORDPOINTERPROC) load(userptr, "glTexCoordPointer"); + glad_glTexSubImage1D = (PFNGLTEXSUBIMAGE1DPROC) load(userptr, "glTexSubImage1D"); + glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC) load(userptr, "glTexSubImage2D"); + glad_glVertexPointer = (PFNGLVERTEXPOINTERPROC) load(userptr, "glVertexPointer"); +} +static void glad_gl_load_GL_VERSION_1_2( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_2) return; + glad_glCopyTexSubImage3D = (PFNGLCOPYTEXSUBIMAGE3DPROC) load(userptr, "glCopyTexSubImage3D"); + glad_glDrawRangeElements = (PFNGLDRAWRANGEELEMENTSPROC) load(userptr, "glDrawRangeElements"); + glad_glTexImage3D = (PFNGLTEXIMAGE3DPROC) load(userptr, "glTexImage3D"); + glad_glTexSubImage3D = (PFNGLTEXSUBIMAGE3DPROC) load(userptr, "glTexSubImage3D"); +} +static void glad_gl_load_GL_VERSION_1_3( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_3) return; + glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC) load(userptr, "glActiveTexture"); + glad_glClientActiveTexture = (PFNGLCLIENTACTIVETEXTUREPROC) load(userptr, "glClientActiveTexture"); + glad_glCompressedTexImage1D = (PFNGLCOMPRESSEDTEXIMAGE1DPROC) load(userptr, "glCompressedTexImage1D"); + glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC) load(userptr, "glCompressedTexImage2D"); + glad_glCompressedTexImage3D = (PFNGLCOMPRESSEDTEXIMAGE3DPROC) load(userptr, "glCompressedTexImage3D"); + glad_glCompressedTexSubImage1D = (PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC) load(userptr, "glCompressedTexSubImage1D"); + glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) load(userptr, "glCompressedTexSubImage2D"); + glad_glCompressedTexSubImage3D = (PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC) load(userptr, "glCompressedTexSubImage3D"); + glad_glGetCompressedTexImage = (PFNGLGETCOMPRESSEDTEXIMAGEPROC) load(userptr, "glGetCompressedTexImage"); + glad_glLoadTransposeMatrixd = (PFNGLLOADTRANSPOSEMATRIXDPROC) load(userptr, "glLoadTransposeMatrixd"); + glad_glLoadTransposeMatrixf = (PFNGLLOADTRANSPOSEMATRIXFPROC) load(userptr, "glLoadTransposeMatrixf"); + glad_glMultTransposeMatrixd = (PFNGLMULTTRANSPOSEMATRIXDPROC) load(userptr, "glMultTransposeMatrixd"); + glad_glMultTransposeMatrixf = (PFNGLMULTTRANSPOSEMATRIXFPROC) load(userptr, "glMultTransposeMatrixf"); + glad_glMultiTexCoord1d = (PFNGLMULTITEXCOORD1DPROC) load(userptr, "glMultiTexCoord1d"); + glad_glMultiTexCoord1dv = (PFNGLMULTITEXCOORD1DVPROC) load(userptr, "glMultiTexCoord1dv"); + glad_glMultiTexCoord1f = (PFNGLMULTITEXCOORD1FPROC) load(userptr, "glMultiTexCoord1f"); + glad_glMultiTexCoord1fv = (PFNGLMULTITEXCOORD1FVPROC) load(userptr, "glMultiTexCoord1fv"); + glad_glMultiTexCoord1i = (PFNGLMULTITEXCOORD1IPROC) load(userptr, "glMultiTexCoord1i"); + glad_glMultiTexCoord1iv = (PFNGLMULTITEXCOORD1IVPROC) load(userptr, "glMultiTexCoord1iv"); + glad_glMultiTexCoord1s = (PFNGLMULTITEXCOORD1SPROC) load(userptr, "glMultiTexCoord1s"); + glad_glMultiTexCoord1sv = (PFNGLMULTITEXCOORD1SVPROC) load(userptr, "glMultiTexCoord1sv"); + glad_glMultiTexCoord2d = (PFNGLMULTITEXCOORD2DPROC) load(userptr, "glMultiTexCoord2d"); + glad_glMultiTexCoord2dv = (PFNGLMULTITEXCOORD2DVPROC) load(userptr, "glMultiTexCoord2dv"); + glad_glMultiTexCoord2f = (PFNGLMULTITEXCOORD2FPROC) load(userptr, "glMultiTexCoord2f"); + glad_glMultiTexCoord2fv = (PFNGLMULTITEXCOORD2FVPROC) load(userptr, "glMultiTexCoord2fv"); + glad_glMultiTexCoord2i = (PFNGLMULTITEXCOORD2IPROC) load(userptr, "glMultiTexCoord2i"); + glad_glMultiTexCoord2iv = (PFNGLMULTITEXCOORD2IVPROC) load(userptr, "glMultiTexCoord2iv"); + glad_glMultiTexCoord2s = (PFNGLMULTITEXCOORD2SPROC) load(userptr, "glMultiTexCoord2s"); + glad_glMultiTexCoord2sv = (PFNGLMULTITEXCOORD2SVPROC) load(userptr, "glMultiTexCoord2sv"); + glad_glMultiTexCoord3d = (PFNGLMULTITEXCOORD3DPROC) load(userptr, "glMultiTexCoord3d"); + glad_glMultiTexCoord3dv = (PFNGLMULTITEXCOORD3DVPROC) load(userptr, "glMultiTexCoord3dv"); + glad_glMultiTexCoord3f = (PFNGLMULTITEXCOORD3FPROC) load(userptr, "glMultiTexCoord3f"); + glad_glMultiTexCoord3fv = (PFNGLMULTITEXCOORD3FVPROC) load(userptr, "glMultiTexCoord3fv"); + glad_glMultiTexCoord3i = (PFNGLMULTITEXCOORD3IPROC) load(userptr, "glMultiTexCoord3i"); + glad_glMultiTexCoord3iv = (PFNGLMULTITEXCOORD3IVPROC) load(userptr, "glMultiTexCoord3iv"); + glad_glMultiTexCoord3s = (PFNGLMULTITEXCOORD3SPROC) load(userptr, "glMultiTexCoord3s"); + glad_glMultiTexCoord3sv = (PFNGLMULTITEXCOORD3SVPROC) load(userptr, "glMultiTexCoord3sv"); + glad_glMultiTexCoord4d = (PFNGLMULTITEXCOORD4DPROC) load(userptr, "glMultiTexCoord4d"); + glad_glMultiTexCoord4dv = (PFNGLMULTITEXCOORD4DVPROC) load(userptr, "glMultiTexCoord4dv"); + glad_glMultiTexCoord4f = (PFNGLMULTITEXCOORD4FPROC) load(userptr, "glMultiTexCoord4f"); + glad_glMultiTexCoord4fv = (PFNGLMULTITEXCOORD4FVPROC) load(userptr, "glMultiTexCoord4fv"); + glad_glMultiTexCoord4i = (PFNGLMULTITEXCOORD4IPROC) load(userptr, "glMultiTexCoord4i"); + glad_glMultiTexCoord4iv = (PFNGLMULTITEXCOORD4IVPROC) load(userptr, "glMultiTexCoord4iv"); + glad_glMultiTexCoord4s = (PFNGLMULTITEXCOORD4SPROC) load(userptr, "glMultiTexCoord4s"); + glad_glMultiTexCoord4sv = (PFNGLMULTITEXCOORD4SVPROC) load(userptr, "glMultiTexCoord4sv"); + glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC) load(userptr, "glSampleCoverage"); +} +static void glad_gl_load_GL_VERSION_1_4( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_4) return; + glad_glBlendColor = (PFNGLBLENDCOLORPROC) load(userptr, "glBlendColor"); + glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC) load(userptr, "glBlendEquation"); + glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC) load(userptr, "glBlendFuncSeparate"); + glad_glFogCoordPointer = (PFNGLFOGCOORDPOINTERPROC) load(userptr, "glFogCoordPointer"); + glad_glFogCoordd = (PFNGLFOGCOORDDPROC) load(userptr, "glFogCoordd"); + glad_glFogCoorddv = (PFNGLFOGCOORDDVPROC) load(userptr, "glFogCoorddv"); + glad_glFogCoordf = (PFNGLFOGCOORDFPROC) load(userptr, "glFogCoordf"); + glad_glFogCoordfv = (PFNGLFOGCOORDFVPROC) load(userptr, "glFogCoordfv"); + glad_glMultiDrawArrays = (PFNGLMULTIDRAWARRAYSPROC) load(userptr, "glMultiDrawArrays"); + glad_glMultiDrawElements = (PFNGLMULTIDRAWELEMENTSPROC) load(userptr, "glMultiDrawElements"); + glad_glPointParameterf = (PFNGLPOINTPARAMETERFPROC) load(userptr, "glPointParameterf"); + glad_glPointParameterfv = (PFNGLPOINTPARAMETERFVPROC) load(userptr, "glPointParameterfv"); + glad_glPointParameteri = (PFNGLPOINTPARAMETERIPROC) load(userptr, "glPointParameteri"); + glad_glPointParameteriv = (PFNGLPOINTPARAMETERIVPROC) load(userptr, "glPointParameteriv"); + glad_glSecondaryColor3b = (PFNGLSECONDARYCOLOR3BPROC) load(userptr, "glSecondaryColor3b"); + glad_glSecondaryColor3bv = (PFNGLSECONDARYCOLOR3BVPROC) load(userptr, "glSecondaryColor3bv"); + glad_glSecondaryColor3d = (PFNGLSECONDARYCOLOR3DPROC) load(userptr, "glSecondaryColor3d"); + glad_glSecondaryColor3dv = (PFNGLSECONDARYCOLOR3DVPROC) load(userptr, "glSecondaryColor3dv"); + glad_glSecondaryColor3f = (PFNGLSECONDARYCOLOR3FPROC) load(userptr, "glSecondaryColor3f"); + glad_glSecondaryColor3fv = (PFNGLSECONDARYCOLOR3FVPROC) load(userptr, "glSecondaryColor3fv"); + glad_glSecondaryColor3i = (PFNGLSECONDARYCOLOR3IPROC) load(userptr, "glSecondaryColor3i"); + glad_glSecondaryColor3iv = (PFNGLSECONDARYCOLOR3IVPROC) load(userptr, "glSecondaryColor3iv"); + glad_glSecondaryColor3s = (PFNGLSECONDARYCOLOR3SPROC) load(userptr, "glSecondaryColor3s"); + glad_glSecondaryColor3sv = (PFNGLSECONDARYCOLOR3SVPROC) load(userptr, "glSecondaryColor3sv"); + glad_glSecondaryColor3ub = (PFNGLSECONDARYCOLOR3UBPROC) load(userptr, "glSecondaryColor3ub"); + glad_glSecondaryColor3ubv = (PFNGLSECONDARYCOLOR3UBVPROC) load(userptr, "glSecondaryColor3ubv"); + glad_glSecondaryColor3ui = (PFNGLSECONDARYCOLOR3UIPROC) load(userptr, "glSecondaryColor3ui"); + glad_glSecondaryColor3uiv = (PFNGLSECONDARYCOLOR3UIVPROC) load(userptr, "glSecondaryColor3uiv"); + glad_glSecondaryColor3us = (PFNGLSECONDARYCOLOR3USPROC) load(userptr, "glSecondaryColor3us"); + glad_glSecondaryColor3usv = (PFNGLSECONDARYCOLOR3USVPROC) load(userptr, "glSecondaryColor3usv"); + glad_glSecondaryColorPointer = (PFNGLSECONDARYCOLORPOINTERPROC) load(userptr, "glSecondaryColorPointer"); + glad_glWindowPos2d = (PFNGLWINDOWPOS2DPROC) load(userptr, "glWindowPos2d"); + glad_glWindowPos2dv = (PFNGLWINDOWPOS2DVPROC) load(userptr, "glWindowPos2dv"); + glad_glWindowPos2f = (PFNGLWINDOWPOS2FPROC) load(userptr, "glWindowPos2f"); + glad_glWindowPos2fv = (PFNGLWINDOWPOS2FVPROC) load(userptr, "glWindowPos2fv"); + glad_glWindowPos2i = (PFNGLWINDOWPOS2IPROC) load(userptr, "glWindowPos2i"); + glad_glWindowPos2iv = (PFNGLWINDOWPOS2IVPROC) load(userptr, "glWindowPos2iv"); + glad_glWindowPos2s = (PFNGLWINDOWPOS2SPROC) load(userptr, "glWindowPos2s"); + glad_glWindowPos2sv = (PFNGLWINDOWPOS2SVPROC) load(userptr, "glWindowPos2sv"); + glad_glWindowPos3d = (PFNGLWINDOWPOS3DPROC) load(userptr, "glWindowPos3d"); + glad_glWindowPos3dv = (PFNGLWINDOWPOS3DVPROC) load(userptr, "glWindowPos3dv"); + glad_glWindowPos3f = (PFNGLWINDOWPOS3FPROC) load(userptr, "glWindowPos3f"); + glad_glWindowPos3fv = (PFNGLWINDOWPOS3FVPROC) load(userptr, "glWindowPos3fv"); + glad_glWindowPos3i = (PFNGLWINDOWPOS3IPROC) load(userptr, "glWindowPos3i"); + glad_glWindowPos3iv = (PFNGLWINDOWPOS3IVPROC) load(userptr, "glWindowPos3iv"); + glad_glWindowPos3s = (PFNGLWINDOWPOS3SPROC) load(userptr, "glWindowPos3s"); + glad_glWindowPos3sv = (PFNGLWINDOWPOS3SVPROC) load(userptr, "glWindowPos3sv"); +} +static void glad_gl_load_GL_VERSION_1_5( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_1_5) return; + glad_glBeginQuery = (PFNGLBEGINQUERYPROC) load(userptr, "glBeginQuery"); + glad_glBindBuffer = (PFNGLBINDBUFFERPROC) load(userptr, "glBindBuffer"); + glad_glBufferData = (PFNGLBUFFERDATAPROC) load(userptr, "glBufferData"); + glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC) load(userptr, "glBufferSubData"); + glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC) load(userptr, "glDeleteBuffers"); + glad_glDeleteQueries = (PFNGLDELETEQUERIESPROC) load(userptr, "glDeleteQueries"); + glad_glEndQuery = (PFNGLENDQUERYPROC) load(userptr, "glEndQuery"); + glad_glGenBuffers = (PFNGLGENBUFFERSPROC) load(userptr, "glGenBuffers"); + glad_glGenQueries = (PFNGLGENQUERIESPROC) load(userptr, "glGenQueries"); + glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC) load(userptr, "glGetBufferParameteriv"); + glad_glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC) load(userptr, "glGetBufferPointerv"); + glad_glGetBufferSubData = (PFNGLGETBUFFERSUBDATAPROC) load(userptr, "glGetBufferSubData"); + glad_glGetQueryObjectiv = (PFNGLGETQUERYOBJECTIVPROC) load(userptr, "glGetQueryObjectiv"); + glad_glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC) load(userptr, "glGetQueryObjectuiv"); + glad_glGetQueryiv = (PFNGLGETQUERYIVPROC) load(userptr, "glGetQueryiv"); + glad_glIsBuffer = (PFNGLISBUFFERPROC) load(userptr, "glIsBuffer"); + glad_glIsQuery = (PFNGLISQUERYPROC) load(userptr, "glIsQuery"); + glad_glMapBuffer = (PFNGLMAPBUFFERPROC) load(userptr, "glMapBuffer"); + glad_glUnmapBuffer = (PFNGLUNMAPBUFFERPROC) load(userptr, "glUnmapBuffer"); +} +static void glad_gl_load_GL_VERSION_2_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_2_0) return; + glad_glAttachShader = (PFNGLATTACHSHADERPROC) load(userptr, "glAttachShader"); + glad_glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC) load(userptr, "glBindAttribLocation"); + glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC) load(userptr, "glBlendEquationSeparate"); + glad_glCompileShader = (PFNGLCOMPILESHADERPROC) load(userptr, "glCompileShader"); + glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC) load(userptr, "glCreateProgram"); + glad_glCreateShader = (PFNGLCREATESHADERPROC) load(userptr, "glCreateShader"); + glad_glDeleteProgram = (PFNGLDELETEPROGRAMPROC) load(userptr, "glDeleteProgram"); + glad_glDeleteShader = (PFNGLDELETESHADERPROC) load(userptr, "glDeleteShader"); + glad_glDetachShader = (PFNGLDETACHSHADERPROC) load(userptr, "glDetachShader"); + glad_glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC) load(userptr, "glDisableVertexAttribArray"); + glad_glDrawBuffers = (PFNGLDRAWBUFFERSPROC) load(userptr, "glDrawBuffers"); + glad_glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC) load(userptr, "glEnableVertexAttribArray"); + glad_glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC) load(userptr, "glGetActiveAttrib"); + glad_glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC) load(userptr, "glGetActiveUniform"); + glad_glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC) load(userptr, "glGetAttachedShaders"); + glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC) load(userptr, "glGetAttribLocation"); + glad_glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC) load(userptr, "glGetProgramInfoLog"); + glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC) load(userptr, "glGetProgramiv"); + glad_glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC) load(userptr, "glGetShaderInfoLog"); + glad_glGetShaderSource = (PFNGLGETSHADERSOURCEPROC) load(userptr, "glGetShaderSource"); + glad_glGetShaderiv = (PFNGLGETSHADERIVPROC) load(userptr, "glGetShaderiv"); + glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC) load(userptr, "glGetUniformLocation"); + glad_glGetUniformfv = (PFNGLGETUNIFORMFVPROC) load(userptr, "glGetUniformfv"); + glad_glGetUniformiv = (PFNGLGETUNIFORMIVPROC) load(userptr, "glGetUniformiv"); + glad_glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC) load(userptr, "glGetVertexAttribPointerv"); + glad_glGetVertexAttribdv = (PFNGLGETVERTEXATTRIBDVPROC) load(userptr, "glGetVertexAttribdv"); + glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC) load(userptr, "glGetVertexAttribfv"); + glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC) load(userptr, "glGetVertexAttribiv"); + glad_glIsProgram = (PFNGLISPROGRAMPROC) load(userptr, "glIsProgram"); + glad_glIsShader = (PFNGLISSHADERPROC) load(userptr, "glIsShader"); + glad_glLinkProgram = (PFNGLLINKPROGRAMPROC) load(userptr, "glLinkProgram"); + glad_glShaderSource = (PFNGLSHADERSOURCEPROC) load(userptr, "glShaderSource"); + glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC) load(userptr, "glStencilFuncSeparate"); + glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC) load(userptr, "glStencilMaskSeparate"); + glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC) load(userptr, "glStencilOpSeparate"); + glad_glUniform1f = (PFNGLUNIFORM1FPROC) load(userptr, "glUniform1f"); + glad_glUniform1fv = (PFNGLUNIFORM1FVPROC) load(userptr, "glUniform1fv"); + glad_glUniform1i = (PFNGLUNIFORM1IPROC) load(userptr, "glUniform1i"); + glad_glUniform1iv = (PFNGLUNIFORM1IVPROC) load(userptr, "glUniform1iv"); + glad_glUniform2f = (PFNGLUNIFORM2FPROC) load(userptr, "glUniform2f"); + glad_glUniform2fv = (PFNGLUNIFORM2FVPROC) load(userptr, "glUniform2fv"); + glad_glUniform2i = (PFNGLUNIFORM2IPROC) load(userptr, "glUniform2i"); + glad_glUniform2iv = (PFNGLUNIFORM2IVPROC) load(userptr, "glUniform2iv"); + glad_glUniform3f = (PFNGLUNIFORM3FPROC) load(userptr, "glUniform3f"); + glad_glUniform3fv = (PFNGLUNIFORM3FVPROC) load(userptr, "glUniform3fv"); + glad_glUniform3i = (PFNGLUNIFORM3IPROC) load(userptr, "glUniform3i"); + glad_glUniform3iv = (PFNGLUNIFORM3IVPROC) load(userptr, "glUniform3iv"); + glad_glUniform4f = (PFNGLUNIFORM4FPROC) load(userptr, "glUniform4f"); + glad_glUniform4fv = (PFNGLUNIFORM4FVPROC) load(userptr, "glUniform4fv"); + glad_glUniform4i = (PFNGLUNIFORM4IPROC) load(userptr, "glUniform4i"); + glad_glUniform4iv = (PFNGLUNIFORM4IVPROC) load(userptr, "glUniform4iv"); + glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC) load(userptr, "glUniformMatrix2fv"); + glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC) load(userptr, "glUniformMatrix3fv"); + glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC) load(userptr, "glUniformMatrix4fv"); + glad_glUseProgram = (PFNGLUSEPROGRAMPROC) load(userptr, "glUseProgram"); + glad_glValidateProgram = (PFNGLVALIDATEPROGRAMPROC) load(userptr, "glValidateProgram"); + glad_glVertexAttrib1d = (PFNGLVERTEXATTRIB1DPROC) load(userptr, "glVertexAttrib1d"); + glad_glVertexAttrib1dv = (PFNGLVERTEXATTRIB1DVPROC) load(userptr, "glVertexAttrib1dv"); + glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC) load(userptr, "glVertexAttrib1f"); + glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC) load(userptr, "glVertexAttrib1fv"); + glad_glVertexAttrib1s = (PFNGLVERTEXATTRIB1SPROC) load(userptr, "glVertexAttrib1s"); + glad_glVertexAttrib1sv = (PFNGLVERTEXATTRIB1SVPROC) load(userptr, "glVertexAttrib1sv"); + glad_glVertexAttrib2d = (PFNGLVERTEXATTRIB2DPROC) load(userptr, "glVertexAttrib2d"); + glad_glVertexAttrib2dv = (PFNGLVERTEXATTRIB2DVPROC) load(userptr, "glVertexAttrib2dv"); + glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC) load(userptr, "glVertexAttrib2f"); + glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC) load(userptr, "glVertexAttrib2fv"); + glad_glVertexAttrib2s = (PFNGLVERTEXATTRIB2SPROC) load(userptr, "glVertexAttrib2s"); + glad_glVertexAttrib2sv = (PFNGLVERTEXATTRIB2SVPROC) load(userptr, "glVertexAttrib2sv"); + glad_glVertexAttrib3d = (PFNGLVERTEXATTRIB3DPROC) load(userptr, "glVertexAttrib3d"); + glad_glVertexAttrib3dv = (PFNGLVERTEXATTRIB3DVPROC) load(userptr, "glVertexAttrib3dv"); + glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC) load(userptr, "glVertexAttrib3f"); + glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC) load(userptr, "glVertexAttrib3fv"); + glad_glVertexAttrib3s = (PFNGLVERTEXATTRIB3SPROC) load(userptr, "glVertexAttrib3s"); + glad_glVertexAttrib3sv = (PFNGLVERTEXATTRIB3SVPROC) load(userptr, "glVertexAttrib3sv"); + glad_glVertexAttrib4Nbv = (PFNGLVERTEXATTRIB4NBVPROC) load(userptr, "glVertexAttrib4Nbv"); + glad_glVertexAttrib4Niv = (PFNGLVERTEXATTRIB4NIVPROC) load(userptr, "glVertexAttrib4Niv"); + glad_glVertexAttrib4Nsv = (PFNGLVERTEXATTRIB4NSVPROC) load(userptr, "glVertexAttrib4Nsv"); + glad_glVertexAttrib4Nub = (PFNGLVERTEXATTRIB4NUBPROC) load(userptr, "glVertexAttrib4Nub"); + glad_glVertexAttrib4Nubv = (PFNGLVERTEXATTRIB4NUBVPROC) load(userptr, "glVertexAttrib4Nubv"); + glad_glVertexAttrib4Nuiv = (PFNGLVERTEXATTRIB4NUIVPROC) load(userptr, "glVertexAttrib4Nuiv"); + glad_glVertexAttrib4Nusv = (PFNGLVERTEXATTRIB4NUSVPROC) load(userptr, "glVertexAttrib4Nusv"); + glad_glVertexAttrib4bv = (PFNGLVERTEXATTRIB4BVPROC) load(userptr, "glVertexAttrib4bv"); + glad_glVertexAttrib4d = (PFNGLVERTEXATTRIB4DPROC) load(userptr, "glVertexAttrib4d"); + glad_glVertexAttrib4dv = (PFNGLVERTEXATTRIB4DVPROC) load(userptr, "glVertexAttrib4dv"); + glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC) load(userptr, "glVertexAttrib4f"); + glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC) load(userptr, "glVertexAttrib4fv"); + glad_glVertexAttrib4iv = (PFNGLVERTEXATTRIB4IVPROC) load(userptr, "glVertexAttrib4iv"); + glad_glVertexAttrib4s = (PFNGLVERTEXATTRIB4SPROC) load(userptr, "glVertexAttrib4s"); + glad_glVertexAttrib4sv = (PFNGLVERTEXATTRIB4SVPROC) load(userptr, "glVertexAttrib4sv"); + glad_glVertexAttrib4ubv = (PFNGLVERTEXATTRIB4UBVPROC) load(userptr, "glVertexAttrib4ubv"); + glad_glVertexAttrib4uiv = (PFNGLVERTEXATTRIB4UIVPROC) load(userptr, "glVertexAttrib4uiv"); + glad_glVertexAttrib4usv = (PFNGLVERTEXATTRIB4USVPROC) load(userptr, "glVertexAttrib4usv"); + glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC) load(userptr, "glVertexAttribPointer"); +} +static void glad_gl_load_GL_VERSION_2_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_2_1) return; + glad_glUniformMatrix2x3fv = (PFNGLUNIFORMMATRIX2X3FVPROC) load(userptr, "glUniformMatrix2x3fv"); + glad_glUniformMatrix2x4fv = (PFNGLUNIFORMMATRIX2X4FVPROC) load(userptr, "glUniformMatrix2x4fv"); + glad_glUniformMatrix3x2fv = (PFNGLUNIFORMMATRIX3X2FVPROC) load(userptr, "glUniformMatrix3x2fv"); + glad_glUniformMatrix3x4fv = (PFNGLUNIFORMMATRIX3X4FVPROC) load(userptr, "glUniformMatrix3x4fv"); + glad_glUniformMatrix4x2fv = (PFNGLUNIFORMMATRIX4X2FVPROC) load(userptr, "glUniformMatrix4x2fv"); + glad_glUniformMatrix4x3fv = (PFNGLUNIFORMMATRIX4X3FVPROC) load(userptr, "glUniformMatrix4x3fv"); +} +static void glad_gl_load_GL_VERSION_3_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_3_0) return; + glad_glBeginConditionalRender = (PFNGLBEGINCONDITIONALRENDERPROC) load(userptr, "glBeginConditionalRender"); + glad_glBeginTransformFeedback = (PFNGLBEGINTRANSFORMFEEDBACKPROC) load(userptr, "glBeginTransformFeedback"); + glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC) load(userptr, "glBindBufferBase"); + glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC) load(userptr, "glBindBufferRange"); + glad_glBindFragDataLocation = (PFNGLBINDFRAGDATALOCATIONPROC) load(userptr, "glBindFragDataLocation"); + glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC) load(userptr, "glBindFramebuffer"); + glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC) load(userptr, "glBindRenderbuffer"); + glad_glBindVertexArray = (PFNGLBINDVERTEXARRAYPROC) load(userptr, "glBindVertexArray"); + glad_glBlitFramebuffer = (PFNGLBLITFRAMEBUFFERPROC) load(userptr, "glBlitFramebuffer"); + glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC) load(userptr, "glCheckFramebufferStatus"); + glad_glClampColor = (PFNGLCLAMPCOLORPROC) load(userptr, "glClampColor"); + glad_glClearBufferfi = (PFNGLCLEARBUFFERFIPROC) load(userptr, "glClearBufferfi"); + glad_glClearBufferfv = (PFNGLCLEARBUFFERFVPROC) load(userptr, "glClearBufferfv"); + glad_glClearBufferiv = (PFNGLCLEARBUFFERIVPROC) load(userptr, "glClearBufferiv"); + glad_glClearBufferuiv = (PFNGLCLEARBUFFERUIVPROC) load(userptr, "glClearBufferuiv"); + glad_glColorMaski = (PFNGLCOLORMASKIPROC) load(userptr, "glColorMaski"); + glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC) load(userptr, "glDeleteFramebuffers"); + glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC) load(userptr, "glDeleteRenderbuffers"); + glad_glDeleteVertexArrays = (PFNGLDELETEVERTEXARRAYSPROC) load(userptr, "glDeleteVertexArrays"); + glad_glDisablei = (PFNGLDISABLEIPROC) load(userptr, "glDisablei"); + glad_glEnablei = (PFNGLENABLEIPROC) load(userptr, "glEnablei"); + glad_glEndConditionalRender = (PFNGLENDCONDITIONALRENDERPROC) load(userptr, "glEndConditionalRender"); + glad_glEndTransformFeedback = (PFNGLENDTRANSFORMFEEDBACKPROC) load(userptr, "glEndTransformFeedback"); + glad_glFlushMappedBufferRange = (PFNGLFLUSHMAPPEDBUFFERRANGEPROC) load(userptr, "glFlushMappedBufferRange"); + glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC) load(userptr, "glFramebufferRenderbuffer"); + glad_glFramebufferTexture1D = (PFNGLFRAMEBUFFERTEXTURE1DPROC) load(userptr, "glFramebufferTexture1D"); + glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC) load(userptr, "glFramebufferTexture2D"); + glad_glFramebufferTexture3D = (PFNGLFRAMEBUFFERTEXTURE3DPROC) load(userptr, "glFramebufferTexture3D"); + glad_glFramebufferTextureLayer = (PFNGLFRAMEBUFFERTEXTURELAYERPROC) load(userptr, "glFramebufferTextureLayer"); + glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC) load(userptr, "glGenFramebuffers"); + glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC) load(userptr, "glGenRenderbuffers"); + glad_glGenVertexArrays = (PFNGLGENVERTEXARRAYSPROC) load(userptr, "glGenVertexArrays"); + glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC) load(userptr, "glGenerateMipmap"); + glad_glGetBooleani_v = (PFNGLGETBOOLEANI_VPROC) load(userptr, "glGetBooleani_v"); + glad_glGetFragDataLocation = (PFNGLGETFRAGDATALOCATIONPROC) load(userptr, "glGetFragDataLocation"); + glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC) load(userptr, "glGetFramebufferAttachmentParameteriv"); + glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC) load(userptr, "glGetIntegeri_v"); + glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC) load(userptr, "glGetRenderbufferParameteriv"); + glad_glGetStringi = (PFNGLGETSTRINGIPROC) load(userptr, "glGetStringi"); + glad_glGetTexParameterIiv = (PFNGLGETTEXPARAMETERIIVPROC) load(userptr, "glGetTexParameterIiv"); + glad_glGetTexParameterIuiv = (PFNGLGETTEXPARAMETERIUIVPROC) load(userptr, "glGetTexParameterIuiv"); + glad_glGetTransformFeedbackVarying = (PFNGLGETTRANSFORMFEEDBACKVARYINGPROC) load(userptr, "glGetTransformFeedbackVarying"); + glad_glGetUniformuiv = (PFNGLGETUNIFORMUIVPROC) load(userptr, "glGetUniformuiv"); + glad_glGetVertexAttribIiv = (PFNGLGETVERTEXATTRIBIIVPROC) load(userptr, "glGetVertexAttribIiv"); + glad_glGetVertexAttribIuiv = (PFNGLGETVERTEXATTRIBIUIVPROC) load(userptr, "glGetVertexAttribIuiv"); + glad_glIsEnabledi = (PFNGLISENABLEDIPROC) load(userptr, "glIsEnabledi"); + glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC) load(userptr, "glIsFramebuffer"); + glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC) load(userptr, "glIsRenderbuffer"); + glad_glIsVertexArray = (PFNGLISVERTEXARRAYPROC) load(userptr, "glIsVertexArray"); + glad_glMapBufferRange = (PFNGLMAPBUFFERRANGEPROC) load(userptr, "glMapBufferRange"); + glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC) load(userptr, "glRenderbufferStorage"); + glad_glRenderbufferStorageMultisample = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC) load(userptr, "glRenderbufferStorageMultisample"); + glad_glTexParameterIiv = (PFNGLTEXPARAMETERIIVPROC) load(userptr, "glTexParameterIiv"); + glad_glTexParameterIuiv = (PFNGLTEXPARAMETERIUIVPROC) load(userptr, "glTexParameterIuiv"); + glad_glTransformFeedbackVaryings = (PFNGLTRANSFORMFEEDBACKVARYINGSPROC) load(userptr, "glTransformFeedbackVaryings"); + glad_glUniform1ui = (PFNGLUNIFORM1UIPROC) load(userptr, "glUniform1ui"); + glad_glUniform1uiv = (PFNGLUNIFORM1UIVPROC) load(userptr, "glUniform1uiv"); + glad_glUniform2ui = (PFNGLUNIFORM2UIPROC) load(userptr, "glUniform2ui"); + glad_glUniform2uiv = (PFNGLUNIFORM2UIVPROC) load(userptr, "glUniform2uiv"); + glad_glUniform3ui = (PFNGLUNIFORM3UIPROC) load(userptr, "glUniform3ui"); + glad_glUniform3uiv = (PFNGLUNIFORM3UIVPROC) load(userptr, "glUniform3uiv"); + glad_glUniform4ui = (PFNGLUNIFORM4UIPROC) load(userptr, "glUniform4ui"); + glad_glUniform4uiv = (PFNGLUNIFORM4UIVPROC) load(userptr, "glUniform4uiv"); + glad_glVertexAttribI1i = (PFNGLVERTEXATTRIBI1IPROC) load(userptr, "glVertexAttribI1i"); + glad_glVertexAttribI1iv = (PFNGLVERTEXATTRIBI1IVPROC) load(userptr, "glVertexAttribI1iv"); + glad_glVertexAttribI1ui = (PFNGLVERTEXATTRIBI1UIPROC) load(userptr, "glVertexAttribI1ui"); + glad_glVertexAttribI1uiv = (PFNGLVERTEXATTRIBI1UIVPROC) load(userptr, "glVertexAttribI1uiv"); + glad_glVertexAttribI2i = (PFNGLVERTEXATTRIBI2IPROC) load(userptr, "glVertexAttribI2i"); + glad_glVertexAttribI2iv = (PFNGLVERTEXATTRIBI2IVPROC) load(userptr, "glVertexAttribI2iv"); + glad_glVertexAttribI2ui = (PFNGLVERTEXATTRIBI2UIPROC) load(userptr, "glVertexAttribI2ui"); + glad_glVertexAttribI2uiv = (PFNGLVERTEXATTRIBI2UIVPROC) load(userptr, "glVertexAttribI2uiv"); + glad_glVertexAttribI3i = (PFNGLVERTEXATTRIBI3IPROC) load(userptr, "glVertexAttribI3i"); + glad_glVertexAttribI3iv = (PFNGLVERTEXATTRIBI3IVPROC) load(userptr, "glVertexAttribI3iv"); + glad_glVertexAttribI3ui = (PFNGLVERTEXATTRIBI3UIPROC) load(userptr, "glVertexAttribI3ui"); + glad_glVertexAttribI3uiv = (PFNGLVERTEXATTRIBI3UIVPROC) load(userptr, "glVertexAttribI3uiv"); + glad_glVertexAttribI4bv = (PFNGLVERTEXATTRIBI4BVPROC) load(userptr, "glVertexAttribI4bv"); + glad_glVertexAttribI4i = (PFNGLVERTEXATTRIBI4IPROC) load(userptr, "glVertexAttribI4i"); + glad_glVertexAttribI4iv = (PFNGLVERTEXATTRIBI4IVPROC) load(userptr, "glVertexAttribI4iv"); + glad_glVertexAttribI4sv = (PFNGLVERTEXATTRIBI4SVPROC) load(userptr, "glVertexAttribI4sv"); + glad_glVertexAttribI4ubv = (PFNGLVERTEXATTRIBI4UBVPROC) load(userptr, "glVertexAttribI4ubv"); + glad_glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC) load(userptr, "glVertexAttribI4ui"); + glad_glVertexAttribI4uiv = (PFNGLVERTEXATTRIBI4UIVPROC) load(userptr, "glVertexAttribI4uiv"); + glad_glVertexAttribI4usv = (PFNGLVERTEXATTRIBI4USVPROC) load(userptr, "glVertexAttribI4usv"); + glad_glVertexAttribIPointer = (PFNGLVERTEXATTRIBIPOINTERPROC) load(userptr, "glVertexAttribIPointer"); +} +static void glad_gl_load_GL_VERSION_3_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_3_1) return; + glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC) load(userptr, "glBindBufferBase"); + glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC) load(userptr, "glBindBufferRange"); + glad_glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC) load(userptr, "glCopyBufferSubData"); + glad_glDrawArraysInstanced = (PFNGLDRAWARRAYSINSTANCEDPROC) load(userptr, "glDrawArraysInstanced"); + glad_glDrawElementsInstanced = (PFNGLDRAWELEMENTSINSTANCEDPROC) load(userptr, "glDrawElementsInstanced"); + glad_glGetActiveUniformBlockName = (PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC) load(userptr, "glGetActiveUniformBlockName"); + glad_glGetActiveUniformBlockiv = (PFNGLGETACTIVEUNIFORMBLOCKIVPROC) load(userptr, "glGetActiveUniformBlockiv"); + glad_glGetActiveUniformName = (PFNGLGETACTIVEUNIFORMNAMEPROC) load(userptr, "glGetActiveUniformName"); + glad_glGetActiveUniformsiv = (PFNGLGETACTIVEUNIFORMSIVPROC) load(userptr, "glGetActiveUniformsiv"); + glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC) load(userptr, "glGetIntegeri_v"); + glad_glGetUniformBlockIndex = (PFNGLGETUNIFORMBLOCKINDEXPROC) load(userptr, "glGetUniformBlockIndex"); + glad_glGetUniformIndices = (PFNGLGETUNIFORMINDICESPROC) load(userptr, "glGetUniformIndices"); + glad_glPrimitiveRestartIndex = (PFNGLPRIMITIVERESTARTINDEXPROC) load(userptr, "glPrimitiveRestartIndex"); + glad_glTexBuffer = (PFNGLTEXBUFFERPROC) load(userptr, "glTexBuffer"); + glad_glUniformBlockBinding = (PFNGLUNIFORMBLOCKBINDINGPROC) load(userptr, "glUniformBlockBinding"); +} +static void glad_gl_load_GL_VERSION_3_2( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_3_2) return; + glad_glClientWaitSync = (PFNGLCLIENTWAITSYNCPROC) load(userptr, "glClientWaitSync"); + glad_glDeleteSync = (PFNGLDELETESYNCPROC) load(userptr, "glDeleteSync"); + glad_glDrawElementsBaseVertex = (PFNGLDRAWELEMENTSBASEVERTEXPROC) load(userptr, "glDrawElementsBaseVertex"); + glad_glDrawElementsInstancedBaseVertex = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC) load(userptr, "glDrawElementsInstancedBaseVertex"); + glad_glDrawRangeElementsBaseVertex = (PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC) load(userptr, "glDrawRangeElementsBaseVertex"); + glad_glFenceSync = (PFNGLFENCESYNCPROC) load(userptr, "glFenceSync"); + glad_glFramebufferTexture = (PFNGLFRAMEBUFFERTEXTUREPROC) load(userptr, "glFramebufferTexture"); + glad_glGetBufferParameteri64v = (PFNGLGETBUFFERPARAMETERI64VPROC) load(userptr, "glGetBufferParameteri64v"); + glad_glGetInteger64i_v = (PFNGLGETINTEGER64I_VPROC) load(userptr, "glGetInteger64i_v"); + glad_glGetInteger64v = (PFNGLGETINTEGER64VPROC) load(userptr, "glGetInteger64v"); + glad_glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC) load(userptr, "glGetMultisamplefv"); + glad_glGetSynciv = (PFNGLGETSYNCIVPROC) load(userptr, "glGetSynciv"); + glad_glIsSync = (PFNGLISSYNCPROC) load(userptr, "glIsSync"); + glad_glMultiDrawElementsBaseVertex = (PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC) load(userptr, "glMultiDrawElementsBaseVertex"); + glad_glProvokingVertex = (PFNGLPROVOKINGVERTEXPROC) load(userptr, "glProvokingVertex"); + glad_glSampleMaski = (PFNGLSAMPLEMASKIPROC) load(userptr, "glSampleMaski"); + glad_glTexImage2DMultisample = (PFNGLTEXIMAGE2DMULTISAMPLEPROC) load(userptr, "glTexImage2DMultisample"); + glad_glTexImage3DMultisample = (PFNGLTEXIMAGE3DMULTISAMPLEPROC) load(userptr, "glTexImage3DMultisample"); + glad_glWaitSync = (PFNGLWAITSYNCPROC) load(userptr, "glWaitSync"); +} +static void glad_gl_load_GL_VERSION_3_3( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_VERSION_3_3) return; + glad_glBindFragDataLocationIndexed = (PFNGLBINDFRAGDATALOCATIONINDEXEDPROC) load(userptr, "glBindFragDataLocationIndexed"); + glad_glBindSampler = (PFNGLBINDSAMPLERPROC) load(userptr, "glBindSampler"); + glad_glColorP3ui = (PFNGLCOLORP3UIPROC) load(userptr, "glColorP3ui"); + glad_glColorP3uiv = (PFNGLCOLORP3UIVPROC) load(userptr, "glColorP3uiv"); + glad_glColorP4ui = (PFNGLCOLORP4UIPROC) load(userptr, "glColorP4ui"); + glad_glColorP4uiv = (PFNGLCOLORP4UIVPROC) load(userptr, "glColorP4uiv"); + glad_glDeleteSamplers = (PFNGLDELETESAMPLERSPROC) load(userptr, "glDeleteSamplers"); + glad_glGenSamplers = (PFNGLGENSAMPLERSPROC) load(userptr, "glGenSamplers"); + glad_glGetFragDataIndex = (PFNGLGETFRAGDATAINDEXPROC) load(userptr, "glGetFragDataIndex"); + glad_glGetQueryObjecti64v = (PFNGLGETQUERYOBJECTI64VPROC) load(userptr, "glGetQueryObjecti64v"); + glad_glGetQueryObjectui64v = (PFNGLGETQUERYOBJECTUI64VPROC) load(userptr, "glGetQueryObjectui64v"); + glad_glGetSamplerParameterIiv = (PFNGLGETSAMPLERPARAMETERIIVPROC) load(userptr, "glGetSamplerParameterIiv"); + glad_glGetSamplerParameterIuiv = (PFNGLGETSAMPLERPARAMETERIUIVPROC) load(userptr, "glGetSamplerParameterIuiv"); + glad_glGetSamplerParameterfv = (PFNGLGETSAMPLERPARAMETERFVPROC) load(userptr, "glGetSamplerParameterfv"); + glad_glGetSamplerParameteriv = (PFNGLGETSAMPLERPARAMETERIVPROC) load(userptr, "glGetSamplerParameteriv"); + glad_glIsSampler = (PFNGLISSAMPLERPROC) load(userptr, "glIsSampler"); + glad_glMultiTexCoordP1ui = (PFNGLMULTITEXCOORDP1UIPROC) load(userptr, "glMultiTexCoordP1ui"); + glad_glMultiTexCoordP1uiv = (PFNGLMULTITEXCOORDP1UIVPROC) load(userptr, "glMultiTexCoordP1uiv"); + glad_glMultiTexCoordP2ui = (PFNGLMULTITEXCOORDP2UIPROC) load(userptr, "glMultiTexCoordP2ui"); + glad_glMultiTexCoordP2uiv = (PFNGLMULTITEXCOORDP2UIVPROC) load(userptr, "glMultiTexCoordP2uiv"); + glad_glMultiTexCoordP3ui = (PFNGLMULTITEXCOORDP3UIPROC) load(userptr, "glMultiTexCoordP3ui"); + glad_glMultiTexCoordP3uiv = (PFNGLMULTITEXCOORDP3UIVPROC) load(userptr, "glMultiTexCoordP3uiv"); + glad_glMultiTexCoordP4ui = (PFNGLMULTITEXCOORDP4UIPROC) load(userptr, "glMultiTexCoordP4ui"); + glad_glMultiTexCoordP4uiv = (PFNGLMULTITEXCOORDP4UIVPROC) load(userptr, "glMultiTexCoordP4uiv"); + glad_glNormalP3ui = (PFNGLNORMALP3UIPROC) load(userptr, "glNormalP3ui"); + glad_glNormalP3uiv = (PFNGLNORMALP3UIVPROC) load(userptr, "glNormalP3uiv"); + glad_glQueryCounter = (PFNGLQUERYCOUNTERPROC) load(userptr, "glQueryCounter"); + glad_glSamplerParameterIiv = (PFNGLSAMPLERPARAMETERIIVPROC) load(userptr, "glSamplerParameterIiv"); + glad_glSamplerParameterIuiv = (PFNGLSAMPLERPARAMETERIUIVPROC) load(userptr, "glSamplerParameterIuiv"); + glad_glSamplerParameterf = (PFNGLSAMPLERPARAMETERFPROC) load(userptr, "glSamplerParameterf"); + glad_glSamplerParameterfv = (PFNGLSAMPLERPARAMETERFVPROC) load(userptr, "glSamplerParameterfv"); + glad_glSamplerParameteri = (PFNGLSAMPLERPARAMETERIPROC) load(userptr, "glSamplerParameteri"); + glad_glSamplerParameteriv = (PFNGLSAMPLERPARAMETERIVPROC) load(userptr, "glSamplerParameteriv"); + glad_glSecondaryColorP3ui = (PFNGLSECONDARYCOLORP3UIPROC) load(userptr, "glSecondaryColorP3ui"); + glad_glSecondaryColorP3uiv = (PFNGLSECONDARYCOLORP3UIVPROC) load(userptr, "glSecondaryColorP3uiv"); + glad_glTexCoordP1ui = (PFNGLTEXCOORDP1UIPROC) load(userptr, "glTexCoordP1ui"); + glad_glTexCoordP1uiv = (PFNGLTEXCOORDP1UIVPROC) load(userptr, "glTexCoordP1uiv"); + glad_glTexCoordP2ui = (PFNGLTEXCOORDP2UIPROC) load(userptr, "glTexCoordP2ui"); + glad_glTexCoordP2uiv = (PFNGLTEXCOORDP2UIVPROC) load(userptr, "glTexCoordP2uiv"); + glad_glTexCoordP3ui = (PFNGLTEXCOORDP3UIPROC) load(userptr, "glTexCoordP3ui"); + glad_glTexCoordP3uiv = (PFNGLTEXCOORDP3UIVPROC) load(userptr, "glTexCoordP3uiv"); + glad_glTexCoordP4ui = (PFNGLTEXCOORDP4UIPROC) load(userptr, "glTexCoordP4ui"); + glad_glTexCoordP4uiv = (PFNGLTEXCOORDP4UIVPROC) load(userptr, "glTexCoordP4uiv"); + glad_glVertexAttribDivisor = (PFNGLVERTEXATTRIBDIVISORPROC) load(userptr, "glVertexAttribDivisor"); + glad_glVertexAttribP1ui = (PFNGLVERTEXATTRIBP1UIPROC) load(userptr, "glVertexAttribP1ui"); + glad_glVertexAttribP1uiv = (PFNGLVERTEXATTRIBP1UIVPROC) load(userptr, "glVertexAttribP1uiv"); + glad_glVertexAttribP2ui = (PFNGLVERTEXATTRIBP2UIPROC) load(userptr, "glVertexAttribP2ui"); + glad_glVertexAttribP2uiv = (PFNGLVERTEXATTRIBP2UIVPROC) load(userptr, "glVertexAttribP2uiv"); + glad_glVertexAttribP3ui = (PFNGLVERTEXATTRIBP3UIPROC) load(userptr, "glVertexAttribP3ui"); + glad_glVertexAttribP3uiv = (PFNGLVERTEXATTRIBP3UIVPROC) load(userptr, "glVertexAttribP3uiv"); + glad_glVertexAttribP4ui = (PFNGLVERTEXATTRIBP4UIPROC) load(userptr, "glVertexAttribP4ui"); + glad_glVertexAttribP4uiv = (PFNGLVERTEXATTRIBP4UIVPROC) load(userptr, "glVertexAttribP4uiv"); + glad_glVertexP2ui = (PFNGLVERTEXP2UIPROC) load(userptr, "glVertexP2ui"); + glad_glVertexP2uiv = (PFNGLVERTEXP2UIVPROC) load(userptr, "glVertexP2uiv"); + glad_glVertexP3ui = (PFNGLVERTEXP3UIPROC) load(userptr, "glVertexP3ui"); + glad_glVertexP3uiv = (PFNGLVERTEXP3UIVPROC) load(userptr, "glVertexP3uiv"); + glad_glVertexP4ui = (PFNGLVERTEXP4UIPROC) load(userptr, "glVertexP4ui"); + glad_glVertexP4uiv = (PFNGLVERTEXP4UIVPROC) load(userptr, "glVertexP4uiv"); +} + + + +#if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0) +#define GLAD_GL_IS_SOME_NEW_VERSION 1 +#else +#define GLAD_GL_IS_SOME_NEW_VERSION 0 +#endif + +static int glad_gl_get_extensions( int version, const char **out_exts, unsigned int *out_num_exts_i, char ***out_exts_i) { +#if GLAD_GL_IS_SOME_NEW_VERSION + if(GLAD_VERSION_MAJOR(version) < 3) { +#else + (void) version; + (void) out_num_exts_i; + (void) out_exts_i; +#endif + if (glad_glGetString == NULL) { + return 0; + } + *out_exts = (const char *)glad_glGetString(GL_EXTENSIONS); +#if GLAD_GL_IS_SOME_NEW_VERSION + } else { + unsigned int index = 0; + unsigned int num_exts_i = 0; + char **exts_i = NULL; + if (glad_glGetStringi == NULL || glad_glGetIntegerv == NULL) { + return 0; + } + glad_glGetIntegerv(GL_NUM_EXTENSIONS, (int*) &num_exts_i); + if (num_exts_i > 0) { + exts_i = (char **) malloc(num_exts_i * (sizeof *exts_i)); + } + if (exts_i == NULL) { + return 0; + } + for(index = 0; index < num_exts_i; index++) { + const char *gl_str_tmp = (const char*) glad_glGetStringi(GL_EXTENSIONS, index); + size_t len = strlen(gl_str_tmp) + 1; + + char *local_str = (char*) malloc(len * sizeof(char)); + if(local_str != NULL) { + memcpy(local_str, gl_str_tmp, len * sizeof(char)); + } + + exts_i[index] = local_str; + } + + *out_num_exts_i = num_exts_i; + *out_exts_i = exts_i; + } +#endif + return 1; +} +static void glad_gl_free_extensions(char **exts_i, unsigned int num_exts_i) { + if (exts_i != NULL) { + unsigned int index; + for(index = 0; index < num_exts_i; index++) { + free((void *) (exts_i[index])); + } + free((void *)exts_i); + exts_i = NULL; + } +} +static int glad_gl_has_extension(int version, const char *exts, unsigned int num_exts_i, char **exts_i, const char *ext) { + if(GLAD_VERSION_MAJOR(version) < 3 || !GLAD_GL_IS_SOME_NEW_VERSION) { + const char *extensions; + const char *loc; + const char *terminator; + extensions = exts; + if(extensions == NULL || ext == NULL) { + return 0; + } + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) { + return 0; + } + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) { + return 1; + } + extensions = terminator; + } + } else { + unsigned int index; + for(index = 0; index < num_exts_i; index++) { + const char *e = exts_i[index]; + if(strcmp(e, ext) == 0) { + return 1; + } + } + } + return 0; +} + +static GLADapiproc glad_gl_get_proc_from_userptr(void *userptr, const char* name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_gl_find_extensions_gl( int version) { + const char *exts = NULL; + unsigned int num_exts_i = 0; + char **exts_i = NULL; + if (!glad_gl_get_extensions(version, &exts, &num_exts_i, &exts_i)) return 0; + + (void) glad_gl_has_extension; + + glad_gl_free_extensions(exts_i, num_exts_i); + + return 1; +} + +static int glad_gl_find_core_gl(void) { + int i, major, minor; + const char* version; + const char* prefixes[] = { + "OpenGL ES-CM ", + "OpenGL ES-CL ", + "OpenGL ES ", + NULL + }; + version = (const char*) glad_glGetString(GL_VERSION); + if (!version) return 0; + for (i = 0; prefixes[i]; i++) { + const size_t length = strlen(prefixes[i]); + if (strncmp(version, prefixes[i], length) == 0) { + version += length; + break; + } + } + + GLAD_IMPL_UTIL_SSCANF(version, "%d.%d", &major, &minor); + + GLAD_GL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + GLAD_GL_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1; + GLAD_GL_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1; + GLAD_GL_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1; + GLAD_GL_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1; + GLAD_GL_VERSION_1_5 = (major == 1 && minor >= 5) || major > 1; + GLAD_GL_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2; + GLAD_GL_VERSION_2_1 = (major == 2 && minor >= 1) || major > 2; + GLAD_GL_VERSION_3_0 = (major == 3 && minor >= 0) || major > 3; + GLAD_GL_VERSION_3_1 = (major == 3 && minor >= 1) || major > 3; + GLAD_GL_VERSION_3_2 = (major == 3 && minor >= 2) || major > 3; + GLAD_GL_VERSION_3_3 = (major == 3 && minor >= 3) || major > 3; + + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadGLUserPtr( GLADuserptrloadfunc load, void *userptr) { + int version; + + glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString"); + if(glad_glGetString == NULL) return 0; + if(glad_glGetString(GL_VERSION) == NULL) return 0; + version = glad_gl_find_core_gl(); + + glad_gl_load_GL_VERSION_1_0(load, userptr); + glad_gl_load_GL_VERSION_1_1(load, userptr); + glad_gl_load_GL_VERSION_1_2(load, userptr); + glad_gl_load_GL_VERSION_1_3(load, userptr); + glad_gl_load_GL_VERSION_1_4(load, userptr); + glad_gl_load_GL_VERSION_1_5(load, userptr); + glad_gl_load_GL_VERSION_2_0(load, userptr); + glad_gl_load_GL_VERSION_2_1(load, userptr); + glad_gl_load_GL_VERSION_3_0(load, userptr); + glad_gl_load_GL_VERSION_3_1(load, userptr); + glad_gl_load_GL_VERSION_3_2(load, userptr); + glad_gl_load_GL_VERSION_3_3(load, userptr); + + if (!glad_gl_find_extensions_gl(version)) return 0; + + + + return version; +} + + +int gladLoadGL( GLADloadfunc load) { + return gladLoadGLUserPtr( glad_gl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + + + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/src/gles2.c glmark2-2021.02/src/glad/src/gles2.c --- glmark2-2014.03+git20150611.fa71af2d/src/glad/src/gles2.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/src/gles2.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,495 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_GL_ES_VERSION_2_0 = 0; +int GLAD_GL_OES_mapbuffer = 0; +int GLAD_GL_OES_required_internalformat = 0; + + + +PFNGLACTIVETEXTUREPROC glad_glActiveTexture = NULL; +PFNGLATTACHSHADERPROC glad_glAttachShader = NULL; +PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation = NULL; +PFNGLBINDBUFFERPROC glad_glBindBuffer = NULL; +PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer = NULL; +PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer = NULL; +PFNGLBINDTEXTUREPROC glad_glBindTexture = NULL; +PFNGLBLENDCOLORPROC glad_glBlendColor = NULL; +PFNGLBLENDEQUATIONPROC glad_glBlendEquation = NULL; +PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate = NULL; +PFNGLBLENDFUNCPROC glad_glBlendFunc = NULL; +PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate = NULL; +PFNGLBUFFERDATAPROC glad_glBufferData = NULL; +PFNGLBUFFERSUBDATAPROC glad_glBufferSubData = NULL; +PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus = NULL; +PFNGLCLEARPROC glad_glClear = NULL; +PFNGLCLEARCOLORPROC glad_glClearColor = NULL; +PFNGLCLEARDEPTHFPROC glad_glClearDepthf = NULL; +PFNGLCLEARSTENCILPROC glad_glClearStencil = NULL; +PFNGLCOLORMASKPROC glad_glColorMask = NULL; +PFNGLCOMPILESHADERPROC glad_glCompileShader = NULL; +PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D = NULL; +PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D = NULL; +PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D = NULL; +PFNGLCREATEPROGRAMPROC glad_glCreateProgram = NULL; +PFNGLCREATESHADERPROC glad_glCreateShader = NULL; +PFNGLCULLFACEPROC glad_glCullFace = NULL; +PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers = NULL; +PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers = NULL; +PFNGLDELETEPROGRAMPROC glad_glDeleteProgram = NULL; +PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers = NULL; +PFNGLDELETESHADERPROC glad_glDeleteShader = NULL; +PFNGLDELETETEXTURESPROC glad_glDeleteTextures = NULL; +PFNGLDEPTHFUNCPROC glad_glDepthFunc = NULL; +PFNGLDEPTHMASKPROC glad_glDepthMask = NULL; +PFNGLDEPTHRANGEFPROC glad_glDepthRangef = NULL; +PFNGLDETACHSHADERPROC glad_glDetachShader = NULL; +PFNGLDISABLEPROC glad_glDisable = NULL; +PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray = NULL; +PFNGLDRAWARRAYSPROC glad_glDrawArrays = NULL; +PFNGLDRAWELEMENTSPROC glad_glDrawElements = NULL; +PFNGLENABLEPROC glad_glEnable = NULL; +PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray = NULL; +PFNGLFINISHPROC glad_glFinish = NULL; +PFNGLFLUSHPROC glad_glFlush = NULL; +PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer = NULL; +PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D = NULL; +PFNGLFRONTFACEPROC glad_glFrontFace = NULL; +PFNGLGENBUFFERSPROC glad_glGenBuffers = NULL; +PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers = NULL; +PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers = NULL; +PFNGLGENTEXTURESPROC glad_glGenTextures = NULL; +PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap = NULL; +PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib = NULL; +PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform = NULL; +PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders = NULL; +PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation = NULL; +PFNGLGETBOOLEANVPROC glad_glGetBooleanv = NULL; +PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv = NULL; +PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv = NULL; +PFNGLGETBUFFERPOINTERVOESPROC glad_glGetBufferPointervOES = NULL; +PFNGLGETERRORPROC glad_glGetError = NULL; +PFNGLGETFLOATVPROC glad_glGetFloatv = NULL; +PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv = NULL; +PFNGLGETINTEGERVPROC glad_glGetIntegerv = NULL; +PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog = NULL; +PFNGLGETPROGRAMIVPROC glad_glGetProgramiv = NULL; +PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv = NULL; +PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog = NULL; +PFNGLGETSHADERPRECISIONFORMATPROC glad_glGetShaderPrecisionFormat = NULL; +PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource = NULL; +PFNGLGETSHADERIVPROC glad_glGetShaderiv = NULL; +PFNGLGETSTRINGPROC glad_glGetString = NULL; +PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv = NULL; +PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv = NULL; +PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation = NULL; +PFNGLGETUNIFORMFVPROC glad_glGetUniformfv = NULL; +PFNGLGETUNIFORMIVPROC glad_glGetUniformiv = NULL; +PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv = NULL; +PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv = NULL; +PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv = NULL; +PFNGLHINTPROC glad_glHint = NULL; +PFNGLISBUFFERPROC glad_glIsBuffer = NULL; +PFNGLISENABLEDPROC glad_glIsEnabled = NULL; +PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer = NULL; +PFNGLISPROGRAMPROC glad_glIsProgram = NULL; +PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer = NULL; +PFNGLISSHADERPROC glad_glIsShader = NULL; +PFNGLISTEXTUREPROC glad_glIsTexture = NULL; +PFNGLLINEWIDTHPROC glad_glLineWidth = NULL; +PFNGLLINKPROGRAMPROC glad_glLinkProgram = NULL; +PFNGLMAPBUFFERPROC glad_glMapBuffer = NULL; +PFNGLMAPBUFFEROESPROC glad_glMapBufferOES = NULL; +PFNGLPIXELSTOREIPROC glad_glPixelStorei = NULL; +PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset = NULL; +PFNGLREADPIXELSPROC glad_glReadPixels = NULL; +PFNGLRELEASESHADERCOMPILERPROC glad_glReleaseShaderCompiler = NULL; +PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage = NULL; +PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage = NULL; +PFNGLSCISSORPROC glad_glScissor = NULL; +PFNGLSHADERBINARYPROC glad_glShaderBinary = NULL; +PFNGLSHADERSOURCEPROC glad_glShaderSource = NULL; +PFNGLSTENCILFUNCPROC glad_glStencilFunc = NULL; +PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate = NULL; +PFNGLSTENCILMASKPROC glad_glStencilMask = NULL; +PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate = NULL; +PFNGLSTENCILOPPROC glad_glStencilOp = NULL; +PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate = NULL; +PFNGLTEXIMAGE2DPROC glad_glTexImage2D = NULL; +PFNGLTEXPARAMETERFPROC glad_glTexParameterf = NULL; +PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv = NULL; +PFNGLTEXPARAMETERIPROC glad_glTexParameteri = NULL; +PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv = NULL; +PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D = NULL; +PFNGLUNIFORM1FPROC glad_glUniform1f = NULL; +PFNGLUNIFORM1FVPROC glad_glUniform1fv = NULL; +PFNGLUNIFORM1IPROC glad_glUniform1i = NULL; +PFNGLUNIFORM1IVPROC glad_glUniform1iv = NULL; +PFNGLUNIFORM2FPROC glad_glUniform2f = NULL; +PFNGLUNIFORM2FVPROC glad_glUniform2fv = NULL; +PFNGLUNIFORM2IPROC glad_glUniform2i = NULL; +PFNGLUNIFORM2IVPROC glad_glUniform2iv = NULL; +PFNGLUNIFORM3FPROC glad_glUniform3f = NULL; +PFNGLUNIFORM3FVPROC glad_glUniform3fv = NULL; +PFNGLUNIFORM3IPROC glad_glUniform3i = NULL; +PFNGLUNIFORM3IVPROC glad_glUniform3iv = NULL; +PFNGLUNIFORM4FPROC glad_glUniform4f = NULL; +PFNGLUNIFORM4FVPROC glad_glUniform4fv = NULL; +PFNGLUNIFORM4IPROC glad_glUniform4i = NULL; +PFNGLUNIFORM4IVPROC glad_glUniform4iv = NULL; +PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv = NULL; +PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv = NULL; +PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv = NULL; +PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer = NULL; +PFNGLUNMAPBUFFEROESPROC glad_glUnmapBufferOES = NULL; +PFNGLUSEPROGRAMPROC glad_glUseProgram = NULL; +PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram = NULL; +PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f = NULL; +PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv = NULL; +PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f = NULL; +PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv = NULL; +PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f = NULL; +PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv = NULL; +PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f = NULL; +PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv = NULL; +PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer = NULL; +PFNGLVIEWPORTPROC glad_glViewport = NULL; + + +static void glad_gl_load_GL_ES_VERSION_2_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_ES_VERSION_2_0) return; + glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC) load(userptr, "glActiveTexture"); + glad_glAttachShader = (PFNGLATTACHSHADERPROC) load(userptr, "glAttachShader"); + glad_glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC) load(userptr, "glBindAttribLocation"); + glad_glBindBuffer = (PFNGLBINDBUFFERPROC) load(userptr, "glBindBuffer"); + glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC) load(userptr, "glBindFramebuffer"); + glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC) load(userptr, "glBindRenderbuffer"); + glad_glBindTexture = (PFNGLBINDTEXTUREPROC) load(userptr, "glBindTexture"); + glad_glBlendColor = (PFNGLBLENDCOLORPROC) load(userptr, "glBlendColor"); + glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC) load(userptr, "glBlendEquation"); + glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC) load(userptr, "glBlendEquationSeparate"); + glad_glBlendFunc = (PFNGLBLENDFUNCPROC) load(userptr, "glBlendFunc"); + glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC) load(userptr, "glBlendFuncSeparate"); + glad_glBufferData = (PFNGLBUFFERDATAPROC) load(userptr, "glBufferData"); + glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC) load(userptr, "glBufferSubData"); + glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC) load(userptr, "glCheckFramebufferStatus"); + glad_glClear = (PFNGLCLEARPROC) load(userptr, "glClear"); + glad_glClearColor = (PFNGLCLEARCOLORPROC) load(userptr, "glClearColor"); + glad_glClearDepthf = (PFNGLCLEARDEPTHFPROC) load(userptr, "glClearDepthf"); + glad_glClearStencil = (PFNGLCLEARSTENCILPROC) load(userptr, "glClearStencil"); + glad_glColorMask = (PFNGLCOLORMASKPROC) load(userptr, "glColorMask"); + glad_glCompileShader = (PFNGLCOMPILESHADERPROC) load(userptr, "glCompileShader"); + glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC) load(userptr, "glCompressedTexImage2D"); + glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) load(userptr, "glCompressedTexSubImage2D"); + glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC) load(userptr, "glCopyTexImage2D"); + glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC) load(userptr, "glCopyTexSubImage2D"); + glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC) load(userptr, "glCreateProgram"); + glad_glCreateShader = (PFNGLCREATESHADERPROC) load(userptr, "glCreateShader"); + glad_glCullFace = (PFNGLCULLFACEPROC) load(userptr, "glCullFace"); + glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC) load(userptr, "glDeleteBuffers"); + glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC) load(userptr, "glDeleteFramebuffers"); + glad_glDeleteProgram = (PFNGLDELETEPROGRAMPROC) load(userptr, "glDeleteProgram"); + glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC) load(userptr, "glDeleteRenderbuffers"); + glad_glDeleteShader = (PFNGLDELETESHADERPROC) load(userptr, "glDeleteShader"); + glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC) load(userptr, "glDeleteTextures"); + glad_glDepthFunc = (PFNGLDEPTHFUNCPROC) load(userptr, "glDepthFunc"); + glad_glDepthMask = (PFNGLDEPTHMASKPROC) load(userptr, "glDepthMask"); + glad_glDepthRangef = (PFNGLDEPTHRANGEFPROC) load(userptr, "glDepthRangef"); + glad_glDetachShader = (PFNGLDETACHSHADERPROC) load(userptr, "glDetachShader"); + glad_glDisable = (PFNGLDISABLEPROC) load(userptr, "glDisable"); + glad_glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC) load(userptr, "glDisableVertexAttribArray"); + glad_glDrawArrays = (PFNGLDRAWARRAYSPROC) load(userptr, "glDrawArrays"); + glad_glDrawElements = (PFNGLDRAWELEMENTSPROC) load(userptr, "glDrawElements"); + glad_glEnable = (PFNGLENABLEPROC) load(userptr, "glEnable"); + glad_glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC) load(userptr, "glEnableVertexAttribArray"); + glad_glFinish = (PFNGLFINISHPROC) load(userptr, "glFinish"); + glad_glFlush = (PFNGLFLUSHPROC) load(userptr, "glFlush"); + glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC) load(userptr, "glFramebufferRenderbuffer"); + glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC) load(userptr, "glFramebufferTexture2D"); + glad_glFrontFace = (PFNGLFRONTFACEPROC) load(userptr, "glFrontFace"); + glad_glGenBuffers = (PFNGLGENBUFFERSPROC) load(userptr, "glGenBuffers"); + glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC) load(userptr, "glGenFramebuffers"); + glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC) load(userptr, "glGenRenderbuffers"); + glad_glGenTextures = (PFNGLGENTEXTURESPROC) load(userptr, "glGenTextures"); + glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC) load(userptr, "glGenerateMipmap"); + glad_glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC) load(userptr, "glGetActiveAttrib"); + glad_glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC) load(userptr, "glGetActiveUniform"); + glad_glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC) load(userptr, "glGetAttachedShaders"); + glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC) load(userptr, "glGetAttribLocation"); + glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC) load(userptr, "glGetBooleanv"); + glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC) load(userptr, "glGetBufferParameteriv"); + glad_glGetError = (PFNGLGETERRORPROC) load(userptr, "glGetError"); + glad_glGetFloatv = (PFNGLGETFLOATVPROC) load(userptr, "glGetFloatv"); + glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC) load(userptr, "glGetFramebufferAttachmentParameteriv"); + glad_glGetIntegerv = (PFNGLGETINTEGERVPROC) load(userptr, "glGetIntegerv"); + glad_glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC) load(userptr, "glGetProgramInfoLog"); + glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC) load(userptr, "glGetProgramiv"); + glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC) load(userptr, "glGetRenderbufferParameteriv"); + glad_glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC) load(userptr, "glGetShaderInfoLog"); + glad_glGetShaderPrecisionFormat = (PFNGLGETSHADERPRECISIONFORMATPROC) load(userptr, "glGetShaderPrecisionFormat"); + glad_glGetShaderSource = (PFNGLGETSHADERSOURCEPROC) load(userptr, "glGetShaderSource"); + glad_glGetShaderiv = (PFNGLGETSHADERIVPROC) load(userptr, "glGetShaderiv"); + glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString"); + glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC) load(userptr, "glGetTexParameterfv"); + glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC) load(userptr, "glGetTexParameteriv"); + glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC) load(userptr, "glGetUniformLocation"); + glad_glGetUniformfv = (PFNGLGETUNIFORMFVPROC) load(userptr, "glGetUniformfv"); + glad_glGetUniformiv = (PFNGLGETUNIFORMIVPROC) load(userptr, "glGetUniformiv"); + glad_glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC) load(userptr, "glGetVertexAttribPointerv"); + glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC) load(userptr, "glGetVertexAttribfv"); + glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC) load(userptr, "glGetVertexAttribiv"); + glad_glHint = (PFNGLHINTPROC) load(userptr, "glHint"); + glad_glIsBuffer = (PFNGLISBUFFERPROC) load(userptr, "glIsBuffer"); + glad_glIsEnabled = (PFNGLISENABLEDPROC) load(userptr, "glIsEnabled"); + glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC) load(userptr, "glIsFramebuffer"); + glad_glIsProgram = (PFNGLISPROGRAMPROC) load(userptr, "glIsProgram"); + glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC) load(userptr, "glIsRenderbuffer"); + glad_glIsShader = (PFNGLISSHADERPROC) load(userptr, "glIsShader"); + glad_glIsTexture = (PFNGLISTEXTUREPROC) load(userptr, "glIsTexture"); + glad_glLineWidth = (PFNGLLINEWIDTHPROC) load(userptr, "glLineWidth"); + glad_glLinkProgram = (PFNGLLINKPROGRAMPROC) load(userptr, "glLinkProgram"); + glad_glPixelStorei = (PFNGLPIXELSTOREIPROC) load(userptr, "glPixelStorei"); + glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC) load(userptr, "glPolygonOffset"); + glad_glReadPixels = (PFNGLREADPIXELSPROC) load(userptr, "glReadPixels"); + glad_glReleaseShaderCompiler = (PFNGLRELEASESHADERCOMPILERPROC) load(userptr, "glReleaseShaderCompiler"); + glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC) load(userptr, "glRenderbufferStorage"); + glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC) load(userptr, "glSampleCoverage"); + glad_glScissor = (PFNGLSCISSORPROC) load(userptr, "glScissor"); + glad_glShaderBinary = (PFNGLSHADERBINARYPROC) load(userptr, "glShaderBinary"); + glad_glShaderSource = (PFNGLSHADERSOURCEPROC) load(userptr, "glShaderSource"); + glad_glStencilFunc = (PFNGLSTENCILFUNCPROC) load(userptr, "glStencilFunc"); + glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC) load(userptr, "glStencilFuncSeparate"); + glad_glStencilMask = (PFNGLSTENCILMASKPROC) load(userptr, "glStencilMask"); + glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC) load(userptr, "glStencilMaskSeparate"); + glad_glStencilOp = (PFNGLSTENCILOPPROC) load(userptr, "glStencilOp"); + glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC) load(userptr, "glStencilOpSeparate"); + glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC) load(userptr, "glTexImage2D"); + glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC) load(userptr, "glTexParameterf"); + glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC) load(userptr, "glTexParameterfv"); + glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC) load(userptr, "glTexParameteri"); + glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC) load(userptr, "glTexParameteriv"); + glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC) load(userptr, "glTexSubImage2D"); + glad_glUniform1f = (PFNGLUNIFORM1FPROC) load(userptr, "glUniform1f"); + glad_glUniform1fv = (PFNGLUNIFORM1FVPROC) load(userptr, "glUniform1fv"); + glad_glUniform1i = (PFNGLUNIFORM1IPROC) load(userptr, "glUniform1i"); + glad_glUniform1iv = (PFNGLUNIFORM1IVPROC) load(userptr, "glUniform1iv"); + glad_glUniform2f = (PFNGLUNIFORM2FPROC) load(userptr, "glUniform2f"); + glad_glUniform2fv = (PFNGLUNIFORM2FVPROC) load(userptr, "glUniform2fv"); + glad_glUniform2i = (PFNGLUNIFORM2IPROC) load(userptr, "glUniform2i"); + glad_glUniform2iv = (PFNGLUNIFORM2IVPROC) load(userptr, "glUniform2iv"); + glad_glUniform3f = (PFNGLUNIFORM3FPROC) load(userptr, "glUniform3f"); + glad_glUniform3fv = (PFNGLUNIFORM3FVPROC) load(userptr, "glUniform3fv"); + glad_glUniform3i = (PFNGLUNIFORM3IPROC) load(userptr, "glUniform3i"); + glad_glUniform3iv = (PFNGLUNIFORM3IVPROC) load(userptr, "glUniform3iv"); + glad_glUniform4f = (PFNGLUNIFORM4FPROC) load(userptr, "glUniform4f"); + glad_glUniform4fv = (PFNGLUNIFORM4FVPROC) load(userptr, "glUniform4fv"); + glad_glUniform4i = (PFNGLUNIFORM4IPROC) load(userptr, "glUniform4i"); + glad_glUniform4iv = (PFNGLUNIFORM4IVPROC) load(userptr, "glUniform4iv"); + glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC) load(userptr, "glUniformMatrix2fv"); + glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC) load(userptr, "glUniformMatrix3fv"); + glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC) load(userptr, "glUniformMatrix4fv"); + glad_glUseProgram = (PFNGLUSEPROGRAMPROC) load(userptr, "glUseProgram"); + glad_glValidateProgram = (PFNGLVALIDATEPROGRAMPROC) load(userptr, "glValidateProgram"); + glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC) load(userptr, "glVertexAttrib1f"); + glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC) load(userptr, "glVertexAttrib1fv"); + glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC) load(userptr, "glVertexAttrib2f"); + glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC) load(userptr, "glVertexAttrib2fv"); + glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC) load(userptr, "glVertexAttrib3f"); + glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC) load(userptr, "glVertexAttrib3fv"); + glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC) load(userptr, "glVertexAttrib4f"); + glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC) load(userptr, "glVertexAttrib4fv"); + glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC) load(userptr, "glVertexAttribPointer"); + glad_glViewport = (PFNGLVIEWPORTPROC) load(userptr, "glViewport"); +} +static void glad_gl_load_GL_OES_mapbuffer( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GL_OES_mapbuffer) return; + glad_glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC) load(userptr, "glGetBufferPointerv"); + glad_glGetBufferPointervOES = (PFNGLGETBUFFERPOINTERVOESPROC) load(userptr, "glGetBufferPointervOES"); + glad_glMapBuffer = (PFNGLMAPBUFFERPROC) load(userptr, "glMapBuffer"); + glad_glMapBufferOES = (PFNGLMAPBUFFEROESPROC) load(userptr, "glMapBufferOES"); + glad_glUnmapBuffer = (PFNGLUNMAPBUFFERPROC) load(userptr, "glUnmapBuffer"); + glad_glUnmapBufferOES = (PFNGLUNMAPBUFFEROESPROC) load(userptr, "glUnmapBufferOES"); +} + + + +#if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0) +#define GLAD_GL_IS_SOME_NEW_VERSION 1 +#else +#define GLAD_GL_IS_SOME_NEW_VERSION 0 +#endif + +static int glad_gl_get_extensions( int version, const char **out_exts, unsigned int *out_num_exts_i, char ***out_exts_i) { +#if GLAD_GL_IS_SOME_NEW_VERSION + if(GLAD_VERSION_MAJOR(version) < 3) { +#else + (void) version; + (void) out_num_exts_i; + (void) out_exts_i; +#endif + if (glad_glGetString == NULL) { + return 0; + } + *out_exts = (const char *)glad_glGetString(GL_EXTENSIONS); +#if GLAD_GL_IS_SOME_NEW_VERSION + } else { + unsigned int index = 0; + unsigned int num_exts_i = 0; + char **exts_i = NULL; + if (glad_glGetStringi == NULL || glad_glGetIntegerv == NULL) { + return 0; + } + glad_glGetIntegerv(GL_NUM_EXTENSIONS, (int*) &num_exts_i); + if (num_exts_i > 0) { + exts_i = (char **) malloc(num_exts_i * (sizeof *exts_i)); + } + if (exts_i == NULL) { + return 0; + } + for(index = 0; index < num_exts_i; index++) { + const char *gl_str_tmp = (const char*) glad_glGetStringi(GL_EXTENSIONS, index); + size_t len = strlen(gl_str_tmp) + 1; + + char *local_str = (char*) malloc(len * sizeof(char)); + if(local_str != NULL) { + memcpy(local_str, gl_str_tmp, len * sizeof(char)); + } + + exts_i[index] = local_str; + } + + *out_num_exts_i = num_exts_i; + *out_exts_i = exts_i; + } +#endif + return 1; +} +static void glad_gl_free_extensions(char **exts_i, unsigned int num_exts_i) { + if (exts_i != NULL) { + unsigned int index; + for(index = 0; index < num_exts_i; index++) { + free((void *) (exts_i[index])); + } + free((void *)exts_i); + exts_i = NULL; + } +} +static int glad_gl_has_extension(int version, const char *exts, unsigned int num_exts_i, char **exts_i, const char *ext) { + if(GLAD_VERSION_MAJOR(version) < 3 || !GLAD_GL_IS_SOME_NEW_VERSION) { + const char *extensions; + const char *loc; + const char *terminator; + extensions = exts; + if(extensions == NULL || ext == NULL) { + return 0; + } + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) { + return 0; + } + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) { + return 1; + } + extensions = terminator; + } + } else { + unsigned int index; + for(index = 0; index < num_exts_i; index++) { + const char *e = exts_i[index]; + if(strcmp(e, ext) == 0) { + return 1; + } + } + } + return 0; +} + +static GLADapiproc glad_gl_get_proc_from_userptr(void *userptr, const char* name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_gl_find_extensions_gles2( int version) { + const char *exts = NULL; + unsigned int num_exts_i = 0; + char **exts_i = NULL; + if (!glad_gl_get_extensions(version, &exts, &num_exts_i, &exts_i)) return 0; + + GLAD_GL_OES_mapbuffer = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OES_mapbuffer"); + GLAD_GL_OES_required_internalformat = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OES_required_internalformat"); + + glad_gl_free_extensions(exts_i, num_exts_i); + + return 1; +} + +static int glad_gl_find_core_gles2(void) { + int i, major, minor; + const char* version; + const char* prefixes[] = { + "OpenGL ES-CM ", + "OpenGL ES-CL ", + "OpenGL ES ", + NULL + }; + version = (const char*) glad_glGetString(GL_VERSION); + if (!version) return 0; + for (i = 0; prefixes[i]; i++) { + const size_t length = strlen(prefixes[i]); + if (strncmp(version, prefixes[i], length) == 0) { + version += length; + break; + } + } + + GLAD_IMPL_UTIL_SSCANF(version, "%d.%d", &major, &minor); + + GLAD_GL_ES_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2; + + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadGLES2UserPtr( GLADuserptrloadfunc load, void *userptr) { + int version; + + glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString"); + if(glad_glGetString == NULL) return 0; + if(glad_glGetString(GL_VERSION) == NULL) return 0; + version = glad_gl_find_core_gles2(); + + glad_gl_load_GL_ES_VERSION_2_0(load, userptr); + + if (!glad_gl_find_extensions_gles2(version)) return 0; + glad_gl_load_GL_OES_mapbuffer(load, userptr); + + + + return version; +} + + +int gladLoadGLES2( GLADloadfunc load) { + return gladLoadGLES2UserPtr( glad_gl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + + + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/src/glx.c glmark2-2021.02/src/glad/src/glx.c --- glmark2-2014.03+git20150611.fa71af2d/src/glad/src/glx.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/src/glx.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,232 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_GLX_VERSION_1_0 = 0; +int GLAD_GLX_VERSION_1_1 = 0; +int GLAD_GLX_VERSION_1_2 = 0; +int GLAD_GLX_VERSION_1_3 = 0; +int GLAD_GLX_VERSION_1_4 = 0; +int GLAD_GLX_EXT_swap_control = 0; +int GLAD_GLX_MESA_swap_control = 0; + + + +PFNGLXCHOOSEFBCONFIGPROC glad_glXChooseFBConfig = NULL; +PFNGLXCHOOSEVISUALPROC glad_glXChooseVisual = NULL; +PFNGLXCOPYCONTEXTPROC glad_glXCopyContext = NULL; +PFNGLXCREATECONTEXTPROC glad_glXCreateContext = NULL; +PFNGLXCREATEGLXPIXMAPPROC glad_glXCreateGLXPixmap = NULL; +PFNGLXCREATENEWCONTEXTPROC glad_glXCreateNewContext = NULL; +PFNGLXCREATEPBUFFERPROC glad_glXCreatePbuffer = NULL; +PFNGLXCREATEPIXMAPPROC glad_glXCreatePixmap = NULL; +PFNGLXCREATEWINDOWPROC glad_glXCreateWindow = NULL; +PFNGLXDESTROYCONTEXTPROC glad_glXDestroyContext = NULL; +PFNGLXDESTROYGLXPIXMAPPROC glad_glXDestroyGLXPixmap = NULL; +PFNGLXDESTROYPBUFFERPROC glad_glXDestroyPbuffer = NULL; +PFNGLXDESTROYPIXMAPPROC glad_glXDestroyPixmap = NULL; +PFNGLXDESTROYWINDOWPROC glad_glXDestroyWindow = NULL; +PFNGLXGETCLIENTSTRINGPROC glad_glXGetClientString = NULL; +PFNGLXGETCONFIGPROC glad_glXGetConfig = NULL; +PFNGLXGETCURRENTCONTEXTPROC glad_glXGetCurrentContext = NULL; +PFNGLXGETCURRENTDISPLAYPROC glad_glXGetCurrentDisplay = NULL; +PFNGLXGETCURRENTDRAWABLEPROC glad_glXGetCurrentDrawable = NULL; +PFNGLXGETCURRENTREADDRAWABLEPROC glad_glXGetCurrentReadDrawable = NULL; +PFNGLXGETFBCONFIGATTRIBPROC glad_glXGetFBConfigAttrib = NULL; +PFNGLXGETFBCONFIGSPROC glad_glXGetFBConfigs = NULL; +PFNGLXGETPROCADDRESSPROC glad_glXGetProcAddress = NULL; +PFNGLXGETSELECTEDEVENTPROC glad_glXGetSelectedEvent = NULL; +PFNGLXGETSWAPINTERVALMESAPROC glad_glXGetSwapIntervalMESA = NULL; +PFNGLXGETVISUALFROMFBCONFIGPROC glad_glXGetVisualFromFBConfig = NULL; +PFNGLXISDIRECTPROC glad_glXIsDirect = NULL; +PFNGLXMAKECONTEXTCURRENTPROC glad_glXMakeContextCurrent = NULL; +PFNGLXMAKECURRENTPROC glad_glXMakeCurrent = NULL; +PFNGLXQUERYCONTEXTPROC glad_glXQueryContext = NULL; +PFNGLXQUERYDRAWABLEPROC glad_glXQueryDrawable = NULL; +PFNGLXQUERYEXTENSIONPROC glad_glXQueryExtension = NULL; +PFNGLXQUERYEXTENSIONSSTRINGPROC glad_glXQueryExtensionsString = NULL; +PFNGLXQUERYSERVERSTRINGPROC glad_glXQueryServerString = NULL; +PFNGLXQUERYVERSIONPROC glad_glXQueryVersion = NULL; +PFNGLXSELECTEVENTPROC glad_glXSelectEvent = NULL; +PFNGLXSWAPBUFFERSPROC glad_glXSwapBuffers = NULL; +PFNGLXSWAPINTERVALEXTPROC glad_glXSwapIntervalEXT = NULL; +PFNGLXSWAPINTERVALMESAPROC glad_glXSwapIntervalMESA = NULL; +PFNGLXUSEXFONTPROC glad_glXUseXFont = NULL; +PFNGLXWAITGLPROC glad_glXWaitGL = NULL; +PFNGLXWAITXPROC glad_glXWaitX = NULL; + + +static void glad_glx_load_GLX_VERSION_1_0( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_0) return; + glad_glXChooseVisual = (PFNGLXCHOOSEVISUALPROC) load(userptr, "glXChooseVisual"); + glad_glXCopyContext = (PFNGLXCOPYCONTEXTPROC) load(userptr, "glXCopyContext"); + glad_glXCreateContext = (PFNGLXCREATECONTEXTPROC) load(userptr, "glXCreateContext"); + glad_glXCreateGLXPixmap = (PFNGLXCREATEGLXPIXMAPPROC) load(userptr, "glXCreateGLXPixmap"); + glad_glXDestroyContext = (PFNGLXDESTROYCONTEXTPROC) load(userptr, "glXDestroyContext"); + glad_glXDestroyGLXPixmap = (PFNGLXDESTROYGLXPIXMAPPROC) load(userptr, "glXDestroyGLXPixmap"); + glad_glXGetConfig = (PFNGLXGETCONFIGPROC) load(userptr, "glXGetConfig"); + glad_glXGetCurrentContext = (PFNGLXGETCURRENTCONTEXTPROC) load(userptr, "glXGetCurrentContext"); + glad_glXGetCurrentDrawable = (PFNGLXGETCURRENTDRAWABLEPROC) load(userptr, "glXGetCurrentDrawable"); + glad_glXIsDirect = (PFNGLXISDIRECTPROC) load(userptr, "glXIsDirect"); + glad_glXMakeCurrent = (PFNGLXMAKECURRENTPROC) load(userptr, "glXMakeCurrent"); + glad_glXQueryExtension = (PFNGLXQUERYEXTENSIONPROC) load(userptr, "glXQueryExtension"); + glad_glXQueryVersion = (PFNGLXQUERYVERSIONPROC) load(userptr, "glXQueryVersion"); + glad_glXSwapBuffers = (PFNGLXSWAPBUFFERSPROC) load(userptr, "glXSwapBuffers"); + glad_glXUseXFont = (PFNGLXUSEXFONTPROC) load(userptr, "glXUseXFont"); + glad_glXWaitGL = (PFNGLXWAITGLPROC) load(userptr, "glXWaitGL"); + glad_glXWaitX = (PFNGLXWAITXPROC) load(userptr, "glXWaitX"); +} +static void glad_glx_load_GLX_VERSION_1_1( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_1) return; + glad_glXGetClientString = (PFNGLXGETCLIENTSTRINGPROC) load(userptr, "glXGetClientString"); + glad_glXQueryExtensionsString = (PFNGLXQUERYEXTENSIONSSTRINGPROC) load(userptr, "glXQueryExtensionsString"); + glad_glXQueryServerString = (PFNGLXQUERYSERVERSTRINGPROC) load(userptr, "glXQueryServerString"); +} +static void glad_glx_load_GLX_VERSION_1_2( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_2) return; + glad_glXGetCurrentDisplay = (PFNGLXGETCURRENTDISPLAYPROC) load(userptr, "glXGetCurrentDisplay"); +} +static void glad_glx_load_GLX_VERSION_1_3( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_3) return; + glad_glXChooseFBConfig = (PFNGLXCHOOSEFBCONFIGPROC) load(userptr, "glXChooseFBConfig"); + glad_glXCreateNewContext = (PFNGLXCREATENEWCONTEXTPROC) load(userptr, "glXCreateNewContext"); + glad_glXCreatePbuffer = (PFNGLXCREATEPBUFFERPROC) load(userptr, "glXCreatePbuffer"); + glad_glXCreatePixmap = (PFNGLXCREATEPIXMAPPROC) load(userptr, "glXCreatePixmap"); + glad_glXCreateWindow = (PFNGLXCREATEWINDOWPROC) load(userptr, "glXCreateWindow"); + glad_glXDestroyPbuffer = (PFNGLXDESTROYPBUFFERPROC) load(userptr, "glXDestroyPbuffer"); + glad_glXDestroyPixmap = (PFNGLXDESTROYPIXMAPPROC) load(userptr, "glXDestroyPixmap"); + glad_glXDestroyWindow = (PFNGLXDESTROYWINDOWPROC) load(userptr, "glXDestroyWindow"); + glad_glXGetCurrentReadDrawable = (PFNGLXGETCURRENTREADDRAWABLEPROC) load(userptr, "glXGetCurrentReadDrawable"); + glad_glXGetFBConfigAttrib = (PFNGLXGETFBCONFIGATTRIBPROC) load(userptr, "glXGetFBConfigAttrib"); + glad_glXGetFBConfigs = (PFNGLXGETFBCONFIGSPROC) load(userptr, "glXGetFBConfigs"); + glad_glXGetSelectedEvent = (PFNGLXGETSELECTEDEVENTPROC) load(userptr, "glXGetSelectedEvent"); + glad_glXGetVisualFromFBConfig = (PFNGLXGETVISUALFROMFBCONFIGPROC) load(userptr, "glXGetVisualFromFBConfig"); + glad_glXMakeContextCurrent = (PFNGLXMAKECONTEXTCURRENTPROC) load(userptr, "glXMakeContextCurrent"); + glad_glXQueryContext = (PFNGLXQUERYCONTEXTPROC) load(userptr, "glXQueryContext"); + glad_glXQueryDrawable = (PFNGLXQUERYDRAWABLEPROC) load(userptr, "glXQueryDrawable"); + glad_glXSelectEvent = (PFNGLXSELECTEVENTPROC) load(userptr, "glXSelectEvent"); +} +static void glad_glx_load_GLX_VERSION_1_4( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_VERSION_1_4) return; + glad_glXGetProcAddress = (PFNGLXGETPROCADDRESSPROC) load(userptr, "glXGetProcAddress"); +} +static void glad_glx_load_GLX_EXT_swap_control( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_EXT_swap_control) return; + glad_glXSwapIntervalEXT = (PFNGLXSWAPINTERVALEXTPROC) load(userptr, "glXSwapIntervalEXT"); +} +static void glad_glx_load_GLX_MESA_swap_control( GLADuserptrloadfunc load, void* userptr) { + if(!GLAD_GLX_MESA_swap_control) return; + glad_glXGetSwapIntervalMESA = (PFNGLXGETSWAPINTERVALMESAPROC) load(userptr, "glXGetSwapIntervalMESA"); + glad_glXSwapIntervalMESA = (PFNGLXSWAPINTERVALMESAPROC) load(userptr, "glXSwapIntervalMESA"); +} + + + +static int glad_glx_has_extension(Display *display, int screen, const char *ext) { +#ifndef GLX_VERSION_1_1 + (void) display; + (void) screen; + (void) ext; +#else + const char *terminator; + const char *loc; + const char *extensions; + + if (glXQueryExtensionsString == NULL) { + return 0; + } + + extensions = glXQueryExtensionsString(display, screen); + + if(extensions == NULL || ext == NULL) { + return 0; + } + + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) + break; + + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) { + return 1; + } + extensions = terminator; + } +#endif + + return 0; +} + +static GLADapiproc glad_glx_get_proc_from_userptr(void *userptr, const char* name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_glx_find_extensions(Display *display, int screen) { + GLAD_GLX_EXT_swap_control = glad_glx_has_extension(display, screen, "GLX_EXT_swap_control"); + GLAD_GLX_MESA_swap_control = glad_glx_has_extension(display, screen, "GLX_MESA_swap_control"); + return 1; +} + +static int glad_glx_find_core_glx(Display **display, int *screen) { + int major = 0, minor = 0; + if(*display == NULL) { +#ifdef GLAD_GLX_NO_X11 + (void) screen; + return 0; +#else + *display = XOpenDisplay(0); + if (*display == NULL) { + return 0; + } + *screen = XScreenNumberOfScreen(XDefaultScreenOfDisplay(*display)); +#endif + } + glXQueryVersion(*display, &major, &minor); + GLAD_GLX_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + GLAD_GLX_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1; + GLAD_GLX_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1; + GLAD_GLX_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1; + GLAD_GLX_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1; + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadGLXUserPtr(Display *display, int screen, GLADuserptrloadfunc load, void *userptr) { + int version; + glXQueryVersion = (PFNGLXQUERYVERSIONPROC) load(userptr, "glXQueryVersion"); + if(glXQueryVersion == NULL) return 0; + version = glad_glx_find_core_glx(&display, &screen); + + glad_glx_load_GLX_VERSION_1_0(load, userptr); + glad_glx_load_GLX_VERSION_1_1(load, userptr); + glad_glx_load_GLX_VERSION_1_2(load, userptr); + glad_glx_load_GLX_VERSION_1_3(load, userptr); + glad_glx_load_GLX_VERSION_1_4(load, userptr); + + if (!glad_glx_find_extensions(display, screen)) return 0; + glad_glx_load_GLX_EXT_swap_control(load, userptr); + glad_glx_load_GLX_MESA_swap_control(load, userptr); + + return version; +} + +int gladLoadGLX(Display *display, int screen, GLADloadfunc load) { + return gladLoadGLXUserPtr(display, screen, glad_glx_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/glad/src/wgl.c glmark2-2021.02/src/glad/src/wgl.c --- glmark2-2014.03+git20150611.fa71af2d/src/glad/src/wgl.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/glad/src/wgl.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,117 @@ +#include +#include +#include +#include + +#ifndef GLAD_IMPL_UTIL_C_ +#define GLAD_IMPL_UTIL_C_ + +#ifdef _MSC_VER +#define GLAD_IMPL_UTIL_SSCANF sscanf_s +#else +#define GLAD_IMPL_UTIL_SSCANF sscanf +#endif + +#endif /* GLAD_IMPL_UTIL_C_ */ + + +int GLAD_WGL_VERSION_1_0 = 0; +int GLAD_WGL_ARB_extensions_string = 0; +int GLAD_WGL_EXT_extensions_string = 0; +int GLAD_WGL_EXT_swap_control = 0; + + + +PFNWGLGETEXTENSIONSSTRINGARBPROC glad_wglGetExtensionsStringARB = NULL; +PFNWGLGETEXTENSIONSSTRINGEXTPROC glad_wglGetExtensionsStringEXT = NULL; +PFNWGLGETSWAPINTERVALEXTPROC glad_wglGetSwapIntervalEXT = NULL; +PFNWGLSWAPINTERVALEXTPROC glad_wglSwapIntervalEXT = NULL; + + +static void glad_wgl_load_WGL_ARB_extensions_string(GLADuserptrloadfunc load, void *userptr) { + if(!GLAD_WGL_ARB_extensions_string) return; + glad_wglGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC) load(userptr, "wglGetExtensionsStringARB"); +} +static void glad_wgl_load_WGL_EXT_extensions_string(GLADuserptrloadfunc load, void *userptr) { + if(!GLAD_WGL_EXT_extensions_string) return; + glad_wglGetExtensionsStringEXT = (PFNWGLGETEXTENSIONSSTRINGEXTPROC) load(userptr, "wglGetExtensionsStringEXT"); +} +static void glad_wgl_load_WGL_EXT_swap_control(GLADuserptrloadfunc load, void *userptr) { + if(!GLAD_WGL_EXT_swap_control) return; + glad_wglGetSwapIntervalEXT = (PFNWGLGETSWAPINTERVALEXTPROC) load(userptr, "wglGetSwapIntervalEXT"); + glad_wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC) load(userptr, "wglSwapIntervalEXT"); +} + + + +static int glad_wgl_has_extension(HDC hdc, const char *ext) { + const char *terminator; + const char *loc; + const char *extensions; + + if(wglGetExtensionsStringEXT == NULL && wglGetExtensionsStringARB == NULL) + return 0; + + if(wglGetExtensionsStringARB == NULL || hdc == INVALID_HANDLE_VALUE) + extensions = wglGetExtensionsStringEXT(); + else + extensions = wglGetExtensionsStringARB(hdc); + + if(extensions == NULL || ext == NULL) + return 0; + + while(1) { + loc = strstr(extensions, ext); + if(loc == NULL) + break; + + terminator = loc + strlen(ext); + if((loc == extensions || *(loc - 1) == ' ') && + (*terminator == ' ' || *terminator == '\0')) + { + return 1; + } + extensions = terminator; + } + + return 0; +} + +static GLADapiproc glad_wgl_get_proc_from_userptr(void *userptr, const char* name) { + return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name); +} + +static int glad_wgl_find_extensions_wgl(HDC hdc) { + GLAD_WGL_ARB_extensions_string = glad_wgl_has_extension(hdc, "WGL_ARB_extensions_string"); + GLAD_WGL_EXT_extensions_string = glad_wgl_has_extension(hdc, "WGL_EXT_extensions_string"); + GLAD_WGL_EXT_swap_control = glad_wgl_has_extension(hdc, "WGL_EXT_swap_control"); + return 1; +} + +static int glad_wgl_find_core_wgl(void) { + int major = 1, minor = 0; + GLAD_WGL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + return GLAD_MAKE_VERSION(major, minor); +} + +int gladLoadWGLUserPtr(HDC hdc, GLADuserptrloadfunc load, void *userptr) { + int version; + wglGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC) load(userptr, "wglGetExtensionsStringARB"); + wglGetExtensionsStringEXT = (PFNWGLGETEXTENSIONSSTRINGEXTPROC) load(userptr, "wglGetExtensionsStringEXT"); + if(wglGetExtensionsStringARB == NULL && wglGetExtensionsStringEXT == NULL) return 0; + version = glad_wgl_find_core_wgl(); + + + if (!glad_wgl_find_extensions_wgl(hdc)) return 0; + glad_wgl_load_WGL_ARB_extensions_string(load, userptr); + glad_wgl_load_WGL_EXT_extensions_string(load, userptr); + glad_wgl_load_WGL_EXT_swap_control(load, userptr); + + return version; +} + +int gladLoadWGL(HDC hdc, GLADloadfunc load) { + return gladLoadWGLUserPtr(hdc, glad_wgl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load); +} + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-headers.cpp glmark2-2021.02/src/gl-headers.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/gl-headers.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/gl-headers.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,8 +21,8 @@ */ #include "gl-headers.h" -void* (*GLExtensions::MapBuffer) (GLenum target, GLenum access) = 0; -GLboolean (*GLExtensions::UnmapBuffer) (GLenum target) = 0; +void* (GLAD_API_PTR *GLExtensions::MapBuffer) (GLenum target, GLenum access) = 0; +GLboolean (GLAD_API_PTR *GLExtensions::UnmapBuffer) (GLenum target) = 0; bool GLExtensions::support(const std::string &ext) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-headers.h glmark2-2021.02/src/gl-headers.h --- glmark2-2014.03+git20150611.fa71af2d/src/gl-headers.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/gl-headers.h 2021-04-25 01:47:22.000000000 +0800 @@ -25,14 +25,12 @@ #define GL_GLEXT_PROTOTYPES #if GLMARK2_USE_GL -#include -#include +#include #ifndef GL_RGB565 #define GL_RGB565 0x8D62 #endif #elif GLMARK2_USE_GLESv2 -#include -#include +#include #ifndef GL_WRITE_ONLY #define GL_WRITE_ONLY GL_WRITE_ONLY_OES #endif @@ -64,8 +62,8 @@ */ static bool support(const std::string &ext); - static void* (*MapBuffer) (GLenum target, GLenum access); - static GLboolean (*UnmapBuffer) (GLenum target); + static void* (GLAD_API_PTR *MapBuffer) (GLenum target, GLenum access); + static GLboolean (GLAD_API_PTR *UnmapBuffer) (GLenum target); }; #endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-state-egl.cpp glmark2-2021.02/src/gl-state-egl.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/gl-state-egl.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/gl-state-egl.cpp 2021-04-25 01:47:47.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -27,10 +27,17 @@ #include "gl-headers.h" #include #include +#include using std::vector; using std::string; +GLADapiproc load_egl_func(void *userdata, const char *name) +{ + SharedLibrary *lib = reinterpret_cast(userdata); + return reinterpret_cast(lib->load(name)); +} + /**************************** * EGLConfig public methods * ****************************/ @@ -287,9 +294,34 @@ * GLStateEGL public methods * ****************************/ +GLStateEGL::~GLStateEGL() +{ + if(egl_display_ != nullptr){ + if(!eglTerminate(egl_display_)) + Log::error("eglTerminate failed\n"); + } + + if(eglReleaseThread && !eglReleaseThread()) + Log::error("eglReleaseThread failed\n"); +} + bool GLStateEGL::init_display(void* native_display, GLVisualConfig& visual_config) { +#if defined(WIN32) + if (!egl_lib_.open("libEGL.dll")) { +#else + if (!egl_lib_.open_from_alternatives({"libEGL.so", "libEGL.so.1" })) { +#endif + Log::error("Error loading EGL library\n"); + return false; + } + + if (gladLoadEGLUserPtr(EGL_NO_DISPLAY, load_egl_func, &egl_lib_) == 0) { + Log::error("Loading EGL entry points failed\n"); + return false; + } + native_display_ = reinterpret_cast(native_display); requested_visual_config_ = visual_config; @@ -304,20 +336,28 @@ return gotValidSurface(); } -void +bool GLStateEGL::init_gl_extensions() { #if GLMARK2_USE_GLESv2 + if (!gladLoadGLES2UserPtr(load_proc, this)) { + Log::error("Loading GLESv2 entry points failed."); + return false; + } + if (GLExtensions::support("GL_OES_mapbuffer")) { - GLExtensions::MapBuffer = - reinterpret_cast(eglGetProcAddress("glMapBufferOES")); - GLExtensions::UnmapBuffer = - reinterpret_cast(eglGetProcAddress("glUnmapBufferOES")); + GLExtensions::MapBuffer = glMapBufferOES; + GLExtensions::UnmapBuffer = glUnmapBufferOES; } #elif GLMARK2_USE_GL + if (!gladLoadGLUserPtr(load_proc, this)) { + Log::error("Loading GL entry points failed."); + return false; + } GLExtensions::MapBuffer = glMapBuffer; GLExtensions::UnmapBuffer = glUnmapBuffer; #endif + return true; } bool @@ -347,7 +387,9 @@ Log::info("** Failed to set swap interval. Results may be bounded above by refresh rate.\n"); } - init_gl_extensions(); + if (!init_gl_extensions()) { + return false; + } return true; } @@ -379,7 +421,7 @@ } bool -GLStateEGL::gotNativeConfig(int& vid) +GLStateEGL::gotNativeConfig(intptr_t& vid) { if (!gotValidConfig()) return false; @@ -409,17 +451,64 @@ * GLStateEGL private methods * *****************************/ +#ifdef GLMARK2_USE_X11 +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM EGL_PLATFORM_X11_KHR +#elif GLMARK2_USE_WAYLAND +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM EGL_PLATFORM_WAYLAND_KHR +#elif GLMARK2_USE_DRM +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM EGL_PLATFORM_GBM_KHR +#elif GLMARK2_USE_MIR +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM EGL_PLATFORM_MIR_KHR +#else +// Platforms not in the above platform enums fall back to eglGetDisplay. +#define GLMARK2_NATIVE_EGL_DISPLAY_ENUM 0 +#endif + bool GLStateEGL::gotValidDisplay() { if (egl_display_) return true; - egl_display_ = eglGetDisplay(native_display_); + char const * __restrict const supported_extensions = + eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); + + if (GLMARK2_NATIVE_EGL_DISPLAY_ENUM != 0 && supported_extensions + && strstr(supported_extensions, "EGL_EXT_platform_base")) + { + Log::debug("Using eglGetPlatformDisplayEXT()\n"); + PFNEGLGETPLATFORMDISPLAYEXTPROC get_platform_display = + reinterpret_cast( + eglGetProcAddress("eglGetPlatformDisplayEXT")); + + if (get_platform_display != nullptr) { + egl_display_ = get_platform_display( + GLMARK2_NATIVE_EGL_DISPLAY_ENUM, + reinterpret_cast(native_display_), + nullptr); + } + + if (!egl_display_) { + Log::debug("eglGetPlatformDisplayEXT() failed with error: 0x%x\n", + eglGetError()); + } + } + else + { + Log::debug("eglGetPlatformDisplayEXT() seems unsupported\n"); + } + + /* Just in case get_platform_display failed... */ + if (!egl_display_) { + Log::debug("Falling back to eglGetDisplay()\n"); + egl_display_ = eglGetDisplay(native_display_); + } + if (!egl_display_) { Log::error("eglGetDisplay() failed with error: 0x%x\n", eglGetError()); return false; } + int egl_major(-1); int egl_minor(-1); if (!eglInitialize(egl_display_, &egl_major, &egl_minor)) { @@ -428,16 +517,33 @@ return false; } + /* Reinitialize GLAD with a known display */ + if (gladLoadEGLUserPtr(egl_display_, load_egl_func, &egl_lib_) == 0) { + Log::error("Loading EGL entry points failed\n"); + return false; + } + #if GLMARK2_USE_GLESv2 EGLenum apiType(EGL_OPENGL_ES_API); +#if defined(WIN32) + std::initializer_list libNames = { "libGLESv2.dll" }; +#else + std::initializer_list libNames = { "libGLESv2.so", "libGLESv2.so.2" }; +#endif #elif GLMARK2_USE_GL EGLenum apiType(EGL_OPENGL_API); + std::initializer_list libNames = { "libGL.so", "libGL.so.1" }; #endif if (!eglBindAPI(apiType)) { Log::error("Failed to bind api EGL_OPENGL_ES_API\n"); return false; } + if (!gl_lib_.open_from_alternatives(libNames)) { + Log::error("Error loading GL library\n"); + return false; + } + return true; } @@ -617,3 +723,16 @@ return true; } +GLADapiproc +GLStateEGL::load_proc(void *userptr, const char* name) +{ + if (eglGetProcAddress) { + GLADapiproc sym = reinterpret_cast(eglGetProcAddress(name)); + if (sym) { + return sym; + } + } + + GLStateEGL* state = reinterpret_cast(userptr); + return reinterpret_cast(state->gl_lib_.load(name)); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-state-egl.h glmark2-2021.02/src/gl-state-egl.h --- glmark2-2014.03+git20150611.fa71af2d/src/gl-state-egl.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/gl-state-egl.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -23,9 +23,10 @@ #define GLMARK2_GL_STATE_EGL_H_ #include -#include +#include #include "gl-state.h" #include "gl-visual-config.h" +#include "shared-library.h" class EglConfig { @@ -124,12 +125,17 @@ EGLSurface egl_surface_; GLVisualConfig requested_visual_config_; EglConfig best_config_; + SharedLibrary egl_lib_; + SharedLibrary gl_lib_; bool gotValidDisplay(); bool gotValidConfig(); bool gotValidSurface(); bool gotValidContext(); void get_glvisualconfig(EGLConfig config, GLVisualConfig& visual_config); EGLConfig select_best_config(std::vector& configs); + + static GLADapiproc load_proc(void *userptr, const char* name); + public: GLStateEGL() : native_display_(0), @@ -138,15 +144,16 @@ egl_config_(0), egl_context_(0), egl_surface_(0) {} + ~GLStateEGL(); bool init_display(void* native_display, GLVisualConfig& config_pref); bool init_surface(void* native_window); - void init_gl_extensions(); + bool init_gl_extensions(); bool valid(); bool reset(); void swap(); // Performs a config search, returning a native visual ID on success - bool gotNativeConfig(int& vid); + bool gotNativeConfig(intptr_t& vid); void getVisualConfig(GLVisualConfig& vc); }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-state-glx.cpp glmark2-2021.02/src/gl-state-glx.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/gl-state-glx.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/gl-state-glx.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -26,13 +26,6 @@ #include -namespace -{ -PFNGLXSWAPINTERVALEXTPROC glXSwapIntervalEXT_; -PFNGLXSWAPINTERVALMESAPROC glXSwapIntervalMESA_; -PFNGLXGETSWAPINTERVALMESAPROC glXGetSwapIntervalMESA_; -} - /****************** * Public methods * ******************/ @@ -43,6 +36,12 @@ xdpy_ = reinterpret_cast(native_display); requested_visual_config_ = visual_config; + if (!lib_.open_from_alternatives({"libGL.so", "libGL.so.1"})) { + Log::error("Failed to load libGL\n"); + return false; + } + + gladLoadGLXUserPtr(xdpy_, DefaultScreen(xdpy_), load_proc, this); return (xdpy_ != 0); } @@ -54,11 +53,12 @@ return (xwin_ != 0); } -void +bool GLStateGLX::init_gl_extensions() { GLExtensions::MapBuffer = glMapBuffer; GLExtensions::UnmapBuffer = glUnmapBuffer; + return true; } bool @@ -80,20 +80,26 @@ return false; } - init_gl_extensions(); + if (gladLoadGLUserPtr(load_proc, this) == 0) { + Log::error("Failed to load GL entry points\n"); + return false; + } + + if (!init_gl_extensions()) + return false; unsigned int desired_swap(0); unsigned int actual_swap(-1); - if (glXSwapIntervalEXT_) { - glXSwapIntervalEXT_(xdpy_, xwin_, desired_swap); + if (glXSwapIntervalEXT) { + glXSwapIntervalEXT(xdpy_, xwin_, desired_swap); glXQueryDrawable(xdpy_, xwin_, GLX_SWAP_INTERVAL_EXT, &actual_swap); if (actual_swap == desired_swap) return true; } - if (glXSwapIntervalMESA_) { - glXSwapIntervalMESA_(desired_swap); - actual_swap = glXGetSwapIntervalMESA_(); + if (glXSwapIntervalMESA) { + glXSwapIntervalMESA(desired_swap); + actual_swap = glXGetSwapIntervalMESA(); if (actual_swap == desired_swap) return true; } @@ -123,7 +129,7 @@ } bool -GLStateGLX::gotNativeConfig(int& vid) +GLStateGLX::gotNativeConfig(intptr_t& vid) { if (!ensure_glx_fbconfig()) return false; @@ -186,31 +192,7 @@ * GLX_SGI_swap_control is not enough because it doesn't allow 0 as a valid * value (i.e. you can't turn off VSync). */ - if (extString.find("GLX_EXT_swap_control") != std::string::npos) { - glXSwapIntervalEXT_ = - reinterpret_cast( - glXGetProcAddress( - reinterpret_cast("glXSwapIntervalEXT") - ) - ); - } - else if (extString.find("GLX_MESA_swap_control") != std::string::npos) { - glXSwapIntervalMESA_ = - reinterpret_cast( - glXGetProcAddress( - reinterpret_cast("glXSwapIntervalMESA") - ) - ); - glXGetSwapIntervalMESA_ = - reinterpret_cast( - glXGetProcAddress( - reinterpret_cast("glXGetSwapIntervalMESA") - ) - ); - } - - - if (!glXSwapIntervalEXT_ && !glXSwapIntervalMESA_) { + if (!glXSwapIntervalEXT && !glXSwapIntervalMESA) { Log::info("** GLX does not support GLX_EXT_swap_control or GLX_MESA_swap_control!\n"); } } @@ -346,3 +328,18 @@ return best_config; } + +GLADapiproc +GLStateGLX::load_proc(void *userptr, const char* name) +{ + if (glXGetProcAddress) { + const GLubyte* bytes = reinterpret_cast(name); + GLADapiproc sym = glXGetProcAddress(bytes); + if (sym) { + return sym; + } + } + + GLStateGLX* state = reinterpret_cast(userptr); + return reinterpret_cast(state->lib_.load(name)); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-state-glx.h glmark2-2021.02/src/gl-state-glx.h --- glmark2-2014.03+git20150611.fa71af2d/src/gl-state-glx.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/gl-state-glx.h 2021-04-25 01:47:22.000000000 +0800 @@ -26,13 +26,11 @@ #include "gl-state.h" #include "gl-visual-config.h" #include "gl-headers.h" +#include "shared-library.h" #include -#include -#define GLX_GLXEXT_PROTOTYPES -#include -#include +#include class GLStateGLX : public GLState { @@ -43,10 +41,10 @@ bool valid(); bool init_display(void* native_display, GLVisualConfig& config_pref); bool init_surface(void* native_window); - void init_gl_extensions(); + bool init_gl_extensions(); bool reset(); void swap(); - bool gotNativeConfig(int& vid); + bool gotNativeConfig(intptr_t& vid); void getVisualConfig(GLVisualConfig& vc); private: @@ -57,11 +55,14 @@ void get_glvisualconfig_glx(GLXFBConfig config, GLVisualConfig &visual_config); GLXFBConfig select_best_config(std::vector configs); + static GLADapiproc load_proc(void* userptr, const char* name); + Display* xdpy_; Window xwin_; GLXFBConfig glx_fbconfig_; GLXContext glx_context_; GLVisualConfig requested_visual_config_; + SharedLibrary lib_; }; #endif /* GLMARK2_GL_STATE_GLX_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-state.h glmark2-2021.02/src/gl-state.h --- glmark2-2014.03+git20150611.fa71af2d/src/gl-state.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/gl-state.h 2021-04-25 01:47:22.000000000 +0800 @@ -22,6 +22,8 @@ #ifndef GLMARK2_GL_STATE_H_ #define GLMARK2_GL_STATE_H_ +#include + class GLVisualConfig; class GLState @@ -31,11 +33,11 @@ virtual bool init_display(void *native_display, GLVisualConfig& config_pref) = 0; virtual bool init_surface(void *native_window) = 0; - virtual void init_gl_extensions() = 0; + virtual bool init_gl_extensions() = 0; virtual bool valid() = 0; virtual bool reset() = 0; virtual void swap() = 0; - virtual bool gotNativeConfig(int& vid) = 0; + virtual bool gotNativeConfig(intptr_t& vid) = 0; virtual void getVisualConfig(GLVisualConfig& vc) = 0; }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-state-wgl.cpp glmark2-2021.02/src/gl-state-wgl.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/gl-state-wgl.cpp 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/gl-state-wgl.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,178 @@ +// +// Copyright © 2019 Jamie Madill +// +// This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. +// +// glmark2 is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// glmark2. If not, see . +// +// Authors: +// Jamie Madill +// +#include "gl-state-wgl.h" + +#include +#include "gl-headers.h" +#include "log.h" +#include "options.h" + +/****************** + * Public methods * + ******************/ + +GLStateWGL::GLStateWGL() : hdc_(nullptr), wgl_context_(nullptr), get_proc_addr_(nullptr) {} + +GLStateWGL::~GLStateWGL() +{ + if (wgl_context_) { + wglDeleteContext(wgl_context_); + wgl_context_ = nullptr; + } +} + +bool +GLStateWGL::init_display(void* native_display, GLVisualConfig& /*visual_config*/) +{ + if (!wgl_library_.open("opengl32.dll")) { + Log::error("Failed to load opengl32.dll\n"); + return false; + } + + hdc_ = reinterpret_cast(native_display); + + PIXELFORMATDESCRIPTOR pixel_format_desc = {}; + pixel_format_desc.nSize = sizeof(pixel_format_desc); + pixel_format_desc.nVersion = 1; + pixel_format_desc.dwFlags = + PFD_DRAW_TO_WINDOW | PFD_GENERIC_ACCELERATED | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + pixel_format_desc.iPixelType = PFD_TYPE_RGBA; + pixel_format_desc.cColorBits = 24; + pixel_format_desc.cAlphaBits = 8; + pixel_format_desc.cDepthBits = 24; + pixel_format_desc.cStencilBits = 8; + pixel_format_desc.iLayerType = PFD_MAIN_PLANE; + + int pixelFormat = ChoosePixelFormat(hdc_, &pixel_format_desc); + if (pixelFormat == 0) { + Log::error("Could not find a compatible pixel format\n"); + return false; + } + + if (SetPixelFormat(hdc_, pixelFormat, &pixel_format_desc) != TRUE) { + Log::error("Failed to set the pixel format\n"); + return false; + } + + wgl_context_ = wglCreateContext(hdc_); + if (!wgl_context_) { + Log::error("Failed to create a WGL context\n"); + return false; + } + + if (wglMakeCurrent(hdc_, wgl_context_) == FALSE) { + Log::error("Error during wglMakeCurrent\n"); + return false; + } + + get_proc_addr_ = wgl_library_.load("wglGetProcAddress"); + if (!get_proc_addr_) { + Log::error("Failed to find wglGetProcAddress\n"); + return false; + } + + if (gladLoadGLUserPtr(load_proc, this) == 0) { + Log::error("Failed to load GL entry points\n"); + return false; + } + + if (gladLoadWGLUserPtr(hdc_, load_proc, this) == 0) { + Log::error("Failed to load WGL entry points\n"); + return false; + } + + return true; +} + +bool +GLStateWGL::init_surface(void* /*native_window*/) +{ + return true; +} + +bool +GLStateWGL::init_gl_extensions() +{ + GLExtensions::MapBuffer = glMapBuffer; + GLExtensions::UnmapBuffer = glUnmapBuffer; + return true; +} + +bool +GLStateWGL::valid() +{ + if (!wglSwapIntervalEXT || wglSwapIntervalEXT(0) == FALSE) { + Log::info("** Failed to set swap interval. Results may be bounded above by refresh rate.\n"); + } + + return wgl_context_ != nullptr; +} + + +bool +GLStateWGL::reset() +{ + return true; +} + +void +GLStateWGL::swap() +{ + if (SwapBuffers(hdc_) == FALSE) { + Log::error("Error during SwapBuffers\n"); + } +} + +bool +GLStateWGL::gotNativeConfig(intptr_t& vid) +{ + vid = 1; + return true; +} + +void +GLStateWGL::getVisualConfig(GLVisualConfig& vc) +{ + vc.buffer = 32; + vc.red = 8; + vc.green = 8; + vc.blue = 8; + vc.alpha = 8; + vc.depth = 24; + vc.stencil = 8; +} + +/******************* + * Private methods * + *******************/ + +GLStateWGL::api_proc GLStateWGL::load_proc(void *userptr, const char *name) +{ + GLStateWGL* state = reinterpret_cast(userptr); + auto get_proc_addr = reinterpret_cast(state->get_proc_addr_); + auto proc = reinterpret_cast(get_proc_addr(name)); + if (proc) { + return proc; + } + return reinterpret_cast(state->wgl_library_.load(name)); +} + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-state-wgl.h glmark2-2021.02/src/gl-state-wgl.h --- glmark2-2014.03+git20150611.fa71af2d/src/gl-state-wgl.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/gl-state-wgl.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,54 @@ +// +// Copyright © 2019 Jamie Madill +// +// This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. +// +// glmark2 is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// glmark2. If not, see . +// +// Authors: +// Jamie Madill +// +#ifndef GLMARK2_GL_STATE_WGL_H_ +#define GLMARK2_GL_STATE_WGL_H_ + +#include "gl-state.h" +#include "shared-library.h" +#include + +class GLStateWGL : public GLState +{ +public: + GLStateWGL(); + ~GLStateWGL(); + + bool init_display(void* native_display, GLVisualConfig& config_pref); + bool init_surface(void* native_window); + bool init_gl_extensions(); + bool valid(); + bool reset(); + void swap(); + bool gotNativeConfig(intptr_t& vid); + void getVisualConfig(GLVisualConfig& vc); + +private: + using api_proc = void (*)(); + static api_proc load_proc(void *userptr, const char *name); + + SharedLibrary wgl_library_; + HDC hdc_; + HGLRC wgl_context_; + void* get_proc_addr_; +}; + +#endif // GLMARK2_GL_STATE_WGL_H_ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/gl-visual-config.cpp glmark2-2021.02/src/gl-visual-config.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/gl-visual-config.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/gl-visual-config.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -108,6 +108,14 @@ /* Reward exact matches with the maximum per component score */ score = MAXIMUM_COMPONENT_SCORE; } + else if (component > 8 && target <= 8 && scale > 1) + { + /* Penalize RGBA component widths larger than 8, since they are + * unlikely to be what the users want or properly supported for + * display. Such widths can still be used, but only if explicitly + * requested. */ + score = UNACCEPTABLE_COMPONENT_PENALTY; + } else { /* diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/image-reader.cpp glmark2-2021.02/src/image-reader.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/image-reader.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/image-reader.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -120,7 +120,7 @@ Log::debug("Reading PNG file %s\n", filename.c_str()); - const std::auto_ptr is_ptr(Util::get_resource(filename)); + const std::unique_ptr is_ptr(Util::get_resource(filename)); if (!(*is_ptr)) { Log::error("Cannot open file %s!\n", filename.c_str()); return false; @@ -268,7 +268,7 @@ size_t n = static_cast(num_bytes); while (n > src->pub.bytes_in_buffer) { n -= src->pub.bytes_in_buffer; - (*src->fill_input_buffer)(cinfo); + src->fill_input_buffer(cinfo); } src->pub.next_input_byte += n; src->pub.bytes_in_buffer -= n; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/include/dirent.h glmark2-2021.02/src/include/dirent.h --- glmark2-2014.03+git20150611.fa71af2d/src/include/dirent.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/include/dirent.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1224 @@ +/* + * Dirent interface for Microsoft Visual Studio + * + * Copyright (C) 2006-2012 Toni Ronkko + * This file is part of dirent. Dirent may be freely distributed + * under the MIT license. For all details and documentation, see + * https://github.com/tronkko/dirent + */ +#ifndef DIRENT_H +#define DIRENT_H + +/* + * Include windows.h without Windows Sockets 1.1 to prevent conflicts with + * Windows Sockets 2.0. + */ +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Indicates that d_type field is available in dirent structure */ +#define _DIRENT_HAVE_D_TYPE + +/* Indicates that d_namlen field is available in dirent structure */ +#define _DIRENT_HAVE_D_NAMLEN + +/* Entries missing from MSVC 6.0 */ +#if !defined(FILE_ATTRIBUTE_DEVICE) +# define FILE_ATTRIBUTE_DEVICE 0x40 +#endif + +/* File type and permission flags for stat(), general mask */ +#if !defined(S_IFMT) +# define S_IFMT _S_IFMT +#endif + +/* Directory bit */ +#if !defined(S_IFDIR) +# define S_IFDIR _S_IFDIR +#endif + +/* Character device bit */ +#if !defined(S_IFCHR) +# define S_IFCHR _S_IFCHR +#endif + +/* Pipe bit */ +#if !defined(S_IFFIFO) +# define S_IFFIFO _S_IFFIFO +#endif + +/* Regular file bit */ +#if !defined(S_IFREG) +# define S_IFREG _S_IFREG +#endif + +/* Read permission */ +#if !defined(S_IREAD) +# define S_IREAD _S_IREAD +#endif + +/* Write permission */ +#if !defined(S_IWRITE) +# define S_IWRITE _S_IWRITE +#endif + +/* Execute permission */ +#if !defined(S_IEXEC) +# define S_IEXEC _S_IEXEC +#endif + +/* Pipe */ +#if !defined(S_IFIFO) +# define S_IFIFO _S_IFIFO +#endif + +/* Block device */ +#if !defined(S_IFBLK) +# define S_IFBLK 0 +#endif + +/* Link */ +#if !defined(S_IFLNK) +# define S_IFLNK 0 +#endif + +/* Socket */ +#if !defined(S_IFSOCK) +# define S_IFSOCK 0 +#endif + +/* Read user permission */ +#if !defined(S_IRUSR) +# define S_IRUSR S_IREAD +#endif + +/* Write user permission */ +#if !defined(S_IWUSR) +# define S_IWUSR S_IWRITE +#endif + +/* Execute user permission */ +#if !defined(S_IXUSR) +# define S_IXUSR 0 +#endif + +/* Read group permission */ +#if !defined(S_IRGRP) +# define S_IRGRP 0 +#endif + +/* Write group permission */ +#if !defined(S_IWGRP) +# define S_IWGRP 0 +#endif + +/* Execute group permission */ +#if !defined(S_IXGRP) +# define S_IXGRP 0 +#endif + +/* Read others permission */ +#if !defined(S_IROTH) +# define S_IROTH 0 +#endif + +/* Write others permission */ +#if !defined(S_IWOTH) +# define S_IWOTH 0 +#endif + +/* Execute others permission */ +#if !defined(S_IXOTH) +# define S_IXOTH 0 +#endif + +/* Maximum length of file name */ +#if !defined(PATH_MAX) +# define PATH_MAX MAX_PATH +#endif +#if !defined(FILENAME_MAX) +# define FILENAME_MAX MAX_PATH +#endif +#if !defined(NAME_MAX) +# define NAME_MAX FILENAME_MAX +#endif + +/* File type flags for d_type */ +#define DT_UNKNOWN 0 +#define DT_REG S_IFREG +#define DT_DIR S_IFDIR +#define DT_FIFO S_IFIFO +#define DT_SOCK S_IFSOCK +#define DT_CHR S_IFCHR +#define DT_BLK S_IFBLK +#define DT_LNK S_IFLNK + +/* Macros for converting between st_mode and d_type */ +#define IFTODT(mode) ((mode) & S_IFMT) +#define DTTOIF(type) (type) + +/* + * File type macros. Note that block devices, sockets and links cannot be + * distinguished on Windows and the macros S_ISBLK, S_ISSOCK and S_ISLNK are + * only defined for compatibility. These macros should always return false + * on Windows. + */ +#if !defined(S_ISFIFO) +# define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) +#endif +#if !defined(S_ISDIR) +# define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#endif +#if !defined(S_ISREG) +# define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#endif +#if !defined(S_ISLNK) +# define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +#endif +#if !defined(S_ISSOCK) +# define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) +#endif +#if !defined(S_ISCHR) +# define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) +#endif +#if !defined(S_ISBLK) +# define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) +#endif + +/* Return the exact length of the file name without zero terminator */ +#define _D_EXACT_NAMLEN(p) ((p)->d_namlen) + +/* Return the maximum size of a file name */ +#define _D_ALLOC_NAMLEN(p) ((PATH_MAX)+1) + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Wide-character version */ +struct _wdirent { + /* Always zero */ + long d_ino; + + /* File position within stream */ + long d_off; + + /* Structure size */ + unsigned short d_reclen; + + /* Length of name without \0 */ + size_t d_namlen; + + /* File type */ + int d_type; + + /* File name */ + wchar_t d_name[PATH_MAX+1]; +}; +typedef struct _wdirent _wdirent; + +struct _WDIR { + /* Current directory entry */ + struct _wdirent ent; + + /* Private file data */ + WIN32_FIND_DATAW data; + + /* True if data is valid */ + int cached; + + /* Win32 search handle */ + HANDLE handle; + + /* Initial directory name */ + wchar_t *patt; +}; +typedef struct _WDIR _WDIR; + +/* Multi-byte character version */ +struct dirent { + /* Always zero */ + long d_ino; + + /* File position within stream */ + long d_off; + + /* Structure size */ + unsigned short d_reclen; + + /* Length of name without \0 */ + size_t d_namlen; + + /* File type */ + int d_type; + + /* File name */ + char d_name[PATH_MAX+1]; +}; +typedef struct dirent dirent; + +struct DIR { + struct dirent ent; + struct _WDIR *wdirp; +}; +typedef struct DIR DIR; + + +/* Dirent functions */ +static DIR *opendir (const char *dirname); +static _WDIR *_wopendir (const wchar_t *dirname); + +static struct dirent *readdir (DIR *dirp); +static struct _wdirent *_wreaddir (_WDIR *dirp); + +static int readdir_r( + DIR *dirp, struct dirent *entry, struct dirent **result); +static int _wreaddir_r( + _WDIR *dirp, struct _wdirent *entry, struct _wdirent **result); + +static int closedir (DIR *dirp); +static int _wclosedir (_WDIR *dirp); + +static void rewinddir (DIR* dirp); +static void _wrewinddir (_WDIR* dirp); + +static int scandir (const char *dirname, struct dirent ***namelist, + int (*filter)(const struct dirent*), + int (*compare)(const struct dirent**, const struct dirent**)); + +static int alphasort (const struct dirent **a, const struct dirent **b); + +static int versionsort (const struct dirent **a, const struct dirent **b); + + +/* For compatibility with Symbian */ +#define wdirent _wdirent +#define WDIR _WDIR +#define wopendir _wopendir +#define wreaddir _wreaddir +#define wclosedir _wclosedir +#define wrewinddir _wrewinddir + + +/* Internal utility functions */ +static WIN32_FIND_DATAW *dirent_first (_WDIR *dirp); +static WIN32_FIND_DATAW *dirent_next (_WDIR *dirp); + +static int dirent_mbstowcs_s( + size_t *pReturnValue, + wchar_t *wcstr, + size_t sizeInWords, + const char *mbstr, + size_t count); + +static int dirent_wcstombs_s( + size_t *pReturnValue, + char *mbstr, + size_t sizeInBytes, + const wchar_t *wcstr, + size_t count); + +static void dirent_set_errno (int error); + + +/* + * Open directory stream DIRNAME for read and return a pointer to the + * internal working area that is used to retrieve individual directory + * entries. + */ +static _WDIR* +_wopendir( + const wchar_t *dirname) +{ + _WDIR *dirp = NULL; + int error; + + /* Must have directory name */ + if (dirname == NULL || dirname[0] == '\0') { + dirent_set_errno (ENOENT); + return NULL; + } + + /* Allocate new _WDIR structure */ + dirp = (_WDIR*) malloc (sizeof (struct _WDIR)); + if (dirp != NULL) { + DWORD n; + + /* Reset _WDIR structure */ + dirp->handle = INVALID_HANDLE_VALUE; + dirp->patt = NULL; + dirp->cached = 0; + + /* Compute the length of full path plus zero terminator + * + * Note that on WinRT there's no way to convert relative paths + * into absolute paths, so just assume it is an absolute path. + */ +# if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP) + n = wcslen(dirname); +# else + n = GetFullPathNameW (dirname, 0, NULL, NULL); +# endif + + /* Allocate room for absolute directory name and search pattern */ + dirp->patt = (wchar_t*) malloc (sizeof (wchar_t) * n + 16); + if (dirp->patt) { + + /* + * Convert relative directory name to an absolute one. This + * allows rewinddir() to function correctly even when current + * working directory is changed between opendir() and rewinddir(). + * + * Note that on WinRT there's no way to convert relative paths + * into absolute paths, so just assume it is an absolute path. + */ +# if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP) + wcsncpy_s(dirp->patt, n+1, dirname, n); +# else + n = GetFullPathNameW (dirname, n, dirp->patt, NULL); +# endif + if (n > 0) { + wchar_t *p; + + /* Append search pattern \* to the directory name */ + p = dirp->patt + n; + if (dirp->patt < p) { + switch (p[-1]) { + case '\\': + case '/': + case ':': + /* Directory ends in path separator, e.g. c:\temp\ */ + /*NOP*/; + break; + + default: + /* Directory name doesn't end in path separator */ + *p++ = '\\'; + } + } + *p++ = '*'; + *p = '\0'; + + /* Open directory stream and retrieve the first entry */ + if (dirent_first (dirp)) { + /* Directory stream opened successfully */ + error = 0; + } else { + /* Cannot retrieve first entry */ + error = 1; + dirent_set_errno (ENOENT); + } + + } else { + /* Cannot retrieve full path name */ + dirent_set_errno (ENOENT); + error = 1; + } + + } else { + /* Cannot allocate memory for search pattern */ + error = 1; + } + + } else { + /* Cannot allocate _WDIR structure */ + error = 1; + } + + /* Clean up in case of error */ + if (error && dirp) { + _wclosedir (dirp); + dirp = NULL; + } + + return dirp; +} + +/* + * Read next directory entry. + * + * Returns pointer to static directory entry which may be overwritten by + * subsequent calls to _wreaddir(). + */ +static struct _wdirent* +_wreaddir( + _WDIR *dirp) +{ + struct _wdirent *entry; + + /* + * Read directory entry to buffer. We can safely ignore the return value + * as entry will be set to NULL in case of error. + */ + (void) _wreaddir_r (dirp, &dirp->ent, &entry); + + /* Return pointer to statically allocated directory entry */ + return entry; +} + +/* + * Read next directory entry. + * + * Returns zero on success. If end of directory stream is reached, then sets + * result to NULL and returns zero. + */ +static int +_wreaddir_r( + _WDIR *dirp, + struct _wdirent *entry, + struct _wdirent **result) +{ + WIN32_FIND_DATAW *datap; + + /* Read next directory entry */ + datap = dirent_next (dirp); + if (datap) { + size_t n; + DWORD attr; + + /* + * Copy file name as wide-character string. If the file name is too + * long to fit in to the destination buffer, then truncate file name + * to PATH_MAX characters and zero-terminate the buffer. + */ + n = 0; + while (n < PATH_MAX && datap->cFileName[n] != 0) { + entry->d_name[n] = datap->cFileName[n]; + n++; + } + entry->d_name[n] = 0; + + /* Length of file name excluding zero terminator */ + entry->d_namlen = n; + + /* File type */ + attr = datap->dwFileAttributes; + if ((attr & FILE_ATTRIBUTE_DEVICE) != 0) { + entry->d_type = DT_CHR; + } else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) { + entry->d_type = DT_DIR; + } else { + entry->d_type = DT_REG; + } + + /* Reset dummy fields */ + entry->d_ino = 0; + entry->d_off = 0; + entry->d_reclen = sizeof (struct _wdirent); + + /* Set result address */ + *result = entry; + + } else { + + /* Return NULL to indicate end of directory */ + *result = NULL; + + } + + return /*OK*/0; +} + +/* + * Close directory stream opened by opendir() function. This invalidates the + * DIR structure as well as any directory entry read previously by + * _wreaddir(). + */ +static int +_wclosedir( + _WDIR *dirp) +{ + int ok; + if (dirp) { + + /* Release search handle */ + if (dirp->handle != INVALID_HANDLE_VALUE) { + FindClose (dirp->handle); + dirp->handle = INVALID_HANDLE_VALUE; + } + + /* Release search pattern */ + if (dirp->patt) { + free (dirp->patt); + dirp->patt = NULL; + } + + /* Release directory structure */ + free (dirp); + ok = /*success*/0; + + } else { + + /* Invalid directory stream */ + dirent_set_errno (EBADF); + ok = /*failure*/-1; + + } + return ok; +} + +/* + * Rewind directory stream such that _wreaddir() returns the very first + * file name again. + */ +static void +_wrewinddir( + _WDIR* dirp) +{ + if (dirp) { + /* Release existing search handle */ + if (dirp->handle != INVALID_HANDLE_VALUE) { + FindClose (dirp->handle); + } + + /* Open new search handle */ + dirent_first (dirp); + } +} + +/* Get first directory entry (internal) */ +static WIN32_FIND_DATAW* +dirent_first( + _WDIR *dirp) +{ + WIN32_FIND_DATAW *datap; + + /* Open directory and retrieve the first entry */ + dirp->handle = FindFirstFileExW( + dirp->patt, FindExInfoStandard, &dirp->data, + FindExSearchNameMatch, NULL, 0); + if (dirp->handle != INVALID_HANDLE_VALUE) { + + /* a directory entry is now waiting in memory */ + datap = &dirp->data; + dirp->cached = 1; + + } else { + + /* Failed to re-open directory: no directory entry in memory */ + dirp->cached = 0; + datap = NULL; + + } + return datap; +} + +/* + * Get next directory entry (internal). + * + * Returns + */ +static WIN32_FIND_DATAW* +dirent_next( + _WDIR *dirp) +{ + WIN32_FIND_DATAW *p; + + /* Get next directory entry */ + if (dirp->cached != 0) { + + /* A valid directory entry already in memory */ + p = &dirp->data; + dirp->cached = 0; + + } else if (dirp->handle != INVALID_HANDLE_VALUE) { + + /* Get the next directory entry from stream */ + if (FindNextFileW (dirp->handle, &dirp->data) != FALSE) { + /* Got a file */ + p = &dirp->data; + } else { + /* The very last entry has been processed or an error occurred */ + FindClose (dirp->handle); + dirp->handle = INVALID_HANDLE_VALUE; + p = NULL; + } + + } else { + + /* End of directory stream reached */ + p = NULL; + + } + + return p; +} + +/* + * Open directory stream using plain old C-string. + */ +static DIR* +opendir( + const char *dirname) +{ + struct DIR *dirp; + int error; + + /* Must have directory name */ + if (dirname == NULL || dirname[0] == '\0') { + dirent_set_errno (ENOENT); + return NULL; + } + + /* Allocate memory for DIR structure */ + dirp = (DIR*) malloc (sizeof (struct DIR)); + if (dirp) { + wchar_t wname[PATH_MAX + 1]; + size_t n; + + /* Convert directory name to wide-character string */ + error = dirent_mbstowcs_s( + &n, wname, PATH_MAX + 1, dirname, PATH_MAX + 1); + if (!error) { + + /* Open directory stream using wide-character name */ + dirp->wdirp = _wopendir (wname); + if (dirp->wdirp) { + /* Directory stream opened */ + error = 0; + } else { + /* Failed to open directory stream */ + error = 1; + } + + } else { + /* + * Cannot convert file name to wide-character string. This + * occurs if the string contains invalid multi-byte sequences or + * the output buffer is too small to contain the resulting + * string. + */ + error = 1; + } + + } else { + /* Cannot allocate DIR structure */ + error = 1; + } + + /* Clean up in case of error */ + if (error && dirp) { + free (dirp); + dirp = NULL; + } + + return dirp; +} + +/* + * Read next directory entry. + */ +static struct dirent* +readdir( + DIR *dirp) +{ + struct dirent *entry; + + /* + * Read directory entry to buffer. We can safely ignore the return value + * as entry will be set to NULL in case of error. + */ + (void) readdir_r (dirp, &dirp->ent, &entry); + + /* Return pointer to statically allocated directory entry */ + return entry; +} + +/* + * Read next directory entry into called-allocated buffer. + * + * Returns zero on success. If the end of directory stream is reached, then + * sets result to NULL and returns zero. + */ +static int +readdir_r( + DIR *dirp, + struct dirent *entry, + struct dirent **result) +{ + WIN32_FIND_DATAW *datap; + + /* Read next directory entry */ + datap = dirent_next (dirp->wdirp); + if (datap) { + size_t n; + int error; + + /* Attempt to convert file name to multi-byte string */ + error = dirent_wcstombs_s( + &n, entry->d_name, PATH_MAX + 1, datap->cFileName, PATH_MAX + 1); + + /* + * If the file name cannot be represented by a multi-byte string, + * then attempt to use old 8+3 file name. This allows traditional + * Unix-code to access some file names despite of unicode + * characters, although file names may seem unfamiliar to the user. + * + * Be ware that the code below cannot come up with a short file + * name unless the file system provides one. At least + * VirtualBox shared folders fail to do this. + */ + if (error && datap->cAlternateFileName[0] != '\0') { + error = dirent_wcstombs_s( + &n, entry->d_name, PATH_MAX + 1, + datap->cAlternateFileName, PATH_MAX + 1); + } + + if (!error) { + DWORD attr; + + /* Length of file name excluding zero terminator */ + entry->d_namlen = n - 1; + + /* File attributes */ + attr = datap->dwFileAttributes; + if ((attr & FILE_ATTRIBUTE_DEVICE) != 0) { + entry->d_type = DT_CHR; + } else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) { + entry->d_type = DT_DIR; + } else { + entry->d_type = DT_REG; + } + + /* Reset dummy fields */ + entry->d_ino = 0; + entry->d_off = 0; + entry->d_reclen = sizeof (struct dirent); + + } else { + + /* + * Cannot convert file name to multi-byte string so construct + * an erroneous directory entry and return that. Note that + * we cannot return NULL as that would stop the processing + * of directory entries completely. + */ + entry->d_name[0] = '?'; + entry->d_name[1] = '\0'; + entry->d_namlen = 1; + entry->d_type = DT_UNKNOWN; + entry->d_ino = 0; + entry->d_off = -1; + entry->d_reclen = 0; + + } + + /* Return pointer to directory entry */ + *result = entry; + + } else { + + /* No more directory entries */ + *result = NULL; + + } + + return /*OK*/0; +} + +/* + * Close directory stream. + */ +static int +closedir( + DIR *dirp) +{ + int ok; + if (dirp) { + + /* Close wide-character directory stream */ + ok = _wclosedir (dirp->wdirp); + dirp->wdirp = NULL; + + /* Release multi-byte character version */ + free (dirp); + + } else { + + /* Invalid directory stream */ + dirent_set_errno (EBADF); + ok = /*failure*/-1; + + } + return ok; +} + +/* + * Rewind directory stream to beginning. + */ +static void +rewinddir( + DIR* dirp) +{ + /* Rewind wide-character string directory stream */ + _wrewinddir (dirp->wdirp); +} + +/* + * Scan directory for entries. + */ +static int +scandir( + const char *dirname, + struct dirent ***namelist, + int (*filter)(const struct dirent*), + int (*compare)(const struct dirent**, const struct dirent**)) +{ + struct dirent **files = NULL; + size_t size = 0; + size_t allocated = 0; + const size_t init_size = 1; + DIR *dir = NULL; + struct dirent *entry; + struct dirent *tmp = NULL; + size_t i; + int result = 0; + + /* Open directory stream */ + dir = opendir (dirname); + if (dir) { + + /* Read directory entries to memory */ + while (1) { + + /* Enlarge pointer table to make room for another pointer */ + if (size >= allocated) { + void *p; + size_t num_entries; + + /* Compute number of entries in the enlarged pointer table */ + if (size < init_size) { + /* Allocate initial pointer table */ + num_entries = init_size; + } else { + /* Double the size */ + num_entries = size * 2; + } + + /* Allocate first pointer table or enlarge existing table */ + p = realloc (files, sizeof (void*) * num_entries); + if (p != NULL) { + /* Got the memory */ + files = (dirent**) p; + allocated = num_entries; + } else { + /* Out of memory */ + result = -1; + break; + } + + } + + /* Allocate room for temporary directory entry */ + if (tmp == NULL) { + tmp = (struct dirent*) malloc (sizeof (struct dirent)); + if (tmp == NULL) { + /* Cannot allocate temporary directory entry */ + result = -1; + break; + } + } + + /* Read directory entry to temporary area */ + if (readdir_r (dir, tmp, &entry) == /*OK*/0) { + + /* Did we get an entry? */ + if (entry != NULL) { + int pass; + + /* Determine whether to include the entry in result */ + if (filter) { + /* Let the filter function decide */ + pass = filter (tmp); + } else { + /* No filter function, include everything */ + pass = 1; + } + + if (pass) { + /* Store the temporary entry to pointer table */ + files[size++] = tmp; + tmp = NULL; + + /* Keep up with the number of files */ + result++; + } + + } else { + + /* + * End of directory stream reached => sort entries and + * exit. + */ + qsort (files, size, sizeof (void*), + (int (*) (const void*, const void*)) compare); + break; + + } + + } else { + /* Error reading directory entry */ + result = /*Error*/ -1; + break; + } + + } + + } else { + /* Cannot open directory */ + result = /*Error*/ -1; + } + + /* Release temporary directory entry */ + if (tmp) { + free (tmp); + } + + /* Release allocated memory on error */ + if (result < 0) { + for (i = 0; i < size; i++) { + free (files[i]); + } + free (files); + files = NULL; + } + + /* Close directory stream */ + if (dir) { + closedir (dir); + } + + /* Pass pointer table to caller */ + if (namelist) { + *namelist = files; + } + return result; +} + +/* Alphabetical sorting */ +static int +alphasort( + const struct dirent **a, const struct dirent **b) +{ + return strcoll ((*a)->d_name, (*b)->d_name); +} + +/* Sort versions */ +static int +versionsort( + const struct dirent **a, const struct dirent **b) +{ + /* FIXME: implement strverscmp and use that */ + return alphasort (a, b); +} + +/* Convert multi-byte string to wide character string */ +static int +dirent_mbstowcs_s( + size_t *pReturnValue, + wchar_t *wcstr, + size_t sizeInWords, + const char *mbstr, + size_t count) +{ + int error; + int n; + size_t len; + UINT cp; + DWORD flags; + + /* Determine code page for multi-byte string */ + if (AreFileApisANSI ()) { + /* Default ANSI code page */ + cp = GetACP (); + } else { + /* Default OEM code page */ + cp = GetOEMCP (); + } + + /* + * Determine flags based on the character set. For more information, + * please see https://docs.microsoft.com/fi-fi/windows/desktop/api/stringapiset/nf-stringapiset-multibytetowidechar + */ + switch (cp) { + case 42: + case 50220: + case 50221: + case 50222: + case 50225: + case 50227: + case 50229: + case 57002: + case 57003: + case 57004: + case 57005: + case 57006: + case 57007: + case 57008: + case 57009: + case 57010: + case 57011: + case 65000: + /* MultiByteToWideChar does not support MB_ERR_INVALID_CHARS */ + flags = 0; + break; + + default: + /* + * Ask MultiByteToWideChar to return an error if a multi-byte + * character cannot be converted to a wide-character. + */ + flags = MB_ERR_INVALID_CHARS; + } + + /* Compute the length of input string without zero-terminator */ + len = 0; + while (mbstr[len] != '\0' && len < count) { + len++; + } + + /* Convert to wide-character string */ + n = MultiByteToWideChar( + /* Source code page */ cp, + /* Flags */ flags, + /* Pointer to string to convert */ mbstr, + /* Size of multi-byte string */ (int) len, + /* Pointer to output buffer */ wcstr, + /* Size of output buffer */ (int)sizeInWords - 1 + ); + + /* Ensure that output buffer is zero-terminated */ + wcstr[n] = '\0'; + + /* Return length of wide-character string with zero-terminator */ + *pReturnValue = (size_t) (n + 1); + + /* Return zero if conversion succeeded */ + if (n > 0) { + error = 0; + } else { + error = 1; + } + return error; +} + +/* Convert wide-character string to multi-byte string */ +static int +dirent_wcstombs_s( + size_t *pReturnValue, + char *mbstr, + size_t sizeInBytes, /* max size of mbstr */ + const wchar_t *wcstr, + size_t count) +{ + int n; + int error; + UINT cp; + size_t len; + BOOL flag = 0; + LPBOOL pflag; + + /* Determine code page for multi-byte string */ + if (AreFileApisANSI ()) { + /* Default ANSI code page */ + cp = GetACP (); + } else { + /* Default OEM code page */ + cp = GetOEMCP (); + } + + /* Compute the length of input string without zero-terminator */ + len = 0; + while (wcstr[len] != '\0' && len < count) { + len++; + } + + /* + * Determine if we can ask WideCharToMultiByte to return information on + * broken characters. For more information, please see + * https://docs.microsoft.com/en-us/windows/desktop/api/stringapiset/nf-stringapiset-widechartomultibyte + */ + switch (cp) { + case CP_UTF7: + case CP_UTF8: + /* + * WideCharToMultiByte fails if we request information on default + * characters. This is just a nuisance but doesn't affect the + * conversion: if Windows is configured to use UTF-8, then the default + * character should not be needed anyway. + */ + pflag = NULL; + break; + + default: + /* + * Request that WideCharToMultiByte sets the flag if it uses the + * default character. + */ + pflag = &flag; + } + + /* Convert wide-character string to multi-byte character string */ + n = WideCharToMultiByte( + /* Target code page */ cp, + /* Flags */ 0, + /* Pointer to unicode string */ wcstr, + /* Length of unicode string */ (int) len, + /* Pointer to output buffer */ mbstr, + /* Size of output buffer */ (int)sizeInBytes - 1, + /* Default character */ NULL, + /* Whether default character was used or not */ pflag + ); + + /* Ensure that output buffer is zero-terminated */ + mbstr[n] = '\0'; + + /* Return length of multi-byte string with zero-terminator */ + *pReturnValue = (size_t) (n + 1); + + /* Return zero if conversion succeeded without using default characters */ + if (n > 0 && flag == 0) { + error = 0; + } else { + error = 1; + } + return error; +} + +/* Set errno variable */ +static void +dirent_set_errno( + int error) +{ +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + /* Microsoft Visual Studio 2005 and later */ + _set_errno (error); + +#else + + /* Non-Microsoft compiler or older Microsoft compiler */ + errno = error; + +#endif +} + + +#ifdef __cplusplus +} +#endif +#endif /*DIRENT_H*/ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/include/getopt.h glmark2-2021.02/src/include/getopt.h --- glmark2-2014.03+git20150611.fa71af2d/src/include/getopt.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/include/getopt.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,653 @@ +#ifndef __GETOPT_H__ +/** + * DISCLAIMER + * This file is part of the mingw-w64 runtime package. + * + * The mingw-w64 runtime package and its code is distributed in the hope that it + * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR + * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to + * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + /* + * Copyright (c) 2002 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Dieter Baron and Thomas Klausner. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma warning(disable:4996) + +#define __GETOPT_H__ + +/* All the headers include this file. */ +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ + +#ifdef REPLACE_GETOPT +int opterr = 1; /* if error message should be printed */ +int optind = 1; /* index into parent argv vector */ +int optopt = '?'; /* character checked for validity */ +#undef optreset /* see getopt.h */ +#define optreset __mingw_optreset +int optreset; /* reset getopt */ +char *optarg; /* argument associated with option */ +#endif + +//extern int optind; /* index of first non-option in argv */ +//extern int optopt; /* single option character, as parsed */ +//extern int opterr; /* flag to enable built-in diagnostics... */ +// /* (user may set to zero, to suppress) */ +// +//extern char *optarg; /* pointer to argument of current option */ + +#define PRINT_ERROR ((opterr) && (*options != ':')) + +#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ +#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ +#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ + +/* return values */ +#define BADCH (int)'?' +#define BADARG ((*options == ':') ? (int)':' : (int)'?') +#define INORDER (int)1 + +#ifndef __CYGWIN__ +#define __progname __argv[0] +#else +extern char __declspec(dllimport) *__progname; +#endif + +#ifdef __CYGWIN__ +static char EMSG[] = ""; +#else +#define EMSG "" +#endif + +static int getopt_internal(int, char * const *, const char *, + const struct option *, int *, int); +static int parse_long_options(char * const *, const char *, + const struct option *, int *, int); +static int gcd(int, int); +static void permute_args(int, int, int, char * const *); + +static char *place = EMSG; /* option letter processing */ + +/* XXX: set optreset to 1 rather than these two */ +static int nonopt_start = -1; /* first non option argument (for permute) */ +static int nonopt_end = -1; /* first option after non options (for permute) */ + +/* Error messages */ +static const char recargchar[] = "option requires an argument -- %c"; +static const char recargstring[] = "option requires an argument -- %s"; +static const char ambig[] = "ambiguous option -- %.*s"; +static const char noarg[] = "option doesn't take an argument -- %.*s"; +static const char illoptchar[] = "unknown option -- %c"; +static const char illoptstring[] = "unknown option -- %s"; + +static void +_vwarnx(const char *fmt,va_list ap) +{ + (void)fprintf(stderr,"%s: ",__progname); + if (fmt != NULL) + (void)vfprintf(stderr,fmt,ap); + (void)fprintf(stderr,"\n"); +} + +static void +warnx(const char *fmt,...) +{ + va_list ap; + va_start(ap,fmt); + _vwarnx(fmt,ap); + va_end(ap); +} + +/* + * Compute the greatest common divisor of a and b. + */ +static int +gcd(int a, int b) +{ + int c; + + c = a % b; + while (c != 0) { + a = b; + b = c; + c = a % b; + } + + return (b); +} + +/* + * Exchange the block from nonopt_start to nonopt_end with the block + * from nonopt_end to opt_end (keeping the same order of arguments + * in each block). + */ +static void +permute_args(int panonopt_start, int panonopt_end, int opt_end, + char * const *nargv) +{ + int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; + char *swap; + + /* + * compute lengths of blocks and number and size of cycles + */ + nnonopts = panonopt_end - panonopt_start; + nopts = opt_end - panonopt_end; + ncycle = gcd(nnonopts, nopts); + cyclelen = (opt_end - panonopt_start) / ncycle; + + for (i = 0; i < ncycle; i++) { + cstart = panonopt_end+i; + pos = cstart; + for (j = 0; j < cyclelen; j++) { + if (pos >= panonopt_end) + pos -= nnonopts; + else + pos += nopts; + swap = nargv[pos]; + /* LINTED const cast */ + ((char **) nargv)[pos] = nargv[cstart]; + /* LINTED const cast */ + ((char **)nargv)[cstart] = swap; + } + } +} + +#ifdef REPLACE_GETOPT +/* + * getopt -- + * Parse argc/argv argument vector. + * + * [eventually this will replace the BSD getopt] + */ +int +getopt(int nargc, char * const *nargv, const char *options) +{ + + /* + * We don't pass FLAG_PERMUTE to getopt_internal() since + * the BSD getopt(3) (unlike GNU) has never done this. + * + * Furthermore, since many privileged programs call getopt() + * before dropping privileges it makes sense to keep things + * as simple (and bug-free) as possible. + */ + return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); +} +#endif /* REPLACE_GETOPT */ + +//extern int getopt(int nargc, char * const *nargv, const char *options); + +#ifdef _BSD_SOURCE +/* + * BSD adds the non-standard `optreset' feature, for reinitialisation + * of `getopt' parsing. We support this feature, for applications which + * proclaim their BSD heritage, before including this header; however, + * to maintain portability, developers are advised to avoid it. + */ +# define optreset __mingw_optreset +extern int optreset; +#endif +#ifdef __cplusplus +} +#endif +/* + * POSIX requires the `getopt' API to be specified in `unistd.h'; + * thus, `unistd.h' includes this header. However, we do not want + * to expose the `getopt_long' or `getopt_long_only' APIs, when + * included in this manner. Thus, close the standard __GETOPT_H__ + * declarations block, and open an additional __GETOPT_LONG_H__ + * specific block, only when *not* __UNISTD_H_SOURCED__, in which + * to declare the extended API. + */ +#endif /* !defined(__GETOPT_H__) */ + +#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) +#define __GETOPT_LONG_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +struct option /* specification for a long form option... */ +{ + const char *name; /* option name, without leading hyphens */ + int has_arg; /* does it take an argument? */ + int *flag; /* where to save its status, or NULL */ + int val; /* its associated status value */ +}; + +enum /* permitted values for its `has_arg' field... */ +{ + no_argument = 0, /* option never takes an argument */ + required_argument, /* option always requires an argument */ + optional_argument /* option may take an argument */ +}; + +/* + * parse_long_options -- + * Parse long options in argc/argv argument vector. + * Returns -1 if short_too is set and the option does not match long_options. + */ +static int +parse_long_options(char * const *nargv, const char *options, + const struct option *long_options, int *idx, int short_too) +{ + char *current_argv, *has_equal; + size_t current_argv_len; + int i, ambiguous, match; + +#define IDENTICAL_INTERPRETATION(_x, _y) \ + (long_options[(_x)].has_arg == long_options[(_y)].has_arg && \ + long_options[(_x)].flag == long_options[(_y)].flag && \ + long_options[(_x)].val == long_options[(_y)].val) + + current_argv = place; + match = -1; + ambiguous = 0; + + optind++; + + if ((has_equal = strchr(current_argv, '=')) != NULL) { + /* argument found (--option=arg) */ + current_argv_len = has_equal - current_argv; + has_equal++; + } else + current_argv_len = strlen(current_argv); + + for (i = 0; long_options[i].name; i++) { + /* find matching long option */ + if (strncmp(current_argv, long_options[i].name, + current_argv_len)) + continue; + + if (strlen(long_options[i].name) == current_argv_len) { + /* exact match */ + match = i; + ambiguous = 0; + break; + } + /* + * If this is a known short option, don't allow + * a partial match of a single character. + */ + if (short_too && current_argv_len == 1) + continue; + + if (match == -1) /* partial match */ + match = i; + else if (!IDENTICAL_INTERPRETATION(i, match)) + ambiguous = 1; + } + if (ambiguous) { + /* ambiguous abbreviation */ + if (PRINT_ERROR) + warnx(ambig, (int)current_argv_len, + current_argv); + optopt = 0; + return (BADCH); + } + if (match != -1) { /* option found */ + if (long_options[match].has_arg == no_argument + && has_equal) { + if (PRINT_ERROR) + warnx(noarg, (int)current_argv_len, + current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + return (BADARG); + } + if (long_options[match].has_arg == required_argument || + long_options[match].has_arg == optional_argument) { + if (has_equal) + optarg = has_equal; + else if (long_options[match].has_arg == + required_argument) { + /* + * optional argument doesn't use next nargv + */ + optarg = nargv[optind++]; + } + } + if ((long_options[match].has_arg == required_argument) + && (optarg == NULL)) { + /* + * Missing argument; leading ':' indicates no error + * should be generated. + */ + if (PRINT_ERROR) + warnx(recargstring, + current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + --optind; + return (BADARG); + } + } else { /* unknown option */ + if (short_too) { + --optind; + return (-1); + } + if (PRINT_ERROR) + warnx(illoptstring, current_argv); + optopt = 0; + return (BADCH); + } + if (idx) + *idx = match; + if (long_options[match].flag) { + *long_options[match].flag = long_options[match].val; + return (0); + } else + return (long_options[match].val); +#undef IDENTICAL_INTERPRETATION +} + +/* + * getopt_internal -- + * Parse argc/argv argument vector. Called by user level routines. + */ +static int +getopt_internal(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx, int flags) +{ + char *oli; /* option letter list index */ + int optchar, short_too; + static int posixly_correct = -1; + + if (options == NULL) + return (-1); + + /* + * XXX Some GNU programs (like cvs) set optind to 0 instead of + * XXX using optreset. Work around this braindamage. + */ + if (optind == 0) + optind = optreset = 1; + + /* + * Disable GNU extensions if POSIXLY_CORRECT is set or options + * string begins with a '+'. + * + * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or + * optreset != 0 for GNU compatibility. + */ + if (posixly_correct == -1 || optreset != 0) + posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); + if (*options == '-') + flags |= FLAG_ALLARGS; + else if (posixly_correct || *options == '+') + flags &= ~FLAG_PERMUTE; + if (*options == '+' || *options == '-') + options++; + + optarg = NULL; + if (optreset) + nonopt_start = nonopt_end = -1; +start: + if (optreset || !*place) { /* update scanning pointer */ + optreset = 0; + if (optind >= nargc) { /* end of argument vector */ + place = EMSG; + if (nonopt_end != -1) { + /* do permutation, if we have to */ + permute_args(nonopt_start, nonopt_end, + optind, nargv); + optind -= nonopt_end - nonopt_start; + } + else if (nonopt_start != -1) { + /* + * If we skipped non-options, set optind + * to the first of them. + */ + optind = nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + if (*(place = nargv[optind]) != '-' || + (place[1] == '\0' && strchr(options, '-') == NULL)) { + place = EMSG; /* found non-option */ + if (flags & FLAG_ALLARGS) { + /* + * GNU extension: + * return non-option as argument to option 1 + */ + optarg = nargv[optind++]; + return (INORDER); + } + if (!(flags & FLAG_PERMUTE)) { + /* + * If no permutation wanted, stop parsing + * at first non-option. + */ + return (-1); + } + /* do permutation */ + if (nonopt_start == -1) + nonopt_start = optind; + else if (nonopt_end != -1) { + permute_args(nonopt_start, nonopt_end, + optind, nargv); + nonopt_start = optind - + (nonopt_end - nonopt_start); + nonopt_end = -1; + } + optind++; + /* process next argument */ + goto start; + } + if (nonopt_start != -1 && nonopt_end == -1) + nonopt_end = optind; + + /* + * If we have "-" do nothing, if "--" we are done. + */ + if (place[1] != '\0' && *++place == '-' && place[1] == '\0') { + optind++; + place = EMSG; + /* + * We found an option (--), so if we skipped + * non-options, we have to permute. + */ + if (nonopt_end != -1) { + permute_args(nonopt_start, nonopt_end, + optind, nargv); + optind -= nonopt_end - nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + } + + /* + * Check long options if: + * 1) we were passed some + * 2) the arg is not just "-" + * 3) either the arg starts with -- we are getopt_long_only() + */ + if (long_options != NULL && place != nargv[optind] && + (*place == '-' || (flags & FLAG_LONGONLY))) { + short_too = 0; + if (*place == '-') + place++; /* --foo long option */ + else if (*place != ':' && strchr(options, *place) != NULL) + short_too = 1; /* could be short option too */ + + optchar = parse_long_options(nargv, options, long_options, + idx, short_too); + if (optchar != -1) { + place = EMSG; + return (optchar); + } + } + + if ((optchar = (int)*place++) == (int)':' || + (optchar == (int)'-' && *place != '\0') || + (oli = (char*)strchr(options, optchar)) == NULL) { + /* + * If the user specified "-" and '-' isn't listed in + * options, return -1 (non-option) as per POSIX. + * Otherwise, it is an unknown option character (or ':'). + */ + if (optchar == (int)'-' && *place == '\0') + return (-1); + if (!*place) + ++optind; + if (PRINT_ERROR) + warnx(illoptchar, optchar); + optopt = optchar; + return (BADCH); + } + if (long_options != NULL && optchar == 'W' && oli[1] == ';') { + /* -W long-option */ + if (*place) /* no space */ + /* NOTHING */; + else if (++optind >= nargc) { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } else /* white space */ + place = nargv[optind]; + optchar = parse_long_options(nargv, options, long_options, + idx, 0); + place = EMSG; + return (optchar); + } + if (*++oli != ':') { /* doesn't take argument */ + if (!*place) + ++optind; + } else { /* takes (optional) argument */ + optarg = NULL; + if (*place) /* no white space */ + optarg = place; + else if (oli[1] != ':') { /* arg not optional */ + if (++optind >= nargc) { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } else + optarg = nargv[optind]; + } + place = EMSG; + ++optind; + } + /* dump back option letter */ + return (optchar); +} + +/* + * getopt_long -- + * Parse argc/argv argument vector. + */ +int +getopt_long(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, + FLAG_PERMUTE)); +} + +/* + * getopt_long_only -- + * Parse argc/argv argument vector. + */ +int +getopt_long_only(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, + FLAG_PERMUTE|FLAG_LONGONLY)); +} + +//extern int getopt_long(int nargc, char * const *nargv, const char *options, +// const struct option *long_options, int *idx); +//extern int getopt_long_only(int nargc, char * const *nargv, const char *options, +// const struct option *long_options, int *idx); +/* + * Previous MinGW implementation had... + */ +#ifndef HAVE_DECL_GETOPT +/* + * ...for the long form API only; keep this for compatibility. + */ +# define HAVE_DECL_GETOPT 1 +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/include/sys/time.h glmark2-2021.02/src/include/sys/time.h --- glmark2-2014.03+git20150611.fa71af2d/src/include/sys/time.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/include/sys/time.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,45 @@ +// +// Copyright © 2019 Jamie Madill +// +// This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. +// +// glmark2 is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// glmark2. If not, see . +// +// Authors: +// Jamie Madill +// +// From Stack Overflow under Create Commons license: +// https://stackoverflow.com/a/31335254 + +#ifndef SYS_TIME_H_FOR_WINDOWS_ +#define SYS_TIME_H_FOR_WINDOWS_ + +#ifdef _WIN32 +#include +#include +#define CLOCK_MONOTONIC 0 +inline int clock_gettime(int, struct timespec *spec) +{ + __int64 wintime; + GetSystemTimeAsFileTime((FILETIME*)&wintime); + wintime -= 116444736000000000i64; // 1601 to 1970 + spec->tv_sec = wintime / 10000000i64; // seconds + spec->tv_nsec = wintime % 10000000i64 *100; // nanoseconds + return 0; +} +#else +#include +#endif + +#endif // SYS_TIME_H_FOR_WINDOWS_ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/acinclude.m4 glmark2-2021.02/src/libjpeg-turbo/acinclude.m4 --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/acinclude.m4 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/acinclude.m4 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,254 @@ +# AC_PROG_NASM +# -------------------------- +# Check that NASM exists and determine flags +AC_DEFUN([AC_PROG_NASM],[ + +AC_ARG_VAR(NASM, [NASM command (used to build the x86/x86-64 SIMD code)]) +if test "x$NASM" = "x"; then + AC_CHECK_PROGS(NASM, [nasm nasmw yasm]) + test -z "$NASM" && AC_MSG_ERROR([no nasm (Netwide Assembler) found]) +fi + +AC_MSG_CHECKING([for object file format of host system]) +case "$host_os" in + cygwin* | mingw* | pw32* | interix*) + case "$host_cpu" in + x86_64) + objfmt='Win64-COFF' + ;; + *) + objfmt='Win32-COFF' + ;; + esac + ;; + msdosdjgpp* | go32*) + objfmt='COFF' + ;; + os2-emx*) # not tested + objfmt='MSOMF' # obj + ;; + linux*coff* | linux*oldld*) + objfmt='COFF' # ??? + ;; + linux*aout*) + objfmt='a.out' + ;; + linux*) + case "$host_cpu" in + x86_64) + objfmt='ELF64' + ;; + *) + objfmt='ELF' + ;; + esac + ;; + kfreebsd* | freebsd* | netbsd* | openbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then + objfmt='BSD-a.out' + else + case "$host_cpu" in + x86_64 | amd64) + objfmt='ELF64' + ;; + *) + objfmt='ELF' + ;; + esac + fi + ;; + solaris* | sunos* | sysv* | sco*) + case "$host_cpu" in + x86_64) + objfmt='ELF64' + ;; + *) + objfmt='ELF' + ;; + esac + ;; + darwin* | rhapsody* | nextstep* | openstep* | macos*) + case "$host_cpu" in + x86_64) + objfmt='Mach-O64' + ;; + *) + objfmt='Mach-O' + ;; + esac + ;; + *) + objfmt='ELF ?' + ;; +esac + +AC_MSG_RESULT([$objfmt]) +if test "$objfmt" = 'ELF ?'; then + objfmt='ELF' + AC_MSG_WARN([unexpected host system. assumed that the format is $objfmt.]) +fi + +AC_MSG_CHECKING([for object file format specifier (NAFLAGS) ]) +case "$objfmt" in + MSOMF) NAFLAGS='-fobj -DOBJ32';; + Win32-COFF) NAFLAGS='-fwin32 -DWIN32';; + Win64-COFF) NAFLAGS='-fwin64 -DWIN64 -D__x86_64__';; + COFF) NAFLAGS='-fcoff -DCOFF';; + a.out) NAFLAGS='-faout -DAOUT';; + BSD-a.out) NAFLAGS='-faoutb -DAOUT';; + ELF) NAFLAGS='-felf -DELF';; + ELF64) NAFLAGS='-felf64 -DELF -D__x86_64__';; + RDF) NAFLAGS='-frdf -DRDF';; + Mach-O) NAFLAGS='-fmacho -DMACHO';; + Mach-O64) NAFLAGS='-fmacho64 -DMACHO -D__x86_64__';; +esac +AC_MSG_RESULT([$NAFLAGS]) +AC_SUBST([NAFLAGS]) + +AC_MSG_CHECKING([whether the assembler ($NASM $NAFLAGS) works]) +cat > conftest.asm <&AC_FD_CC + cat conftest.asm >&AC_FD_CC + rm -rf conftest* + AC_MSG_RESULT(no) + AC_MSG_ERROR([installation or configuration problem: assembler cannot create object files.]) +fi + +AC_MSG_CHECKING([whether the linker accepts assembler output]) +try_nasm='${CC-cc} -o conftest${ac_exeext} $LDFLAGS conftest.o $LIBS 1>&AC_FD_CC' +if AC_TRY_EVAL(try_nasm) && test -s conftest${ac_exeext}; then + rm -rf conftest* + AC_MSG_RESULT(yes) +else + rm -rf conftest* + AC_MSG_RESULT(no) + AC_MSG_ERROR([configuration problem: maybe object file format mismatch.]) +fi + +]) + +# AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE +# -------------------------- +# Test whether the assembler is suitable and supports NEON instructions +AC_DEFUN([AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE],[ + ac_good_gnu_arm_assembler=no + ac_save_CC="$CC" + ac_save_CFLAGS="$CFLAGS" + CFLAGS="$CCASFLAGS -x assembler-with-cpp" + CC="$CCAS" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .arm + pld [r0] + vmovn.u16 d0, q0]])], ac_good_gnu_arm_assembler=yes) + + ac_use_gas_preprocessor=no + if test "x$ac_good_gnu_arm_assembler" = "xno" ; then + CC="gas-preprocessor.pl $CCAS" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .arm + pld [r0] + vmovn.u16 d0, q0]])], ac_use_gas_preprocessor=yes) + fi + CFLAGS="$ac_save_CFLAGS" + CC="$ac_save_CC" + + if test "x$ac_use_gas_preprocessor" = "xyes" ; then + CCAS="gas-preprocessor.pl $CCAS" + AC_SUBST([CCAS]) + ac_good_gnu_arm_assembler=yes + fi + + if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then + $1 + else + $2 + fi +]) + +# AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE +# -------------------------- +# Test whether the assembler is suitable and supports MIPS instructions +AC_DEFUN([AC_CHECK_COMPATIBLE_MIPS_ASSEMBLER_IFELSE],[ + have_mips_dspr2=no + ac_save_CFLAGS="$CFLAGS" + CFLAGS="$CCASFLAGS -mdspr2" + + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + + int main () + { + int c = 0, a = 0, b = 0; + __asm__ __volatile__ ( + "precr.qb.ph %[c], %[a], %[b] \n\t" + : [c] "=r" (c) + : [a] "r" (a), [b] "r" (b) + ); + return c; + } + ]])], have_mips_dspr2=yes) + CFLAGS=$ac_save_CFLAGS + + if test "x$have_mips_dspr2" = "xyes" ; then + $1 + else + $2 + fi +]) + +AC_DEFUN([AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE],[ + ac_good_gnu_arm_assembler=no + ac_save_CC="$CC" + ac_save_CFLAGS="$CFLAGS" + CFLAGS="$CCASFLAGS -x assembler-with-cpp" + CC="$CCAS" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + .text + MYVAR .req x0 + movi v0.16b, #100 + mov MYVAR, #100 + .unreq MYVAR]])], ac_good_gnu_arm_assembler=yes) + + ac_use_gas_preprocessor=no + if test "x$ac_good_gnu_arm_assembler" = "xno" ; then + CC="gas-preprocessor.pl $CCAS" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + .text + MYVAR .req x0 + movi v0.16b, #100 + mov MYVAR, #100 + .unreq MYVAR]])], ac_use_gas_preprocessor=yes) + fi + CFLAGS="$ac_save_CFLAGS" + CC="$ac_save_CC" + + if test "x$ac_use_gas_preprocessor" = "xyes" ; then + CCAS="gas-preprocessor.pl $CCAS" + AC_SUBST([CCAS]) + ac_good_gnu_arm_assembler=yes + fi + + if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then + $1 + else + $2 + fi +]) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/bmp.c glmark2-2021.02/src/libjpeg-turbo/bmp.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/bmp.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/bmp.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,334 @@ +/* + * Copyright (C)2011, 2015 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include "cdjpeg.h" +#include +#include +#include "tjutil.h" +#include "bmp.h" + + +/* This duplicates the functionality of the VirtualGL bitmap library using + the components from cjpeg and djpeg */ + + +/* Error handling (based on example in example.c) */ + +static char errStr[JMSG_LENGTH_MAX]="No error"; + +struct my_error_mgr +{ + struct jpeg_error_mgr pub; + jmp_buf setjmp_buffer; +}; +typedef struct my_error_mgr *my_error_ptr; + +static void my_error_exit(j_common_ptr cinfo) +{ + my_error_ptr myerr=(my_error_ptr)cinfo->err; + (*cinfo->err->output_message)(cinfo); + longjmp(myerr->setjmp_buffer, 1); +} + +/* Based on output_message() in jerror.c */ + +static void my_output_message(j_common_ptr cinfo) +{ + (*cinfo->err->format_message)(cinfo, errStr); +} + +#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \ + retval=-1; goto bailout;} +#define _throwunix(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, \ + strerror(errno)); retval=-1; goto bailout;} + + +static void pixelconvert(unsigned char *srcbuf, int srcpf, int srcbottomup, + unsigned char *dstbuf, int dstpf, int dstbottomup, int w, int h) +{ + unsigned char *srcrowptr=srcbuf, *srccolptr; + int srcps=tjPixelSize[srcpf]; + int srcstride=srcbottomup? -w*srcps:w*srcps; + unsigned char *dstrowptr=dstbuf, *dstcolptr; + int dstps=tjPixelSize[dstpf]; + int dststride=dstbottomup? -w*dstps:w*dstps; + int row, col; + + if(srcbottomup) srcrowptr=&srcbuf[w*srcps*(h-1)]; + if(dstbottomup) dstrowptr=&dstbuf[w*dstps*(h-1)]; + + /* NOTE: These quick & dirty CMYK<->RGB conversion routines are for testing + purposes only. Properly converting between CMYK and RGB requires a color + management system. */ + + if(dstpf==TJPF_CMYK) + { + for(row=0; row1.0) c=1.0; if(c<0.) c=0.; + if(m>1.0) m=1.0; if(m<0.) m=0.; + if(y>1.0) y=1.0; if(y<0.) y=0.; + if(k>1.0) k=1.0; if(k<0.) k=0.; + *dstcolptr++=(unsigned char)(255.0-c*255.0+0.5); + *dstcolptr++=(unsigned char)(255.0-m*255.0+0.5); + *dstcolptr++=(unsigned char)(255.0-y*255.0+0.5); + *dstcolptr++=(unsigned char)(255.0-k*255.0+0.5); + } + } + } + else if(srcpf==TJPF_CMYK) + { + for(row=0; row255.0) r=255.0; if(r<0.) r=0.; + if(g>255.0) g=255.0; if(g<0.) g=0.; + if(b>255.0) b=255.0; if(b<0.) b=0.; + dstcolptr[tjRedOffset[dstpf]]=(unsigned char)(r+0.5); + dstcolptr[tjGreenOffset[dstpf]]=(unsigned char)(g+0.5); + dstcolptr[tjBlueOffset[dstpf]]=(unsigned char)(b+0.5); + } + } + } + else + { + for(row=0; row=TJ_NUMPF) + _throw("loadbmp(): Invalid argument"); + + if((file=fopen(filename, "rb"))==NULL) + _throwunix("loadbmp(): Cannot open input file"); + + cinfo.err=jpeg_std_error(&jerr.pub); + jerr.pub.error_exit=my_error_exit; + jerr.pub.output_message=my_output_message; + + if(setjmp(jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; goto bailout; + } + + jpeg_create_compress(&cinfo); + if((tempc=getc(file))<0 || ungetc(tempc, file)==EOF) + _throwunix("loadbmp(): Could not read input file") + else if(tempc==EOF) _throw("loadbmp(): Input file contains no data"); + + if(tempc=='B') + { + if((src=jinit_read_bmp(&cinfo))==NULL) + _throw("loadbmp(): Could not initialize bitmap loader"); + } + else if(tempc=='P') + { + if((src=jinit_read_ppm(&cinfo))==NULL) + _throw("loadbmp(): Could not initialize bitmap loader"); + } + else _throw("loadbmp(): Unsupported file type"); + + src->input_file=file; + (*src->start_input)(&cinfo, src); + (*cinfo.mem->realize_virt_arrays)((j_common_ptr)&cinfo); + + *w=cinfo.image_width; *h=cinfo.image_height; + + if(cinfo.input_components==1 && cinfo.in_color_space==JCS_RGB) + srcpf=TJPF_GRAY; + else srcpf=TJPF_RGB; + + dstps=tjPixelSize[dstpf]; + if((*buf=(unsigned char *)malloc((*w)*(*h)*dstps))==NULL) + _throw("loadbmp(): Memory allocation failure"); + + while(cinfo.next_scanlineget_pixel_rows)(&cinfo, src); + for(i=0; ibuffer[i], srcpf, 0, outbuf, dstpf, bottomup, *w, + nlines); + } + cinfo.next_scanline+=nlines; + } + + (*src->finish_input)(&cinfo, src); + + bailout: + jpeg_destroy_compress(&cinfo); + if(file) fclose(file); + if(retval<0 && buf && *buf) {free(*buf); *buf=NULL;} + return retval; +} + + +int savebmp(char *filename, unsigned char *buf, int w, int h, int srcpf, + int bottomup) +{ + int retval=0, srcps, dstpf; + struct jpeg_decompress_struct dinfo; + struct my_error_mgr jerr; + djpeg_dest_ptr dst; + FILE *file=NULL; + char *ptr=NULL; + + memset(&dinfo, 0, sizeof(struct jpeg_decompress_struct)); + + if(!filename || !buf || w<1 || h<1 || srcpf<0 || srcpf>=TJ_NUMPF) + _throw("savebmp(): Invalid argument"); + + if((file=fopen(filename, "wb"))==NULL) + _throwunix("savebmp(): Cannot open output file"); + + dinfo.err=jpeg_std_error(&jerr.pub); + jerr.pub.error_exit=my_error_exit; + jerr.pub.output_message=my_output_message; + + if(setjmp(jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; goto bailout; + } + + jpeg_create_decompress(&dinfo); + if(srcpf==TJPF_GRAY) + { + dinfo.out_color_components=dinfo.output_components=1; + dinfo.out_color_space=JCS_GRAYSCALE; + } + else + { + dinfo.out_color_components=dinfo.output_components=3; + dinfo.out_color_space=JCS_RGB; + } + dinfo.image_width=w; dinfo.image_height=h; + dinfo.global_state=DSTATE_READY; + dinfo.scale_num=dinfo.scale_denom=1; + + ptr=strrchr(filename, '.'); + if(ptr && !strcasecmp(ptr, ".bmp")) + { + if((dst=jinit_write_bmp(&dinfo, 0))==NULL) + _throw("savebmp(): Could not initialize bitmap writer"); + } + else + { + if((dst=jinit_write_ppm(&dinfo))==NULL) + _throw("savebmp(): Could not initialize PPM writer"); + } + + dst->output_file=file; + (*dst->start_output)(&dinfo, dst); + (*dinfo.mem->realize_virt_arrays)((j_common_ptr)&dinfo); + + if(srcpf==TJPF_GRAY) dstpf=srcpf; + else dstpf=TJPF_RGB; + srcps=tjPixelSize[srcpf]; + + while(dinfo.output_scanlinebuffer_height; + for(i=0; ibuffer[i], dstpf, 0, w, + nlines); + } + (*dst->put_pixel_rows)(&dinfo, dst, nlines); + dinfo.output_scanline+=nlines; + } + + (*dst->finish_output)(&dinfo, dst); + + bailout: + jpeg_destroy_decompress(&dinfo); + if(file) fclose(file); + return retval; +} + +const char *bmpgeterr(void) +{ + return errStr; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/bmp.h glmark2-2021.02/src/libjpeg-turbo/bmp.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/bmp.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/bmp.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,42 @@ +/* + * Copyright (C)2011 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BMP_H__ +#define __BMP_H__ + +#include "./turbojpeg.h" + +int loadbmp(char *filename, unsigned char **buf, int *w, int *h, int pf, + int bottomup); + +int savebmp(char *filename, unsigned char *buf, int w, int h, int pf, + int bottomup); + +const char *bmpgeterr(void); + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cderror.h glmark2-2021.02/src/libjpeg-turbo/cderror.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cderror.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/cderror.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,136 @@ +/* + * cderror.h + * + * Copyright (C) 1994-1997, Thomas G. Lane. + * Modified 2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file defines the error and message codes for the cjpeg/djpeg + * applications. These strings are not needed as part of the JPEG library + * proper. + * Edit this file to add new codes, or to translate the message strings to + * some other language. + */ + +/* + * To define the enum list of message codes, include this file without + * defining macro JMESSAGE. To create a message string table, include it + * again with a suitable JMESSAGE definition (see jerror.c for an example). + */ +#ifndef JMESSAGE +#ifndef CDERROR_H +#define CDERROR_H +/* First time through, define the enum list */ +#define JMAKE_ENUM_LIST +#else +/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */ +#define JMESSAGE(code,string) +#endif /* CDERROR_H */ +#endif /* JMESSAGE */ + +#ifdef JMAKE_ENUM_LIST + +typedef enum { + +#define JMESSAGE(code,string) code , + +#endif /* JMAKE_ENUM_LIST */ + +JMESSAGE(JMSG_FIRSTADDONCODE=1000, NULL) /* Must be first entry! */ + +#ifdef BMP_SUPPORTED +JMESSAGE(JERR_BMP_BADCMAP, "Unsupported BMP colormap format") +JMESSAGE(JERR_BMP_BADDEPTH, "Only 8- and 24-bit BMP files are supported") +JMESSAGE(JERR_BMP_BADHEADER, "Invalid BMP file: bad header length") +JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1") +JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB") +JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported") +JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image") +JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM") +JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image") +JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image") +JMESSAGE(JTRC_BMP_OS2, "%ux%u 24-bit OS2 BMP image") +JMESSAGE(JTRC_BMP_OS2_MAPPED, "%ux%u 8-bit colormapped OS2 BMP image") +#endif /* BMP_SUPPORTED */ + +#ifdef GIF_SUPPORTED +JMESSAGE(JERR_GIF_BUG, "GIF output got confused") +JMESSAGE(JERR_GIF_CODESIZE, "Bogus GIF codesize %d") +JMESSAGE(JERR_GIF_COLORSPACE, "GIF output must be grayscale or RGB") +JMESSAGE(JERR_GIF_IMAGENOTFOUND, "Too few images in GIF file") +JMESSAGE(JERR_GIF_NOT, "Not a GIF file") +JMESSAGE(JTRC_GIF, "%ux%ux%d GIF image") +JMESSAGE(JTRC_GIF_BADVERSION, + "Warning: unexpected GIF version number '%c%c%c'") +JMESSAGE(JTRC_GIF_EXTENSION, "Ignoring GIF extension block of type 0x%02x") +JMESSAGE(JTRC_GIF_NONSQUARE, "Caution: nonsquare pixels in input") +JMESSAGE(JWRN_GIF_BADDATA, "Corrupt data in GIF file") +JMESSAGE(JWRN_GIF_CHAR, "Bogus char 0x%02x in GIF file, ignoring") +JMESSAGE(JWRN_GIF_ENDCODE, "Premature end of GIF image") +JMESSAGE(JWRN_GIF_NOMOREDATA, "Ran out of GIF bits") +#endif /* GIF_SUPPORTED */ + +#ifdef PPM_SUPPORTED +JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB") +JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file") +JMESSAGE(JERR_PPM_TOOLARGE, "Integer value too large in PPM file") +JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file") +JMESSAGE(JTRC_PGM, "%ux%u PGM image") +JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image") +JMESSAGE(JTRC_PPM, "%ux%u PPM image") +JMESSAGE(JTRC_PPM_TEXT, "%ux%u text PPM image") +#endif /* PPM_SUPPORTED */ + +#ifdef RLE_SUPPORTED +JMESSAGE(JERR_RLE_BADERROR, "Bogus error code from RLE library") +JMESSAGE(JERR_RLE_COLORSPACE, "RLE output must be grayscale or RGB") +JMESSAGE(JERR_RLE_DIMENSIONS, "Image dimensions (%ux%u) too large for RLE") +JMESSAGE(JERR_RLE_EMPTY, "Empty RLE file") +JMESSAGE(JERR_RLE_EOF, "Premature EOF in RLE header") +JMESSAGE(JERR_RLE_MEM, "Insufficient memory for RLE header") +JMESSAGE(JERR_RLE_NOT, "Not an RLE file") +JMESSAGE(JERR_RLE_TOOMANYCHANNELS, "Cannot handle %d output channels for RLE") +JMESSAGE(JERR_RLE_UNSUPPORTED, "Cannot handle this RLE setup") +JMESSAGE(JTRC_RLE, "%ux%u full-color RLE file") +JMESSAGE(JTRC_RLE_FULLMAP, "%ux%u full-color RLE file with map of length %d") +JMESSAGE(JTRC_RLE_GRAY, "%ux%u grayscale RLE file") +JMESSAGE(JTRC_RLE_MAPGRAY, "%ux%u grayscale RLE file with map of length %d") +JMESSAGE(JTRC_RLE_MAPPED, "%ux%u colormapped RLE file with map of length %d") +#endif /* RLE_SUPPORTED */ + +#ifdef TARGA_SUPPORTED +JMESSAGE(JERR_TGA_BADCMAP, "Unsupported Targa colormap format") +JMESSAGE(JERR_TGA_BADPARMS, "Invalid or unsupported Targa file") +JMESSAGE(JERR_TGA_COLORSPACE, "Targa output must be grayscale or RGB") +JMESSAGE(JTRC_TGA, "%ux%u RGB Targa image") +JMESSAGE(JTRC_TGA_GRAY, "%ux%u grayscale Targa image") +JMESSAGE(JTRC_TGA_MAPPED, "%ux%u colormapped Targa image") +#else +JMESSAGE(JERR_TGA_NOTCOMP, "Targa support was not compiled") +#endif /* TARGA_SUPPORTED */ + +JMESSAGE(JERR_BAD_CMAP_FILE, + "Color map file is invalid or of unsupported format") +JMESSAGE(JERR_TOO_MANY_COLORS, + "Output file format cannot handle %d colormap entries") +JMESSAGE(JERR_UNGETC_FAILED, "ungetc failed") +#ifdef TARGA_SUPPORTED +JMESSAGE(JERR_UNKNOWN_FORMAT, + "Unrecognized input file format --- perhaps you need -targa") +#else +JMESSAGE(JERR_UNKNOWN_FORMAT, "Unrecognized input file format") +#endif +JMESSAGE(JERR_UNSUPPORTED_FORMAT, "Unsupported output file format") + +#ifdef JMAKE_ENUM_LIST + + JMSG_LASTADDONCODE +} ADDON_MESSAGE_CODE; + +#undef JMAKE_ENUM_LIST +#endif /* JMAKE_ENUM_LIST */ + +/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */ +#undef JMESSAGE diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cdjpeg.c glmark2-2021.02/src/libjpeg-turbo/cdjpeg.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cdjpeg.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/cdjpeg.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,144 @@ +/* + * cdjpeg.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains common support routines used by the IJG application + * programs (cjpeg, djpeg, jpegtran). + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include /* to declare isupper(), tolower() */ +#ifdef USE_SETMODE +#include /* to declare setmode()'s parameter macros */ +/* If you have setmode() but not , just delete this line: */ +#include /* to declare setmode() */ +#endif + + +/* + * Optional progress monitor: display a percent-done figure on stderr. + */ + +#ifdef PROGRESS_REPORT + +METHODDEF(void) +progress_monitor (j_common_ptr cinfo) +{ + cd_progress_ptr prog = (cd_progress_ptr) cinfo->progress; + int total_passes = prog->pub.total_passes + prog->total_extra_passes; + int percent_done = (int) (prog->pub.pass_counter*100L/prog->pub.pass_limit); + + if (percent_done != prog->percent_done) { + prog->percent_done = percent_done; + if (total_passes > 1) { + fprintf(stderr, "\rPass %d/%d: %3d%% ", + prog->pub.completed_passes + prog->completed_extra_passes + 1, + total_passes, percent_done); + } else { + fprintf(stderr, "\r %3d%% ", percent_done); + } + fflush(stderr); + } +} + + +GLOBAL(void) +start_progress_monitor (j_common_ptr cinfo, cd_progress_ptr progress) +{ + /* Enable progress display, unless trace output is on */ + if (cinfo->err->trace_level == 0) { + progress->pub.progress_monitor = progress_monitor; + progress->completed_extra_passes = 0; + progress->total_extra_passes = 0; + progress->percent_done = -1; + cinfo->progress = &progress->pub; + } +} + + +GLOBAL(void) +end_progress_monitor (j_common_ptr cinfo) +{ + /* Clear away progress display */ + if (cinfo->err->trace_level == 0) { + fprintf(stderr, "\r \r"); + fflush(stderr); + } +} + +#endif + + +/* + * Case-insensitive matching of possibly-abbreviated keyword switches. + * keyword is the constant keyword (must be lower case already), + * minchars is length of minimum legal abbreviation. + */ + +GLOBAL(boolean) +keymatch (char *arg, const char *keyword, int minchars) +{ + register int ca, ck; + register int nmatched = 0; + + while ((ca = *arg++) != '\0') { + if ((ck = *keyword++) == '\0') + return FALSE; /* arg longer than keyword, no good */ + if (isupper(ca)) /* force arg to lcase (assume ck is already) */ + ca = tolower(ca); + if (ca != ck) + return FALSE; /* no good */ + nmatched++; /* count matched characters */ + } + /* reached end of argument; fail if it's too short for unique abbrev */ + if (nmatched < minchars) + return FALSE; + return TRUE; /* A-OK */ +} + + +/* + * Routines to establish binary I/O mode for stdin and stdout. + * Non-Unix systems often require some hacking to get out of text mode. + */ + +GLOBAL(FILE *) +read_stdin (void) +{ + FILE * input_file = stdin; + +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdin), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((input_file = fdopen(fileno(stdin), READ_BINARY)) == NULL) { + fprintf(stderr, "Cannot reopen stdin\n"); + exit(EXIT_FAILURE); + } +#endif + return input_file; +} + + +GLOBAL(FILE *) +write_stdout (void) +{ + FILE * output_file = stdout; + +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdout), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((output_file = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) { + fprintf(stderr, "Cannot reopen stdout\n"); + exit(EXIT_FAILURE); + } +#endif + return output_file; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cdjpeg.h glmark2-2021.02/src/libjpeg-turbo/cdjpeg.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cdjpeg.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/cdjpeg.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,145 @@ +/* + * cdjpeg.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1997, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains common declarations for the sample applications + * cjpeg and djpeg. It is NOT used by the core JPEG library. + */ + +#define JPEG_CJPEG_DJPEG /* define proper options in jconfig.h */ +#define JPEG_INTERNAL_OPTIONS /* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */ +#include "jinclude.h" +#include "jpeglib.h" +#include "jerror.h" /* get library error codes too */ +#include "cderror.h" /* get application-specific error codes */ + + +/* + * Object interface for cjpeg's source file decoding modules + */ + +typedef struct cjpeg_source_struct *cjpeg_source_ptr; + +struct cjpeg_source_struct { + void (*start_input) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo); + JDIMENSION (*get_pixel_rows) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo); + void (*finish_input) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo); + + FILE *input_file; + + JSAMPARRAY buffer; + JDIMENSION buffer_height; +}; + + +/* + * Object interface for djpeg's output file encoding modules + */ + +typedef struct djpeg_dest_struct *djpeg_dest_ptr; + +struct djpeg_dest_struct { + /* start_output is called after jpeg_start_decompress finishes. + * The color map will be ready at this time, if one is needed. + */ + void (*start_output) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo); + /* Emit the specified number of pixel rows from the buffer. */ + void (*put_pixel_rows) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied); + /* Finish up at the end of the image. */ + void (*finish_output) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo); + + /* Target file spec; filled in by djpeg.c after object is created. */ + FILE *output_file; + + /* Output pixel-row buffer. Created by module init or start_output. + * Width is cinfo->output_width * cinfo->output_components; + * height is buffer_height. + */ + JSAMPARRAY buffer; + JDIMENSION buffer_height; +}; + + +/* + * cjpeg/djpeg may need to perform extra passes to convert to or from + * the source/destination file format. The JPEG library does not know + * about these passes, but we'd like them to be counted by the progress + * monitor. We use an expanded progress monitor object to hold the + * additional pass count. + */ + +struct cdjpeg_progress_mgr { + struct jpeg_progress_mgr pub; /* fields known to JPEG library */ + int completed_extra_passes; /* extra passes completed */ + int total_extra_passes; /* total extra */ + /* last printed percentage stored here to avoid multiple printouts */ + int percent_done; +}; + +typedef struct cdjpeg_progress_mgr *cd_progress_ptr; + + +/* Module selection routines for I/O modules. */ + +EXTERN(cjpeg_source_ptr) jinit_read_bmp (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_bmp (j_decompress_ptr cinfo, + boolean is_os2); +EXTERN(cjpeg_source_ptr) jinit_read_gif (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_gif (j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_ppm (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_ppm (j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_rle (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_rle (j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_targa (j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_targa (j_decompress_ptr cinfo); + +/* cjpeg support routines (in rdswitch.c) */ + +EXTERN(boolean) read_quant_tables (j_compress_ptr cinfo, char *filename, + boolean force_baseline); +EXTERN(boolean) read_scan_script (j_compress_ptr cinfo, char *filename); +EXTERN(boolean) set_quality_ratings (j_compress_ptr cinfo, char *arg, + boolean force_baseline); +EXTERN(boolean) set_quant_slots (j_compress_ptr cinfo, char *arg); +EXTERN(boolean) set_sample_factors (j_compress_ptr cinfo, char *arg); + +/* djpeg support routines (in rdcolmap.c) */ + +EXTERN(void) read_color_map (j_decompress_ptr cinfo, FILE *infile); + +/* common support routines (in cdjpeg.c) */ + +EXTERN(void) enable_signal_catcher (j_common_ptr cinfo); +EXTERN(void) start_progress_monitor (j_common_ptr cinfo, + cd_progress_ptr progress); +EXTERN(void) end_progress_monitor (j_common_ptr cinfo); +EXTERN(boolean) keymatch (char *arg, const char *keyword, int minchars); +EXTERN(FILE *) read_stdin (void); +EXTERN(FILE *) write_stdout (void); + +/* miscellaneous useful macros */ + +#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */ +#define READ_BINARY "r" +#define WRITE_BINARY "w" +#else +#define READ_BINARY "rb" +#define WRITE_BINARY "wb" +#endif + +#ifndef EXIT_FAILURE /* define exit() codes if not provided */ +#define EXIT_FAILURE 1 +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif +#ifndef EXIT_WARNING +#define EXIT_WARNING 2 +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/change.log glmark2-2021.02/src/libjpeg-turbo/change.log --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/change.log 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/change.log 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,313 @@ +NOTE: This file was modified by The libjpeg-turbo Project to include only +information relevant to libjpeg-turbo. + +CHANGE LOG for Independent JPEG Group's JPEG software + + +Version 9b 17-Jan-2016 +----------------------- + +Document 'f' specifier for jpegtran -crop specification. +Thank to Michele Martone for suggestion. + + +Version 9 13-Jan-2013 +---------------------- + +Add remark for jpeg_mem_dest() in jdatadst.c. +Thank to Elie-Gregoire Khoury for the hint. + +Correct argument type in format string, avoid compiler warnings. +Thank to Vincent Torri for hint. + + +Version 8d 15-Jan-2012 +----------------------- + +Add cjpeg -rgb option to create RGB JPEG files. +Using this switch suppresses the conversion from RGB +colorspace input to the default YCbCr JPEG colorspace. +Thank to Michael Koch for the initial suggestion. + +Add option to disable the region adjustment in the transupp crop code. +Thank to Jeffrey Friedl for the suggestion. + + +Version 8b 16-May-2010 +----------------------- + +Repair problem in new memory source manager with corrupt JPEG data. +Thank to Ted Campbell and Samuel Chun for the report. + + +Version 8a 28-Feb-2010 +----------------------- + +Writing tables-only datastreams via jpeg_write_tables works again. + +Support 32-bit BMPs (RGB image with Alpha channel) for read in cjpeg. +Thank to Brett Blackham for the suggestion. + + +Version 8 10-Jan-2010 +---------------------- + +Add sanity check in BMP reader module to avoid cjpeg crash for empty input +image (thank to Isaev Ildar of ISP RAS, Moscow, RU for reporting this error). + +Add data source and destination managers for read from and write to +memory buffers. New API functions jpeg_mem_src and jpeg_mem_dest. +Thank to Roberto Boni from Italy for the suggestion. + + +Version 7 27-Jun-2009 +---------------------- + +New scaled DCTs implemented. +djpeg now supports scalings N/8 with all N from 1 to 16. + +cjpeg -quality option has been extended for support of separate quality +settings for luminance and chrominance (or in general, for every provided +quantization table slot). +New API function jpeg_default_qtables() and q_scale_factor array in library. + +Support arithmetic entropy encoding and decoding. +Added files jaricom.c, jcarith.c, jdarith.c. + +jpegtran has a new "lossless" cropping feature. + +Implement -perfect option in jpegtran, new API function +jtransform_perfect_transform() in transupp. (DP 204_perfect.dpatch) + +Better error messages for jpegtran fopen failure. +(DP 203_jpegtran_errmsg.dpatch) + +Fix byte order issue with 16bit PPM/PGM files in rdppm.c/wrppm.c: +according to Netpbm, the de facto standard implementation of the PNM formats, +the most significant byte is first. (DP 203_rdppm.dpatch) + +Add -raw option to rdjpgcom not to mangle the output. +(DP 205_rdjpgcom_raw.dpatch) + +Make rdjpgcom locale aware. (DP 201_rdjpgcom_locale.dpatch) + +Add extern "C" to jpeglib.h. +This avoids the need to put extern "C" { ... } around #include "jpeglib.h" +in your C++ application. Defining the symbol DONT_USE_EXTERN_C in the +configuration prevents this. (DP 202_jpeglib.h_c++.dpatch) + + +Version 6b 27-Mar-1998 +----------------------- + +jpegtran has new features for lossless image transformations (rotation +and flipping) as well as "lossless" reduction to grayscale. + +jpegtran now copies comments by default; it has a -copy switch to enable +copying all APPn blocks as well, or to suppress comments. (Formerly it +always suppressed comments and APPn blocks.) jpegtran now also preserves +JFIF version and resolution information. + +New decompressor library feature: COM and APPn markers found in the input +file can be saved in memory for later use by the application. (Before, +you had to code this up yourself with a custom marker processor.) + +There is an unused field "void * client_data" now in compress and decompress +parameter structs; this may be useful in some applications. + +JFIF version number information is now saved by the decoder and accepted by +the encoder. jpegtran uses this to copy the source file's version number, +to ensure "jpegtran -copy all" won't create bogus files that contain JFXX +extensions but claim to be version 1.01. Applications that generate their +own JFXX extension markers also (finally) have a supported way to cause the +encoder to emit JFIF version number 1.02. + +djpeg's trace mode reports JFIF 1.02 thumbnail images as such, rather +than as unknown APP0 markers. + +In -verbose mode, djpeg and rdjpgcom will try to print the contents of +APP12 markers as text. Some digital cameras store useful text information +in APP12 markers. + +Handling of truncated data streams is more robust: blocks beyond the one in +which the error occurs will be output as uniform gray, or left unchanged +if decoding a progressive JPEG. The appearance no longer depends on the +Huffman tables being used. + +Huffman tables are checked for validity much more carefully than before. + +To avoid the Unisys LZW patent, djpeg's GIF output capability has been +changed to produce "uncompressed GIFs", and cjpeg's GIF input capability +has been removed altogether. We're not happy about it either, but there +seems to be no good alternative. + +The configure script now supports building libjpeg as a shared library +on many flavors of Unix (all the ones that GNU libtool knows how to +build shared libraries for). Use "./configure --enable-shared" to +try this out. + +New jconfig file and makefiles for Microsoft Visual C++ and Developer Studio. +Also, a jconfig file and a build script for Metrowerks CodeWarrior +on Apple Macintosh. makefile.dj has been updated for DJGPP v2, and there +are miscellaneous other minor improvements in the makefiles. + +jmemmac.c now knows how to create temporary files following Mac System 7 +conventions. + +djpeg's -map switch is now able to read raw-format PPM files reliably. + +cjpeg -progressive -restart no longer generates any unnecessary DRI markers. + +Multiple calls to jpeg_simple_progression for a single JPEG object +no longer leak memory. + + +Version 6a 7-Feb-96 +-------------------- + +Library initialization sequence modified to detect version mismatches +and struct field packing mismatches between library and calling application. +This change requires applications to be recompiled, but does not require +any application source code change. + +All routine declarations changed to the style "GLOBAL(type) name ...", +that is, GLOBAL, LOCAL, METHODDEF, EXTERN are now macros taking the +routine's return type as an argument. This makes it possible to add +Microsoft-style linkage keywords to all the routines by changing just +these macros. Note that any application code that was using these macros +will have to be changed. + +DCT coefficient quantization tables are now stored in normal array order +rather than zigzag order. Application code that calls jpeg_add_quant_table, +or otherwise manipulates quantization tables directly, will need to be +changed. If you need to make such code work with either older or newer +versions of the library, a test like "#if JPEG_LIB_VERSION >= 61" is +recommended. + +djpeg's trace capability now dumps DQT tables in natural order, not zigzag +order. This allows the trace output to be made into a "-qtables" file +more easily. + +New system-dependent memory manager module for use on Apple Macintosh. + +Fix bug in cjpeg's -smooth option: last one or two scanlines would be +duplicates of the prior line unless the image height mod 16 was 1 or 2. + +Repair minor problems in VMS, BCC, MC6 makefiles. + +New configure script based on latest GNU Autoconf. + +Correct the list of include files needed by MetroWerks C for ccommand(). + +Numerous small documentation updates. + + +Version 6 2-Aug-95 +------------------- + +Progressive JPEG support: library can read and write full progressive JPEG +files. A "buffered image" mode supports incremental decoding for on-the-fly +display of progressive images. Simply recompiling an existing IJG-v5-based +decoder with v6 should allow it to read progressive files, though of course +without any special progressive display. + +New "jpegtran" application performs lossless transcoding between different +JPEG formats; primarily, it can be used to convert baseline to progressive +JPEG and vice versa. In support of jpegtran, the library now allows lossless +reading and writing of JPEG files as DCT coefficient arrays. This ability +may be of use in other applications. + +Notes for programmers: +* We changed jpeg_start_decompress() to be able to suspend; this makes all +decoding modes available to suspending-input applications. However, +existing applications that use suspending input will need to be changed +to check the return value from jpeg_start_decompress(). You don't need to +do anything if you don't use a suspending data source. +* We changed the interface to the virtual array routines: access_virt_array +routines now take a count of the number of rows to access this time. The +last parameter to request_virt_array routines is now interpreted as the +maximum number of rows that may be accessed at once, but not necessarily +the height of every access. + + +Version 5b 15-Mar-95 +--------------------- + +Correct bugs with grayscale images having v_samp_factor > 1. + +jpeg_write_raw_data() now supports output suspension. + +Correct bugs in "configure" script for case of compiling in +a directory other than the one containing the source files. + +Repair bug in jquant1.c: sometimes didn't use as many colors as it could. + +Borland C makefile and jconfig file work under either MS-DOS or OS/2. + +Miscellaneous improvements to documentation. + + +Version 5a 7-Dec-94 +-------------------- + +Changed color conversion roundoff behavior so that grayscale values are +represented exactly. (This causes test image files to change.) + +Make ordered dither use 16x16 instead of 4x4 pattern for a small quality +improvement. + +New configure script based on latest GNU Autoconf. +Fix configure script to handle CFLAGS correctly. +Rename *.auto files to *.cfg, so that configure script still works if +file names have been truncated for DOS. + +Fix bug in rdbmp.c: didn't allow for extra data between header and image. + +Modify rdppm.c/wrppm.c to handle 2-byte raw PPM/PGM formats for 12-bit data. + +Fix several bugs in rdrle.c. + +NEED_SHORT_EXTERNAL_NAMES option was broken. + +Revise jerror.h/jerror.c for more flexibility in message table. + +Repair oversight in jmemname.c NO_MKTEMP case: file could be there +but unreadable. + + +Version 5 24-Sep-94 +-------------------- + +Version 5 represents a nearly complete redesign and rewrite of the IJG +software. Major user-visible changes include: + * Automatic configuration simplifies installation for most Unix systems. + * A range of speed vs. image quality tradeoffs are supported. + This includes resizing of an image during decompression: scaling down + by a factor of 1/2, 1/4, or 1/8 is handled very efficiently. + * New programs rdjpgcom and wrjpgcom allow insertion and extraction + of text comments in a JPEG file. + +The application programmer's interface to the library has changed completely. +Notable improvements include: + * We have eliminated the use of callback routines for handling the + uncompressed image data. The application now sees the library as a + set of routines that it calls to read or write image data on a + scanline-by-scanline basis. + * The application image data is represented in a conventional interleaved- + pixel format, rather than as a separate array for each color channel. + This can save a copying step in many programs. + * The handling of compressed data has been cleaned up: the application can + supply routines to source or sink the compressed data. It is possible to + suspend processing on source/sink buffer overrun, although this is not + supported in all operating modes. + * All static state has been eliminated from the library, so that multiple + instances of compression or decompression can be active concurrently. + * JPEG abbreviated datastream formats are supported, ie, quantization and + Huffman tables can be stored separately from the image data. + * And not only that, but the documentation of the library has improved + considerably! + + +The last widely used release before the version 5 rewrite was version 4A of +18-Feb-93. Change logs before that point have been discarded, since they +are not of much interest after the rewrite. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/ChangeLog.txt glmark2-2021.02/src/libjpeg-turbo/ChangeLog.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/ChangeLog.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/ChangeLog.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,861 @@ +1.5 beta1 +========= + +[1] Added full SIMD acceleration for PowerPC platforms using AltiVec VMX +(128-bit SIMD) instructions. Although the performance of libjpeg-turbo on +PowerPC was already good, due to the increased number of registers available +to the compiler vs. x86, it was still possible to speed up compression by about +3-4x and decompression by about 2-2.5x (relative to libjpeg v6b) through the +use of AltiVec instructions. + +[2] Added two new libjpeg API functions (jpeg_skip_scanlines() and +jpeg_crop_scanline()) that can be used to partially decode a JPEG image. See +libjpeg.txt for more details. + +[3] The TJCompressor and TJDecompressor classes in the TurboJPEG Java API now +implement the Closeable interface, so those classes can be used with a +try-with-resources statement. + +[4] The TurboJPEG Java classes now throw unchecked idiomatic exceptions +(IllegalArgumentException, IllegalStateException) for unrecoverable errors +caused by incorrect API usage, and those classes throw a new checked exception +type (TJException) for errors that are passed through from the C library. + +[5] Source buffers for the TurboJPEG C API functions, as well as the +jpeg_mem_src() function in the libjpeg API, are now declared as const pointers. +This facilitates passing read-only buffers to those functions and ensures the +caller that the source buffer will not be modified. This should not create any +backward API or ABI incompatibilities with prior libjpeg-turbo releases. + +[6] The MIPS DSPr2 SIMD code can now be compiled to support either FR=0 or FR=1 +FPUs. + +[7] Fixed additional negative left shifts and other issues reported by the GCC +and Clang undefined behavior sanitizers. Most of these issues affected only +32-bit code, and none of them was known to pose a security threat, but removing +the warnings makes it easier to detect actual security issues, should they +arise in the future. + +[8] Removed the unnecessary .arch directive from the ARM64 NEON SIMD code. +This directive was preventing the code from assembling using the clang +integrated assembler. + +[9] Fixed a regression caused by 1.4.1[6] that prevented 32-bit and 64-bit +libjpeg-turbo RPMs from being installed simultaneously on recent Red Hat/Fedora +distributions. This was due to the addition of a macro in jconfig.h that +allows the Huffman codec to determine the word size at compile time. Since +that macro differs between 32-bit and 64-bit builds, this caused a conflict +between the i386 and x86_64 RPMs (any differing files, other than executables, +are not allowed when 32-bit and 64-bit RPMs are installed simultaneously.) +Since the macro is used only internally, it has been moved into jconfigint.h. + +[10] The x86-64 SIMD code can now be disabled at run time by setting the +JSIMD_FORCENONE environment variable to 1 (the other SIMD implementations +already had this capability.) + +[11] Added a new command-line argument to TJBench (-nowrite) that prevents the +benchmark from outputting any images. This removes any potential operating +system overhead that might be caused by lazy writes to disk and thus improves +the consistency of the performance measurements. + +[12] Added SIMD acceleration for Huffman encoding on SSE2-capable x86 and +x86-64 platforms. This speeds up the compression of full-color JPEGs by about +10-15% on average (relative to libjpeg-turbo 1.4.x) when using modern Intel and +AMD CPUs. Additionally, this works around an issue in the clang optimizer that +prevents it (as of this writing) from achieving the same performance as GCC +when compiling the C version of the Huffman encoder +(https://llvm.org/bugs/show_bug.cgi?id=16035). For the purposes of benchmarking +or regression testing, SIMD-accelerated Huffman encoding can be disabled by +setting the JSIMD_NOHUFFENC environment variable to 1. + +[13] Added ARM 64-bit (ARMv8) NEON SIMD implementations of the commonly-used +compression algorithms (including the slow integer forward DCT and h2v2 & h2v1 +downsampling algorithms, which are not accelerated in the 32-bit NEON +implementation.) This speeds up the compression of full-color JPEGs by about +75% on average on a Cavium ThunderX processor and by about 2-2.5x on average on +Cortex-A53 and Cortex-A57 cores. + +[14] Added SIMD acceleration for Huffman encoding on NEON-capable ARM 32-bit +and 64-bit platforms. + +For 32-bit code, this speeds up the compression of full-color JPEGs by about +30% on average on a typical iOS device (iPhone 4S, Cortex-A9) and by about 6-7% +on average on a typical Android device (Nexus 5X, Cortex-A53 and Cortex-A57), +relative to libjpeg-turbo 1.4.x. Note that the larger speedup under iOS is due +to the fact that iOS builds use LLVM, which does not optimize the C Huffman +encoder as well as GCC does. + +For 64-bit code, NEON-accelerated Huffman encoding speeds up the compression of +full-color JPEGs by about 40% on average on a typical iOS device (iPhone 5S, +Apple A7) and by about 7-8% on average on a typical Android device (Nexus 5X, +Cortex-A53 and Cortex-A57), in addition to the speedup described in [13] above. + +For the purposes of benchmarking or regression testing, SIMD-accelerated +Huffman encoding can be disabled by setting the JSIMD_NOHUFFENC environment +variable to 1. + +[15] pkg-config (.pc) scripts are now included for both the libjpeg and +TurboJPEG API libraries on Un*x systems. Note that if a project's build system +relies on these scripts, then it will not be possible to build that project +with libjpeg or with a prior version of libjpeg-turbo. + +[16] Optimized the ARM 64-bit (ARMv8) NEON SIMD decompression routines to +improve performance on CPUs with in-order pipelines. This speeds up the +decompression of full-color JPEGs by nearly 2x on average on a Cavium ThunderX +processor and by about 15% on average on a Cortex-A53 core. + +[17] Fixed an issue in the accelerated Huffman decoder that could have caused +the decoder to read past the end of the input buffer when a malformed, +specially-crafted JPEG image was being decompressed. In prior versions of +libjpeg-turbo, the accelerated Huffman decoder was invoked (in most cases) only +if there were > 128 bytes of data in the input buffer. However, it is possible +to construct a JPEG image in which a single Huffman block is over 430 bytes +long, so this version of libjpeg-turbo activates the accelerated Huffman +decoder only if there are > 512 bytes of data in the input buffer. + +[18] Fixed a memory leak in tjunittest encountered when running the program +with the -yuv option. + + +1.4.2 +===== + +[1] Fixed an issue whereby cjpeg would segfault if a Windows bitmap with a +negative width or height was used as an input image (Windows bitmaps can have +a negative height if they are stored in top-down order, but such files are +rare and not supported by libjpeg-turbo.) + +[2] Fixed an issue whereby, under certain circumstances, libjpeg-turbo would +incorrectly encode certain JPEG images when quality=100 and the fast integer +forward DCT were used. This was known to cause 'make test' to fail when the +library was built with '-march=haswell' on x86 systems. + +[3] Fixed an issue whereby libjpeg-turbo would crash when built with the latest +& greatest development version of the Clang/LLVM compiler. This was caused by +an x86-64 ABI conformance issue in some of libjpeg-turbo's 64-bit SSE2 SIMD +routines. Those routines were incorrectly using a 64-bit mov instruction to +transfer a 32-bit JDIMENSION argument, whereas the x86-64 ABI allows the upper +(unused) 32 bits of a 32-bit argument's register to be undefined. The new +Clang/LLVM optimizer uses load combining to transfer multiple adjacent 32-bit +structure members into a single 64-bit register, and this exposed the ABI +conformance issue. + +[4] Fixed a bug in the MIPS DSPr2 4:2:0 "plain" (non-fancy and non-merged) +upsampling routine that caused a buffer overflow (and subsequent segfault) when +decompressing a 4:2:0 JPEG image whose scaled output width was less than 16 +pixels. The "plain" upsampling routines are normally only used when +decompressing a non-YCbCr JPEG image, but they are also used when decompressing +a JPEG image whose scaled output height is 1. + +[5] Fixed various negative left shifts and other issues reported by the GCC and +Clang undefined behavior sanitizers. None of these was known to pose a +security threat, but removing the warnings makes it easier to detect actual +security issues, should they arise in the future. + + +1.4.1 +===== + +[1] tjbench now properly handles CMYK/YCCK JPEG files. Passing an argument of +-cmyk (instead of, for instance, -rgb) will cause tjbench to internally convert +the source bitmap to CMYK prior to compression, to generate YCCK JPEG files, +and to internally convert the decompressed CMYK pixels back to RGB after +decompression (the latter is done automatically if a CMYK or YCCK JPEG is +passed to tjbench as a source image.) The CMYK<->RGB conversion operation is +not benchmarked. NOTE: The quick & dirty CMYK<->RGB conversions that tjbench +uses are suitable for testing only. Proper conversion between CMYK and RGB +requires a color management system. + +[2] 'make test' now performs additional bitwise regression tests using tjbench, +mainly for the purpose of testing compression from/decompression to a subregion +of a larger image buffer. + +[3] 'make test' no longer tests the regression of the floating point DCT/IDCT +by default, since the results of those tests can vary if the algorithms in +question are not implemented using SIMD instructions on a particular platform. +See the comments in Makefile.am for information on how to re-enable the tests +and to specify an expected result for them based on the particulars of your +platform. + +[4] The NULL color conversion routines have been significantly optimized, +which speeds up the compression of RGB and CMYK JPEGs by 5-20% when using +64-bit code and 0-3% when using 32-bit code, and the decompression of those +images by 10-30% when using 64-bit code and 3-12% when using 32-bit code. + +[5] Fixed an "illegal instruction" error that occurred when djpeg from a +SIMD-enabled libjpeg-turbo MIPS build was executed with the -nosmooth option on +a MIPS machine that lacked DSPr2 support. The MIPS SIMD routines for h2v1 and +h2v2 merged upsampling were not properly checking for the existence of DSPr2. + +[6] Performance has been improved significantly on 64-bit non-Linux and +non-Windows platforms (generally 10-20% faster compression and 5-10% faster +decompression.) Due to an oversight, the 64-bit version of the accelerated +Huffman codec was not being compiled in when libjpeg-turbo was built on +platforms other than Windows or Linux. Oops. + +[7] Fixed an extremely rare bug in the Huffman encoder that caused 64-bit +builds of libjpeg-turbo to incorrectly encode a few specific test images when +quality=98, an optimized Huffman table, and the slow integer forward DCT were +used. + +[8] The Windows (CMake) build system now supports building only static or only +shared libraries. This is accomplished by adding either -DENABLE_STATIC=0 or +-DENABLE_SHARED=0 to the CMake command line. + +[9] TurboJPEG API functions will now return an error code if a warning is +triggered in the underlying libjpeg API. For instance, if a JPEG file is +corrupt, the TurboJPEG decompression functions will attempt to decompress +as much of the image as possible, but those functions will now return -1 to +indicate that the decompression was not entirely successful. + +[10] Fixed a bug in the MIPS DSPr2 4:2:2 fancy upsampling routine that caused a +buffer overflow (and subsequent segfault) when decompressing a 4:2:2 JPEG image +in which the right-most MCU was 5 or 6 pixels wide. + + +1.4.0 +===== + +[1] Fixed a build issue on OS X PowerPC platforms (md5cmp failed to build +because OS X does not provide the le32toh() and htole32() functions.) + +[2] The non-SIMD RGB565 color conversion code did not work correctly on big +endian machines. This has been fixed. + +[3] Fixed an issue in tjPlaneSizeYUV() whereby it would erroneously return 1 +instead of -1 if componentID was > 0 and subsamp was TJSAMP_GRAY. + +[3] Fixed an issue in tjBufSizeYUV2() whereby it would erroneously return 0 +instead of -1 if width was < 1. + +[5] The Huffman encoder now uses clz and bsr instructions for bit counting on +ARM64 platforms (see 1.4 beta1 [5].) + +[6] The close() method in the TJCompressor and TJDecompressor Java classes is +now idempotent. Previously, that method would call the native tjDestroy() +function even if the TurboJPEG instance had already been destroyed. This +caused an exception to be thrown during finalization, if the close() method had +already been called. The exception was caught, but it was still an expensive +operation. + +[7] The TurboJPEG API previously generated an error ("Could not determine +subsampling type for JPEG image") when attempting to decompress grayscale JPEG +images that were compressed with a sampling factor other than 1 (for instance, +with 'cjpeg -grayscale -sample 2x2'). Subsampling technically has no meaning +with grayscale JPEGs, and thus the horizontal and vertical sampling factors +for such images are ignored by the decompressor. However, the TurboJPEG API +was being too rigid and was expecting the sampling factors to be equal to 1 +before it treated the image as a grayscale JPEG. + +[8] cjpeg, djpeg, and jpegtran now accept an argument of -version, which will +print the library version and exit. + +[9] Referring to 1.4 beta1 [15], another extremely rare circumstance was +discovered under which the Huffman encoder's local buffer can be overrun +when a buffered destination manager is being used and an +extremely-high-frequency block (basically junk image data) is being encoded. +Even though the Huffman local buffer was increased from 128 bytes to 136 bytes +to address the previous issue, the new issue caused even the larger buffer to +be overrun. Further analysis reveals that, in the absolute worst case (such as +setting alternating AC coefficients to 32767 and -32768 in the JPEG scanning +order), the Huffman encoder can produce encoded blocks that approach double the +size of the unencoded blocks. Thus, the Huffman local buffer was increased to +256 bytes, which should prevent any such issue from re-occurring in the future. + +[10] The new tjPlaneSizeYUV(), tjPlaneWidth(), and tjPlaneHeight() functions +were not actually usable on any platform except OS X and Windows, because +those functions were not included in the libturbojpeg mapfile. This has been +fixed. + +[11] Restored the JPP(), JMETHOD(), and FAR macros in the libjpeg-turbo header +files. The JPP() and JMETHOD() macros were originally implemented in libjpeg +as a way of supporting non-ANSI compilers that lacked support for prototype +parameters. libjpeg-turbo has never supported such compilers, but some +software packages still use the macros to define their own prototypes. +Similarly, libjpeg-turbo has never supported MS-DOS and other platforms that +have far symbols, but some software packages still use the FAR macro. A pretty +good argument can be made that this is a bad practice on the part of the +software in question, but since this affects more than one package, it's just +easier to fix it here. + +[12] Fixed issues that were preventing the ARM 64-bit SIMD code from compiling +for iOS, and included an ARMv8 architecture in all of the binaries installed by +the "official" libjpeg-turbo SDK for OS X. + + +1.3.90 (1.4 beta1) +================== + +[1] New features in the TurboJPEG API: +-- YUV planar images can now be generated with an arbitrary line padding +(previously only 4-byte padding, which was compatible with X Video, was +supported.) +-- The decompress-to-YUV function has been extended to support image scaling. +-- JPEG images can now be compressed from YUV planar source images. +-- YUV planar images can now be decoded into RGB or grayscale images. +-- 4:1:1 subsampling is now supported. This is mainly included for +compatibility, since 4:1:1 is not fully accelerated in libjpeg-turbo and has no +significant advantages relative to 4:2:0. +-- CMYK images are now supported. This feature allows CMYK source images to be +compressed to YCCK JPEGs and YCCK or CMYK JPEGs to be decompressed to CMYK +destination images. Conversion between CMYK/YCCK and RGB or YUV images is not +supported. Such conversion requires a color management system and is thus out +of scope for a codec library. +-- The handling of YUV images in the Java API has been significantly refactored +and should now be much more intuitive. +-- The Java API now supports encoding a YUV image from an arbitrary position in +a large image buffer. +-- All of the YUV functions now have a corresponding function that operates on +separate image planes instead of a unified image buffer. This allows for +compressing/decoding from or decompressing/encoding to a subregion of a larger +YUV image. It also allows for handling YUV formats that swap the order of the +U and V planes. + +[2] Added SIMD acceleration for DSPr2-capable MIPS platforms. This speeds up +the compression of full-color JPEGs by 70-80% on such platforms and +decompression by 25-35%. + +[3] If an application attempts to decompress a Huffman-coded JPEG image whose +header does not contain Huffman tables, libjpeg-turbo will now insert the +default Huffman tables. In order to save space, many motion JPEG video frames +are encoded without the default Huffman tables, so these frames can now be +successfully decompressed by libjpeg-turbo without additional work on the part +of the application. An application can still override the Huffman tables, for +instance to re-use tables from a previous frame of the same video. + +[4] The Mac packaging system now uses pkgbuild and productbuild rather than +PackageMaker (which is obsolete and no longer supported.) This means that +OS X 10.6 "Snow Leopard" or later must be used when packaging libjpeg-turbo, +although the packages produced can be installed on OS X 10.5 "Leopard" or +later. OS X 10.4 "Tiger" is no longer supported. + +[5] The Huffman encoder now uses clz and bsr instructions for bit counting on +ARM platforms rather than a lookup table. This reduces the memory footprint +by 64k, which may be important for some mobile applications. Out of four +Android devices that were tested, two demonstrated a small overall performance +loss (~3-4% on average) with ARMv6 code and a small gain (also ~3-4%) with +ARMv7 code when enabling this new feature, but the other two devices +demonstrated a significant overall performance gain with both ARMv6 and ARMv7 +code (~10-20%) when enabling the feature. Actual mileage may vary. + +[6] Worked around an issue with Visual C++ 2010 and later that caused incorrect +pixels to be generated when decompressing a JPEG image to a 256-color bitmap, +if compiler optimization was enabled when libjpeg-turbo was built. This caused +the regression tests to fail when doing a release build under Visual C++ 2010 +and later. + +[7] Improved the accuracy and performance of the non-SIMD implementation of the +floating point inverse DCT (using code borrowed from libjpeg v8a and later.) +The accuracy of this implementation now matches the accuracy of the SSE/SSE2 +implementation. Note, however, that the floating point DCT/IDCT algorithms are +mainly a legacy feature. They generally do not produce significantly better +accuracy than the slow integer DCT/IDCT algorithms, and they are quite a bit +slower. + +[8] Added a new output colorspace (JCS_RGB565) to the libjpeg API that allows +for decompressing JPEG images into RGB565 (16-bit) pixels. If dithering is not +used, then this code path is SIMD-accelerated on ARM platforms. + +[9] Numerous obsolete features, such as support for non-ANSI compilers and +support for the MS-DOS memory model, were removed from the libjpeg code, +greatly improving its readability and making it easier to maintain and extend. + +[10] Fixed a segfault that occurred when calling output_message() with msg_code +set to JMSG_COPYRIGHT. + +[11] Fixed an issue whereby wrjpgcom was allowing comments longer than 65k +characters to be passed on the command line, which was causing it to generate +incorrect JPEG files. + +[12] Fixed a bug in the build system that was causing the Windows version of +wrjpgcom to be built using the rdjpgcom source code. + +[13] Restored 12-bit-per-component JPEG support. A 12-bit version of +libjpeg-turbo can now be built by passing an argument of --with-12bit to +configure (Unix) or -DWITH_12BIT=1 to cmake (Windows.) 12-bit JPEG support is +included only for convenience. Enabling this feature disables all of the +performance features in libjpeg-turbo, as well as arithmetic coding and the +TurboJPEG API. The resulting library still contains the other libjpeg-turbo +features (such as the colorspace extensions), but in general, it performs no +faster than libjpeg v6b. + +[14] Added ARM 64-bit SIMD acceleration for the YCC-to-RGB color conversion +and IDCT algorithms (both are used during JPEG decompression.) For unknown +reasons (probably related to clang), this code cannot currently be compiled for +iOS. + +[15] Fixed an extremely rare bug that could cause the Huffman encoder's local +buffer to overrun when a very high-frequency MCU is compressed using quality +100 and no subsampling, and when the JPEG output buffer is being dynamically +resized by the destination manager. This issue was so rare that, even with a +test program specifically designed to make the bug occur (by injecting random +high-frequency YUV data into the compressor), it was reproducible only once in +about every 25 million iterations. + +[16] Fixed an oversight in the TurboJPEG C wrapper: if any of the JPEG +compression functions was called repeatedly with the same +automatically-allocated destination buffer, then TurboJPEG would erroneously +assume that the jpegSize parameter was equal to the size of the buffer, when in +fact that parameter was probably equal to the size of the most recently +compressed JPEG image. If the size of the previous JPEG image was not as large +as the current JPEG image, then TurboJPEG would unnecessarily reallocate the +destination buffer. + + +1.3.1 +===== + +[1] On Un*x systems, 'make install' now installs the libjpeg-turbo libraries +into /opt/libjpeg-turbo/lib32 by default on any 32-bit system, not just x86, +and into /opt/libjpeg-turbo/lib64 by default on any 64-bit system, not just +x86-64. You can override this by overriding either the 'prefix' or 'libdir' +configure variables. + +[2] The Windows installer now places a copy of the TurboJPEG DLLs in the same +directory as the rest of the libjpeg-turbo binaries. This was mainly done +to support TurboVNC 1.3, which bundles the DLLs in its Windows installation. +When using a 32-bit version of CMake on 64-bit Windows, it is impossible to +access the c:\WINDOWS\system32 directory, which made it impossible for the +TurboVNC build scripts to bundle the 64-bit TurboJPEG DLL. + +[3] Fixed a bug whereby attempting to encode a progressive JPEG with arithmetic +entropy coding (by passing arguments of -progressive -arithmetic to cjpeg or +jpegtran, for instance) would result in an error, "Requested feature was +omitted at compile time". + +[4] Fixed a couple of issues whereby malformed JPEG images would cause +libjpeg-turbo to use uninitialized memory during decompression. + +[5] Fixed an error ("Buffer passed to JPEG library is too small") that occurred +when calling the TurboJPEG YUV encoding function with a very small (< 5x5) +source image, and added a unit test to check for this error. + +[6] The Java classes should now build properly under Visual Studio 2010 and +later. + +[7] Fixed an issue that prevented SRPMs generated using the in-tree packaging +tools from being rebuilt on certain newer Linux distributions. + +[8] Numerous minor fixes to eliminate compilation and build/packaging system +warnings, fix cosmetic issues, improve documentation clarity, and other general +source cleanup. + + +1.3.0 +===== + +[1] 'make test' now works properly on FreeBSD, and it no longer requires the +md5sum executable to be present on other Un*x platforms. + +[2] Overhauled the packaging system: +-- To avoid conflict with vendor-supplied libjpeg-turbo packages, the +official RPMs and DEBs for libjpeg-turbo have been renamed to +"libjpeg-turbo-official". +-- The TurboJPEG libraries are now located under /opt/libjpeg-turbo in the +official Linux and Mac packages, to avoid conflict with vendor-supplied +packages and also to streamline the packaging system. +-- Release packages are now created with the directory structure defined +by the configure variables "prefix", "bindir", "libdir", etc. (Un*x) or by the +CMAKE_INSTALL_PREFIX variable (Windows.) The exception is that the docs are +always located under the system default documentation directory on Un*x and Mac +systems, and on Windows, the TurboJPEG DLL is always located in the Windows +system directory. +-- To avoid confusion, official libjpeg-turbo packages on Linux/Unix platforms +(except for Mac) will always install the 32-bit libraries in +/opt/libjpeg-turbo/lib32 and the 64-bit libraries in /opt/libjpeg-turbo/lib64. +-- Fixed an issue whereby, in some cases, the libjpeg-turbo executables on Un*x +systems were not properly linking with the shared libraries installed by the +same package. +-- Fixed an issue whereby building the "installer" target on Windows when +WITH_JAVA=1 would fail if the TurboJPEG JAR had not been previously built. +-- Building the "install" target on Windows now installs files into the same +places that the installer does. + +[3] Fixed a Huffman encoder bug that prevented I/O suspension from working +properly. + + +1.2.90 (1.3 beta1) +================== + +[1] Added support for additional scaling factors (3/8, 5/8, 3/4, 7/8, 9/8, 5/4, +11/8, 3/2, 13/8, 7/4, 15/8, and 2) when decompressing. Note that the IDCT will +not be SIMD-accelerated when using any of these new scaling factors. + +[2] The TurboJPEG dynamic library is now versioned. It was not strictly +necessary to do so, because TurboJPEG uses versioned symbols, and if a function +changes in an ABI-incompatible way, that function is renamed and a legacy +function is provided to maintain backward compatibility. However, certain +Linux distro maintainers have a policy against accepting any library that isn't +versioned. + +[3] Extended the TurboJPEG Java API so that it can be used to compress a JPEG +image from and decompress a JPEG image to an arbitrary position in a large +image buffer. + +[4] The tjDecompressToYUV() function now supports the TJFLAG_FASTDCT flag. + +[5] The 32-bit supplementary package for amd64 Debian systems now provides +symlinks in /usr/lib/i386-linux-gnu for the TurboJPEG libraries in /usr/lib32. +This allows those libraries to be used on MultiArch-compatible systems (such as +Ubuntu 11 and later) without setting the linker path. + +[6] The TurboJPEG Java wrapper should now find the JNI library on Mac systems +without having to pass -Djava.library.path=/usr/lib to java. + +[7] TJBench has been ported to Java to provide a convenient way of validating +the performance of the TurboJPEG Java API. It can be run with +'java -cp turbojpeg.jar TJBench'. + +[8] cjpeg can now be used to generate JPEG files with the RGB colorspace +(feature ported from jpeg-8d.) + +[9] The width and height in the -crop argument passed to jpegtran can now be +suffixed with "f" to indicate that, when the upper left corner of the cropping +region is automatically moved to the nearest iMCU boundary, the bottom right +corner should be moved by the same amount. In other words, this feature causes +jpegtran to strictly honor the specified width/height rather than the specified +bottom right corner (feature ported from jpeg-8d.) + +[10] JPEG files using the RGB colorspace can now be decompressed into grayscale +images (feature ported from jpeg-8d.) + +[11] Fixed a regression caused by 1.2.1[7] whereby the build would fail with +multiple "Mismatch in operand sizes" errors when attempting to build the x86 +SIMD code with NASM 0.98. + +[12] The in-memory source/destination managers (jpeg_mem_src() and +jpeg_mem_dest()) are now included by default when building libjpeg-turbo with +libjpeg v6b or v7 emulation, so that programs can take advantage of these +functions without requiring the use of the backward-incompatible libjpeg v8 +ABI. The "age number" of the libjpeg-turbo library on Un*x systems has been +incremented by 1 to reflect this. You can disable this feature with a +configure/CMake switch in order to retain strict API/ABI compatibility with the +libjpeg v6b or v7 API/ABI (or with previous versions of libjpeg-turbo.) See +README.md for more details. + +[13] Added ARMv7s architecture to libjpeg.a and libturbojpeg.a in the official +libjpeg-turbo binary package for OS X, so that those libraries can be used to +build applications that leverage the faster CPUs in the iPhone 5 and iPad 4. + + +1.2.1 +===== + +[1] Creating or decoding a JPEG file that uses the RGB colorspace should now +properly work when the input or output colorspace is one of the libjpeg-turbo +colorspace extensions. + +[2] When libjpeg-turbo was built without SIMD support and merged (non-fancy) +upsampling was used along with an alpha-enabled colorspace during +decompression, the unused byte of the decompressed pixels was not being set to +0xFF. This has been fixed. TJUnitTest has also been extended to test for the +correct behavior of the colorspace extensions when merged upsampling is used. + +[3] Fixed a bug whereby the libjpeg-turbo SSE2 SIMD code would not preserve the +upper 64 bits of xmm6 and xmm7 on Win64 platforms, which violated the Win64 +calling conventions. + +[4] Fixed a regression caused by 1.2.0[6] whereby decompressing corrupt JPEG +images (specifically, images in which the component count was erroneously set +to a large value) would cause libjpeg-turbo to segfault. + +[5] Worked around a severe performance issue with "Bobcat" (AMD Embedded APU) +processors. The MASKMOVDQU instruction, which was used by the libjpeg-turbo +SSE2 SIMD code, is apparently implemented in microcode on AMD processors, and +it is painfully slow on Bobcat processors in particular. Eliminating the use +of this instruction improved performance by an order of magnitude on Bobcat +processors and by a small amount (typically 5%) on AMD desktop processors. + +[6] Added SIMD acceleration for performing 4:2:2 upsampling on NEON-capable ARM +platforms. This speeds up the decompression of 4:2:2 JPEGs by 20-25% on such +platforms. + +[7] Fixed a regression caused by 1.2.0[2] whereby, on Linux/x86 platforms +running the 32-bit SSE2 SIMD code in libjpeg-turbo, decompressing a 4:2:0 or +4:2:2 JPEG image into a 32-bit (RGBX, BGRX, etc.) buffer without using fancy +upsampling would produce several incorrect columns of pixels at the right-hand +side of the output image if each row in the output image was not evenly +divisible by 16 bytes. + +[8] Fixed an issue whereby attempting to build the SIMD extensions with Xcode +4.3 on OS X platforms would cause NASM to return numerous errors of the form +"'%define' expects a macro identifier". + +[9] Added flags to the TurboJPEG API that allow the caller to force the use of +either the fast or the accurate DCT/IDCT algorithms in the underlying codec. + + +1.2.0 +===== + +[1] Fixed build issue with YASM on Unix systems (the libjpeg-turbo build system +was not adding the current directory to the assembler include path, so YASM +was not able to find jsimdcfg.inc.) + +[2] Fixed out-of-bounds read in SSE2 SIMD code that occurred when decompressing +a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes. +This was more of an annoyance than an actual bug, since it did not cause any +actual run-time problems, but the issue showed up when running libjpeg-turbo in +valgrind. See http://crbug.com/72399 for more information. + +[3] Added a compile-time macro (LIBJPEG_TURBO_VERSION) that can be used to +check the version of libjpeg-turbo against which an application was compiled. + +[4] Added new RGBA/BGRA/ABGR/ARGB colorspace extension constants (libjpeg API) +and pixel formats (TurboJPEG API), which allow applications to specify that, +when decompressing to a 4-component RGB buffer, the unused byte should be set +to 0xFF so that it can be interpreted as an opaque alpha channel. + +[5] Fixed regression issue whereby DevIL failed to build against libjpeg-turbo +because libjpeg-turbo's distributed version of jconfig.h contained an INLINE +macro, which conflicted with a similar macro in DevIL. This macro is used only +internally when building libjpeg-turbo, so it was moved into config.h. + +[6] libjpeg-turbo will now correctly decompress erroneous CMYK/YCCK JPEGs whose +K component is assigned a component ID of 1 instead of 4. Although these files +are in violation of the spec, other JPEG implementations handle them +correctly. + +[7] Added ARMv6 and ARMv7 architectures to libjpeg.a and libturbojpeg.a in +the official libjpeg-turbo binary package for OS X, so that those libraries can +be used to build both OS X and iOS applications. + + +1.1.90 (1.2 beta1) +================== + +[1] Added a Java wrapper for the TurboJPEG API. See java/README for more +details. + +[2] The TurboJPEG API can now be used to scale down images during +decompression. + +[3] Added SIMD routines for RGB-to-grayscale color conversion, which +significantly improves the performance of grayscale JPEG compression from an +RGB source image. + +[4] Improved the performance of the C color conversion routines, which are used +on platforms for which SIMD acceleration is not available. + +[5] Added a function to the TurboJPEG API that performs lossless transforms. +This function is implemented using the same back end as jpegtran, but it +performs transcoding entirely in memory and allows multiple transforms and/or +crop operations to be batched together, so the source coefficients only need to +be read once. This is useful when generating image tiles from a single source +JPEG. + +[6] Added tests for the new TurboJPEG scaled decompression and lossless +transform features to tjbench (the TurboJPEG benchmark, formerly called +"jpgtest".) + +[7] Added support for 4:4:0 (transposed 4:2:2) subsampling in TurboJPEG, which +was necessary in order for it to read 4:2:2 JPEG files that had been losslessly +transposed or rotated 90 degrees. + +[8] All legacy VirtualGL code has been re-factored, and this has allowed +libjpeg-turbo, in its entirety, to be re-licensed under a BSD-style license. + +[9] libjpeg-turbo can now be built with YASM. + +[10] Added SIMD acceleration for ARM Linux and iOS platforms that support +NEON instructions. + +[11] Refactored the TurboJPEG C API and documented it using Doxygen. The +TurboJPEG 1.2 API uses pixel formats to define the size and component order of +the uncompressed source/destination images, and it includes a more efficient +version of TJBUFSIZE() that computes a worst-case JPEG size based on the level +of chrominance subsampling. The refactored implementation of the TurboJPEG API +now uses the libjpeg memory source and destination managers, which allows the +TurboJPEG compressor to grow the JPEG buffer as necessary. + +[12] Eliminated errors in the output of jpegtran on Windows that occurred when +the application was invoked using I/O redirection +(jpegtran output.jpg). + +[13] The inclusion of libjpeg v7 and v8 emulation as well as arithmetic coding +support in libjpeg-turbo v1.1.0 introduced several new error constants in +jerror.h, and these were mistakenly enabled for all emulation modes, causing +the error enum in libjpeg-turbo to sometimes have different values than the +same enum in libjpeg. This represents an ABI incompatibility, and it caused +problems with rare applications that took specific action based on a particular +error value. The fix was to include the new error constants conditionally +based on whether libjpeg v7 or v8 emulation was enabled. + +[14] Fixed an issue whereby Windows applications that used libjpeg-turbo would +fail to compile if the Windows system headers were included before jpeglib.h. +This issue was caused by a conflict in the definition of the INT32 type. + +[15] Fixed 32-bit supplementary package for amd64 Debian systems, which was +broken by enhancements to the packaging system in 1.1. + +[16] When decompressing a JPEG image using an output colorspace of +JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR, or JCS_EXT_XRGB, libjpeg-turbo will +now set the unused byte to 0xFF, which allows applications to interpret that +byte as an alpha channel (0xFF = opaque). + + +1.1.1 +===== + +[1] Fixed a 1-pixel error in row 0, column 21 of the luminance plane generated +by tjEncodeYUV(). + +[2] libjpeg-turbo's accelerated Huffman decoder previously ignored unexpected +markers found in the middle of the JPEG data stream during decompression. It +will now hand off decoding of a particular block to the unaccelerated Huffman +decoder if an unexpected marker is found, so that the unaccelerated Huffman +decoder can generate an appropriate warning. + +[3] Older versions of MinGW64 prefixed symbol names with underscores by +default, which differed from the behavior of 64-bit Visual C++. MinGW64 1.0 +has adopted the behavior of 64-bit Visual C++ as the default, so to accommodate +this, the libjpeg-turbo SIMD function names are no longer prefixed with an +underscore when building with MinGW64. This means that, when building +libjpeg-turbo with older versions of MinGW64, you will now have to add +-fno-leading-underscore to the CFLAGS. + +[4] Fixed a regression bug in the NSIS script that caused the Windows installer +build to fail when using the Visual Studio IDE. + +[5] Fixed a bug in jpeg_read_coefficients() whereby it would not initialize +cinfo->image_width and cinfo->image_height if libjpeg v7 or v8 emulation was +enabled. This specifically caused the jpegoptim program to fail if it was +linked against a version of libjpeg-turbo that was built with libjpeg v7 or v8 +emulation. + +[6] Eliminated excessive I/O overhead that occurred when reading BMP files in +cjpeg. + +[7] Eliminated errors in the output of cjpeg on Windows that occurred when the +application was invoked using I/O redirection (cjpeg output.jpg). + + +1.1.0 +===== + +[1] The algorithm used by the SIMD quantization function cannot produce correct +results when the JPEG quality is >= 98 and the fast integer forward DCT is +used. Thus, the non-SIMD quantization function is now used for those cases, +and libjpeg-turbo should now produce identical output to libjpeg v6b in all +cases. + +[2] Despite the above, the fast integer forward DCT still degrades somewhat for +JPEG qualities greater than 95, so the TurboJPEG wrapper will now automatically +use the slow integer forward DCT when generating JPEG images of quality 96 or +greater. This reduces compression performance by as much as 15% for these +high-quality images but is necessary to ensure that the images are perceptually +lossless. It also ensures that the library can avoid the performance pitfall +created by [1]. + +[3] Ported jpgtest.cxx to pure C to avoid the need for a C++ compiler. + +[4] Fixed visual artifacts in grayscale JPEG compression caused by a typo in +the RGB-to-luminance lookup tables. + +[5] The Windows distribution packages now include the libjpeg run-time programs +(cjpeg, etc.) + +[6] All packages now include jpgtest. + +[7] The TurboJPEG dynamic library now uses versioned symbols. + +[8] Added two new TurboJPEG API functions, tjEncodeYUV() and +tjDecompressToYUV(), to replace the somewhat hackish TJ_YUV flag. + + +1.0.90 (1.1 beta1) +================== + +[1] Added emulation of the libjpeg v7 and v8 APIs and ABIs. See +README.md for more details. This feature was sponsored by CamTrace SAS. + +[2] Created a new CMake-based build system for the Visual C++ and MinGW builds. + +[3] Grayscale bitmaps can now be compressed from/decompressed to using the +TurboJPEG API. + +[4] jpgtest can now be used to test decompression performance with existing +JPEG images. + +[5] If the default install prefix (/opt/libjpeg-turbo) is used, then +'make install' now creates /opt/libjpeg-turbo/lib32 and +/opt/libjpeg-turbo/lib64 sym links to duplicate the behavior of the binary +packages. + +[6] All symbols in the libjpeg-turbo dynamic library are now versioned, even +when the library is built with libjpeg v6b emulation. + +[7] Added arithmetic encoding and decoding support (can be disabled with +configure or CMake options) + +[8] Added a TJ_YUV flag to the TurboJPEG API, which causes both the compressor +and decompressor to output planar YUV images. + +[9] Added an extended version of tjDecompressHeader() to the TurboJPEG API, +which allows the caller to determine the type of subsampling used in a JPEG +image. + +[10] Added further protections against invalid Huffman codes. + + +1.0.1 +===== + +[1] The Huffman decoder will now handle erroneous Huffman codes (for instance, +from a corrupt JPEG image.) Previously, these would cause libjpeg-turbo to +crash under certain circumstances. + +[2] Fixed typo in SIMD dispatch routines that was causing 4:2:2 upsampling to +be used instead of 4:2:0 when decompressing JPEG images using SSE2 code. + +[3] configure script will now automatically determine whether the +INCOMPLETE_TYPES_BROKEN macro should be defined. + + +1.0.0 +===== + +[1] 2983700: Further FreeBSD build tweaks (no longer necessary to specify +--host when configuring on a 64-bit system) + +[2] Created symlinks in the Unix/Linux packages so that the TurboJPEG +include file can always be found in /opt/libjpeg-turbo/include, the 32-bit +static libraries can always be found in /opt/libjpeg-turbo/lib32, and the +64-bit static libraries can always be found in /opt/libjpeg-turbo/lib64. + +[3] The Unix/Linux distribution packages now include the libjpeg run-time +programs (cjpeg, etc.) and man pages. + +[4] Created a 32-bit supplementary package for amd64 Debian systems, which +contains just the 32-bit libjpeg-turbo libraries. + +[5] Moved the libraries from */lib32 to */lib in the i386 Debian package. + +[6] Include distribution package for Cygwin + +[7] No longer necessary to specify --without-simd on non-x86 architectures, and +unit tests now work on those architectures. + + +0.0.93 +====== + +[1] 2982659, Fixed x86-64 build on FreeBSD systems + +[2] 2988188: Added support for Windows 64-bit systems + + +0.0.91 +====== + +[1] Added documentation to .deb packages + +[2] 2968313: Fixed data corruption issues when decompressing large JPEG images +and/or using buffered I/O with the libjpeg-turbo decompressor + + +0.0.90 +====== + +Initial release diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cjpeg.1 glmark2-2021.02/src/libjpeg-turbo/cjpeg.1 --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cjpeg.1 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/cjpeg.1 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,351 @@ +.TH CJPEG 1 "17 February 2016" +.SH NAME +cjpeg \- compress an image file to a JPEG file +.SH SYNOPSIS +.B cjpeg +[ +.I options +] +[ +.I filename +] +.LP +.SH DESCRIPTION +.LP +.B cjpeg +compresses the named image file, or the standard input if no file is +named, and produces a JPEG/JFIF file on the standard output. +The currently supported input file formats are: PPM (PBMPLUS color +format), PGM (PBMPLUS grayscale format), BMP, Targa, and RLE (Utah Raster +Toolkit format). (RLE is supported only if the URT library is available.) +.SH OPTIONS +All switch names may be abbreviated; for example, +.B \-grayscale +may be written +.B \-gray +or +.BR \-gr . +Most of the "basic" switches can be abbreviated to as little as one letter. +Upper and lower case are equivalent (thus +.B \-BMP +is the same as +.BR \-bmp ). +British spellings are also accepted (e.g., +.BR \-greyscale ), +though for brevity these are not mentioned below. +.PP +The basic switches are: +.TP +.BI \-quality " N[,...]" +Scale quantization tables to adjust image quality. Quality is 0 (worst) to +100 (best); default is 75. (See below for more info.) +.TP +.B \-grayscale +Create monochrome JPEG file from color input. Be sure to use this switch when +compressing a grayscale BMP file, because +.B cjpeg +isn't bright enough to notice whether a BMP file uses only shades of gray. +By saying +.BR \-grayscale , +you'll get a smaller JPEG file that takes less time to process. +.TP +.B \-rgb +Create RGB JPEG file. +Using this switch suppresses the conversion from RGB +colorspace input to the default YCbCr JPEG colorspace. +.TP +.B \-optimize +Perform optimization of entropy encoding parameters. Without this, default +encoding parameters are used. +.B \-optimize +usually makes the JPEG file a little smaller, but +.B cjpeg +runs somewhat slower and needs much more memory. Image quality and speed of +decompression are unaffected by +.BR \-optimize . +.TP +.B \-progressive +Create progressive JPEG file (see below). +.TP +.B \-targa +Input file is Targa format. Targa files that contain an "identification" +field will not be automatically recognized by +.BR cjpeg ; +for such files you must specify +.B \-targa +to make +.B cjpeg +treat the input as Targa format. +For most Targa files, you won't need this switch. +.PP +The +.B \-quality +switch lets you trade off compressed file size against quality of the +reconstructed image: the higher the quality setting, the larger the JPEG file, +and the closer the output image will be to the original input. Normally you +want to use the lowest quality setting (smallest file) that decompresses into +something visually indistinguishable from the original image. For this +purpose the quality setting should generally be between 50 and 95 (the default +is 75) for photographic images. If you see defects at +.B \-quality +75, then go up 5 or 10 counts at a time until you are happy with the output +image. (The optimal setting will vary from one image to another.) +.PP +.B \-quality +100 will generate a quantization table of all 1's, minimizing loss in the +quantization step (but there is still information loss in subsampling, as well +as roundoff error.) For most images, specifying a quality value above +about 95 will increase the size of the compressed file dramatically, and while +the quality gain from these higher quality values is measurable (using metrics +such as PSNR or SSIM), it is rarely perceivable by human vision. +.PP +In the other direction, quality values below 50 will produce very small files +of low image quality. Settings around 5 to 10 might be useful in preparing an +index of a large image library, for example. Try +.B \-quality +2 (or so) for some amusing Cubist effects. (Note: quality +values below about 25 generate 2-byte quantization tables, which are +considered optional in the JPEG standard. +.B cjpeg +emits a warning message when you give such a quality value, because some +other JPEG programs may be unable to decode the resulting file. Use +.B \-baseline +if you need to ensure compatibility at low quality values.) +.PP +The \fB-quality\fR option has been extended in this version of \fBcjpeg\fR to +support separate quality settings for luminance and chrominance (or, in +general, separate settings for every quantization table slot.) The principle +is the same as chrominance subsampling: since the human eye is more sensitive +to spatial changes in brightness than spatial changes in color, the chrominance +components can be quantized more than the luminance components without +incurring any visible image quality loss. However, unlike subsampling, this +feature reduces data in the frequency domain instead of the spatial domain, +which allows for more fine-grained control. This option is useful in +quality-sensitive applications, for which the artifacts generated by +subsampling may be unacceptable. +.PP +The \fB-quality\fR option accepts a comma-separated list of parameters, which +respectively refer to the quality levels that should be assigned to the +quantization table slots. If there are more q-table slots than parameters, +then the last parameter is replicated. Thus, if only one quality parameter is +given, this is used for both luminance and chrominance (slots 0 and 1, +respectively), preserving the legacy behavior of cjpeg v6b and prior. +More (or customized) quantization tables can be set with the \fB-qtables\fR +option and assigned to components with the \fB-qslots\fR option (see the +"wizard" switches below.) +.PP +JPEG files generated with separate luminance and chrominance quality are fully +compliant with standard JPEG decoders. +.PP +.BR CAUTION: +For this setting to be useful, be sure to pass an argument of \fB-sample 1x1\fR +to \fBcjpeg\fR to disable chrominance subsampling. Otherwise, the default +subsampling level (2x2, AKA "4:2:0") will be used. +.PP +The +.B \-progressive +switch creates a "progressive JPEG" file. In this type of JPEG file, the data +is stored in multiple scans of increasing quality. If the file is being +transmitted over a slow communications link, the decoder can use the first +scan to display a low-quality image very quickly, and can then improve the +display with each subsequent scan. The final image is exactly equivalent to a +standard JPEG file of the same quality setting, and the total file size is +about the same --- often a little smaller. +.PP +Switches for advanced users: +.TP +.B \-arithmetic +Use arithmetic coding. +.B Caution: +arithmetic coded JPEG is not yet widely implemented, so many decoders will be +unable to view an arithmetic coded JPEG file at all. +.TP +.B \-dct int +Use integer DCT method (default). +.TP +.B \-dct fast +Use fast integer DCT (less accurate). +In libjpeg-turbo, the fast method is generally about 5-15% faster than the int +method when using the x86/x86-64 SIMD extensions (results may vary with other +SIMD implementations, or when using libjpeg-turbo without SIMD extensions.) +For quality levels of 90 and below, there should be little or no perceptible +difference between the two algorithms. For quality levels above 90, however, +the difference between the fast and the int methods becomes more pronounced. +With quality=97, for instance, the fast method incurs generally about a 1-3 dB +loss (in PSNR) relative to the int method, but this can be larger for some +images. Do not use the fast method with quality levels above 97. The +algorithm often degenerates at quality=98 and above and can actually produce a +more lossy image than if lower quality levels had been used. Also, in +libjpeg-turbo, the fast method is not fully accelerated for quality levels +above 97, so it will be slower than the int method. +.TP +.B \-dct float +Use floating-point DCT method. +The float method is mainly a legacy feature. It does not produce significantly +more accurate results than the int method, and it is much slower. The float +method may also give different results on different machines due to varying +roundoff behavior, whereas the integer methods should give the same results on +all machines. +.TP +.BI \-restart " N" +Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is +attached to the number. +.B \-restart 0 +(the default) means no restart markers. +.TP +.BI \-smooth " N" +Smooth the input image to eliminate dithering noise. N, ranging from 1 to +100, indicates the strength of smoothing. 0 (the default) means no smoothing. +.TP +.BI \-maxmemory " N" +Set limit for amount of memory to use in processing large images. Value is +in thousands of bytes, or millions of bytes if "M" is attached to the +number. For example, +.B \-max 4m +selects 4000000 bytes. If more space is needed, temporary files will be used. +.TP +.BI \-outfile " name" +Send output image to the named file, not to standard output. +.TP +.BI \-memdst +Compress to memory instead of a file. This feature was implemented mainly as a +way of testing the in-memory destination manager (jpeg_mem_dest()), but it is +also useful for benchmarking, since it reduces the I/O overhead. +.TP +.B \-verbose +Enable debug printout. More +.BR \-v 's +give more output. Also, version information is printed at startup. +.TP +.B \-debug +Same as +.BR \-verbose . +.TP +.B \-version +Print version information and exit. +.PP +The +.B \-restart +option inserts extra markers that allow a JPEG decoder to resynchronize after +a transmission error. Without restart markers, any damage to a compressed +file will usually ruin the image from the point of the error to the end of the +image; with restart markers, the damage is usually confined to the portion of +the image up to the next restart marker. Of course, the restart markers +occupy extra space. We recommend +.B \-restart 1 +for images that will be transmitted across unreliable networks such as Usenet. +.PP +The +.B \-smooth +option filters the input to eliminate fine-scale noise. This is often useful +when converting dithered images to JPEG: a moderate smoothing factor of 10 to +50 gets rid of dithering patterns in the input file, resulting in a smaller +JPEG file and a better-looking image. Too large a smoothing factor will +visibly blur the image, however. +.PP +Switches for wizards: +.TP +.B \-baseline +Force baseline-compatible quantization tables to be generated. This clamps +quantization values to 8 bits even at low quality settings. (This switch is +poorly named, since it does not ensure that the output is actually baseline +JPEG. For example, you can use +.B \-baseline +and +.B \-progressive +together.) +.TP +.BI \-qtables " file" +Use the quantization tables given in the specified text file. +.TP +.BI \-qslots " N[,...]" +Select which quantization table to use for each color component. +.TP +.BI \-sample " HxV[,...]" +Set JPEG sampling factors for each color component. +.TP +.BI \-scans " file" +Use the scan script given in the specified text file. +.PP +The "wizard" switches are intended for experimentation with JPEG. If you +don't know what you are doing, \fBdon't use them\fR. These switches are +documented further in the file wizard.txt. +.SH EXAMPLES +.LP +This example compresses the PPM file foo.ppm with a quality factor of +60 and saves the output as foo.jpg: +.IP +.B cjpeg \-quality +.I 60 foo.ppm +.B > +.I foo.jpg +.SH HINTS +Color GIF files are not the ideal input for JPEG; JPEG is really intended for +compressing full-color (24-bit) images. In particular, don't try to convert +cartoons, line drawings, and other images that have only a few distinct +colors. GIF works great on these, JPEG does not. If you want to convert a +GIF to JPEG, you should experiment with +.BR cjpeg 's +.B \-quality +and +.B \-smooth +options to get a satisfactory conversion. +.B \-smooth 10 +or so is often helpful. +.PP +Avoid running an image through a series of JPEG compression/decompression +cycles. Image quality loss will accumulate; after ten or so cycles the image +may be noticeably worse than it was after one cycle. It's best to use a +lossless format while manipulating an image, then convert to JPEG format when +you are ready to file the image away. +.PP +The +.B \-optimize +option to +.B cjpeg +is worth using when you are making a "final" version for posting or archiving. +It's also a win when you are using low quality settings to make very small +JPEG files; the percentage improvement is often a lot more than it is on +larger files. (At present, +.B \-optimize +mode is always selected when generating progressive JPEG files.) +.SH ENVIRONMENT +.TP +.B JPEGMEM +If this environment variable is set, its value is the default memory limit. +The value is specified as described for the +.B \-maxmemory +switch. +.B JPEGMEM +overrides the default value specified when the program was compiled, and +itself is overridden by an explicit +.BR \-maxmemory . +.SH SEE ALSO +.BR djpeg (1), +.BR jpegtran (1), +.BR rdjpgcom (1), +.BR wrjpgcom (1) +.br +.BR ppm (5), +.BR pgm (5) +.br +Wallace, Gregory K. "The JPEG Still Picture Compression Standard", +Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44. +.SH AUTHOR +Independent JPEG Group +.PP +This file was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo, to wordsmith certain sections, and to describe +features not present in libjpeg. +.SH ISSUES +Support for GIF input files was removed in cjpeg v6b due to concerns over +the Unisys LZW patent. Although this patent expired in 2006, cjpeg still +lacks GIF support, for these historical reasons. (Conversion of GIF files to +JPEG is usually a bad idea anyway, since GIF is a 256-color format.) +.PP +Not all variants of BMP and Targa file formats are supported. +.PP +The +.B \-targa +switch is not a bug, it's a feature. (It would be a bug if the Targa format +designers had not been clueless.) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cjpeg.c glmark2-2021.02/src/libjpeg-turbo/cjpeg.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/cjpeg.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/cjpeg.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,644 @@ +/* + * cjpeg.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1998, Thomas G. Lane. + * Modified 2003-2011 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2013-2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a command-line user interface for the JPEG compressor. + * It should work on any system with Unix- or MS-DOS-style command lines. + * + * Two different command line styles are permitted, depending on the + * compile-time switch TWO_FILE_COMMANDLINE: + * cjpeg [options] inputfile outputfile + * cjpeg [options] [inputfile] + * In the second style, output is always to standard output, which you'd + * normally redirect to a file or pipe to some other program. Input is + * either from a named file or from standard input (typically redirected). + * The second style is convenient on Unix but is unhelpful on systems that + * don't support pipes. Also, you MUST use the first style if your system + * doesn't do binary I/O to stdin/stdout. + * To simplify script writing, the "-outfile" switch is provided. The syntax + * cjpeg [options] -outfile outputfile inputfile + * works regardless of which command line style is used. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "jversion.h" /* for version message */ +#include "jconfigint.h" + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + + +/* Create the add-on message string table. */ + +#define JMESSAGE(code,string) string , + +static const char * const cdjpeg_message_table[] = { +#include "cderror.h" + NULL +}; + + +/* + * This routine determines what format the input file is, + * and selects the appropriate input-reading module. + * + * To determine which family of input formats the file belongs to, + * we may look only at the first byte of the file, since C does not + * guarantee that more than one character can be pushed back with ungetc. + * Looking at additional bytes would require one of these approaches: + * 1) assume we can fseek() the input file (fails for piped input); + * 2) assume we can push back more than one character (works in + * some C implementations, but unportable); + * 3) provide our own buffering (breaks input readers that want to use + * stdio directly, such as the RLE library); + * or 4) don't put back the data, and modify the input_init methods to assume + * they start reading after the start of file (also breaks RLE library). + * #1 is attractive for MS-DOS but is untenable on Unix. + * + * The most portable solution for file types that can't be identified by their + * first byte is to make the user tell us what they are. This is also the + * only approach for "raw" file types that contain only arbitrary values. + * We presently apply this method for Targa files. Most of the time Targa + * files start with 0x00, so we recognize that case. Potentially, however, + * a Targa file could start with any byte value (byte 0 is the length of the + * seldom-used ID field), so we provide a switch to force Targa input mode. + */ + +static boolean is_targa; /* records user -targa switch */ + + +LOCAL(cjpeg_source_ptr) +select_file_type (j_compress_ptr cinfo, FILE *infile) +{ + int c; + + if (is_targa) { +#ifdef TARGA_SUPPORTED + return jinit_read_targa(cinfo); +#else + ERREXIT(cinfo, JERR_TGA_NOTCOMP); +#endif + } + + if ((c = getc(infile)) == EOF) + ERREXIT(cinfo, JERR_INPUT_EMPTY); + if (ungetc(c, infile) == EOF) + ERREXIT(cinfo, JERR_UNGETC_FAILED); + + switch (c) { +#ifdef BMP_SUPPORTED + case 'B': + return jinit_read_bmp(cinfo); +#endif +#ifdef GIF_SUPPORTED + case 'G': + return jinit_read_gif(cinfo); +#endif +#ifdef PPM_SUPPORTED + case 'P': + return jinit_read_ppm(cinfo); +#endif +#ifdef RLE_SUPPORTED + case 'R': + return jinit_read_rle(cinfo); +#endif +#ifdef TARGA_SUPPORTED + case 0x00: + return jinit_read_targa(cinfo); +#endif + default: + ERREXIT(cinfo, JERR_UNKNOWN_FORMAT); + break; + } + + return NULL; /* suppress compiler warnings */ +} + + +/* + * Argument-parsing code. + * The switch parser is designed to be useful with DOS-style command line + * syntax, ie, intermixed switches and file names, where only the switches + * to the left of a given file name affect processing of that file. + * The main program in this file doesn't actually use this capability... + */ + + +static const char *progname; /* program name for error messages */ +static char *outfilename; /* for -outfile switch */ +boolean memdst; /* for -memdst switch */ + + +LOCAL(void) +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "usage: %s [switches] ", progname); +#ifdef TWO_FILE_COMMANDLINE + fprintf(stderr, "inputfile outputfile\n"); +#else + fprintf(stderr, "[inputfile]\n"); +#endif + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -quality N[,...] Compression quality (0..100; 5-95 is most useful range,\n"); + fprintf(stderr, " default is 75)\n"); + fprintf(stderr, " -grayscale Create monochrome JPEG file\n"); + fprintf(stderr, " -rgb Create RGB JPEG file\n"); +#ifdef ENTROPY_OPT_SUPPORTED + fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n"); +#endif +#ifdef C_PROGRESSIVE_SUPPORTED + fprintf(stderr, " -progressive Create progressive JPEG file\n"); +#endif +#ifdef TARGA_SUPPORTED + fprintf(stderr, " -targa Input file is Targa format (usually not needed)\n"); +#endif + fprintf(stderr, "Switches for advanced users:\n"); +#ifdef C_ARITH_CODING_SUPPORTED + fprintf(stderr, " -arithmetic Use arithmetic coding\n"); +#endif +#ifdef DCT_ISLOW_SUPPORTED + fprintf(stderr, " -dct int Use integer DCT method%s\n", + (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : "")); +#endif +#ifdef DCT_IFAST_SUPPORTED + fprintf(stderr, " -dct fast Use fast integer DCT (less accurate)%s\n", + (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : "")); +#endif +#ifdef DCT_FLOAT_SUPPORTED + fprintf(stderr, " -dct float Use floating-point DCT method%s\n", + (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : "")); +#endif + fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); +#ifdef INPUT_SMOOTHING_SUPPORTED + fprintf(stderr, " -smooth N Smooth dithered input (N=1..100 is strength)\n"); +#endif + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); + fprintf(stderr, " -outfile name Specify name for output file\n"); +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + fprintf(stderr, " -memdst Compress to memory instead of file (useful for benchmarking)\n"); +#endif + fprintf(stderr, " -verbose or -debug Emit debug output\n"); + fprintf(stderr, " -version Print version information and exit\n"); + fprintf(stderr, "Switches for wizards:\n"); + fprintf(stderr, " -baseline Force baseline quantization tables\n"); + fprintf(stderr, " -qtables file Use quantization tables given in file\n"); + fprintf(stderr, " -qslots N[,...] Set component quantization tables\n"); + fprintf(stderr, " -sample HxV[,...] Set component sampling factors\n"); +#ifdef C_MULTISCAN_FILES_SUPPORTED + fprintf(stderr, " -scans file Create multi-scan JPEG per script file\n"); +#endif + exit(EXIT_FAILURE); +} + + +LOCAL(int) +parse_switches (j_compress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) +/* Parse optional switches. + * Returns argv[] index of first file-name argument (== argc if none). + * Any file names with indexes <= last_file_arg_seen are ignored; + * they have presumably been processed in a previous iteration. + * (Pass 0 for last_file_arg_seen on the first or only iteration.) + * for_real is FALSE on the first (dummy) pass; we may skip any expensive + * processing. + */ +{ + int argn; + char *arg; + boolean force_baseline; + boolean simple_progressive; + char *qualityarg = NULL; /* saves -quality parm if any */ + char *qtablefile = NULL; /* saves -qtables filename if any */ + char *qslotsarg = NULL; /* saves -qslots parm if any */ + char *samplearg = NULL; /* saves -sample parm if any */ + char *scansarg = NULL; /* saves -scans parm if any */ + + /* Set up default JPEG parameters. */ + + force_baseline = FALSE; /* by default, allow 16-bit quantizers */ + simple_progressive = FALSE; + is_targa = FALSE; + outfilename = NULL; + memdst = FALSE; + cinfo->err->trace_level = 0; + + /* Scan command line options, adjust parameters */ + + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (*arg != '-') { + /* Not a switch, must be a file name argument */ + if (argn <= last_file_arg_seen) { + outfilename = NULL; /* -outfile applies to just one input file */ + continue; /* ignore this name if previously processed */ + } + break; /* else done parsing switches */ + } + arg++; /* advance past switch marker character */ + + if (keymatch(arg, "arithmetic", 1)) { + /* Use arithmetic coding. */ +#ifdef C_ARITH_CODING_SUPPORTED + cinfo->arith_code = TRUE; +#else + fprintf(stderr, "%s: sorry, arithmetic coding not supported\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "baseline", 1)) { + /* Force baseline-compatible output (8-bit quantizer values). */ + force_baseline = TRUE; + + } else if (keymatch(arg, "dct", 2)) { + /* Select DCT algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "int", 1)) { + cinfo->dct_method = JDCT_ISLOW; + } else if (keymatch(argv[argn], "fast", 2)) { + cinfo->dct_method = JDCT_IFAST; + } else if (keymatch(argv[argn], "float", 2)) { + cinfo->dct_method = JDCT_FLOAT; + } else + usage(); + + } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { + /* Enable debug printouts. */ + /* On first -d, print version identification */ + static boolean printed_version = FALSE; + + if (! printed_version) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + fprintf(stderr, "%s\n\n", JCOPYRIGHT); + fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n", + JVERSION); + printed_version = TRUE; + } + cinfo->err->trace_level++; + + } else if (keymatch(arg, "version", 4)) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + exit(EXIT_SUCCESS); + + } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { + /* Force a monochrome JPEG file to be generated. */ + jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); + + } else if (keymatch(arg, "rgb", 3)) { + /* Force an RGB JPEG file to be generated. */ + jpeg_set_colorspace(cinfo, JCS_RGB); + + } else if (keymatch(arg, "maxmemory", 3)) { + /* Maximum memory in Kb (or Mb with 'm'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (ch == 'm' || ch == 'M') + lval *= 1000L; + cinfo->mem->max_memory_to_use = lval * 1000L; + + } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { + /* Enable entropy parm optimization. */ +#ifdef ENTROPY_OPT_SUPPORTED + cinfo->optimize_coding = TRUE; +#else + fprintf(stderr, "%s: sorry, entropy optimization was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "outfile", 4)) { + /* Set output file name. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + outfilename = argv[argn]; /* save it away for later use */ + + } else if (keymatch(arg, "progressive", 1)) { + /* Select simple progressive mode. */ +#ifdef C_PROGRESSIVE_SUPPORTED + simple_progressive = TRUE; + /* We must postpone execution until num_components is known. */ +#else + fprintf(stderr, "%s: sorry, progressive output was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "memdst", 2)) { + /* Use in-memory destination manager */ +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + memdst = TRUE; +#else + fprintf(stderr, "%s: sorry, in-memory destination manager was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "quality", 1)) { + /* Quality ratings (quantization table scaling factors). */ + if (++argn >= argc) /* advance to next argument */ + usage(); + qualityarg = argv[argn]; + + } else if (keymatch(arg, "qslots", 2)) { + /* Quantization table slot numbers. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + qslotsarg = argv[argn]; + /* Must delay setting qslots until after we have processed any + * colorspace-determining switches, since jpeg_set_colorspace sets + * default quant table numbers. + */ + + } else if (keymatch(arg, "qtables", 2)) { + /* Quantization tables fetched from file. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + qtablefile = argv[argn]; + /* We postpone actually reading the file in case -quality comes later. */ + + } else if (keymatch(arg, "restart", 1)) { + /* Restart interval in MCU rows (or in MCUs with 'b'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (lval < 0 || lval > 65535L) + usage(); + if (ch == 'b' || ch == 'B') { + cinfo->restart_interval = (unsigned int) lval; + cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */ + } else { + cinfo->restart_in_rows = (int) lval; + /* restart_interval will be computed during startup */ + } + + } else if (keymatch(arg, "sample", 2)) { + /* Set sampling factors. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + samplearg = argv[argn]; + /* Must delay setting sample factors until after we have processed any + * colorspace-determining switches, since jpeg_set_colorspace sets + * default sampling factors. + */ + + } else if (keymatch(arg, "scans", 4)) { + /* Set scan script. */ +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (++argn >= argc) /* advance to next argument */ + usage(); + scansarg = argv[argn]; + /* We must postpone reading the file in case -progressive appears. */ +#else + fprintf(stderr, "%s: sorry, multi-scan output was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "smooth", 2)) { + /* Set input smoothing factor. */ + int val; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%d", &val) != 1) + usage(); + if (val < 0 || val > 100) + usage(); + cinfo->smoothing_factor = val; + + } else if (keymatch(arg, "targa", 1)) { + /* Input file is Targa format. */ + is_targa = TRUE; + + } else { + usage(); /* bogus switch */ + } + } + + /* Post-switch-scanning cleanup */ + + if (for_real) { + + /* Set quantization tables for selected quality. */ + /* Some or all may be overridden if -qtables is present. */ + if (qualityarg != NULL) /* process -quality if it was present */ + if (! set_quality_ratings(cinfo, qualityarg, force_baseline)) + usage(); + + if (qtablefile != NULL) /* process -qtables if it was present */ + if (! read_quant_tables(cinfo, qtablefile, force_baseline)) + usage(); + + if (qslotsarg != NULL) /* process -qslots if it was present */ + if (! set_quant_slots(cinfo, qslotsarg)) + usage(); + + if (samplearg != NULL) /* process -sample if it was present */ + if (! set_sample_factors(cinfo, samplearg)) + usage(); + +#ifdef C_PROGRESSIVE_SUPPORTED + if (simple_progressive) /* process -progressive; -scans can override */ + jpeg_simple_progression(cinfo); +#endif + +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (scansarg != NULL) /* process -scans if it was present */ + if (! read_scan_script(cinfo, scansarg)) + usage(); +#endif + } + + return argn; /* return index of next arg (file name) */ +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; +#ifdef PROGRESS_REPORT + struct cdjpeg_progress_mgr progress; +#endif + int file_index; + cjpeg_source_ptr src_mgr; + FILE *input_file; + FILE *output_file = NULL; + unsigned char *outbuffer = NULL; + unsigned long outsize = 0; + JDIMENSION num_scanlines; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "cjpeg"; /* in case C library doesn't provide it */ + + /* Initialize the JPEG compression object with default error handling. */ + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + /* Add some application-specific error messages (from cderror.h) */ + jerr.addon_message_table = cdjpeg_message_table; + jerr.first_addon_message = JMSG_FIRSTADDONCODE; + jerr.last_addon_message = JMSG_LASTADDONCODE; + + /* Initialize JPEG parameters. + * Much of this may be overridden later. + * In particular, we don't yet know the input file's color space, + * but we need to provide some value for jpeg_set_defaults() to work. + */ + + cinfo.in_color_space = JCS_RGB; /* arbitrary guess */ + jpeg_set_defaults(&cinfo); + + /* Scan command line to find file names. + * It is convenient to use just one switch-parsing routine, but the switch + * values read here are ignored; we will rescan the switches after opening + * the input file. + */ + + file_index = parse_switches(&cinfo, argc, argv, 0, FALSE); + +#ifdef TWO_FILE_COMMANDLINE + if (!memdst) { + /* Must have either -outfile switch or explicit output file name */ + if (outfilename == NULL) { + if (file_index != argc-2) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + outfilename = argv[file_index+1]; + } else { + if (file_index != argc-1) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + } + } +#else + /* Unix style: expect zero or one file name */ + if (file_index < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } +#endif /* TWO_FILE_COMMANDLINE */ + + /* Open the input file. */ + if (file_index < argc) { + if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ + input_file = read_stdin(); + } + + /* Open the output file. */ + if (outfilename != NULL) { + if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, outfilename); + exit(EXIT_FAILURE); + } + } else if (!memdst) { + /* default output file is stdout */ + output_file = write_stdout(); + } + +#ifdef PROGRESS_REPORT + start_progress_monitor((j_common_ptr) &cinfo, &progress); +#endif + + /* Figure out the input file format, and set up to read it. */ + src_mgr = select_file_type(&cinfo, input_file); + src_mgr->input_file = input_file; + + /* Read the input file header to obtain file size & colorspace. */ + (*src_mgr->start_input) (&cinfo, src_mgr); + + /* Now that we know input colorspace, fix colorspace-dependent defaults */ + jpeg_default_colorspace(&cinfo); + + /* Adjust default compression parameters by re-parsing the options */ + file_index = parse_switches(&cinfo, argc, argv, 0, TRUE); + + /* Specify data destination for compression */ +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + if (memdst) + jpeg_mem_dest(&cinfo, &outbuffer, &outsize); + else +#endif + jpeg_stdio_dest(&cinfo, output_file); + + /* Start compressor */ + jpeg_start_compress(&cinfo, TRUE); + + /* Process data */ + while (cinfo.next_scanline < cinfo.image_height) { + num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr); + (void) jpeg_write_scanlines(&cinfo, src_mgr->buffer, num_scanlines); + } + + /* Finish compression and release memory */ + (*src_mgr->finish_input) (&cinfo, src_mgr); + jpeg_finish_compress(&cinfo); + jpeg_destroy_compress(&cinfo); + + /* Close files, if we opened them */ + if (input_file != stdin) + fclose(input_file); + if (output_file != stdout && output_file != NULL) + fclose(output_file); + +#ifdef PROGRESS_REPORT + end_progress_monitor((j_common_ptr) &cinfo); +#endif + + if (memdst) { + fprintf(stderr, "Compressed size: %lu bytes\n", outsize); + if (outbuffer != NULL) + free(outbuffer); + } + + /* All done. */ + exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/coderules.txt glmark2-2021.02/src/libjpeg-turbo/coderules.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/coderules.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/coderules.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,78 @@ +IJG JPEG LIBRARY: CODING RULES + +This file was part of the Independent JPEG Group's software: +Copyright (C) 1991-1996, Thomas G. Lane. +It was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo. +For conditions of distribution and use, see the accompanying README.ijg file. + + +Since numerous people will be contributing code and bug fixes, it's important +to establish a common coding style. The goal of using similar coding styles +is much more important than the details of just what that style is. + +In general we follow the recommendations of "Recommended C Style and Coding +Standards" revision 6.1 (Cannon et al. as modified by Spencer, Keppel and +Brader). This document is available in the IJG FTP archive (see +jpeg/doc/cstyle.ms.tbl.Z, or cstyle.txt.Z for those without nroff/tbl). + +Block comments should be laid out thusly: + +/* + * Block comments in this style. + */ + +We indent statements in K&R style, e.g., + if (test) { + then-part; + } else { + else-part; + } +with two spaces per indentation level. (This indentation convention is +handled automatically by GNU Emacs and many other text editors.) + +Multi-word names should be written in lower case with underscores, e.g., +multi_word_name (not multiWordName). Preprocessor symbols and enum constants +are similar but upper case (MULTI_WORD_NAME). Names should be unique within +the first fifteen characters. + +Note that each function definition must begin with GLOBAL(type), LOCAL(type), +or METHODDEF(type). These macros expand to "static type" or just "type" as +appropriate. They provide a readable indication of the routine's usage and +can readily be changed for special needs. (For instance, special linkage +keywords can be inserted for use in Windows DLLs.) + +A similar solution is used for external function declarations (see the EXTERN +macro.) + + +The JPEG library is intended to be used within larger programs. Furthermore, +we want it to be reentrant so that it can be used by applications that process +multiple images concurrently. The following rules support these requirements: + +1. Avoid direct use of file I/O, "malloc", error report printouts, etc; +pass these through the common routines provided. + +2. Minimize global namespace pollution. Functions should be declared static +wherever possible. (Note that our method-based calling conventions help this +a lot: in many modules only the initialization function will ever need to be +called directly, so only that function need be externally visible.) All +global function names should begin with "jpeg_". + +3. Don't use global variables; anything that must be used in another module +should be in the common data structures. + +4. Don't use static variables except for read-only constant tables. Variables +that should be private to a module can be placed into private structures (see +the system architecture document, structure.txt). + +5. Source file names should begin with "j" for files that are part of the +library proper; source files that are not part of the library, such as cjpeg.c +and djpeg.c, do not begin with "j". Keep compression and decompression code in +separate source files --- some applications may want only one half of the +library. + +Note: these rules (particularly #4) are not followed religiously in the +modules that are used in cjpeg/djpeg but are not part of the JPEG library +proper. Those modules are not really intended to be used in other +applications. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/config.h glmark2-2021.02/src/libjpeg-turbo/config.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/config.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/config.h 1970-01-01 08:00:00.000000000 +0800 @@ -1,137 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* Build number */ -#define BUILD "20120626" - -/* Support arithmetic encoding */ -#define C_ARITH_CODING_SUPPORTED 1 - -/* Support arithmetic decoding */ -#define D_ARITH_CODING_SUPPORTED 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_JNI_H */ - -/* Define to 1 if you have the `memcpy' function. */ -#define HAVE_MEMCPY 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `memset' function. */ -#define HAVE_MEMSET 1 - -/* Define if your compiler supports prototypes */ -#define HAVE_PROTOTYPES 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDDEF_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `unsigned char'. */ -#define HAVE_UNSIGNED_CHAR 1 - -/* Define to 1 if the system has the type `unsigned short'. */ -#define HAVE_UNSIGNED_SHORT 1 - -/* Compiler does not support pointers to undefined structures. */ -/* #undef INCOMPLETE_TYPES_BROKEN */ - -/* How to obtain function inlining. */ -#define INLINE __attribute__((always_inline)) - -/* libjpeg API version */ -#define JPEG_LIB_VERSION 62 - -/* libjpeg-turbo version */ -#define LIBJPEG_TURBO_VERSION 1.2.0 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* Define if you have BSD-like bzero and bcopy */ -/* #undef NEED_BSD_STRINGS */ - -/* Define if you need short function names */ -/* #undef NEED_SHORT_EXTERNAL_NAMES */ - -/* Define if you have sys/types.h */ -#define NEED_SYS_TYPES_H 1 - -/* Name of package */ -#define PACKAGE "libjpeg-turbo" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "libjpeg-turbo" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "libjpeg-turbo 1.2.0" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "libjpeg-turbo" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "1.2.0" - -/* Define if shift is unsigned */ -/* #undef RIGHT_SHIFT_IS_UNSIGNED */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Version number of package */ -#define VERSION "1.2.0" - -/* Use accelerated SIMD routines. */ -#define WITH_SIMD 1 - -/* Define to 1 if type `char' is unsigned and you are not using gcc. */ -#ifndef __CHAR_UNSIGNED__ -/* # undef __CHAR_UNSIGNED__ */ -#endif - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/configure.ac glmark2-2021.02/src/libjpeg-turbo/configure.ac --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/configure.ac 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/configure.ac 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,595 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([2.56]) +AC_INIT([libjpeg-turbo], [1.4.90]) + +AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2]) +AC_PREFIX_DEFAULT(/opt/libjpeg-turbo) + +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) + +# Checks for programs. +SAVED_CFLAGS=${CFLAGS} +SAVED_CPPFLAGS=${CPPFLAGS} +AC_PROG_CPP +AC_PROG_CC +m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) +AM_PROG_AS +AM_PROG_CC_C_O +AC_PROG_INSTALL +AC_PROG_LIBTOOL +AC_PROG_LN_S + +AC_ARG_WITH([build-date], [Use custom build string to enable reproducible builds (default: YYMMDD)], + [BUILD="$with_build_date"], + [BUILD=`date +%Y%m%d`]) + +PKG_PROG_PKG_CONFIG + +# When the prefix is /opt/libjpeg-turbo, we assume that an "official" binary is +# being created, and thus we install things into specific locations. + +old_prefix=${prefix} +if test "x$prefix" = "xNONE" -a "x$ac_default_prefix" != "x"; then + prefix=$ac_default_prefix +fi +DATADIR=`eval echo ${datadir}` +DATADIR=`eval echo $DATADIR` +if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then + datadir='${prefix}' +fi +DATADIR=`eval echo ${datarootdir}` +DATADIR=`eval echo $DATADIR` +if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then + datarootdir='${prefix}' +fi +DOCDIR=`eval echo ${docdir}` +DOCDIR=`eval echo $DOCDIR` +if test "$DOCDIR" = "/opt/libjpeg-turbo/doc/libjpeg-turbo"; then + docdir='${datadir}/doc' +fi + +old_exec_prefix=${exec_prefix} +if test "x$exec_prefix" = "xNONE"; then + exec_prefix=${prefix} +fi + +AC_CHECK_SIZEOF(size_t) + +if test "x${libdir}" = 'x${exec_prefix}/lib' -o "x${libdir}" = 'x${prefix}/lib'; then + LIBDIR=`eval echo ${libdir}` + LIBDIR=`eval echo $LIBDIR` + if test "$LIBDIR" = "/opt/libjpeg-turbo/lib"; then + case $host_os in + darwin*) + ;; + *) + if test "${ac_cv_sizeof_size_t}" = "8"; then + libdir='${exec_prefix}/lib64' + elif test "${ac_cv_sizeof_size_t}" = "4"; then + libdir='${exec_prefix}/lib32' + fi + ;; + esac + fi +fi +exec_prefix=${old_exec_prefix} +prefix=${old_prefix} + +# Check whether compiler supports pointers to undefined structures +AC_MSG_CHECKING(whether compiler supports pointers to undefined structures) +AC_TRY_COMPILE([ typedef struct undefined_structure *undef_struct_ptr; ], , + AC_MSG_RESULT(yes), + [AC_MSG_RESULT(no) + AC_DEFINE([INCOMPLETE_TYPES_BROKEN], [1], + [Compiler does not support pointers to undefined structures.])]) + +if test "x${GCC}" = "xyes"; then + if test "x${SAVED_CFLAGS}" = "x"; then + CFLAGS=-O3 + fi + if test "x${SAVED_CPPFLAGS}" = "x"; then + CPPFLAGS=-Wall + fi +fi + +AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"]) +if test "x${SUNCC}" = "xyes"; then + if test "x${SAVED_CFLAGS}" = "x"; then + CFLAGS=-xO5 + fi +fi + +# Checks for libraries. + +# Checks for header files. +AC_HEADER_STDC +AC_CHECK_HEADERS([stddef.h stdlib.h locale.h string.h]) +AC_CHECK_HEADER([sys/types.h], + AC_DEFINE([NEED_SYS_TYPES_H], 1, [Define if you need to include to get size_t.])) + +# Checks for typedefs, structures, and compiler characteristics. +AC_C_CONST +AC_C_CHAR_UNSIGNED +AC_C_INLINE +AC_TYPE_SIZE_T +AC_CHECK_TYPES([unsigned char, unsigned short]) + +AC_MSG_CHECKING([if right shift is signed]) +AC_TRY_RUN( + [#include + int is_shifting_signed (long arg) { + long res = arg >> 4; + + if (res == -0x7F7E80CL) + return 1; /* right shift is signed */ + + /* see if unsigned-shift hack will fix it. */ + /* we can't just test exact value since it depends on width of long... */ + res |= (~0L) << (32-4); + if (res == -0x7F7E80CL) + return 0; /* right shift is unsigned */ + + printf("Right shift isn't acting as I expect it to.\n"); + printf("I fear the JPEG software will not work at all.\n\n"); + return 0; /* try it with unsigned anyway */ + } + int main (void) { + exit(is_shifting_signed(-0x7F7E80B1L)); + }], + [AC_MSG_RESULT(no) + AC_DEFINE([RIGHT_SHIFT_IS_UNSIGNED], 1, + [Define if your (broken) compiler shifts signed values as if they were unsigned.])], + [AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(Assuming that right shift is signed on target machine.)]) + +# Checks for library functions. +AC_CHECK_FUNCS([memset memcpy], [], + [AC_DEFINE([NEED_BSD_STRINGS], 1, + [Define if you have BSD-like bzero and bcopy in rather than memset/memcpy in .])]) + +AC_MSG_CHECKING([libjpeg API version]) +AC_ARG_VAR(JPEG_LIB_VERSION, [libjpeg API version (62, 70, or 80)]) +if test "x$JPEG_LIB_VERSION" = "x"; then + AC_ARG_WITH([jpeg7], + AC_HELP_STRING([--with-jpeg7], + [Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)])) + AC_ARG_WITH([jpeg8], + AC_HELP_STRING([--with-jpeg8], + [Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)])) + if test "x${with_jpeg8}" = "xyes"; then + JPEG_LIB_VERSION=80 + else + if test "x${with_jpeg7}" = "xyes"; then + JPEG_LIB_VERSION=70 + else + JPEG_LIB_VERSION=62 + fi + fi +fi +JPEG_LIB_VERSION_DECIMAL=`expr $JPEG_LIB_VERSION / 10`.`expr $JPEG_LIB_VERSION % 10` +AC_SUBST(JPEG_LIB_VERSION_DECIMAL) +AC_MSG_RESULT([$JPEG_LIB_VERSION_DECIMAL]) +AC_DEFINE_UNQUOTED(JPEG_LIB_VERSION, [$JPEG_LIB_VERSION], + [libjpeg API version]) + +AC_ARG_VAR(SO_MAJOR_VERSION, + [Major version of the libjpeg-turbo shared library (default is determined by the API version)]) +AC_ARG_VAR(SO_MINOR_VERSION, + [Minor version of the libjpeg-turbo shared library (default is determined by the API version)]) +if test "x$SO_MAJOR_VERSION" = "x"; then + case "$JPEG_LIB_VERSION" in + 62) SO_MAJOR_VERSION=$JPEG_LIB_VERSION ;; + *) SO_MAJOR_VERSION=`expr $JPEG_LIB_VERSION / 10` ;; + esac +fi +if test "x$SO_MINOR_VERSION" = "x"; then + case "$JPEG_LIB_VERSION" in + 80) SO_MINOR_VERSION=2 ;; + *) SO_MINOR_VERSION=0 ;; + esac +fi + +RPM_CONFIG_ARGS= + +# Memory source/destination managers +SO_AGE=1 +MEM_SRCDST_FUNCTIONS= +if test "x${with_jpeg8}" != "xyes"; then + AC_MSG_CHECKING([whether to include in-memory source/destination managers]) + AC_ARG_WITH([mem-srcdst], + AC_HELP_STRING([--without-mem-srcdst], + [Do not include in-memory source/destination manager functions when emulating the libjpeg v6b or v7 API/ABI])) + if test "x$with_mem_srcdst" != "xno"; then + AC_MSG_RESULT(yes) + AC_DEFINE([MEM_SRCDST_SUPPORTED], [1], + [Support in-memory source/destination managers]) + SO_AGE=2 + MEM_SRCDST_FUNCTIONS="global: jpeg_mem_dest; jpeg_mem_src;"; + else + AC_MSG_RESULT(no) + RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-mem-srcdst" + fi +fi + +AC_MSG_CHECKING([libjpeg shared library version]) +AC_MSG_RESULT([$SO_MAJOR_VERSION.$SO_AGE.$SO_MINOR_VERSION]) +LIBTOOL_CURRENT=`expr $SO_MAJOR_VERSION + $SO_AGE` +AC_SUBST(LIBTOOL_CURRENT) +AC_SUBST(SO_MAJOR_VERSION) +AC_SUBST(SO_MINOR_VERSION) +AC_SUBST(SO_AGE) +AC_SUBST(MEM_SRCDST_FUNCTIONS) + +AC_DEFINE_UNQUOTED(LIBJPEG_TURBO_VERSION, [$VERSION], [libjpeg-turbo version]) + +VERSION_SCRIPT=yes +AC_ARG_ENABLE([ld-version-script], + AS_HELP_STRING([--disable-ld-version-script], + [Disable linker version script for libjpeg-turbo (default is to use linker version script if the linker supports it)]), + [VERSION_SCRIPT=$enableval], []) + +AC_MSG_CHECKING([whether the linker supports version scripts]) +SAVED_LDFLAGS="$LDFLAGS" +LDFLAGS="$LDFLAGS -Wl,--version-script,conftest.map" +cat > conftest.map < +.I foo.bmp +.SH HINTS +To get a quick preview of an image, use the +.B \-grayscale +and/or +.B \-scale +switches. +.B \-grayscale \-scale 1/8 +is the fastest case. +.PP +Several options are available that trade off image quality to gain speed. +.B \-fast +turns on the recommended settings. +.PP +.B \-dct fast +and/or +.B \-nosmooth +gain speed at a small sacrifice in quality. +When producing a color-quantized image, +.B \-onepass \-dither ordered +is fast but much lower quality than the default behavior. +.B \-dither none +may give acceptable results in two-pass mode, but is seldom tolerable in +one-pass mode. +.PP +If you are fortunate enough to have very fast floating point hardware, +\fB\-dct float\fR may be even faster than \fB\-dct fast\fR. But on most +machines \fB\-dct float\fR is slower than \fB\-dct int\fR; in this case it is +not worth using, because its theoretical accuracy advantage is too small to be +significant in practice. +.SH ENVIRONMENT +.TP +.B JPEGMEM +If this environment variable is set, its value is the default memory limit. +The value is specified as described for the +.B \-maxmemory +switch. +.B JPEGMEM +overrides the default value specified when the program was compiled, and +itself is overridden by an explicit +.BR \-maxmemory . +.SH SEE ALSO +.BR cjpeg (1), +.BR jpegtran (1), +.BR rdjpgcom (1), +.BR wrjpgcom (1) +.br +.BR ppm (5), +.BR pgm (5) +.br +Wallace, Gregory K. "The JPEG Still Picture Compression Standard", +Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44. +.SH AUTHOR +Independent JPEG Group +.PP +This file was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo, to wordsmith certain sections, and to describe +features not present in libjpeg. +.SH ISSUES +Support for compressed GIF output files was removed in djpeg v6b due to +concerns over the Unisys LZW patent. Although this patent expired in 2006, +djpeg still lacks compressed GIF support, for these historical reasons. +(Conversion of JPEG files to GIF is usually a bad idea anyway, since GIF is a +256-color format.) The uncompressed GIF files that djpeg generates are larger +than they should be, but they are readable by standard GIF decoders. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/djpeg.c glmark2-2021.02/src/libjpeg-turbo/djpeg.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/djpeg.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/djpeg.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,781 @@ +/* + * djpeg.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2013 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010-2011, 2013-2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a command-line user interface for the JPEG decompressor. + * It should work on any system with Unix- or MS-DOS-style command lines. + * + * Two different command line styles are permitted, depending on the + * compile-time switch TWO_FILE_COMMANDLINE: + * djpeg [options] inputfile outputfile + * djpeg [options] [inputfile] + * In the second style, output is always to standard output, which you'd + * normally redirect to a file or pipe to some other program. Input is + * either from a named file or from standard input (typically redirected). + * The second style is convenient on Unix but is unhelpful on systems that + * don't support pipes. Also, you MUST use the first style if your system + * doesn't do binary I/O to stdin/stdout. + * To simplify script writing, the "-outfile" switch is provided. The syntax + * djpeg [options] -outfile outputfile inputfile + * works regardless of which command line style is used. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "jversion.h" /* for version message */ +#include "jconfigint.h" +#include "wrppm.h" + +#include /* to declare isprint() */ + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + + +/* Create the add-on message string table. */ + +#define JMESSAGE(code,string) string , + +static const char * const cdjpeg_message_table[] = { +#include "cderror.h" + NULL +}; + + +/* + * This list defines the known output image formats + * (not all of which need be supported by a given version). + * You can change the default output format by defining DEFAULT_FMT; + * indeed, you had better do so if you undefine PPM_SUPPORTED. + */ + +typedef enum { + FMT_BMP, /* BMP format (Windows flavor) */ + FMT_GIF, /* GIF format */ + FMT_OS2, /* BMP format (OS/2 flavor) */ + FMT_PPM, /* PPM/PGM (PBMPLUS formats) */ + FMT_RLE, /* RLE format */ + FMT_TARGA, /* Targa format */ + FMT_TIFF /* TIFF format */ +} IMAGE_FORMATS; + +#ifndef DEFAULT_FMT /* so can override from CFLAGS in Makefile */ +#define DEFAULT_FMT FMT_PPM +#endif + +static IMAGE_FORMATS requested_fmt; + + +/* + * Argument-parsing code. + * The switch parser is designed to be useful with DOS-style command line + * syntax, ie, intermixed switches and file names, where only the switches + * to the left of a given file name affect processing of that file. + * The main program in this file doesn't actually use this capability... + */ + + +static const char *progname; /* program name for error messages */ +static char *outfilename; /* for -outfile switch */ +boolean memsrc; /* for -memsrc switch */ +boolean skip, crop; +JDIMENSION skip_start, skip_end; +JDIMENSION crop_x, crop_y, crop_width, crop_height; +#define INPUT_BUF_SIZE 4096 + + +LOCAL(void) +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "usage: %s [switches] ", progname); +#ifdef TWO_FILE_COMMANDLINE + fprintf(stderr, "inputfile outputfile\n"); +#else + fprintf(stderr, "[inputfile]\n"); +#endif + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -colors N Reduce image to no more than N colors\n"); + fprintf(stderr, " -fast Fast, low-quality processing\n"); + fprintf(stderr, " -grayscale Force grayscale output\n"); + fprintf(stderr, " -rgb Force RGB output\n"); + fprintf(stderr, " -rgb565 Force RGB565 output\n"); +#ifdef IDCT_SCALING_SUPPORTED + fprintf(stderr, " -scale M/N Scale output image by fraction M/N, eg, 1/8\n"); +#endif +#ifdef BMP_SUPPORTED + fprintf(stderr, " -bmp Select BMP output format (Windows style)%s\n", + (DEFAULT_FMT == FMT_BMP ? " (default)" : "")); +#endif +#ifdef GIF_SUPPORTED + fprintf(stderr, " -gif Select GIF output format%s\n", + (DEFAULT_FMT == FMT_GIF ? " (default)" : "")); +#endif +#ifdef BMP_SUPPORTED + fprintf(stderr, " -os2 Select BMP output format (OS/2 style)%s\n", + (DEFAULT_FMT == FMT_OS2 ? " (default)" : "")); +#endif +#ifdef PPM_SUPPORTED + fprintf(stderr, " -pnm Select PBMPLUS (PPM/PGM) output format%s\n", + (DEFAULT_FMT == FMT_PPM ? " (default)" : "")); +#endif +#ifdef RLE_SUPPORTED + fprintf(stderr, " -rle Select Utah RLE output format%s\n", + (DEFAULT_FMT == FMT_RLE ? " (default)" : "")); +#endif +#ifdef TARGA_SUPPORTED + fprintf(stderr, " -targa Select Targa output format%s\n", + (DEFAULT_FMT == FMT_TARGA ? " (default)" : "")); +#endif + fprintf(stderr, "Switches for advanced users:\n"); +#ifdef DCT_ISLOW_SUPPORTED + fprintf(stderr, " -dct int Use integer DCT method%s\n", + (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : "")); +#endif +#ifdef DCT_IFAST_SUPPORTED + fprintf(stderr, " -dct fast Use fast integer DCT (less accurate)%s\n", + (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : "")); +#endif +#ifdef DCT_FLOAT_SUPPORTED + fprintf(stderr, " -dct float Use floating-point DCT method%s\n", + (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : "")); +#endif + fprintf(stderr, " -dither fs Use F-S dithering (default)\n"); + fprintf(stderr, " -dither none Don't use dithering in quantization\n"); + fprintf(stderr, " -dither ordered Use ordered dither (medium speed, quality)\n"); +#ifdef QUANT_2PASS_SUPPORTED + fprintf(stderr, " -map FILE Map to colors used in named image file\n"); +#endif + fprintf(stderr, " -nosmooth Don't use high-quality upsampling\n"); +#ifdef QUANT_1PASS_SUPPORTED + fprintf(stderr, " -onepass Use 1-pass quantization (fast, low quality)\n"); +#endif + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); + fprintf(stderr, " -outfile name Specify name for output file\n"); +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + fprintf(stderr, " -memsrc Load input file into memory before decompressing\n"); +#endif + + fprintf(stderr, " -skip Y0,Y1 Decompress all rows except those between Y0 and Y1 (inclusive)\n"); + fprintf(stderr, " -crop WxH+X+Y Decompress only a rectangular subregion of the image\n"); + fprintf(stderr, " -verbose or -debug Emit debug output\n"); + fprintf(stderr, " -version Print version information and exit\n"); + exit(EXIT_FAILURE); +} + + +LOCAL(int) +parse_switches (j_decompress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) +/* Parse optional switches. + * Returns argv[] index of first file-name argument (== argc if none). + * Any file names with indexes <= last_file_arg_seen are ignored; + * they have presumably been processed in a previous iteration. + * (Pass 0 for last_file_arg_seen on the first or only iteration.) + * for_real is FALSE on the first (dummy) pass; we may skip any expensive + * processing. + */ +{ + int argn; + char *arg; + + /* Set up default JPEG parameters. */ + requested_fmt = DEFAULT_FMT; /* set default output file format */ + outfilename = NULL; + memsrc = FALSE; + skip = FALSE; + crop = FALSE; + cinfo->err->trace_level = 0; + + /* Scan command line options, adjust parameters */ + + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (*arg != '-') { + /* Not a switch, must be a file name argument */ + if (argn <= last_file_arg_seen) { + outfilename = NULL; /* -outfile applies to just one input file */ + continue; /* ignore this name if previously processed */ + } + break; /* else done parsing switches */ + } + arg++; /* advance past switch marker character */ + + if (keymatch(arg, "bmp", 1)) { + /* BMP output format. */ + requested_fmt = FMT_BMP; + + } else if (keymatch(arg, "colors", 1) || keymatch(arg, "colours", 1) || + keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) { + /* Do color quantization. */ + int val; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%d", &val) != 1) + usage(); + cinfo->desired_number_of_colors = val; + cinfo->quantize_colors = TRUE; + + } else if (keymatch(arg, "dct", 2)) { + /* Select IDCT algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "int", 1)) { + cinfo->dct_method = JDCT_ISLOW; + } else if (keymatch(argv[argn], "fast", 2)) { + cinfo->dct_method = JDCT_IFAST; + } else if (keymatch(argv[argn], "float", 2)) { + cinfo->dct_method = JDCT_FLOAT; + } else + usage(); + + } else if (keymatch(arg, "dither", 2)) { + /* Select dithering algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "fs", 2)) { + cinfo->dither_mode = JDITHER_FS; + } else if (keymatch(argv[argn], "none", 2)) { + cinfo->dither_mode = JDITHER_NONE; + } else if (keymatch(argv[argn], "ordered", 2)) { + cinfo->dither_mode = JDITHER_ORDERED; + } else + usage(); + + } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { + /* Enable debug printouts. */ + /* On first -d, print version identification */ + static boolean printed_version = FALSE; + + if (! printed_version) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + fprintf(stderr, "%s\n\n", JCOPYRIGHT); + fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n", + JVERSION); + printed_version = TRUE; + } + cinfo->err->trace_level++; + + } else if (keymatch(arg, "version", 4)) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + exit(EXIT_SUCCESS); + + } else if (keymatch(arg, "fast", 1)) { + /* Select recommended processing options for quick-and-dirty output. */ + cinfo->two_pass_quantize = FALSE; + cinfo->dither_mode = JDITHER_ORDERED; + if (! cinfo->quantize_colors) /* don't override an earlier -colors */ + cinfo->desired_number_of_colors = 216; + cinfo->dct_method = JDCT_FASTEST; + cinfo->do_fancy_upsampling = FALSE; + + } else if (keymatch(arg, "gif", 1)) { + /* GIF output format. */ + requested_fmt = FMT_GIF; + + } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { + /* Force monochrome output. */ + cinfo->out_color_space = JCS_GRAYSCALE; + + } else if (keymatch(arg, "rgb", 2)) { + /* Force RGB output. */ + cinfo->out_color_space = JCS_RGB; + + } else if (keymatch(arg, "rgb565", 2)) { + /* Force RGB565 output. */ + cinfo->out_color_space = JCS_RGB565; + + } else if (keymatch(arg, "map", 3)) { + /* Quantize to a color map taken from an input file. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (for_real) { /* too expensive to do twice! */ +#ifdef QUANT_2PASS_SUPPORTED /* otherwise can't quantize to supplied map */ + FILE *mapfile; + + if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + read_color_map(cinfo, mapfile); + fclose(mapfile); + cinfo->quantize_colors = TRUE; +#else + ERREXIT(cinfo, JERR_NOT_COMPILED); +#endif + } + + } else if (keymatch(arg, "maxmemory", 3)) { + /* Maximum memory in Kb (or Mb with 'm'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (ch == 'm' || ch == 'M') + lval *= 1000L; + cinfo->mem->max_memory_to_use = lval * 1000L; + + } else if (keymatch(arg, "nosmooth", 3)) { + /* Suppress fancy upsampling */ + cinfo->do_fancy_upsampling = FALSE; + + } else if (keymatch(arg, "onepass", 3)) { + /* Use fast one-pass quantization. */ + cinfo->two_pass_quantize = FALSE; + + } else if (keymatch(arg, "os2", 3)) { + /* BMP output format (OS/2 flavor). */ + requested_fmt = FMT_OS2; + + } else if (keymatch(arg, "outfile", 4)) { + /* Set output file name. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + outfilename = argv[argn]; /* save it away for later use */ + + } else if (keymatch(arg, "memsrc", 2)) { + /* Use in-memory source manager */ +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + memsrc = TRUE; +#else + fprintf(stderr, "%s: sorry, in-memory source manager was not compiled in\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) { + /* PPM/PGM output format. */ + requested_fmt = FMT_PPM; + + } else if (keymatch(arg, "rle", 1)) { + /* RLE output format. */ + requested_fmt = FMT_RLE; + + } else if (keymatch(arg, "scale", 2)) { + /* Scale the output image by a fraction M/N. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%u/%u", + &cinfo->scale_num, &cinfo->scale_denom) != 2) + usage(); + + } else if (keymatch(arg, "skip", 2)) { + if (++argn >= argc) + usage(); + if (sscanf(argv[argn], "%u,%u", &skip_start, &skip_end) != 2 || + skip_start > skip_end) + usage(); + skip = TRUE; + + } else if (keymatch(arg, "crop", 2)) { + char c; + if (++argn >= argc) + usage(); + if (sscanf(argv[argn], "%u%c%u+%u+%u", &crop_width, &c, &crop_height, + &crop_x, &crop_y) != 5 || + (c != 'X' && c != 'x') || crop_width < 1 || crop_height < 1) + usage(); + crop = TRUE; + + } else if (keymatch(arg, "targa", 1)) { + /* Targa output format. */ + requested_fmt = FMT_TARGA; + + } else { + usage(); /* bogus switch */ + } + } + + return argn; /* return index of next arg (file name) */ +} + + +/* + * Marker processor for COM and interesting APPn markers. + * This replaces the library's built-in processor, which just skips the marker. + * We want to print out the marker as text, to the extent possible. + * Note this code relies on a non-suspending data source. + */ + +LOCAL(unsigned int) +jpeg_getc (j_decompress_ptr cinfo) +/* Read next byte */ +{ + struct jpeg_source_mgr *datasrc = cinfo->src; + + if (datasrc->bytes_in_buffer == 0) { + if (! (*datasrc->fill_input_buffer) (cinfo)) + ERREXIT(cinfo, JERR_CANT_SUSPEND); + } + datasrc->bytes_in_buffer--; + return GETJOCTET(*datasrc->next_input_byte++); +} + + +METHODDEF(boolean) +print_text_marker (j_decompress_ptr cinfo) +{ + boolean traceit = (cinfo->err->trace_level >= 1); + long length; + unsigned int ch; + unsigned int lastch = 0; + + length = jpeg_getc(cinfo) << 8; + length += jpeg_getc(cinfo); + length -= 2; /* discount the length word itself */ + + if (traceit) { + if (cinfo->unread_marker == JPEG_COM) + fprintf(stderr, "Comment, length %ld:\n", (long) length); + else /* assume it is an APPn otherwise */ + fprintf(stderr, "APP%d, length %ld:\n", + cinfo->unread_marker - JPEG_APP0, (long) length); + } + + while (--length >= 0) { + ch = jpeg_getc(cinfo); + if (traceit) { + /* Emit the character in a readable form. + * Nonprintables are converted to \nnn form, + * while \ is converted to \\. + * Newlines in CR, CR/LF, or LF form will be printed as one newline. + */ + if (ch == '\r') { + fprintf(stderr, "\n"); + } else if (ch == '\n') { + if (lastch != '\r') + fprintf(stderr, "\n"); + } else if (ch == '\\') { + fprintf(stderr, "\\\\"); + } else if (isprint(ch)) { + putc(ch, stderr); + } else { + fprintf(stderr, "\\%03o", ch); + } + lastch = ch; + } + } + + if (traceit) + fprintf(stderr, "\n"); + + return TRUE; +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + struct jpeg_decompress_struct cinfo; + struct jpeg_error_mgr jerr; +#ifdef PROGRESS_REPORT + struct cdjpeg_progress_mgr progress; +#endif + int file_index; + djpeg_dest_ptr dest_mgr = NULL; + FILE *input_file; + FILE *output_file; + unsigned char *inbuffer = NULL; + unsigned long insize = 0; + JDIMENSION num_scanlines; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "djpeg"; /* in case C library doesn't provide it */ + + /* Initialize the JPEG decompression object with default error handling. */ + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_decompress(&cinfo); + /* Add some application-specific error messages (from cderror.h) */ + jerr.addon_message_table = cdjpeg_message_table; + jerr.first_addon_message = JMSG_FIRSTADDONCODE; + jerr.last_addon_message = JMSG_LASTADDONCODE; + + /* Insert custom marker processor for COM and APP12. + * APP12 is used by some digital camera makers for textual info, + * so we provide the ability to display it as text. + * If you like, additional APPn marker types can be selected for display, + * but don't try to override APP0 or APP14 this way (see libjpeg.txt). + */ + jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker); + jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker); + + /* Scan command line to find file names. */ + /* It is convenient to use just one switch-parsing routine, but the switch + * values read here are ignored; we will rescan the switches after opening + * the input file. + * (Exception: tracing level set here controls verbosity for COM markers + * found during jpeg_read_header...) + */ + + file_index = parse_switches(&cinfo, argc, argv, 0, FALSE); + +#ifdef TWO_FILE_COMMANDLINE + /* Must have either -outfile switch or explicit output file name */ + if (outfilename == NULL) { + if (file_index != argc-2) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + outfilename = argv[file_index+1]; + } else { + if (file_index != argc-1) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + } +#else + /* Unix style: expect zero or one file name */ + if (file_index < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } +#endif /* TWO_FILE_COMMANDLINE */ + + /* Open the input file. */ + if (file_index < argc) { + if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ + input_file = read_stdin(); + } + + /* Open the output file. */ + if (outfilename != NULL) { + if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, outfilename); + exit(EXIT_FAILURE); + } + } else { + /* default output file is stdout */ + output_file = write_stdout(); + } + +#ifdef PROGRESS_REPORT + start_progress_monitor((j_common_ptr) &cinfo, &progress); +#endif + + /* Specify data source for decompression */ +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) + if (memsrc) { + size_t nbytes; + do { + inbuffer = (unsigned char *)realloc(inbuffer, insize + INPUT_BUF_SIZE); + if (inbuffer == NULL) { + fprintf(stderr, "%s: memory allocation failure\n", progname); + exit(EXIT_FAILURE); + } + nbytes = JFREAD(input_file, &inbuffer[insize], INPUT_BUF_SIZE); + if (nbytes < INPUT_BUF_SIZE && ferror(input_file)) { + if (file_index < argc) + fprintf(stderr, "%s: can't read from %s\n", progname, + argv[file_index]); + else + fprintf(stderr, "%s: can't read from stdin\n", progname); + } + insize += (unsigned long)nbytes; + } while (nbytes == INPUT_BUF_SIZE); + fprintf(stderr, "Compressed size: %lu bytes\n", insize); + jpeg_mem_src(&cinfo, inbuffer, insize); + } else +#endif + jpeg_stdio_src(&cinfo, input_file); + + /* Read file header, set default decompression parameters */ + (void) jpeg_read_header(&cinfo, TRUE); + + /* Adjust default decompression parameters by re-parsing the options */ + file_index = parse_switches(&cinfo, argc, argv, 0, TRUE); + + /* Initialize the output module now to let it override any crucial + * option settings (for instance, GIF wants to force color quantization). + */ + switch (requested_fmt) { +#ifdef BMP_SUPPORTED + case FMT_BMP: + dest_mgr = jinit_write_bmp(&cinfo, FALSE); + break; + case FMT_OS2: + dest_mgr = jinit_write_bmp(&cinfo, TRUE); + break; +#endif +#ifdef GIF_SUPPORTED + case FMT_GIF: + dest_mgr = jinit_write_gif(&cinfo); + break; +#endif +#ifdef PPM_SUPPORTED + case FMT_PPM: + dest_mgr = jinit_write_ppm(&cinfo); + break; +#endif +#ifdef RLE_SUPPORTED + case FMT_RLE: + dest_mgr = jinit_write_rle(&cinfo); + break; +#endif +#ifdef TARGA_SUPPORTED + case FMT_TARGA: + dest_mgr = jinit_write_targa(&cinfo); + break; +#endif + default: + ERREXIT(&cinfo, JERR_UNSUPPORTED_FORMAT); + break; + } + dest_mgr->output_file = output_file; + + /* Start decompressor */ + (void) jpeg_start_decompress(&cinfo); + + /* Skip rows */ + if (skip) { + JDIMENSION tmp; + + /* Check for valid skip_end. We cannot check this value until after + * jpeg_start_decompress() is called. Note that we have already verified + * that skip_start <= skip_end. + */ + if (skip_end > cinfo.output_height - 1) { + fprintf(stderr, "%s: skip region exceeds image height %d\n", progname, + cinfo.output_height); + exit(EXIT_FAILURE); + } + + /* Write output file header. This is a hack to ensure that the destination + * manager creates an output image of the proper size. + */ + tmp = cinfo.output_height; + cinfo.output_height -= (skip_end - skip_start + 1); + (*dest_mgr->start_output) (&cinfo, dest_mgr); + cinfo.output_height = tmp; + + /* Process data */ + while (cinfo.output_scanline < skip_start) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + jpeg_skip_scanlines(&cinfo, skip_end - skip_start + 1); + while (cinfo.output_scanline < cinfo.output_height) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + + /* Decompress a subregion */ + } else if (crop) { + JDIMENSION tmp; + + /* Check for valid crop dimensions. We cannot check these values until + * after jpeg_start_decompress() is called. + */ + if (crop_x + crop_width > cinfo.output_width || + crop_y + crop_height > cinfo.output_height) { + fprintf(stderr, "%s: crop dimensions exceed image dimensions %d x %d\n", + progname, cinfo.output_width, cinfo.output_height); + exit(EXIT_FAILURE); + } + + jpeg_crop_scanline(&cinfo, &crop_x, &crop_width); + ((ppm_dest_ptr) dest_mgr)->buffer_width = cinfo.output_width * + cinfo.out_color_components * + sizeof(JSAMPLE); + + /* Write output file header. This is a hack to ensure that the destination + * manager creates an output image of the proper size. + */ + tmp = cinfo.output_height; + cinfo.output_height = crop_height; + (*dest_mgr->start_output) (&cinfo, dest_mgr); + cinfo.output_height = tmp; + + /* Process data */ + jpeg_skip_scanlines(&cinfo, crop_y); + while (cinfo.output_scanline < crop_y + crop_height) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + jpeg_skip_scanlines(&cinfo, cinfo.output_height - crop_y - crop_height); + + /* Normal full-image decompress */ + } else { + /* Write output file header */ + (*dest_mgr->start_output) (&cinfo, dest_mgr); + + /* Process data */ + while (cinfo.output_scanline < cinfo.output_height) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + } + +#ifdef PROGRESS_REPORT + /* Hack: count final pass as done in case finish_output does an extra pass. + * The library won't have updated completed_passes. + */ + progress.pub.completed_passes = progress.pub.total_passes; +#endif + + /* Finish decompression and release memory. + * I must do it in this order because output module has allocated memory + * of lifespan JPOOL_IMAGE; it needs to finish before releasing memory. + */ + (*dest_mgr->finish_output) (&cinfo, dest_mgr); + (void) jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + + /* Close files, if we opened them */ + if (input_file != stdin) + fclose(input_file); + if (output_file != stdout) + fclose(output_file); + +#ifdef PROGRESS_REPORT + end_progress_monitor((j_common_ptr) &cinfo); +#endif + + if (memsrc && inbuffer != NULL) + free(inbuffer); + + /* All done. */ + exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/doxygen.config glmark2-2021.02/src/libjpeg-turbo/doxygen.config --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/doxygen.config 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/doxygen.config 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,16 @@ +PROJECT_NAME = TurboJPEG +PROJECT_NUMBER = 1.5 +OUTPUT_DIRECTORY = doc/ +USE_WINDOWS_ENCODING = NO +OPTIMIZE_OUTPUT_FOR_C = YES +WARN_NO_PARAMDOC = YES +GENERATE_LATEX = NO +FILE_PATTERNS = turbojpeg.h +HIDE_UNDOC_MEMBERS = YES +VERBATIM_HEADERS = NO +EXTRACT_STATIC = YES +JAVADOC_AUTOBRIEF = YES +MAX_INITIALIZER_LINES = 0 +ALWAYS_DETAILED_SEC = YES +HTML_TIMESTAMP = NO +HTML_EXTRA_STYLESHEET = doxygen-extra.css diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/doxygen-extra.css glmark2-2021.02/src/libjpeg-turbo/doxygen-extra.css --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/doxygen-extra.css 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/doxygen-extra.css 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,3 @@ +code { + color: #4665A2; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/example.c glmark2-2021.02/src/libjpeg-turbo/example.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/example.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/example.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,433 @@ +/* + * example.c + * + * This file illustrates how to use the IJG code as a subroutine library + * to read or write JPEG image files. You should look at this code in + * conjunction with the documentation file libjpeg.txt. + * + * This code will not do anything useful as-is, but it may be helpful as a + * skeleton for constructing routines that call the JPEG library. + * + * We present these routines in the same coding style used in the JPEG code + * (ANSI function definitions, etc); but you are of course free to code your + * routines in a different style if you prefer. + */ + +#include + +/* + * Include file for users of JPEG library. + * You will need to have included system headers that define at least + * the typedefs FILE and size_t before you can include jpeglib.h. + * (stdio.h is sufficient on ANSI-conforming systems.) + * You may also wish to include "jerror.h". + */ + +#include "jpeglib.h" + +/* + * is used for the optional error recovery mechanism shown in + * the second part of the example. + */ + +#include + + + +/******************** JPEG COMPRESSION SAMPLE INTERFACE *******************/ + +/* This half of the example shows how to feed data into the JPEG compressor. + * We present a minimal version that does not worry about refinements such + * as error recovery (the JPEG code will just exit() if it gets an error). + */ + + +/* + * IMAGE DATA FORMATS: + * + * The standard input image format is a rectangular array of pixels, with + * each pixel having the same number of "component" values (color channels). + * Each pixel row is an array of JSAMPLEs (which typically are unsigned chars). + * If you are working with color data, then the color values for each pixel + * must be adjacent in the row; for example, R,G,B,R,G,B,R,G,B,... for 24-bit + * RGB color. + * + * For this example, we'll assume that this data structure matches the way + * our application has stored the image in memory, so we can just pass a + * pointer to our image buffer. In particular, let's say that the image is + * RGB color and is described by: + */ + +extern JSAMPLE *image_buffer; /* Points to large array of R,G,B-order data */ +extern int image_height; /* Number of rows in image */ +extern int image_width; /* Number of columns in image */ + + +/* + * Sample routine for JPEG compression. We assume that the target file name + * and a compression quality factor are passed in. + */ + +GLOBAL(void) +write_JPEG_file (char *filename, int quality) +{ + /* This struct contains the JPEG compression parameters and pointers to + * working space (which is allocated as needed by the JPEG library). + * It is possible to have several such structures, representing multiple + * compression/decompression processes, in existence at once. We refer + * to any one struct (and its associated working data) as a "JPEG object". + */ + struct jpeg_compress_struct cinfo; + /* This struct represents a JPEG error handler. It is declared separately + * because applications often want to supply a specialized error handler + * (see the second half of this file for an example). But here we just + * take the easy way out and use the standard error handler, which will + * print a message on stderr and call exit() if compression fails. + * Note that this struct must live as long as the main JPEG parameter + * struct, to avoid dangling-pointer problems. + */ + struct jpeg_error_mgr jerr; + /* More stuff */ + FILE *outfile; /* target file */ + JSAMPROW row_pointer[1]; /* pointer to JSAMPLE row[s] */ + int row_stride; /* physical row width in image buffer */ + + /* Step 1: allocate and initialize JPEG compression object */ + + /* We have to set up the error handler first, in case the initialization + * step fails. (Unlikely, but it could happen if you are out of memory.) + * This routine fills in the contents of struct jerr, and returns jerr's + * address which we place into the link field in cinfo. + */ + cinfo.err = jpeg_std_error(&jerr); + /* Now we can initialize the JPEG compression object. */ + jpeg_create_compress(&cinfo); + + /* Step 2: specify data destination (eg, a file) */ + /* Note: steps 2 and 3 can be done in either order. */ + + /* Here we use the library-supplied code to send compressed data to a + * stdio stream. You can also write your own code to do something else. + * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that + * requires it in order to write binary files. + */ + if ((outfile = fopen(filename, "wb")) == NULL) { + fprintf(stderr, "can't open %s\n", filename); + exit(1); + } + jpeg_stdio_dest(&cinfo, outfile); + + /* Step 3: set parameters for compression */ + + /* First we supply a description of the input image. + * Four fields of the cinfo struct must be filled in: + */ + cinfo.image_width = image_width; /* image width and height, in pixels */ + cinfo.image_height = image_height; + cinfo.input_components = 3; /* # of color components per pixel */ + cinfo.in_color_space = JCS_RGB; /* colorspace of input image */ + /* Now use the library's routine to set default compression parameters. + * (You must set at least cinfo.in_color_space before calling this, + * since the defaults depend on the source color space.) + */ + jpeg_set_defaults(&cinfo); + /* Now you can set any non-default parameters you wish to. + * Here we just illustrate the use of quality (quantization table) scaling: + */ + jpeg_set_quality(&cinfo, quality, TRUE /* limit to baseline-JPEG values */); + + /* Step 4: Start compressor */ + + /* TRUE ensures that we will write a complete interchange-JPEG file. + * Pass TRUE unless you are very sure of what you're doing. + */ + jpeg_start_compress(&cinfo, TRUE); + + /* Step 5: while (scan lines remain to be written) */ + /* jpeg_write_scanlines(...); */ + + /* Here we use the library's state variable cinfo.next_scanline as the + * loop counter, so that we don't have to keep track ourselves. + * To keep things simple, we pass one scanline per call; you can pass + * more if you wish, though. + */ + row_stride = image_width * 3; /* JSAMPLEs per row in image_buffer */ + + while (cinfo.next_scanline < cinfo.image_height) { + /* jpeg_write_scanlines expects an array of pointers to scanlines. + * Here the array is only one element long, but you could pass + * more than one scanline at a time if that's more convenient. + */ + row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride]; + (void) jpeg_write_scanlines(&cinfo, row_pointer, 1); + } + + /* Step 6: Finish compression */ + + jpeg_finish_compress(&cinfo); + /* After finish_compress, we can close the output file. */ + fclose(outfile); + + /* Step 7: release JPEG compression object */ + + /* This is an important step since it will release a good deal of memory. */ + jpeg_destroy_compress(&cinfo); + + /* And we're done! */ +} + + +/* + * SOME FINE POINTS: + * + * In the above loop, we ignored the return value of jpeg_write_scanlines, + * which is the number of scanlines actually written. We could get away + * with this because we were only relying on the value of cinfo.next_scanline, + * which will be incremented correctly. If you maintain additional loop + * variables then you should be careful to increment them properly. + * Actually, for output to a stdio stream you needn't worry, because + * then jpeg_write_scanlines will write all the lines passed (or else exit + * with a fatal error). Partial writes can only occur if you use a data + * destination module that can demand suspension of the compressor. + * (If you don't know what that's for, you don't need it.) + * + * If the compressor requires full-image buffers (for entropy-coding + * optimization or a multi-scan JPEG file), it will create temporary + * files for anything that doesn't fit within the maximum-memory setting. + * (Note that temp files are NOT needed if you use the default parameters.) + * On some systems you may need to set up a signal handler to ensure that + * temporary files are deleted if the program is interrupted. See libjpeg.txt. + * + * Scanlines MUST be supplied in top-to-bottom order if you want your JPEG + * files to be compatible with everyone else's. If you cannot readily read + * your data in that order, you'll need an intermediate array to hold the + * image. See rdtarga.c or rdbmp.c for examples of handling bottom-to-top + * source data using the JPEG code's internal virtual-array mechanisms. + */ + + + +/******************** JPEG DECOMPRESSION SAMPLE INTERFACE *******************/ + +/* This half of the example shows how to read data from the JPEG decompressor. + * It's a bit more refined than the above, in that we show: + * (a) how to modify the JPEG library's standard error-reporting behavior; + * (b) how to allocate workspace using the library's memory manager. + * + * Just to make this example a little different from the first one, we'll + * assume that we do not intend to put the whole image into an in-memory + * buffer, but to send it line-by-line someplace else. We need a one- + * scanline-high JSAMPLE array as a work buffer, and we will let the JPEG + * memory manager allocate it for us. This approach is actually quite useful + * because we don't need to remember to deallocate the buffer separately: it + * will go away automatically when the JPEG object is cleaned up. + */ + + +/* + * ERROR HANDLING: + * + * The JPEG library's standard error handler (jerror.c) is divided into + * several "methods" which you can override individually. This lets you + * adjust the behavior without duplicating a lot of code, which you might + * have to update with each future release. + * + * Our example here shows how to override the "error_exit" method so that + * control is returned to the library's caller when a fatal error occurs, + * rather than calling exit() as the standard error_exit method does. + * + * We use C's setjmp/longjmp facility to return control. This means that the + * routine which calls the JPEG library must first execute a setjmp() call to + * establish the return point. We want the replacement error_exit to do a + * longjmp(). But we need to make the setjmp buffer accessible to the + * error_exit routine. To do this, we make a private extension of the + * standard JPEG error handler object. (If we were using C++, we'd say we + * were making a subclass of the regular error handler.) + * + * Here's the extended error handler struct: + */ + +struct my_error_mgr { + struct jpeg_error_mgr pub; /* "public" fields */ + + jmp_buf setjmp_buffer; /* for return to caller */ +}; + +typedef struct my_error_mgr *my_error_ptr; + +/* + * Here's the routine that will replace the standard error_exit method: + */ + +METHODDEF(void) +my_error_exit (j_common_ptr cinfo) +{ + /* cinfo->err really points to a my_error_mgr struct, so coerce pointer */ + my_error_ptr myerr = (my_error_ptr) cinfo->err; + + /* Always display the message. */ + /* We could postpone this until after returning, if we chose. */ + (*cinfo->err->output_message) (cinfo); + + /* Return control to the setjmp point */ + longjmp(myerr->setjmp_buffer, 1); +} + + +/* + * Sample routine for JPEG decompression. We assume that the source file name + * is passed in. We want to return 1 on success, 0 on error. + */ + + +GLOBAL(int) +read_JPEG_file (char *filename) +{ + /* This struct contains the JPEG decompression parameters and pointers to + * working space (which is allocated as needed by the JPEG library). + */ + struct jpeg_decompress_struct cinfo; + /* We use our private extension JPEG error handler. + * Note that this struct must live as long as the main JPEG parameter + * struct, to avoid dangling-pointer problems. + */ + struct my_error_mgr jerr; + /* More stuff */ + FILE *infile; /* source file */ + JSAMPARRAY buffer; /* Output row buffer */ + int row_stride; /* physical row width in output buffer */ + + /* In this example we want to open the input file before doing anything else, + * so that the setjmp() error recovery below can assume the file is open. + * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that + * requires it in order to read binary files. + */ + + if ((infile = fopen(filename, "rb")) == NULL) { + fprintf(stderr, "can't open %s\n", filename); + return 0; + } + + /* Step 1: allocate and initialize JPEG decompression object */ + + /* We set up the normal JPEG error routines, then override error_exit. */ + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = my_error_exit; + /* Establish the setjmp return context for my_error_exit to use. */ + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. + * We need to clean up the JPEG object, close the input file, and return. + */ + jpeg_destroy_decompress(&cinfo); + fclose(infile); + return 0; + } + /* Now we can initialize the JPEG decompression object. */ + jpeg_create_decompress(&cinfo); + + /* Step 2: specify data source (eg, a file) */ + + jpeg_stdio_src(&cinfo, infile); + + /* Step 3: read file parameters with jpeg_read_header() */ + + (void) jpeg_read_header(&cinfo, TRUE); + /* We can ignore the return value from jpeg_read_header since + * (a) suspension is not possible with the stdio data source, and + * (b) we passed TRUE to reject a tables-only JPEG file as an error. + * See libjpeg.txt for more info. + */ + + /* Step 4: set parameters for decompression */ + + /* In this example, we don't need to change any of the defaults set by + * jpeg_read_header(), so we do nothing here. + */ + + /* Step 5: Start decompressor */ + + (void) jpeg_start_decompress(&cinfo); + /* We can ignore the return value since suspension is not possible + * with the stdio data source. + */ + + /* We may need to do some setup of our own at this point before reading + * the data. After jpeg_start_decompress() we have the correct scaled + * output image dimensions available, as well as the output colormap + * if we asked for color quantization. + * In this example, we need to make an output work buffer of the right size. + */ + /* JSAMPLEs per row in output buffer */ + row_stride = cinfo.output_width * cinfo.output_components; + /* Make a one-row-high sample array that will go away when done with image */ + buffer = (*cinfo.mem->alloc_sarray) + ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1); + + /* Step 6: while (scan lines remain to be read) */ + /* jpeg_read_scanlines(...); */ + + /* Here we use the library's state variable cinfo.output_scanline as the + * loop counter, so that we don't have to keep track ourselves. + */ + while (cinfo.output_scanline < cinfo.output_height) { + /* jpeg_read_scanlines expects an array of pointers to scanlines. + * Here the array is only one element long, but you could ask for + * more than one scanline at a time if that's more convenient. + */ + (void) jpeg_read_scanlines(&cinfo, buffer, 1); + /* Assume put_scanline_someplace wants a pointer and sample count. */ + put_scanline_someplace(buffer[0], row_stride); + } + + /* Step 7: Finish decompression */ + + (void) jpeg_finish_decompress(&cinfo); + /* We can ignore the return value since suspension is not possible + * with the stdio data source. + */ + + /* Step 8: Release JPEG decompression object */ + + /* This is an important step since it will release a good deal of memory. */ + jpeg_destroy_decompress(&cinfo); + + /* After finish_decompress, we can close the input file. + * Here we postpone it until after no more JPEG errors are possible, + * so as to simplify the setjmp error logic above. (Actually, I don't + * think that jpeg_destroy can do an error exit, but why assume anything...) + */ + fclose(infile); + + /* At this point you may want to check to see whether any corrupt-data + * warnings occurred (test whether jerr.pub.num_warnings is nonzero). + */ + + /* And we're done! */ + return 1; +} + + +/* + * SOME FINE POINTS: + * + * In the above code, we ignored the return value of jpeg_read_scanlines, + * which is the number of scanlines actually read. We could get away with + * this because we asked for only one line at a time and we weren't using + * a suspending data source. See libjpeg.txt for more info. + * + * We cheated a bit by calling alloc_sarray() after jpeg_start_decompress(); + * we should have done it beforehand to ensure that the space would be + * counted against the JPEG max_memory setting. In some systems the above + * code would risk an out-of-memory error. However, in general we don't + * know the output image dimensions before jpeg_start_decompress(), unless we + * call jpeg_calc_output_dimensions(). See libjpeg.txt for more about this. + * + * Scanlines are returned in the same order as they appear in the JPEG file, + * which is standardly top-to-bottom. If you must emit data bottom-to-top, + * you can use one of the virtual arrays provided by the JPEG memory manager + * to invert the data. See wrbmp.c for an example. + * + * As with compression, some operating modes may require temporary files. + * On some systems you may need to set up a signal handler to ensure that + * temporary files are deleted if the program is interrupted. See libjpeg.txt. + */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jaricom.c glmark2-2021.02/src/libjpeg-turbo/jaricom.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jaricom.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jaricom.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jaricom.c * + * This file was part of the Independent JPEG Group's software: * Developed 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains probability estimation tables for common use in * arithmetic entropy encoding and decoding routines. @@ -18,7 +21,7 @@ #include "jpeglib.h" /* The following #define specifies the packing of the four components - * into the compact INT32 representation. + * into the compact JLONG representation. * Note that this formula must match the actual arithmetic encoder * and decoder implementation. The implementation has to be changed * if this formula is changed. @@ -26,9 +29,9 @@ * implementation (jbig_tab.c). */ -#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b) +#define V(i,a,b,c,d) (((JLONG)a << 16) | ((JLONG)c << 8) | ((JLONG)d << 7) | b) -const INT32 jpeg_aritab[113+1] = { +const JLONG jpeg_aritab[113+1] = { /* * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcapimin.c glmark2-2021.02/src/libjpeg-turbo/jcapimin.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcapimin.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcapimin.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jcapimin.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. * Modified 2003-2010 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface code for the compression half * of the JPEG library. These are the "minimum" API routines that may be @@ -33,12 +36,12 @@ int i; /* Guard against version mismatches between library and caller. */ - cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */ + cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */ if (version != JPEG_LIB_VERSION) ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version); - if (structsize != SIZEOF(struct jpeg_compress_struct)) - ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, - (int) SIZEOF(struct jpeg_compress_struct), (int) structsize); + if (structsize != sizeof(struct jpeg_compress_struct)) + ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, + (int) sizeof(struct jpeg_compress_struct), (int) structsize); /* For debugging purposes, we zero the whole master structure. * But the application has already set the err pointer, and may have set @@ -47,9 +50,9 @@ * complain here. */ { - struct jpeg_error_mgr * err = cinfo->err; - void * client_data = cinfo->client_data; /* ignore Purify complaint here */ - MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct)); + struct jpeg_error_mgr *err = cinfo->err; + void *client_data = cinfo->client_data; /* ignore Purify complaint here */ + MEMZERO(cinfo, sizeof(struct jpeg_compress_struct)); cinfo->err = err; cinfo->client_data = client_data; } @@ -85,7 +88,7 @@ cinfo->script_space = NULL; - cinfo->input_gamma = 1.0; /* in case application forgets */ + cinfo->input_gamma = 1.0; /* in case application forgets */ /* OK, I'm ready */ cinfo->global_state = CSTATE_START; @@ -131,8 +134,8 @@ jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress) { int i; - JQUANT_TBL * qtbl; - JHUFF_TBL * htbl; + JQUANT_TBL *qtbl; + JHUFF_TBL *htbl; for (i = 0; i < NUM_QUANT_TBLS; i++) { if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL) @@ -173,15 +176,15 @@ (*cinfo->master->prepare_for_pass) (cinfo); for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) { if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) iMCU_row; - cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long) iMCU_row; + cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows; + (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); } /* We bypass the main controller and invoke coef controller directly; * all work is being done from the coefficient buffer. */ if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL)) - ERREXIT(cinfo, JERR_CANT_SUSPEND); + ERREXIT(cinfo, JERR_CANT_SUSPEND); } (*cinfo->master->finish_pass) (cinfo); } @@ -202,9 +205,9 @@ GLOBAL(void) jpeg_write_marker (j_compress_ptr cinfo, int marker, - const JOCTET *dataptr, unsigned int datalen) + const JOCTET *dataptr, unsigned int datalen) { - JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val)); + void (*write_marker_byte) (j_compress_ptr info, int val); if (cinfo->next_scanline != 0 || (cinfo->global_state != CSTATE_SCANNING && @@ -213,7 +216,7 @@ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); (*cinfo->marker->write_marker_header) (cinfo, marker, datalen); - write_marker_byte = cinfo->marker->write_marker_byte; /* copy for speed */ + write_marker_byte = cinfo->marker->write_marker_byte; /* copy for speed */ while (datalen--) { (*write_marker_byte) (cinfo, *dataptr); dataptr++; @@ -248,14 +251,14 @@ * To produce a pair of files containing abbreviated tables and abbreviated * image data, one would proceed as follows: * - * initialize JPEG object - * set JPEG parameters - * set destination to table file - * jpeg_write_tables(cinfo); - * set destination to image file - * jpeg_start_compress(cinfo, FALSE); - * write data... - * jpeg_finish_compress(cinfo); + * initialize JPEG object + * set JPEG parameters + * set destination to table file + * jpeg_write_tables(cinfo); + * set destination to image file + * jpeg_start_compress(cinfo, FALSE); + * write data... + * jpeg_finish_compress(cinfo); * * jpeg_write_tables has the side effect of marking all tables written * (same as jpeg_suppress_tables(..., TRUE)). Thus a subsequent start_compress diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcapistd.c glmark2-2021.02/src/libjpeg-turbo/jcapistd.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcapistd.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcapistd.c 2021-04-25 01:47:22.000000000 +0800 @@ -3,7 +3,8 @@ * * Copyright (C) 1994-1996, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface code for the compression half * of the JPEG library. These are the "standard" API routines that are @@ -41,7 +42,7 @@ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); if (write_all_tables) - jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */ + jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */ /* (Re)initialize error mgr and destination modules */ (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); @@ -75,7 +76,7 @@ GLOBAL(JDIMENSION) jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines, - JDIMENSION num_lines) + JDIMENSION num_lines) { JDIMENSION row_ctr, rows_left; @@ -118,7 +119,7 @@ GLOBAL(JDIMENSION) jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data, - JDIMENSION num_lines) + JDIMENSION num_lines) { JDIMENSION lines_per_iMCU_row; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcarith.c glmark2-2021.02/src/libjpeg-turbo/jcarith.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcarith.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcarith.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcarith.c * + * This file was part of the Independent JPEG Group's software: * Developed 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains portable arithmetic entropy encoding routines for JPEG * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81). @@ -23,10 +26,10 @@ typedef struct { struct jpeg_entropy_encoder pub; /* public fields */ - INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */ - INT32 a; /* A register, normalized size of coding interval */ - INT32 sc; /* counter for stacked 0xFF values which might overflow */ - INT32 zc; /* counter for pending 0x00 output values which might * + JLONG c; /* C register, base of coding interval, layout as in sec. D.1.3 */ + JLONG a; /* A register, normalized size of coding interval */ + JLONG sc; /* counter for stacked 0xFF values which might overflow */ + JLONG zc; /* counter for pending 0x00 output values which might * * be discarded at the end ("Pacman" termination) */ int ct; /* bit shift counter, determines when next byte will be written */ int buffer; /* buffer for most recent output byte != 0xFF */ @@ -34,18 +37,18 @@ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - int next_restart_num; /* next restart number to write (0-7) */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ /* Pointers to statistics areas (these workspaces have image lifespan) */ - unsigned char * dc_stats[NUM_ARITH_TBLS]; - unsigned char * ac_stats[NUM_ARITH_TBLS]; + unsigned char *dc_stats[NUM_ARITH_TBLS]; + unsigned char *ac_stats[NUM_ARITH_TBLS]; /* Statistics bin for coding with fixed probability 0.5 */ unsigned char fixed_bin[4]; } arith_entropy_encoder; -typedef arith_entropy_encoder * arith_entropy_ptr; +typedef arith_entropy_encoder *arith_entropy_ptr; /* The following two definitions specify the allocation chunk size * for the statistics area. @@ -95,20 +98,20 @@ #define CALCULATE_SPECTRAL_CONDITIONING */ -/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32. - * We assume that int right shift is unsigned if INT32 right shift is, +/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG. + * We assume that int right shift is unsigned if JLONG right shift is, * which should be safe. */ #ifdef RIGHT_SHIFT_IS_UNSIGNED -#define ISHIFT_TEMPS int ishift_temp; +#define ISHIFT_TEMPS int ishift_temp; #define IRIGHT_SHIFT(x,shft) \ - ((ishift_temp = (x)) < 0 ? \ - (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ - (ishift_temp >> (shft))) + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ + (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) #endif @@ -116,7 +119,7 @@ emit_byte (int val, j_compress_ptr cinfo) /* Write next output byte; we do not support suspension in this module. */ { - struct jpeg_destination_mgr * dest = cinfo->dest; + struct jpeg_destination_mgr *dest = cinfo->dest; *dest->next_output_byte++ = (JOCTET) val; if (--dest->free_in_buffer == 0) @@ -133,7 +136,7 @@ finish_pass (j_compress_ptr cinfo) { arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; - INT32 temp; + JLONG temp; /* Section D.1.8: Termination of encoding */ @@ -149,11 +152,11 @@ /* One final overflow has to be handled */ if (e->buffer >= 0) { if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); + do emit_byte(0x00, cinfo); + while (--e->zc); emit_byte(e->buffer + 1, cinfo); if (e->buffer + 1 == 0xFF) - emit_byte(0x00, cinfo); + emit_byte(0x00, cinfo); } e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */ e->sc = 0; @@ -162,17 +165,17 @@ ++e->zc; else if (e->buffer >= 0) { if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); + do emit_byte(0x00, cinfo); + while (--e->zc); emit_byte(e->buffer, cinfo); } if (e->sc) { if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); + do emit_byte(0x00, cinfo); + while (--e->zc); do { - emit_byte(0xFF, cinfo); - emit_byte(0x00, cinfo); + emit_byte(0xFF, cinfo); + emit_byte(0x00, cinfo); } while (--e->sc); } } @@ -187,7 +190,7 @@ if (e->c & 0x7F800L) { emit_byte((e->c >> 11) & 0xFF, cinfo); if (((e->c >> 11) & 0xFF) == 0xFF) - emit_byte(0x00, cinfo); + emit_byte(0x00, cinfo); } } } @@ -216,20 +219,20 @@ */ LOCAL(void) -arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) +arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) { register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; register unsigned char nl, nm; - register INT32 qe, temp; + register JLONG qe, temp; register int sv; /* Fetch values from our compact representation of Table D.2: * Qe values and probability estimation state machine */ sv = *st; - qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ - nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ - nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ + qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ + nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ + nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ /* Encode & estimation procedures per sections D.1.4 & D.1.5 */ e->a -= qe; @@ -243,7 +246,7 @@ e->c += e->a; e->a = qe; } - *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ } else { /* Encode the more probable symbol */ if (e->a >= 0x8000L) @@ -255,7 +258,7 @@ e->c += e->a; e->a = qe; } - *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ } /* Renormalization & data output per section D.1.6 */ @@ -266,43 +269,43 @@ /* Another byte is ready for output */ temp = e->c >> 19; if (temp > 0xFF) { - /* Handle overflow over all stacked 0xFF bytes */ - if (e->buffer >= 0) { - if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); - emit_byte(e->buffer + 1, cinfo); - if (e->buffer + 1 == 0xFF) - emit_byte(0x00, cinfo); - } - e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */ - e->sc = 0; - /* Note: The 3 spacer bits in the C register guarantee - * that the new buffer byte can't be 0xFF here - * (see page 160 in the P&M JPEG book). */ - e->buffer = temp & 0xFF; /* new output byte, might overflow later */ + /* Handle overflow over all stacked 0xFF bytes */ + if (e->buffer >= 0) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte(e->buffer + 1, cinfo); + if (e->buffer + 1 == 0xFF) + emit_byte(0x00, cinfo); + } + e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */ + e->sc = 0; + /* Note: The 3 spacer bits in the C register guarantee + * that the new buffer byte can't be 0xFF here + * (see page 160 in the P&M JPEG book). */ + e->buffer = temp & 0xFF; /* new output byte, might overflow later */ } else if (temp == 0xFF) { - ++e->sc; /* stack 0xFF byte (which might overflow later) */ + ++e->sc; /* stack 0xFF byte (which might overflow later) */ } else { - /* Output all stacked 0xFF bytes, they will not overflow any more */ - if (e->buffer == 0) - ++e->zc; - else if (e->buffer >= 0) { - if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); - emit_byte(e->buffer, cinfo); - } - if (e->sc) { - if (e->zc) - do emit_byte(0x00, cinfo); - while (--e->zc); - do { - emit_byte(0xFF, cinfo); - emit_byte(0x00, cinfo); - } while (--e->sc); - } - e->buffer = temp & 0xFF; /* new output byte (can still overflow) */ + /* Output all stacked 0xFF bytes, they will not overflow any more */ + if (e->buffer == 0) + ++e->zc; + else if (e->buffer >= 0) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte(e->buffer, cinfo); + } + if (e->sc) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + do { + emit_byte(0xFF, cinfo); + emit_byte(0x00, cinfo); + } while (--e->sc); + } + e->buffer = temp & 0xFF; /* new output byte (can still overflow) */ } e->c &= 0x7FFFFL; e->ct += 8; @@ -320,7 +323,7 @@ { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; finish_pass(cinfo); @@ -398,45 +401,45 @@ /* Figure F.4: Encode_DC_DIFF */ if ((v = m - entropy->last_dc_val[ci]) == 0) { arith_encode(cinfo, st, 0); - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ } else { entropy->last_dc_val[ci] = m; arith_encode(cinfo, st, 1); /* Figure F.6: Encoding nonzero value v */ /* Figure F.7: Encoding the sign of v */ if (v > 0) { - arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ - st += 2; /* Table F.4: SP = S0 + 2 */ - entropy->dc_context[ci] = 4; /* small positive diff category */ + arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ + st += 2; /* Table F.4: SP = S0 + 2 */ + entropy->dc_context[ci] = 4; /* small positive diff category */ } else { - v = -v; - arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ - st += 3; /* Table F.4: SN = S0 + 3 */ - entropy->dc_context[ci] = 8; /* small negative diff category */ + v = -v; + arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ + st += 3; /* Table F.4: SN = S0 + 3 */ + entropy->dc_context[ci] = 8; /* small negative diff category */ } /* Figure F.8: Encoding the magnitude category of v */ m = 0; if (v -= 1) { - arith_encode(cinfo, st, 1); - m = 1; - v2 = v; - st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ - while (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st += 1; - } + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } } arith_encode(cinfo, st, 0); /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) - entropy->dc_context[ci] += 8; /* large diff category */ + entropy->dc_context[ci] += 8; /* large diff category */ /* Figure F.9: Encoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - arith_encode(cinfo, st, (m & v) ? 1 : 0); + arith_encode(cinfo, st, (m & v) ? 1 : 0); } } @@ -491,21 +494,21 @@ /* Figure F.5: Encode_AC_Coefficients */ for (k = cinfo->Ss; k <= ke; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); - arith_encode(cinfo, st, 0); /* EOB decision */ + arith_encode(cinfo, st, 0); /* EOB decision */ for (;;) { if ((v = (*block)[jpeg_natural_order[k]]) >= 0) { - if (v >>= cinfo->Al) { - arith_encode(cinfo, st + 1, 1); - arith_encode(cinfo, entropy->fixed_bin, 0); - break; - } + if (v >>= cinfo->Al) { + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 0); + break; + } } else { - v = -v; - if (v >>= cinfo->Al) { - arith_encode(cinfo, st + 1, 1); - arith_encode(cinfo, entropy->fixed_bin, 1); - break; - } + v = -v; + if (v >>= cinfo->Al) { + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 1); + break; + } } arith_encode(cinfo, st + 1, 0); st += 3; k++; } @@ -517,15 +520,15 @@ m = 1; v2 = v; if (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st = entropy->ac_stats[tbl] + - (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); - while (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st += 1; - } + arith_encode(cinfo, st, 1); + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } } } arith_encode(cinfo, st, 0); @@ -566,7 +569,7 @@ entropy->restarts_to_go--; } - st = entropy->fixed_bin; /* use fixed probability estimation */ + st = entropy->fixed_bin; /* use fixed probability estimation */ Al = cinfo->Al; /* Encode the MCU data blocks */ @@ -635,29 +638,29 @@ for (k = cinfo->Ss; k <= ke; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); if (k > kex) - arith_encode(cinfo, st, 0); /* EOB decision */ + arith_encode(cinfo, st, 0); /* EOB decision */ for (;;) { if ((v = (*block)[jpeg_natural_order[k]]) >= 0) { - if (v >>= cinfo->Al) { - if (v >> 1) /* previously nonzero coef */ - arith_encode(cinfo, st + 2, (v & 1)); - else { /* newly nonzero coef */ - arith_encode(cinfo, st + 1, 1); - arith_encode(cinfo, entropy->fixed_bin, 0); - } - break; - } + if (v >>= cinfo->Al) { + if (v >> 1) /* previously nonzero coef */ + arith_encode(cinfo, st + 2, (v & 1)); + else { /* newly nonzero coef */ + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 0); + } + break; + } } else { - v = -v; - if (v >>= cinfo->Al) { - if (v >> 1) /* previously nonzero coef */ - arith_encode(cinfo, st + 2, (v & 1)); - else { /* newly nonzero coef */ - arith_encode(cinfo, st + 1, 1); - arith_encode(cinfo, entropy->fixed_bin, 1); - } - break; - } + v = -v; + if (v >>= cinfo->Al) { + if (v >> 1) /* previously nonzero coef */ + arith_encode(cinfo, st + 2, (v & 1)); + else { /* newly nonzero coef */ + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 1); + } + break; + } } arith_encode(cinfo, st + 1, 0); st += 3; k++; } @@ -680,7 +683,7 @@ encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data) { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JBLOCKROW block; unsigned char *st; int blkn, ci, tbl, k, ke; @@ -713,45 +716,45 @@ /* Figure F.4: Encode_DC_DIFF */ if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) { arith_encode(cinfo, st, 0); - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ } else { entropy->last_dc_val[ci] = (*block)[0]; arith_encode(cinfo, st, 1); /* Figure F.6: Encoding nonzero value v */ /* Figure F.7: Encoding the sign of v */ if (v > 0) { - arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ - st += 2; /* Table F.4: SP = S0 + 2 */ - entropy->dc_context[ci] = 4; /* small positive diff category */ + arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ + st += 2; /* Table F.4: SP = S0 + 2 */ + entropy->dc_context[ci] = 4; /* small positive diff category */ } else { - v = -v; - arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ - st += 3; /* Table F.4: SN = S0 + 3 */ - entropy->dc_context[ci] = 8; /* small negative diff category */ + v = -v; + arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ + st += 3; /* Table F.4: SN = S0 + 3 */ + entropy->dc_context[ci] = 8; /* small negative diff category */ } /* Figure F.8: Encoding the magnitude category of v */ m = 0; if (v -= 1) { - arith_encode(cinfo, st, 1); - m = 1; - v2 = v; - st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ - while (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st += 1; - } + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } } arith_encode(cinfo, st, 0); /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) - entropy->dc_context[ci] += 8; /* large diff category */ + entropy->dc_context[ci] += 8; /* large diff category */ /* Figure F.9: Encoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - arith_encode(cinfo, st, (m & v) ? 1 : 0); + arith_encode(cinfo, st, (m & v) ? 1 : 0); } /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */ @@ -765,43 +768,43 @@ /* Figure F.5: Encode_AC_Coefficients */ for (k = 1; k <= ke; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); - arith_encode(cinfo, st, 0); /* EOB decision */ + arith_encode(cinfo, st, 0); /* EOB decision */ while ((v = (*block)[jpeg_natural_order[k]]) == 0) { - arith_encode(cinfo, st + 1, 0); st += 3; k++; + arith_encode(cinfo, st + 1, 0); st += 3; k++; } arith_encode(cinfo, st + 1, 1); /* Figure F.6: Encoding nonzero value v */ /* Figure F.7: Encoding the sign of v */ if (v > 0) { - arith_encode(cinfo, entropy->fixed_bin, 0); + arith_encode(cinfo, entropy->fixed_bin, 0); } else { - v = -v; - arith_encode(cinfo, entropy->fixed_bin, 1); + v = -v; + arith_encode(cinfo, entropy->fixed_bin, 1); } st += 2; /* Figure F.8: Encoding the magnitude category of v */ m = 0; if (v -= 1) { - arith_encode(cinfo, st, 1); - m = 1; - v2 = v; - if (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st = entropy->ac_stats[tbl] + - (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); - while (v2 >>= 1) { - arith_encode(cinfo, st, 1); - m <<= 1; - st += 1; - } - } + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + if (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } + } } arith_encode(cinfo, st, 0); /* Figure F.9: Encoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - arith_encode(cinfo, st, (m & v) ? 1 : 0); + arith_encode(cinfo, st, (m & v) ? 1 : 0); } /* Encode EOB decision only if k <= DCTSIZE2 - 1 */ if (k <= DCTSIZE2 - 1) { @@ -823,7 +826,7 @@ { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; int ci, tbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; if (gather_statistics) /* Make sure to avoid that in the master control logic! @@ -838,14 +841,14 @@ if (cinfo->progressive_mode) { if (cinfo->Ah == 0) { if (cinfo->Ss == 0) - entropy->pub.encode_mcu = encode_mcu_DC_first; + entropy->pub.encode_mcu = encode_mcu_DC_first; else - entropy->pub.encode_mcu = encode_mcu_AC_first; + entropy->pub.encode_mcu = encode_mcu_AC_first; } else { if (cinfo->Ss == 0) - entropy->pub.encode_mcu = encode_mcu_DC_refine; + entropy->pub.encode_mcu = encode_mcu_DC_refine; else - entropy->pub.encode_mcu = encode_mcu_AC_refine; + entropy->pub.encode_mcu = encode_mcu_AC_refine; } } else entropy->pub.encode_mcu = encode_mcu; @@ -857,10 +860,10 @@ if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) { tbl = compptr->dc_tbl_no; if (tbl < 0 || tbl >= NUM_ARITH_TBLS) - ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->dc_stats[tbl] == NULL) - entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); + entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS); /* Initialize DC predictions to 0 */ entropy->last_dc_val[ci] = 0; @@ -870,15 +873,15 @@ if (cinfo->progressive_mode == 0 || cinfo->Se) { tbl = compptr->ac_tbl_no; if (tbl < 0 || tbl >= NUM_ARITH_TBLS) - ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->ac_stats[tbl] == NULL) - entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); + entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS); #ifdef CALCULATE_SPECTRAL_CONDITIONING if (cinfo->progressive_mode) - /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */ - cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4); + /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */ + cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4); #endif } } @@ -909,7 +912,7 @@ entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(arith_entropy_encoder)); + sizeof(arith_entropy_encoder)); cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; entropy->pub.start_pass = start_pass; entropy->pub.finish_pass = finish_pass; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jccoefct.c glmark2-2021.02/src/libjpeg-turbo/jccoefct.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jccoefct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jccoefct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jccoefct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the coefficient buffer controller for compression. * This controller is the top level of the JPEG compressor proper. @@ -34,19 +37,16 @@ typedef struct { struct jpeg_c_coef_controller pub; /* public fields */ - JDIMENSION iMCU_row_num; /* iMCU row # within image */ - JDIMENSION mcu_ctr; /* counts MCUs processed in current row */ - int MCU_vert_offset; /* counts MCU rows within iMCU row */ - int MCU_rows_per_iMCU_row; /* number of such rows needed */ + JDIMENSION iMCU_row_num; /* iMCU row # within image */ + JDIMENSION mcu_ctr; /* counts MCUs processed in current row */ + int MCU_vert_offset; /* counts MCU rows within iMCU row */ + int MCU_rows_per_iMCU_row; /* number of such rows needed */ /* For single-pass compression, it's sufficient to buffer just one MCU * (although this may prove a bit slow in practice). We allocate a * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each - * MCU constructed and sent. (On 80x86, the workspace is FAR even though - * it's not really very big; this is to keep the module interfaces unchanged - * when a large coefficient buffer is necessary.) - * In multi-pass modes, this array points to the current MCU's blocks - * within the virtual arrays. + * MCU constructed and sent. In multi-pass modes, this array points to the + * current MCU's blocks within the virtual arrays. */ JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU]; @@ -54,17 +54,17 @@ jvirt_barray_ptr whole_image[MAX_COMPONENTS]; } my_coef_controller; -typedef my_coef_controller * my_coef_ptr; +typedef my_coef_controller *my_coef_ptr; /* Forward declarations */ METHODDEF(boolean) compress_data - JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf)); + (j_compress_ptr cinfo, JSAMPIMAGE input_buf); #ifdef FULL_COEF_BUFFER_SUPPORTED METHODDEF(boolean) compress_first_pass - JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf)); + (j_compress_ptr cinfo, JSAMPIMAGE input_buf); METHODDEF(boolean) compress_output - JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf)); + (j_compress_ptr cinfo, JSAMPIMAGE input_buf); #endif @@ -143,7 +143,7 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; int blkn, bi, ci, yindex, yoffset, blockcnt; @@ -154,7 +154,7 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col; - MCU_col_num++) { + MCU_col_num++) { /* Determine where data comes from in input_buf and do the DCT thing. * Each call on forward_DCT processes a horizontal row of DCT blocks * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks @@ -166,46 +166,46 @@ */ blkn = 0; for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; - xpos = MCU_col_num * compptr->MCU_sample_width; - ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */ - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - if (coef->iMCU_row_num < last_iMCU_row || - yoffset+yindex < compptr->last_row_height) { - (*cinfo->fdct->forward_DCT) (cinfo, compptr, - input_buf[compptr->component_index], - coef->MCU_buffer[blkn], - ypos, xpos, (JDIMENSION) blockcnt); - if (blockcnt < compptr->MCU_width) { - /* Create some dummy blocks at the right edge of the image. */ - jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt], - (compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK)); - for (bi = blockcnt; bi < compptr->MCU_width; bi++) { - coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0]; - } - } - } else { - /* Create a row of dummy blocks at the bottom of the image. */ - jzero_far((void FAR *) coef->MCU_buffer[blkn], - compptr->MCU_width * SIZEOF(JBLOCK)); - for (bi = 0; bi < compptr->MCU_width; bi++) { - coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0]; - } - } - blkn += compptr->MCU_width; - ypos += DCTSIZE; - } + compptr = cinfo->cur_comp_info[ci]; + blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width + : compptr->last_col_width; + xpos = MCU_col_num * compptr->MCU_sample_width; + ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */ + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + if (coef->iMCU_row_num < last_iMCU_row || + yoffset+yindex < compptr->last_row_height) { + (*cinfo->fdct->forward_DCT) (cinfo, compptr, + input_buf[compptr->component_index], + coef->MCU_buffer[blkn], + ypos, xpos, (JDIMENSION) blockcnt); + if (blockcnt < compptr->MCU_width) { + /* Create some dummy blocks at the right edge of the image. */ + jzero_far((void *) coef->MCU_buffer[blkn + blockcnt], + (compptr->MCU_width - blockcnt) * sizeof(JBLOCK)); + for (bi = blockcnt; bi < compptr->MCU_width; bi++) { + coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0]; + } + } + } else { + /* Create a row of dummy blocks at the bottom of the image. */ + jzero_far((void *) coef->MCU_buffer[blkn], + compptr->MCU_width * sizeof(JBLOCK)); + for (bi = 0; bi < compptr->MCU_width; bi++) { + coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0]; + } + } + blkn += compptr->MCU_width; + ypos += DCTSIZE; + } } /* Try to write the MCU. In event of a suspension failure, we will * re-DCT the MCU on restart (a bit inefficient, could be fixed...) */ if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->mcu_ctr = MCU_col_num; - return FALSE; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->mcu_ctr = MCU_col_num; + return FALSE; } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -280,17 +280,17 @@ for (block_row = 0; block_row < block_rows; block_row++) { thisblockrow = buffer[block_row]; (*cinfo->fdct->forward_DCT) (cinfo, compptr, - input_buf[ci], thisblockrow, - (JDIMENSION) (block_row * DCTSIZE), - (JDIMENSION) 0, blocks_across); + input_buf[ci], thisblockrow, + (JDIMENSION) (block_row * DCTSIZE), + (JDIMENSION) 0, blocks_across); if (ndummy > 0) { - /* Create dummy blocks at the right edge of the image. */ - thisblockrow += blocks_across; /* => first dummy block */ - jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK)); - lastDC = thisblockrow[-1][0]; - for (bi = 0; bi < ndummy; bi++) { - thisblockrow[bi][0] = lastDC; - } + /* Create dummy blocks at the right edge of the image. */ + thisblockrow += blocks_across; /* => first dummy block */ + jzero_far((void *) thisblockrow, ndummy * sizeof(JBLOCK)); + lastDC = thisblockrow[-1][0]; + for (bi = 0; bi < ndummy; bi++) { + thisblockrow[bi][0] = lastDC; + } } } /* If at end of image, create dummy block rows as needed. @@ -299,22 +299,22 @@ * This squeezes a few more bytes out of the resulting file... */ if (coef->iMCU_row_num == last_iMCU_row) { - blocks_across += ndummy; /* include lower right corner */ + blocks_across += ndummy; /* include lower right corner */ MCUs_across = blocks_across / h_samp_factor; for (block_row = block_rows; block_row < compptr->v_samp_factor; - block_row++) { - thisblockrow = buffer[block_row]; - lastblockrow = buffer[block_row-1]; - jzero_far((void FAR *) thisblockrow, - (size_t) (blocks_across * SIZEOF(JBLOCK))); - for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) { - lastDC = lastblockrow[h_samp_factor-1][0]; - for (bi = 0; bi < h_samp_factor; bi++) { - thisblockrow[bi][0] = lastDC; - } - thisblockrow += h_samp_factor; /* advance to next MCU in row */ - lastblockrow += h_samp_factor; - } + block_row++) { + thisblockrow = buffer[block_row]; + lastblockrow = buffer[block_row-1]; + jzero_far((void *) thisblockrow, + (size_t) (blocks_across * sizeof(JBLOCK))); + for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) { + lastDC = lastblockrow[h_samp_factor-1][0]; + for (bi = 0; bi < h_samp_factor; bi++) { + thisblockrow[bi][0] = lastDC; + } + thisblockrow += h_samp_factor; /* advance to next MCU in row */ + lastblockrow += h_samp_factor; + } } } } @@ -341,7 +341,7 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ int blkn, ci, xindex, yindex, yoffset; JDIMENSION start_col; JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN]; @@ -364,25 +364,25 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row; - MCU_col_num++) { + MCU_col_num++) { /* Construct list of pointers to DCT blocks belonging to this MCU */ - blkn = 0; /* index of current DCT block within MCU */ + blkn = 0; /* index of current DCT block within MCU */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - start_col = MCU_col_num * compptr->MCU_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; - for (xindex = 0; xindex < compptr->MCU_width; xindex++) { - coef->MCU_buffer[blkn++] = buffer_ptr++; - } - } + compptr = cinfo->cur_comp_info[ci]; + start_col = MCU_col_num * compptr->MCU_width; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + for (xindex = 0; xindex < compptr->MCU_width; xindex++) { + coef->MCU_buffer[blkn++] = buffer_ptr++; + } + } } /* Try to write the MCU. */ if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->mcu_ctr = MCU_col_num; - return FALSE; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->mcu_ctr = MCU_col_num; + return FALSE; } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -408,7 +408,7 @@ coef = (my_coef_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_coef_controller)); + sizeof(my_coef_controller)); cinfo->coef = (struct jpeg_c_coef_controller *) coef; coef->pub.start_pass = start_pass_coef; @@ -421,14 +421,14 @@ jpeg_component_info *compptr; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { coef->whole_image[ci] = (*cinfo->mem->request_virt_barray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - (JDIMENSION) jround_up((long) compptr->width_in_blocks, - (long) compptr->h_samp_factor), - (JDIMENSION) jround_up((long) compptr->height_in_blocks, - (long) compptr->v_samp_factor), - (JDIMENSION) compptr->v_samp_factor); + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) jround_up((long) compptr->width_in_blocks, + (long) compptr->h_samp_factor), + (JDIMENSION) jround_up((long) compptr->height_in_blocks, + (long) compptr->v_samp_factor), + (JDIMENSION) compptr->v_samp_factor); } #else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); @@ -440,7 +440,7 @@ buffer = (JBLOCKROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)); + C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) { coef->MCU_buffer[i] = buffer + i; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jccolext.c glmark2-2021.02/src/libjpeg-turbo/jccolext.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jccolext.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jccolext.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,148 @@ +/* + * jccolext.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2012, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains input colorspace conversion routines. + */ + + +/* This file is included by jccolor.c */ + + +/* + * Convert some rows of samples to the JPEG colorspace. + * + * Note that we change from the application's interleaved-pixel format + * to our internal noninterleaved, one-plane-per-component format. + * The input buffer is therefore three times as wide as the output buffer. + * + * A starting row offset is provided only for the output buffer. The caller + * can easily adjust the passed input_buf value to accommodate any row + * offset required on that side. + */ + +INLINE +LOCAL(void) +rgb_ycc_convert_internal (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int r, g, b; + register JLONG * ctab = cconvert->rgb_ycc_tab; + register JSAMPROW inptr; + register JSAMPROW outptr0, outptr1, outptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->image_width; + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + r = GETJSAMPLE(inptr[RGB_RED]); + g = GETJSAMPLE(inptr[RGB_GREEN]); + b = GETJSAMPLE(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations + * must be too; we do not need an explicit range-limiting operation. + * Hence the value being shifted is never negative, and we don't + * need the general RIGHT_SHIFT macro. + */ + /* Y */ + outptr0[col] = (JSAMPLE) + ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) + >> SCALEBITS); + /* Cb */ + outptr1[col] = (JSAMPLE) + ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) + >> SCALEBITS); + /* Cr */ + outptr2[col] = (JSAMPLE) + ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) + >> SCALEBITS); + } + } +} + + +/**************** Cases other than RGB -> YCbCr **************/ + + +/* + * Convert some rows of samples to the JPEG colorspace. + * This version handles RGB->grayscale conversion, which is the same + * as the RGB->Y portion of RGB->YCbCr. + * We assume rgb_ycc_start has been called (we only use the Y tables). + */ + +INLINE +LOCAL(void) +rgb_gray_convert_internal (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int r, g, b; + register JLONG * ctab = cconvert->rgb_ycc_tab; + register JSAMPROW inptr; + register JSAMPROW outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->image_width; + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr = output_buf[0][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + r = GETJSAMPLE(inptr[RGB_RED]); + g = GETJSAMPLE(inptr[RGB_GREEN]); + b = GETJSAMPLE(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + /* Y */ + outptr[col] = (JSAMPLE) + ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) + >> SCALEBITS); + } + } +} + + +/* + * Convert some rows of samples to the JPEG colorspace. + * This version handles extended RGB->plain RGB conversion + */ + +INLINE +LOCAL(void) +rgb_rgb_convert_internal (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + register JSAMPROW inptr; + register JSAMPROW outptr0, outptr1, outptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->image_width; + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + outptr0[col] = GETJSAMPLE(inptr[RGB_RED]); + outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]); + outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jccolext.c.inc glmark2-2021.02/src/libjpeg-turbo/jccolext.c.inc --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jccolext.c.inc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jccolext.c.inc 1970-01-01 08:00:00.000000000 +0800 @@ -1,114 +0,0 @@ -/* - * jccolext.c - * - * Copyright (C) 1991-1996, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains input colorspace conversion routines. - */ - - -/* This file is included by jccolor.c */ - - -/* - * Convert some rows of samples to the JPEG colorspace. - * - * Note that we change from the application's interleaved-pixel format - * to our internal noninterleaved, one-plane-per-component format. - * The input buffer is therefore three times as wide as the output buffer. - * - * A starting row offset is provided only for the output buffer. The caller - * can easily adjust the passed input_buf value to accommodate any row - * offset required on that side. - */ - -INLINE -LOCAL(void) -rgb_ycc_convert_internal (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; - register int r, g, b; - register INT32 * ctab = cconvert->rgb_ycc_tab; - register JSAMPROW inptr; - register JSAMPROW outptr0, outptr1, outptr2; - register JDIMENSION col; - JDIMENSION num_cols = cinfo->image_width; - - while (--num_rows >= 0) { - inptr = *input_buf++; - outptr0 = output_buf[0][output_row]; - outptr1 = output_buf[1][output_row]; - outptr2 = output_buf[2][output_row]; - output_row++; - for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr[RGB_RED]); - g = GETJSAMPLE(inptr[RGB_GREEN]); - b = GETJSAMPLE(inptr[RGB_BLUE]); - inptr += RGB_PIXELSIZE; - /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations - * must be too; we do not need an explicit range-limiting operation. - * Hence the value being shifted is never negative, and we don't - * need the general RIGHT_SHIFT macro. - */ - /* Y */ - outptr0[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); - /* Cb */ - outptr1[col] = (JSAMPLE) - ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) - >> SCALEBITS); - /* Cr */ - outptr2[col] = (JSAMPLE) - ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) - >> SCALEBITS); - } - } -} - - -/**************** Cases other than RGB -> YCbCr **************/ - - -/* - * Convert some rows of samples to the JPEG colorspace. - * This version handles RGB->grayscale conversion, which is the same - * as the RGB->Y portion of RGB->YCbCr. - * We assume rgb_ycc_start has been called (we only use the Y tables). - */ - -INLINE -LOCAL(void) -rgb_gray_convert_internal (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; - register int r, g, b; - register INT32 * ctab = cconvert->rgb_ycc_tab; - register JSAMPROW inptr; - register JSAMPROW outptr; - register JDIMENSION col; - JDIMENSION num_cols = cinfo->image_width; - - while (--num_rows >= 0) { - inptr = *input_buf++; - outptr = output_buf[0][output_row]; - output_row++; - for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr[RGB_RED]); - g = GETJSAMPLE(inptr[RGB_GREEN]); - b = GETJSAMPLE(inptr[RGB_BLUE]); - inptr += RGB_PIXELSIZE; - /* Y */ - outptr[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); - } - } -} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jccolor.c glmark2-2021.02/src/libjpeg-turbo/jccolor.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jccolor.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jccolor.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,14 @@ /* * jccolor.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2009-2012, 2015 D. R. Commander. + * Copyright (C) 2014, MIPS Technologies, Inc., California + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains input colorspace conversion routines. */ @@ -14,7 +17,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jsimd.h" -#include "config.h" +#include "jconfigint.h" /* Private subobject */ @@ -23,10 +26,10 @@ struct jpeg_color_converter pub; /* public fields */ /* Private state for RGB->YCC conversion */ - INT32 * rgb_ycc_tab; /* => table for RGB to YCbCr conversion */ + JLONG *rgb_ycc_tab; /* => table for RGB to YCbCr conversion */ } my_color_converter; -typedef my_color_converter * my_cconvert_ptr; +typedef my_color_converter *my_cconvert_ptr; /**************** RGB -> YCbCr conversion: most common case **************/ @@ -35,9 +38,9 @@ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5. * The conversion equations to be implemented are therefore - * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B - * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE - * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.) * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2, * rather than CENTERJSAMPLE, for Cb and Cr. This gave equal positive and @@ -59,10 +62,10 @@ * in the tables to save adding them separately in the inner loop. */ -#define SCALEBITS 16 /* speediest right-shift on some machines */ -#define CBCR_OFFSET ((INT32) CENTERJSAMPLE << SCALEBITS) -#define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) -#define FIX(x) ((INT32) ((x) * (1L< Y section */ -#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ -#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ -#define R_CB_OFF (3*(MAXJSAMPLE+1)) -#define G_CB_OFF (4*(MAXJSAMPLE+1)) -#define B_CB_OFF (5*(MAXJSAMPLE+1)) -#define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */ -#define G_CR_OFF (6*(MAXJSAMPLE+1)) -#define B_CR_OFF (7*(MAXJSAMPLE+1)) -#define TABLE_SIZE (8*(MAXJSAMPLE+1)) +#define R_Y_OFF 0 /* offset to R => Y section */ +#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ +#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ +#define R_CB_OFF (3*(MAXJSAMPLE+1)) +#define G_CB_OFF (4*(MAXJSAMPLE+1)) +#define B_CB_OFF (5*(MAXJSAMPLE+1)) +#define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */ +#define G_CR_OFF (6*(MAXJSAMPLE+1)) +#define B_CR_OFF (7*(MAXJSAMPLE+1)) +#define TABLE_SIZE (8*(MAXJSAMPLE+1)) /* Include inline routines for colorspace extensions */ -#include "jccolext.c.inc" +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -96,13 +99,15 @@ #define RGB_PIXELSIZE EXT_RGB_PIXELSIZE #define rgb_ycc_convert_internal extrgb_ycc_convert_internal #define rgb_gray_convert_internal extrgb_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extrgb_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_RGBX_RED #define RGB_GREEN EXT_RGBX_GREEN @@ -110,13 +115,15 @@ #define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE #define rgb_ycc_convert_internal extrgbx_ycc_convert_internal #define rgb_gray_convert_internal extrgbx_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extrgbx_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGR_RED #define RGB_GREEN EXT_BGR_GREEN @@ -124,13 +131,15 @@ #define RGB_PIXELSIZE EXT_BGR_PIXELSIZE #define rgb_ycc_convert_internal extbgr_ycc_convert_internal #define rgb_gray_convert_internal extbgr_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extbgr_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGRX_RED #define RGB_GREEN EXT_BGRX_GREEN @@ -138,13 +147,15 @@ #define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE #define rgb_ycc_convert_internal extbgrx_ycc_convert_internal #define rgb_gray_convert_internal extbgrx_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extbgrx_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XBGR_RED #define RGB_GREEN EXT_XBGR_GREEN @@ -152,13 +163,15 @@ #define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE #define rgb_ycc_convert_internal extxbgr_ycc_convert_internal #define rgb_gray_convert_internal extxbgr_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extxbgr_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XRGB_RED #define RGB_GREEN EXT_XRGB_GREEN @@ -166,13 +179,15 @@ #define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE #define rgb_ycc_convert_internal extxrgb_ycc_convert_internal #define rgb_gray_convert_internal extxrgb_gray_convert_internal -#include "jccolext.c.inc" +#define rgb_rgb_convert_internal extxrgb_rgb_convert_internal +#include "jccolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal /* @@ -183,13 +198,13 @@ rgb_ycc_start (j_compress_ptr cinfo) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; - INT32 * rgb_ycc_tab; - INT32 i; + JLONG *rgb_ycc_tab; + JLONG i; /* Allocate and fill in the conversion tables. */ - cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *) + cconvert->rgb_ycc_tab = rgb_ycc_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (TABLE_SIZE * SIZEOF(INT32))); + (TABLE_SIZE * sizeof(JLONG))); for (i = 0; i <= MAXJSAMPLE; i++) { rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i; @@ -217,8 +232,8 @@ METHODDEF(void) rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { switch (cinfo->in_color_space) { case JCS_EXT_RGB: @@ -266,8 +281,8 @@ METHODDEF(void) rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { switch (cinfo->in_color_space) { case JCS_EXT_RGB: @@ -307,6 +322,52 @@ /* + * Extended RGB to plain RGB conversion + */ + +METHODDEF(void) +rgb_rgb_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGR: + extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + default: + rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + } +} + + +/* * Convert some rows of samples to the JPEG colorspace. * This version handles Adobe-style CMYK->YCCK conversion, * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same @@ -316,12 +377,12 @@ METHODDEF(void) cmyk_ycck_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int r, g, b; - register INT32 * ctab = cconvert->rgb_ycc_tab; + register JLONG *ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; @@ -339,7 +400,7 @@ g = MAXJSAMPLE - GETJSAMPLE(inptr[1]); b = MAXJSAMPLE - GETJSAMPLE(inptr[2]); /* K passes through as-is */ - outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ + outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. @@ -348,16 +409,16 @@ */ /* Y */ outptr0[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); + ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) + >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) - ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) - >> SCALEBITS); + ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) + >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) - ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) - >> SCALEBITS); + ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) + >> SCALEBITS); } } } @@ -371,8 +432,8 @@ METHODDEF(void) grayscale_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; register JSAMPROW outptr; @@ -385,7 +446,7 @@ outptr = output_buf[0][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */ + outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */ inptr += instride; } } @@ -400,28 +461,58 @@ METHODDEF(void) null_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; - register JSAMPROW outptr; + register JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; register int ci; int nc = cinfo->num_components; JDIMENSION num_cols = cinfo->image_width; - while (--num_rows >= 0) { - /* It seems fastest to make a separate pass for each component. */ - for (ci = 0; ci < nc; ci++) { - inptr = *input_buf; - outptr = output_buf[ci][output_row]; + if (nc == 3) { + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ - inptr += nc; + outptr0[col] = *inptr++; + outptr1[col] = *inptr++; + outptr2[col] = *inptr++; } } - input_buf++; - output_row++; + } else if (nc == 4) { + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + outptr3 = output_buf[3][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + outptr0[col] = *inptr++; + outptr1[col] = *inptr++; + outptr2[col] = *inptr++; + outptr3[col] = *inptr++; + } + } + } else { + while (--num_rows >= 0) { + /* It seems fastest to make a separate pass for each component. */ + for (ci = 0; ci < nc; ci++) { + inptr = *input_buf; + outptr = output_buf[ci][output_row]; + for (col = 0; col < num_cols; col++) { + outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ + inptr += nc; + } + } + input_buf++; + output_row++; + } } } @@ -448,7 +539,7 @@ cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_color_converter)); + sizeof(my_color_converter)); cinfo->cconvert = (struct jpeg_color_converter *) cconvert; /* set start_pass to null method until we find out differently */ cconvert->pub.start_pass = null_method; @@ -486,7 +577,7 @@ ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; - default: /* JCS_UNKNOWN can be anything */ + default: /* JCS_UNKNOWN can be anything */ if (cinfo->input_components < 1) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; @@ -523,21 +614,30 @@ break; case JCS_RGB: - case JCS_EXT_RGB: - case JCS_EXT_RGBX: - case JCS_EXT_BGR: - case JCS_EXT_BGRX: - case JCS_EXT_XBGR: - case JCS_EXT_XRGB: - case JCS_EXT_RGBA: - case JCS_EXT_BGRA: - case JCS_EXT_ABGR: - case JCS_EXT_ARGB: if (cinfo->num_components != 3) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); - if (cinfo->in_color_space == cinfo->jpeg_color_space && - rgb_pixelsize[cinfo->in_color_space] == 3) - cconvert->pub.color_convert = null_convert; + if (rgb_red[cinfo->in_color_space] == 0 && + rgb_green[cinfo->in_color_space] == 1 && + rgb_blue[cinfo->in_color_space] == 2 && + rgb_pixelsize[cinfo->in_color_space] == 3) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else if (cinfo->in_color_space == JCS_RGB || + cinfo->in_color_space == JCS_EXT_RGB || + cinfo->in_color_space == JCS_EXT_RGBX || + cinfo->in_color_space == JCS_EXT_BGR || + cinfo->in_color_space == JCS_EXT_BGRX || + cinfo->in_color_space == JCS_EXT_XBGR || + cinfo->in_color_space == JCS_EXT_XRGB || + cinfo->in_color_space == JCS_EXT_RGBA || + cinfo->in_color_space == JCS_EXT_BGRA || + cinfo->in_color_space == JCS_EXT_ABGR || + cinfo->in_color_space == JCS_EXT_ARGB) + cconvert->pub.color_convert = rgb_rgb_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; @@ -562,18 +662,28 @@ cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = rgb_ycc_convert; } - } else if (cinfo->in_color_space == JCS_YCbCr) - cconvert->pub.color_convert = null_convert; - else + } else if (cinfo->in_color_space == JCS_YCbCr) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; case JCS_CMYK: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); - if (cinfo->in_color_space == JCS_CMYK) - cconvert->pub.color_convert = null_convert; - else + if (cinfo->in_color_space == JCS_CMYK) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; @@ -583,17 +693,27 @@ if (cinfo->in_color_space == JCS_CMYK) { cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = cmyk_ycck_convert; - } else if (cinfo->in_color_space == JCS_YCCK) - cconvert->pub.color_convert = null_convert; - else + } else if (cinfo->in_color_space == JCS_YCCK) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; - default: /* allow null conversion of JCS_UNKNOWN */ + default: /* allow null conversion of JCS_UNKNOWN */ if (cinfo->jpeg_color_space != cinfo->in_color_space || - cinfo->num_components != cinfo->input_components) + cinfo->num_components != cinfo->input_components) ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); - cconvert->pub.color_convert = null_convert; +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; break; } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcdctmgr.c glmark2-2021.02/src/libjpeg-turbo/jcdctmgr.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcdctmgr.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcdctmgr.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,12 +1,14 @@ /* * jcdctmgr.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2011 D. R. Commander - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2011, 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the forward-DCT management logic. * This code selects a particular DCT implementation to be used, @@ -17,33 +19,32 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #include "jsimddct.h" /* Private subobject for this module */ -typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data)); -typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data)); +typedef void (*forward_DCT_method_ptr) (DCTELEM *data); +typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data); -typedef JMETHOD(void, convsamp_method_ptr, - (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM * workspace)); -typedef JMETHOD(void, float_convsamp_method_ptr, - (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace)); - -typedef JMETHOD(void, quantize_method_ptr, - (JCOEFPTR coef_block, DCTELEM * divisors, - DCTELEM * workspace)); -typedef JMETHOD(void, float_quantize_method_ptr, - (JCOEFPTR coef_block, FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); +typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data, + JDIMENSION start_col, + DCTELEM *workspace); +typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data, + JDIMENSION start_col, + FAST_FLOAT *workspace); + +typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace); +typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block, + FAST_FLOAT *divisors, + FAST_FLOAT *workspace); METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *); typedef struct { - struct jpeg_forward_dct pub; /* public fields */ + struct jpeg_forward_dct pub; /* public fields */ /* Pointer to the DCT routine actually in use */ forward_DCT_method_ptr dct; @@ -54,27 +55,30 @@ * entries, because of scaling (especially for an unnormalized DCT). * Each table is given in normal array order. */ - DCTELEM * divisors[NUM_QUANT_TBLS]; + DCTELEM *divisors[NUM_QUANT_TBLS]; /* work area for FDCT subroutine */ - DCTELEM * workspace; + DCTELEM *workspace; #ifdef DCT_FLOAT_SUPPORTED /* Same as above for the floating-point case. */ float_DCT_method_ptr float_dct; float_convsamp_method_ptr float_convsamp; float_quantize_method_ptr float_quantize; - FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; - FAST_FLOAT * float_workspace; + FAST_FLOAT *float_divisors[NUM_QUANT_TBLS]; + FAST_FLOAT *float_workspace; #endif } my_fdct_controller; -typedef my_fdct_controller * my_fdct_ptr; +typedef my_fdct_controller *my_fdct_ptr; +#if BITS_IN_JSAMPLE == 8 + /* * Find the highest bit in an integer through binary search. */ + LOCAL(int) flss (UINT16 val) { @@ -105,6 +109,7 @@ return bit; } + /* * Compute values to do a division using reciprocal. * @@ -146,7 +151,7 @@ * * In order to allow SIMD implementations we also tweak the values to * allow the same calculation to be made at all times: - * + * * dctbl[0] = f rounded to nearest integer * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5) * dctbl[2] = 1 << ((word size) * 2 - r) @@ -163,13 +168,27 @@ * of in a consecutive manner, yet again in order to allow SIMD * routines. */ + LOCAL(int) -compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) +compute_reciprocal (UINT16 divisor, DCTELEM *dtbl) { UDCTELEM2 fq, fr; UDCTELEM c; int b, r; + if (divisor == 1) { + /* divisor == 1 means unquantized, so these reciprocal/correction/shift + * values will cause the C quantization algorithm to act like the + * identity function. Since only the C quantization algorithm is used in + * these cases, the scale value is irrelevant. + */ + dtbl[DCTSIZE2 * 0] = (DCTELEM) 1; /* reciprocal */ + dtbl[DCTSIZE2 * 1] = (DCTELEM) 0; /* correction */ + dtbl[DCTSIZE2 * 2] = (DCTELEM) 1; /* scale */ + dtbl[DCTSIZE2 * 3] = -(DCTELEM) (sizeof(DCTELEM) * 8); /* shift */ + return 0; + } + b = flss(divisor) - 1; r = sizeof(DCTELEM) * 8 + b; @@ -190,13 +209,20 @@ dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */ dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */ +#ifdef WITH_SIMD dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */ +#else + dtbl[DCTSIZE2 * 2] = 1; +#endif dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */ if(r <= 16) return 0; else return 1; } +#endif + + /* * Initialize for a processing pass. * Verify that all referenced Q-tables are present, and set up @@ -212,15 +238,15 @@ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; int ci, qtblno, i; jpeg_component_info *compptr; - JQUANT_TBL * qtbl; - DCTELEM * dtbl; + JQUANT_TBL *qtbl; + DCTELEM *dtbl; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { qtblno = compptr->quant_tbl_no; /* Make sure specified quantization table is present */ if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || - cinfo->quant_tbl_ptrs[qtblno] == NULL) + cinfo->quant_tbl_ptrs[qtblno] == NULL) ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); qtbl = cinfo->quant_tbl_ptrs[qtblno]; /* Compute divisors for this quant table */ @@ -232,91 +258,102 @@ * coefficients multiplied by 8 (to counteract scaling). */ if (fdct->divisors[qtblno] == NULL) { - fdct->divisors[qtblno] = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); + fdct->divisors[qtblno] = (DCTELEM *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (DCTSIZE2 * 4) * sizeof(DCTELEM)); } dtbl = fdct->divisors[qtblno]; for (i = 0; i < DCTSIZE2; i++) { - if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) - && fdct->quantize == jsimd_quantize) - fdct->quantize = quantize; +#if BITS_IN_JSAMPLE == 8 + if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) && + fdct->quantize == jsimd_quantize) + fdct->quantize = quantize; +#else + dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3; +#endif } break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: { - /* For AA&N IDCT method, divisors are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - * We apply a further scale factor of 8. - */ + /* For AA&N IDCT method, divisors are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * We apply a further scale factor of 8. + */ #define CONST_BITS 14 - static const INT16 aanscales[DCTSIZE2] = { - /* precomputed values scaled up by 14 bits */ - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, - 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, - 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, - 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, - 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 - }; - SHIFT_TEMPS - - if (fdct->divisors[qtblno] == NULL) { - fdct->divisors[qtblno] = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); - } - dtbl = fdct->divisors[qtblno]; - for (i = 0; i < DCTSIZE2; i++) { - if(!compute_reciprocal( - DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], - (INT32) aanscales[i]), - CONST_BITS-3), &dtbl[i]) - && fdct->quantize == jsimd_quantize) - fdct->quantize = quantize; - } + static const INT16 aanscales[DCTSIZE2] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 + }; + SHIFT_TEMPS + + if (fdct->divisors[qtblno] == NULL) { + fdct->divisors[qtblno] = (DCTELEM *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (DCTSIZE2 * 4) * sizeof(DCTELEM)); + } + dtbl = fdct->divisors[qtblno]; + for (i = 0; i < DCTSIZE2; i++) { +#if BITS_IN_JSAMPLE == 8 + if (!compute_reciprocal( + DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], + (JLONG) aanscales[i]), + CONST_BITS-3), &dtbl[i]) && + fdct->quantize == jsimd_quantize) + fdct->quantize = quantize; +#else + dtbl[i] = (DCTELEM) + DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], + (JLONG) aanscales[i]), + CONST_BITS-3); +#endif + } } break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: { - /* For float AA&N IDCT method, divisors are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - * We apply a further scale factor of 8. - * What's actually stored is 1/divisor so that the inner loop can - * use a multiplication rather than a division. - */ - FAST_FLOAT * fdtbl; - int row, col; - static const double aanscalefactor[DCTSIZE] = { - 1.0, 1.387039845, 1.306562965, 1.175875602, - 1.0, 0.785694958, 0.541196100, 0.275899379 - }; - - if (fdct->float_divisors[qtblno] == NULL) { - fdct->float_divisors[qtblno] = (FAST_FLOAT *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - DCTSIZE2 * SIZEOF(FAST_FLOAT)); - } - fdtbl = fdct->float_divisors[qtblno]; - i = 0; - for (row = 0; row < DCTSIZE; row++) { - for (col = 0; col < DCTSIZE; col++) { - fdtbl[i] = (FAST_FLOAT) - (1.0 / (((double) qtbl->quantval[i] * - aanscalefactor[row] * aanscalefactor[col] * 8.0))); - i++; - } - } + /* For float AA&N IDCT method, divisors are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * We apply a further scale factor of 8. + * What's actually stored is 1/divisor so that the inner loop can + * use a multiplication rather than a division. + */ + FAST_FLOAT *fdtbl; + int row, col; + static const double aanscalefactor[DCTSIZE] = { + 1.0, 1.387039845, 1.306562965, 1.175875602, + 1.0, 0.785694958, 0.541196100, 0.275899379 + }; + + if (fdct->float_divisors[qtblno] == NULL) { + fdct->float_divisors[qtblno] = (FAST_FLOAT *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + DCTSIZE2 * sizeof(FAST_FLOAT)); + } + fdtbl = fdct->float_divisors[qtblno]; + i = 0; + for (row = 0; row < DCTSIZE; row++) { + for (col = 0; col < DCTSIZE; col++) { + fdtbl[i] = (FAST_FLOAT) + (1.0 / (((double) qtbl->quantval[i] * + aanscalefactor[row] * aanscalefactor[col] * 8.0))); + i++; + } + } } break; #endif @@ -333,7 +370,7 @@ */ METHODDEF(void) -convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace) +convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) { register DCTELEM *workspaceptr; register JSAMPROW elemptr; @@ -343,7 +380,7 @@ for (elemr = 0; elemr < DCTSIZE; elemr++) { elemptr = sample_data[elemr] + start_col; -#if DCTSIZE == 8 /* unroll the inner loop */ +#if DCTSIZE == 8 /* unroll the inner loop */ *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; @@ -368,14 +405,18 @@ */ METHODDEF(void) -quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace) +quantize (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { int i; DCTELEM temp; - UDCTELEM recip, corr, shift; - UDCTELEM2 product; JCOEFPTR output_ptr = coef_block; +#if BITS_IN_JSAMPLE == 8 + + UDCTELEM recip, corr; + int shift; + UDCTELEM2 product; + for (i = 0; i < DCTSIZE2; i++) { temp = workspace[i]; recip = divisors[i + DCTSIZE2 * 0]; @@ -386,16 +427,54 @@ temp = -temp; product = (UDCTELEM2)(temp + corr) * recip; product >>= shift + sizeof(DCTELEM)*8; - temp = product; + temp = (DCTELEM)product; temp = -temp; } else { product = (UDCTELEM2)(temp + corr) * recip; product >>= shift + sizeof(DCTELEM)*8; - temp = product; + temp = (DCTELEM)product; } + output_ptr[i] = (JCOEF) temp; + } + +#else + + register DCTELEM qval; + for (i = 0; i < DCTSIZE2; i++) { + qval = divisors[i]; + temp = workspace[i]; + /* Divide the coefficient value by qval, ensuring proper rounding. + * Since C does not specify the direction of rounding for negative + * quotients, we have to force the dividend positive for portability. + * + * In most files, at least half of the output values will be zero + * (at default quantization settings, more like three-quarters...) + * so we should ensure that this case is fast. On many machines, + * a comparison is enough cheaper than a divide to make a special test + * a win. Since both inputs will be nonnegative, we need only test + * for a < b to discover whether a/b is 0. + * If your machine's division is fast enough, define FAST_DIVIDE. + */ +#ifdef FAST_DIVIDE +#define DIVIDE_BY(a,b) a /= b +#else +#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0 +#endif + if (temp < 0) { + temp = -temp; + temp += qval>>1; /* for rounding */ + DIVIDE_BY(temp, qval); + temp = -temp; + } else { + temp += qval>>1; /* for rounding */ + DIVIDE_BY(temp, qval); + } output_ptr[i] = (JCOEF) temp; } + +#endif + } @@ -408,16 +487,16 @@ */ METHODDEF(void) -forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks) +forward_DCT (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, + JDIMENSION num_blocks) /* This version is used for integer DCT implementations. */ { /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; - DCTELEM * workspace; + DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no]; + DCTELEM *workspace; JDIMENSION bi; /* Make sure the compiler doesn't look up these every pass */ @@ -426,7 +505,7 @@ quantize_method_ptr do_quantize = fdct->quantize; workspace = fdct->workspace; - sample_data += start_row; /* fold in the vertical offset once */ + sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { /* Load data into workspace, applying unsigned->signed conversion */ @@ -445,7 +524,7 @@ METHODDEF(void) -convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace) +convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace) { register FAST_FLOAT *workspaceptr; register JSAMPROW elemptr; @@ -454,7 +533,7 @@ workspaceptr = workspace; for (elemr = 0; elemr < DCTSIZE; elemr++) { elemptr = sample_data[elemr] + start_col; -#if DCTSIZE == 8 /* unroll the inner loop */ +#if DCTSIZE == 8 /* unroll the inner loop */ *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); @@ -476,7 +555,7 @@ METHODDEF(void) -quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace) +quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace) { register FAST_FLOAT temp; register int i; @@ -498,16 +577,16 @@ METHODDEF(void) -forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks) +forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, + JDIMENSION num_blocks) /* This version is used for floating-point DCT implementations. */ { /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; - FAST_FLOAT * workspace; + FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no]; + FAST_FLOAT *workspace; JDIMENSION bi; @@ -517,7 +596,7 @@ float_quantize_method_ptr do_quantize = fdct->float_quantize; workspace = fdct->float_workspace; - sample_data += start_row; /* fold in the vertical offset once */ + sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { /* Load data into workspace, applying unsigned->signed conversion */ @@ -546,7 +625,7 @@ fdct = (my_fdct_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_fdct_controller)); + sizeof(my_fdct_controller)); cinfo->fdct = (struct jpeg_forward_dct *) fdct; fdct->pub.start_pass = start_pass_fdctmgr; @@ -625,12 +704,12 @@ if (cinfo->dct_method == JDCT_FLOAT) fdct->float_workspace = (FAST_FLOAT *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(FAST_FLOAT) * DCTSIZE2); + sizeof(FAST_FLOAT) * DCTSIZE2); else #endif fdct->workspace = (DCTELEM *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(DCTELEM) * DCTSIZE2); + sizeof(DCTELEM) * DCTSIZE2); /* Mark divisor tables unallocated */ for (i = 0; i < NUM_QUANT_TBLS; i++) { diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jchuff.c glmark2-2021.02/src/libjpeg-turbo/jchuff.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jchuff.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jchuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jchuff.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2011, 2014-2016 D. R. Commander. + * Copyright (C) 2015 Matthieu Darbois. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains Huffman entropy encoding routines. * @@ -18,11 +21,39 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jchuff.h" /* Declarations shared with jcphuff.c */ +#include "jsimd.h" +#include "jconfigint.h" #include -static unsigned char jpeg_nbits_table[65536]; -static int jpeg_nbits_table_init = 0; +/* + * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be + * used for bit counting rather than the lookup table. This will reduce the + * memory footprint by 64k, which is important for some mobile applications + * that create many isolated instances of libjpeg-turbo (web browsers, for + * instance.) This may improve performance on some mobile platforms as well. + * This feature is enabled by default only on ARM processors, because some x86 + * chips have a slow implementation of bsr, and the use of clz/bsr cannot be + * shown to have a significant performance impact even on the x86 chips that + * have a fast implementation of it. When building for ARMv6, you can + * explicitly disable the use of clz/bsr by adding -mthumb to the compiler + * flags (this defines __thumb__). + */ + +/* NOTE: Both GCC and Clang define __GNUC__ */ +#if defined __GNUC__ && (defined __arm__ || defined __aarch64__) +#if !defined __thumb__ || defined __thumb2__ +#define USE_CLZ_INTRINSIC +#endif +#endif + +#ifdef USE_CLZ_INTRINSIC +#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) +#else +#include "jpeg_nbits_table.h" +#define JPEG_NBITS(x) (jpeg_nbits_table[x]) +#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) +#endif #ifndef min #define min(a,b) ((a)<(b)?(a):(b)) @@ -36,8 +67,8 @@ */ typedef struct { - size_t put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ + size_t put_buffer; /* current bit-accumulation buffer */ + int put_bits; /* # of bits now in it */ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; @@ -51,12 +82,12 @@ #else #if MAX_COMPS_IN_SCAN == 4 #define ASSIGN_STATE(dest,src) \ - ((dest).put_buffer = (src).put_buffer, \ - (dest).put_bits = (src).put_bits, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) + ((dest).put_buffer = (src).put_buffer, \ + (dest).put_bits = (src).put_bits, \ + (dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -64,44 +95,45 @@ typedef struct { struct jpeg_entropy_encoder pub; /* public fields */ - savable_state saved; /* Bit buffer & DC state at start of MCU */ + savable_state saved; /* Bit buffer & DC state at start of MCU */ /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - int next_restart_num; /* next restart number to write (0-7) */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ /* Pointers to derived tables (these workspaces have image lifespan) */ - c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; - c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; + c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS]; + c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS]; -#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ - long * dc_count_ptrs[NUM_HUFF_TBLS]; - long * ac_count_ptrs[NUM_HUFF_TBLS]; +#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ + long *dc_count_ptrs[NUM_HUFF_TBLS]; + long *ac_count_ptrs[NUM_HUFF_TBLS]; #endif + + int simd; } huff_entropy_encoder; -typedef huff_entropy_encoder * huff_entropy_ptr; +typedef huff_entropy_encoder *huff_entropy_ptr; /* Working state while writing an MCU. * This struct contains all the fields that are needed by subroutines. */ typedef struct { - JOCTET * next_output_byte; /* => next byte to write in buffer */ - size_t free_in_buffer; /* # of byte spaces remaining in buffer */ - savable_state cur; /* Current bit buffer & DC state */ - j_compress_ptr cinfo; /* dump_buffer needs access to this */ + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + savable_state cur; /* Current bit buffer & DC state */ + j_compress_ptr cinfo; /* dump_buffer needs access to this */ } working_state; /* Forward declarations */ -METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo)); +METHODDEF(boolean) encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_huff (j_compress_ptr cinfo); #ifdef ENTROPY_OPT_SUPPORTED -METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo)); +METHODDEF(boolean) encode_mcu_gather (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_gather (j_compress_ptr cinfo); #endif @@ -116,7 +148,7 @@ { huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; int ci, dctbl, actbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; if (gather_statistics) { #ifdef ENTROPY_OPT_SUPPORTED @@ -130,6 +162,8 @@ entropy->pub.finish_pass = finish_pass_huff; } + entropy->simd = jsimd_can_huff_encode_one_block(); + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; dctbl = compptr->dc_tbl_no; @@ -139,29 +173,29 @@ /* Check for invalid table indexes */ /* (make_c_derived_tbl does this in the other path) */ if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); if (actbl < 0 || actbl >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); /* Allocate and zero the statistics tables */ /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ if (entropy->dc_count_ptrs[dctbl] == NULL) - entropy->dc_count_ptrs[dctbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long)); + entropy->dc_count_ptrs[dctbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + 257 * sizeof(long)); + MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long)); if (entropy->ac_count_ptrs[actbl] == NULL) - entropy->ac_count_ptrs[actbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long)); + entropy->ac_count_ptrs[actbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + 257 * sizeof(long)); + MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long)); #endif } else { /* Compute derived values for Huffman tables */ /* We may do this more than once for a table, but it's not expensive */ jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, - & entropy->dc_derived_tbls[dctbl]); + & entropy->dc_derived_tbls[dctbl]); jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, - & entropy->ac_derived_tbls[actbl]); + & entropy->ac_derived_tbls[actbl]); } /* Initialize DC predictions to 0 */ entropy->saved.last_dc_val[ci] = 0; @@ -186,7 +220,7 @@ GLOBAL(void) jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, - c_derived_tbl ** pdtbl) + c_derived_tbl **pdtbl) { JHUFF_TBL *htbl; c_derived_tbl *dtbl; @@ -211,22 +245,22 @@ if (*pdtbl == NULL) *pdtbl = (c_derived_tbl *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(c_derived_tbl)); + sizeof(c_derived_tbl)); dtbl = *pdtbl; - + /* Figure C.1: make table of Huffman code length for each symbol */ p = 0; for (l = 1; l <= 16; l++) { i = (int) htbl->bits[l]; - if (i < 0 || p + i > 256) /* protect against table overrun */ + if (i < 0 || p + i > 256) /* protect against table overrun */ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); while (i--) huffsize[p++] = (char) l; } huffsize[p] = 0; lastp = p; - + /* Figure C.2: generate the codes themselves */ /* We also validate that the counts represent a legal Huffman code tree. */ @@ -241,12 +275,12 @@ /* code is now 1 more than the last code used for codelength si; but * it must still fit in si bits, since no code is allowed to be all ones. */ - if (((INT32) code) >= (((INT32) 1) << si)) + if (((JLONG) code) >= (((JLONG) 1) << si)) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); code <<= 1; si++; } - + /* Figure C.3: generate encoding tables */ /* These are code and size indexed by symbol value */ @@ -254,7 +288,7 @@ * this lets us detect duplicate VAL entries here, and later * allows emit_bits to detect any attempt to emit such symbols. */ - MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi)); + MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi)); /* This is also a convenient place to check for out-of-range * and duplicated VAL entries. We allow 0..255 for AC symbols @@ -270,15 +304,6 @@ dtbl->ehufco[i] = huffcode[p]; dtbl->ehufsi[i] = huffsize[p]; } - - if(!jpeg_nbits_table_init) { - for(i = 0; i < 65536; i++) { - int nbits = 0, temp = i; - while (temp) {temp >>= 1; nbits++;} - jpeg_nbits_table[i] = nbits; - } - jpeg_nbits_table_init = 1; - } } @@ -286,19 +311,17 @@ /* Emit a byte, taking 'action' if must suspend. */ #define emit_byte(state,val,action) \ - { *(state)->next_output_byte++ = (JOCTET) (val); \ - if (--(state)->free_in_buffer == 0) \ - if (! dump_buffer(state)) \ - { action; } } + { *(state)->next_output_byte++ = (JOCTET) (val); \ + if (--(state)->free_in_buffer == 0) \ + if (! dump_buffer(state)) \ + { action; } } LOCAL(boolean) -dump_buffer (working_state * state) +dump_buffer (working_state *state) /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ { - struct jpeg_destination_mgr * dest = state->cinfo->dest; - - dest->free_in_buffer = state->free_in_buffer; + struct jpeg_destination_mgr *dest = state->cinfo->dest; if (! (*dest->empty_output_buffer) (state->cinfo)) return FALSE; @@ -360,7 +383,11 @@ } \ } -#if __WORDSIZE==64 || defined(_WIN64) +#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T) +#error Cannot determine word size +#endif + +#if SIZEOF_SIZE_T==8 || defined(_WIN64) #define EMIT_BITS(code, size) { \ CHECKBUF47() \ @@ -368,7 +395,7 @@ } #define EMIT_CODE(code, size) { \ - temp2 &= (((INT32) 1)<free_in_buffer < BUFSIZE) { \ @@ -425,7 +461,7 @@ LOCAL(boolean) -flush_bits (working_state * state) +flush_bits (working_state *state) { JOCTET _buffer[BUFSIZE], *buffer; size_t put_buffer; int put_bits; @@ -439,7 +475,7 @@ PUT_BITS(0x7F, 7) while (put_bits >= 8) EMIT_BYTE() - state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ + state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ state->cur.put_bits = 0; STORE_BUFFER() @@ -450,8 +486,25 @@ /* Encode a single block's worth of coefficients */ LOCAL(boolean) -encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl) +encode_one_block_simd (working_state *state, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl) +{ + JOCTET _buffer[BUFSIZE], *buffer; + size_t bytes, bytestocopy; int localbuf = 0; + + LOAD_BUFFER() + + buffer = jsimd_huff_encode_one_block(state, buffer, block, last_dc_val, + dctbl, actbl); + + STORE_BUFFER() + + return TRUE; +} + +LOCAL(boolean) +encode_one_block (working_state *state, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl) { int temp, temp2, temp3; int nbits; @@ -466,7 +519,7 @@ LOAD_BUFFER() /* Encode the DC coefficient difference per section F.1.2.1 */ - + temp = temp2 = block[0] - last_dc_val; /* This is a well-known technique for obtaining the absolute value without a @@ -483,25 +536,23 @@ temp2 += temp3; /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = jpeg_nbits_table[temp]; + nbits = JPEG_NBITS(temp); /* Emit the Huffman-coded symbol for the number of bits */ code = dctbl->ehufco[nbits]; size = dctbl->ehufsi[nbits]; - PUT_BITS(code, size) - CHECKBUF15() + EMIT_BITS(code, size) /* Mask off any extra bits in code */ - temp2 &= (((INT32) 1)< 15, must emit special run-length-16 codes (0xF0) */ \ while (r > 15) { \ EMIT_BITS(code_0xf0, size_0xf0) \ @@ -565,7 +616,7 @@ */ LOCAL(boolean) -emit_restart (working_state * state, int restart_num) +emit_restart (working_state *state, int restart_num) { int ci; @@ -595,7 +646,7 @@ huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; working_state state; int blkn, ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; /* Load up working state */ state.next_output_byte = cinfo->dest->next_output_byte; @@ -607,20 +658,34 @@ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! emit_restart(&state, entropy->next_restart_num)) - return FALSE; + return FALSE; } /* Encode the MCU data blocks */ - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - ci = cinfo->MCU_membership[blkn]; - compptr = cinfo->cur_comp_info[ci]; - if (! encode_one_block(&state, - MCU_data[blkn][0], state.cur.last_dc_val[ci], - entropy->dc_derived_tbls[compptr->dc_tbl_no], - entropy->ac_derived_tbls[compptr->ac_tbl_no])) - return FALSE; - /* Update last_dc_val */ - state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; + if (entropy->simd) { + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + if (! encode_one_block_simd(&state, + MCU_data[blkn][0], state.cur.last_dc_val[ci], + entropy->dc_derived_tbls[compptr->dc_tbl_no], + entropy->ac_derived_tbls[compptr->ac_tbl_no])) + return FALSE; + /* Update last_dc_val */ + state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; + } + } else { + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + if (! encode_one_block(&state, + MCU_data[blkn][0], state.cur.last_dc_val[ci], + entropy->dc_derived_tbls[compptr->dc_tbl_no], + entropy->ac_derived_tbls[compptr->ac_tbl_no])) + return FALSE; + /* Update last_dc_val */ + state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; + } } /* Completed MCU, so update state */ @@ -687,18 +752,18 @@ LOCAL(void) htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, - long dc_counts[], long ac_counts[]) + long dc_counts[], long ac_counts[]) { register int temp; register int nbits; register int k, r; - + /* Encode the DC coefficient difference per section F.1.2.1 */ - + temp = block[0] - last_dc_val; if (temp < 0) temp = -temp; - + /* Find the number of bits needed for the magnitude of the coefficient */ nbits = 0; while (temp) { @@ -713,36 +778,36 @@ /* Count the Huffman symbol for the number of bits */ dc_counts[nbits]++; - + /* Encode the AC coefficients per section F.1.2.2 */ - - r = 0; /* r = run length of zeros */ - + + r = 0; /* r = run length of zeros */ + for (k = 1; k < DCTSIZE2; k++) { if ((temp = block[jpeg_natural_order[k]]) == 0) { r++; } else { /* if run length > 15, must emit special run-length-16 codes (0xF0) */ while (r > 15) { - ac_counts[0xF0]++; - r -= 16; + ac_counts[0xF0]++; + r -= 16; } - + /* Find the number of bits needed for the magnitude of the coefficient */ if (temp < 0) - temp = -temp; - + temp = -temp; + /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 1; /* there must be at least one 1 bit */ + nbits = 1; /* there must be at least one 1 bit */ while ((temp >>= 1)) - nbits++; + nbits++; /* Check for out-of-range coefficient values */ if (nbits > MAX_COEF_BITS) - ERREXIT(cinfo, JERR_BAD_DCT_COEF); - + ERREXIT(cinfo, JERR_BAD_DCT_COEF); + /* Count Huffman symbol for run length / number of bits */ ac_counts[(r << 4) + nbits]++; - + r = 0; } } @@ -763,14 +828,14 @@ { huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; int blkn, ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; /* Take care of restart intervals if needed */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) { /* Re-initialize DC predictions to 0 */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) - entropy->saved.last_dc_val[ci] = 0; + entropy->saved.last_dc_val[ci] = 0; /* Update restart state */ entropy->restarts_to_go = cinfo->restart_interval; } @@ -781,8 +846,8 @@ ci = cinfo->MCU_membership[blkn]; compptr = cinfo->cur_comp_info[ci]; htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci], - entropy->dc_count_ptrs[compptr->dc_tbl_no], - entropy->ac_count_ptrs[compptr->ac_tbl_no]); + entropy->dc_count_ptrs[compptr->dc_tbl_no], + entropy->ac_count_ptrs[compptr->ac_tbl_no]); entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0]; } @@ -819,24 +884,24 @@ */ GLOBAL(void) -jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]) +jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) { -#define MAX_CLEN 32 /* assumed maximum initial code length */ - UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ - int codesize[257]; /* codesize[k] = code length of symbol k */ - int others[257]; /* next symbol in current branch of tree */ +#define MAX_CLEN 32 /* assumed maximum initial code length */ + UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ + int codesize[257]; /* codesize[k] = code length of symbol k */ + int others[257]; /* next symbol in current branch of tree */ int c1, c2; int p, i, j; long v; /* This algorithm is explained in section K.2 of the JPEG standard */ - MEMZERO(bits, SIZEOF(bits)); - MEMZERO(codesize, SIZEOF(codesize)); + MEMZERO(bits, sizeof(bits)); + MEMZERO(codesize, sizeof(codesize)); for (i = 0; i < 257; i++) - others[i] = -1; /* init links to empty */ - - freq[256] = 1; /* make sure 256 has a nonzero count */ + others[i] = -1; /* init links to empty */ + + freq[256] = 1; /* make sure 256 has a nonzero count */ /* Including the pseudo-symbol 256 in the Huffman procedure guarantees * that no real symbol is given code-value of all ones, because 256 * will be placed last in the largest codeword category. @@ -851,8 +916,8 @@ v = 1000000000L; for (i = 0; i <= 256; i++) { if (freq[i] && freq[i] <= v) { - v = freq[i]; - c1 = i; + v = freq[i]; + c1 = i; } } @@ -862,15 +927,15 @@ v = 1000000000L; for (i = 0; i <= 256; i++) { if (freq[i] && freq[i] <= v && i != c1) { - v = freq[i]; - c2 = i; + v = freq[i]; + c2 = i; } } /* Done if we've merged everything into one frequency */ if (c2 < 0) break; - + /* Else merge the two counts/trees */ freq[c1] += freq[c2]; freq[c2] = 0; @@ -881,9 +946,9 @@ c1 = others[c1]; codesize[c1]++; } - - others[c1] = c2; /* chain c2 onto c1's tree branch */ - + + others[c1] = c2; /* chain c2 onto c1's tree branch */ + /* Increment the codesize of everything in c2's tree branch */ codesize[c2]++; while (others[c2] >= 0) { @@ -898,7 +963,7 @@ /* The JPEG standard seems to think that this can't happen, */ /* but I'm paranoid... */ if (codesize[i] > MAX_CLEN) - ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); + ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); bits[codesize[i]]++; } @@ -914,28 +979,28 @@ * shortest nonzero BITS entry is converted into a prefix for two code words * one bit longer. */ - + for (i = MAX_CLEN; i > 16; i--) { while (bits[i] > 0) { - j = i - 2; /* find length of new prefix to be used */ + j = i - 2; /* find length of new prefix to be used */ while (bits[j] == 0) - j--; - - bits[i] -= 2; /* remove two symbols */ - bits[i-1]++; /* one goes in this length */ - bits[j+1] += 2; /* two new symbols in this length */ - bits[j]--; /* symbol of this length is now a prefix */ + j--; + + bits[i] -= 2; /* remove two symbols */ + bits[i-1]++; /* one goes in this length */ + bits[j+1] += 2; /* two new symbols in this length */ + bits[j]--; /* symbol of this length is now a prefix */ } } /* Remove the count for the pseudo-symbol 256 from the largest codelength */ - while (bits[i] == 0) /* find largest codelength still in use */ + while (bits[i] == 0) /* find largest codelength still in use */ i--; bits[i]--; - + /* Return final symbol counts (only for lengths 0..16) */ - MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits)); - + MEMCOPY(htbl->bits, bits, sizeof(htbl->bits)); + /* Return a list of the symbols sorted by code length */ /* It's not real clear to me why we don't need to consider the codelength * changes made above, but the JPEG spec seems to think this works. @@ -944,8 +1009,8 @@ for (i = 1; i <= MAX_CLEN; i++) { for (j = 0; j <= 255; j++) { if (codesize[j] == i) { - htbl->huffval[p] = (UINT8) j; - p++; + htbl->huffval[p] = (UINT8) j; + p++; } } } @@ -964,7 +1029,7 @@ { huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; int ci, dctbl, actbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JHUFF_TBL **htblptr; boolean did_dc[NUM_HUFF_TBLS]; boolean did_ac[NUM_HUFF_TBLS]; @@ -972,8 +1037,8 @@ /* It's important not to apply jpeg_gen_optimal_table more than once * per table, because it clobbers the input frequency counts! */ - MEMZERO(did_dc, SIZEOF(did_dc)); - MEMZERO(did_ac, SIZEOF(did_ac)); + MEMZERO(did_dc, sizeof(did_dc)); + MEMZERO(did_ac, sizeof(did_ac)); for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; @@ -982,14 +1047,14 @@ if (! did_dc[dctbl]) { htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl]; if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]); did_dc[dctbl] = TRUE; } if (! did_ac[actbl]) { htblptr = & cinfo->ac_huff_tbl_ptrs[actbl]; if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]); did_ac[actbl] = TRUE; } @@ -1012,7 +1077,7 @@ entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(huff_entropy_encoder)); + sizeof(huff_entropy_encoder)); cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; entropy->pub.start_pass = start_pass_huff; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jchuff.h glmark2-2021.02/src/libjpeg-turbo/jchuff.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jchuff.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jchuff.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jchuff.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains declarations for Huffman entropy encoding routines * that are shared between the sequential encoder (jchuff.c) and the @@ -25,23 +28,16 @@ /* Derived data constructed for each Huffman table */ typedef struct { - unsigned int ehufco[256]; /* code for each symbol */ - char ehufsi[256]; /* length of code for each symbol */ + unsigned int ehufco[256]; /* code for each symbol */ + char ehufsi[256]; /* length of code for each symbol */ /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */ } c_derived_tbl; -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_make_c_derived_tbl jMkCDerived -#define jpeg_gen_optimal_table jGenOptTbl -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - /* Expand a Huffman table definition into the derived format */ EXTERN(void) jpeg_make_c_derived_tbl - JPP((j_compress_ptr cinfo, boolean isDC, int tblno, - c_derived_tbl ** pdtbl)); + (j_compress_ptr cinfo, boolean isDC, int tblno, + c_derived_tbl ** pdtbl); /* Generate an optimal table definition given the specified counts */ EXTERN(void) jpeg_gen_optimal_table - JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])); + (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcinit.c glmark2-2021.02/src/libjpeg-turbo/jcinit.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcinit.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcinit.c 2021-04-25 01:47:22.000000000 +0800 @@ -3,7 +3,8 @@ * * Copyright (C) 1991-1997, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains initialization logic for the JPEG compressor. * This routine is in charge of selecting the modules to be executed and @@ -60,7 +61,7 @@ /* Need a full-image coefficient buffer in any multi-pass mode. */ jinit_c_coef_controller(cinfo, - (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding)); + (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding)); jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */); jinit_marker_writer(cinfo); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcmainct.c glmark2-2021.02/src/libjpeg-turbo/jcmainct.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcmainct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcmainct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcmainct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the main buffer controller for compression. * The main buffer lies between the pre-processor and the JPEG @@ -15,50 +18,30 @@ #include "jpeglib.h" -/* Note: currently, there is no operating mode in which a full-image buffer - * is needed at this step. If there were, that mode could not be used with - * "raw data" input, since this module is bypassed in that case. However, - * we've left the code here for possible use in special applications. - */ -#undef FULL_MAIN_BUFFER_SUPPORTED - - /* Private buffer controller object */ typedef struct { struct jpeg_c_main_controller pub; /* public fields */ - JDIMENSION cur_iMCU_row; /* number of current iMCU row */ - JDIMENSION rowgroup_ctr; /* counts row groups received in iMCU row */ - boolean suspended; /* remember if we suspended output */ - J_BUF_MODE pass_mode; /* current operating mode */ + JDIMENSION cur_iMCU_row; /* number of current iMCU row */ + JDIMENSION rowgroup_ctr; /* counts row groups received in iMCU row */ + boolean suspended; /* remember if we suspended output */ + J_BUF_MODE pass_mode; /* current operating mode */ /* If using just a strip buffer, this points to the entire set of buffers * (we allocate one for each component). In the full-image case, this * points to the currently accessible strips of the virtual arrays. */ JSAMPARRAY buffer[MAX_COMPONENTS]; - -#ifdef FULL_MAIN_BUFFER_SUPPORTED - /* If using full-image storage, this array holds pointers to virtual-array - * control blocks for each component. Unused if not full-image storage. - */ - jvirt_sarray_ptr whole_image[MAX_COMPONENTS]; -#endif } my_main_controller; -typedef my_main_controller * my_main_ptr; +typedef my_main_controller *my_main_ptr; /* Forward declarations */ METHODDEF(void) process_data_simple_main - JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf, - JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail)); -#ifdef FULL_MAIN_BUFFER_SUPPORTED -METHODDEF(void) process_data_buffer_main - JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf, - JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail)); -#endif + (j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail); /* @@ -74,32 +57,14 @@ if (cinfo->raw_data_in) return; - main_ptr->cur_iMCU_row = 0; /* initialize counters */ + if (pass_mode != JBUF_PASS_THRU) + ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); + + main_ptr->cur_iMCU_row = 0; /* initialize counters */ main_ptr->rowgroup_ctr = 0; main_ptr->suspended = FALSE; - main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */ - - switch (pass_mode) { - case JBUF_PASS_THRU: -#ifdef FULL_MAIN_BUFFER_SUPPORTED - if (main_ptr->whole_image[0] != NULL) - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); -#endif - main_ptr->pub.process_data = process_data_simple_main; - break; -#ifdef FULL_MAIN_BUFFER_SUPPORTED - case JBUF_SAVE_SOURCE: - case JBUF_CRANK_DEST: - case JBUF_SAVE_AND_PASS: - if (main_ptr->whole_image[0] == NULL) - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); - main_ptr->pub.process_data = process_data_buffer_main; - break; -#endif - default: - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); - break; - } + main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */ + main_ptr->pub.process_data = process_data_simple_main; } @@ -111,8 +76,8 @@ METHODDEF(void) process_data_simple_main (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail) + JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail) { my_main_ptr main_ptr = (my_main_ptr) cinfo->main; @@ -120,9 +85,9 @@ /* Read input data if we haven't filled the main buffer yet */ if (main_ptr->rowgroup_ctr < DCTSIZE) (*cinfo->prep->pre_process_data) (cinfo, - input_buf, in_row_ctr, in_rows_avail, - main_ptr->buffer, &main_ptr->rowgroup_ctr, - (JDIMENSION) DCTSIZE); + input_buf, in_row_ctr, in_rows_avail, + main_ptr->buffer, &main_ptr->rowgroup_ctr, + (JDIMENSION) DCTSIZE); /* If we don't have a full iMCU row buffered, return to application for * more data. Note that preprocessor will always pad to fill the iMCU row @@ -140,8 +105,8 @@ * think we were done. */ if (! main_ptr->suspended) { - (*in_row_ctr)--; - main_ptr->suspended = TRUE; + (*in_row_ctr)--; + main_ptr->suspended = TRUE; } return; } @@ -158,85 +123,6 @@ } -#ifdef FULL_MAIN_BUFFER_SUPPORTED - -/* - * Process some data. - * This routine handles all of the modes that use a full-size buffer. - */ - -METHODDEF(void) -process_data_buffer_main (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail) -{ - my_main_ptr main = (my_main_ptr) cinfo->main; - int ci; - jpeg_component_info *compptr; - boolean writing = (main_ptr->pass_mode != JBUF_CRANK_DEST); - - while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) { - /* Realign the virtual buffers if at the start of an iMCU row. */ - if (main_ptr->rowgroup_ctr == 0) { - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, main_ptr->whole_image[ci], - main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE), - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing); - } - /* In a read pass, pretend we just read some source data. */ - if (! writing) { - *in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE; - main_ptr->rowgroup_ctr = DCTSIZE; - } - } - - /* If a write pass, read input data until the current iMCU row is full. */ - /* Note: preprocessor will pad if necessary to fill the last iMCU row. */ - if (writing) { - (*cinfo->prep->pre_process_data) (cinfo, - input_buf, in_row_ctr, in_rows_avail, - main_ptr->buffer, &main_ptr->rowgroup_ctr, - (JDIMENSION) DCTSIZE); - /* Return to application if we need more data to fill the iMCU row. */ - if (main_ptr->rowgroup_ctr < DCTSIZE) - return; - } - - /* Emit data, unless this is a sink-only pass. */ - if (main_ptr->pass_mode != JBUF_SAVE_SOURCE) { - if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) { - /* If compressor did not consume the whole row, then we must need to - * suspend processing and return to the application. In this situation - * we pretend we didn't yet consume the last input row; otherwise, if - * it happened to be the last row of the image, the application would - * think we were done. - */ - if (! main_ptr->suspended) { - (*in_row_ctr)--; - main_ptr->suspended = TRUE; - } - return; - } - /* We did finish the row. Undo our little suspension hack if a previous - * call suspended; then mark the main buffer empty. - */ - if (main_ptr->suspended) { - (*in_row_ctr)++; - main_ptr->suspended = FALSE; - } - } - - /* If get here, we are done with this iMCU row. Mark buffer empty. */ - main_ptr->rowgroup_ctr = 0; - main_ptr->cur_iMCU_row++; - } -} - -#endif /* FULL_MAIN_BUFFER_SUPPORTED */ - - /* * Initialize main buffer controller. */ @@ -250,7 +136,7 @@ main_ptr = (my_main_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_main_controller)); + sizeof(my_main_controller)); cinfo->main = (struct jpeg_c_main_controller *) main_ptr; main_ptr->pub.start_pass = start_pass_main; @@ -262,32 +148,15 @@ * may be of a different size. */ if (need_full_buffer) { -#ifdef FULL_MAIN_BUFFER_SUPPORTED - /* Allocate a full-image virtual array for each component */ - /* Note we pad the bottom to a multiple of the iMCU height */ - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - main_ptr->whole_image[ci] = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - compptr->width_in_blocks * DCTSIZE, - (JDIMENSION) jround_up((long) compptr->height_in_blocks, - (long) compptr->v_samp_factor) * DCTSIZE, - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); - } -#else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); -#endif } else { -#ifdef FULL_MAIN_BUFFER_SUPPORTED - main_ptr->whole_image[0] = NULL; /* flag for no virtual arrays */ -#endif /* Allocate a strip buffer for each component */ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - compptr->width_in_blocks * DCTSIZE, - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + compptr->width_in_blocks * DCTSIZE, + (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcmarker.c glmark2-2021.02/src/libjpeg-turbo/jcmarker.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcmarker.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcmarker.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jcmarker.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. + * Modified 2003-2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains routines to write JPEG datastream markers. */ @@ -15,29 +18,29 @@ #include "jpegcomp.h" -typedef enum { /* JPEG marker codes */ +typedef enum { /* JPEG marker codes */ M_SOF0 = 0xc0, M_SOF1 = 0xc1, M_SOF2 = 0xc2, M_SOF3 = 0xc3, - + M_SOF5 = 0xc5, M_SOF6 = 0xc6, M_SOF7 = 0xc7, - + M_JPG = 0xc8, M_SOF9 = 0xc9, M_SOF10 = 0xca, M_SOF11 = 0xcb, - + M_SOF13 = 0xcd, M_SOF14 = 0xce, M_SOF15 = 0xcf, - + M_DHT = 0xc4, - + M_DAC = 0xcc, - + M_RST0 = 0xd0, M_RST1 = 0xd1, M_RST2 = 0xd2, @@ -46,7 +49,7 @@ M_RST5 = 0xd5, M_RST6 = 0xd6, M_RST7 = 0xd7, - + M_SOI = 0xd8, M_EOI = 0xd9, M_SOS = 0xda, @@ -55,7 +58,7 @@ M_DRI = 0xdd, M_DHP = 0xde, M_EXP = 0xdf, - + M_APP0 = 0xe0, M_APP1 = 0xe1, M_APP2 = 0xe2, @@ -72,13 +75,13 @@ M_APP13 = 0xed, M_APP14 = 0xee, M_APP15 = 0xef, - + M_JPG0 = 0xf0, M_JPG13 = 0xfd, M_COM = 0xfe, - + M_TEM = 0x01, - + M_ERROR = 0x100 } JPEG_MARKER; @@ -91,7 +94,7 @@ unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */ } my_marker_writer; -typedef my_marker_writer * my_marker_ptr; +typedef my_marker_writer *my_marker_ptr; /* @@ -110,7 +113,7 @@ emit_byte (j_compress_ptr cinfo, int val) /* Emit a byte */ { - struct jpeg_destination_mgr * dest = cinfo->dest; + struct jpeg_destination_mgr *dest = cinfo->dest; *(dest->next_output_byte)++ = (JOCTET) val; if (--dest->free_in_buffer == 0) { @@ -147,7 +150,7 @@ /* Emit a DQT marker */ /* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */ { - JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index]; + JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[index]; int prec; int i; @@ -171,7 +174,7 @@ /* The table entries must be emitted in zigzag order. */ unsigned int qval = qtbl->quantval[jpeg_natural_order[i]]; if (prec) - emit_byte(cinfo, (int) (qval >> 8)); + emit_byte(cinfo, (int) (qval >> 8)); emit_byte(cinfo, (int) (qval & 0xFF)); } @@ -186,35 +189,35 @@ emit_dht (j_compress_ptr cinfo, int index, boolean is_ac) /* Emit a DHT marker */ { - JHUFF_TBL * htbl; + JHUFF_TBL *htbl; int length, i; - + if (is_ac) { htbl = cinfo->ac_huff_tbl_ptrs[index]; - index += 0x10; /* output index has AC bit set */ + index += 0x10; /* output index has AC bit set */ } else { htbl = cinfo->dc_huff_tbl_ptrs[index]; } if (htbl == NULL) ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index); - + if (! htbl->sent_table) { emit_marker(cinfo, M_DHT); - + length = 0; for (i = 1; i <= 16; i++) length += htbl->bits[i]; - + emit_2bytes(cinfo, length + 2 + 1 + 16); emit_byte(cinfo, index); - + for (i = 1; i <= 16; i++) emit_byte(cinfo, htbl->bits[i]); - + for (i = 0; i < length; i++) emit_byte(cinfo, htbl->huffval[i]); - + htbl->sent_table = TRUE; } } @@ -231,32 +234,38 @@ char ac_in_use[NUM_ARITH_TBLS]; int length, i; jpeg_component_info *compptr; - + for (i = 0; i < NUM_ARITH_TBLS; i++) dc_in_use[i] = ac_in_use[i] = 0; - + for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; - dc_in_use[compptr->dc_tbl_no] = 1; - ac_in_use[compptr->ac_tbl_no] = 1; + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) + dc_in_use[compptr->dc_tbl_no] = 1; + /* AC needs no table when not present */ + if (cinfo->Se) + ac_in_use[compptr->ac_tbl_no] = 1; } - + length = 0; for (i = 0; i < NUM_ARITH_TBLS; i++) length += dc_in_use[i] + ac_in_use[i]; - - emit_marker(cinfo, M_DAC); - - emit_2bytes(cinfo, length*2 + 2); - - for (i = 0; i < NUM_ARITH_TBLS; i++) { - if (dc_in_use[i]) { - emit_byte(cinfo, i); - emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); - } - if (ac_in_use[i]) { - emit_byte(cinfo, i + 0x10); - emit_byte(cinfo, cinfo->arith_ac_K[i]); + + if (length) { + emit_marker(cinfo, M_DAC); + + emit_2bytes(cinfo, length*2 + 2); + + for (i = 0; i < NUM_ARITH_TBLS; i++) { + if (dc_in_use[i]) { + emit_byte(cinfo, i); + emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); + } + if (ac_in_use[i]) { + emit_byte(cinfo, i + 0x10); + emit_byte(cinfo, cinfo->arith_ac_K[i]); + } } } #endif /* C_ARITH_CODING_SUPPORTED */ @@ -268,8 +277,8 @@ /* Emit a DRI marker */ { emit_marker(cinfo, M_DRI); - - emit_2bytes(cinfo, 4); /* fixed length */ + + emit_2bytes(cinfo, 4); /* fixed length */ emit_2bytes(cinfo, (int) cinfo->restart_interval); } @@ -281,9 +290,9 @@ { int ci; jpeg_component_info *compptr; - + emit_marker(cinfo, code); - + emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */ /* Make sure image isn't bigger than SOF field can handle */ @@ -312,32 +321,26 @@ { int i, td, ta; jpeg_component_info *compptr; - + emit_marker(cinfo, M_SOS); - + emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */ - + emit_byte(cinfo, cinfo->comps_in_scan); - + for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; emit_byte(cinfo, compptr->component_id); - td = compptr->dc_tbl_no; - ta = compptr->ac_tbl_no; - if (cinfo->progressive_mode) { - /* Progressive mode: only DC or only AC tables are used in one scan; - * furthermore, Huffman coding of DC refinement uses no table at all. - * We emit 0 for unused field(s); this is recommended by the P&M text - * but does not seem to be specified in the standard. - */ - if (cinfo->Ss == 0) { - ta = 0; /* DC scan */ - if (cinfo->Ah != 0 && !cinfo->arith_code) - td = 0; /* no DC table either */ - } else { - td = 0; /* AC scan */ - } - } + + /* We emit 0 for unused field(s); this is recommended by the P&M text + * but does not seem to be specified in the standard. + */ + + /* DC needs no table for refinement scan */ + td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0; + /* AC needs no table when not present */ + ta = cinfo->Se ? compptr->ac_tbl_no : 0; + emit_byte(cinfo, (td << 4) + ta); } @@ -352,22 +355,22 @@ /* Emit a JFIF-compliant APP0 marker */ { /* - * Length of APP0 block (2 bytes) - * Block ID (4 bytes - ASCII "JFIF") - * Zero byte (1 byte to terminate the ID string) - * Version Major, Minor (2 bytes - major first) - * Units (1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm) - * Xdpu (2 bytes - dots per unit horizontal) - * Ydpu (2 bytes - dots per unit vertical) - * Thumbnail X size (1 byte) - * Thumbnail Y size (1 byte) + * Length of APP0 block (2 bytes) + * Block ID (4 bytes - ASCII "JFIF") + * Zero byte (1 byte to terminate the ID string) + * Version Major, Minor (2 bytes - major first) + * Units (1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm) + * Xdpu (2 bytes - dots per unit horizontal) + * Ydpu (2 bytes - dots per unit vertical) + * Thumbnail X size (1 byte) + * Thumbnail Y size (1 byte) */ - + emit_marker(cinfo, M_APP0); - + emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */ - emit_byte(cinfo, 0x4A); /* Identifier: ASCII "JFIF" */ + emit_byte(cinfo, 0x4A); /* Identifier: ASCII "JFIF" */ emit_byte(cinfo, 0x46); emit_byte(cinfo, 0x49); emit_byte(cinfo, 0x46); @@ -377,7 +380,7 @@ emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */ emit_2bytes(cinfo, (int) cinfo->X_density); emit_2bytes(cinfo, (int) cinfo->Y_density); - emit_byte(cinfo, 0); /* No thumbnail image */ + emit_byte(cinfo, 0); /* No thumbnail image */ emit_byte(cinfo, 0); } @@ -387,12 +390,12 @@ /* Emit an Adobe APP14 marker */ { /* - * Length of APP14 block (2 bytes) - * Block ID (5 bytes - ASCII "Adobe") - * Version Number (2 bytes - currently 100) - * Flags0 (2 bytes - currently 0) - * Flags1 (2 bytes - currently 0) - * Color transform (1 byte) + * Length of APP14 block (2 bytes) + * Block ID (5 bytes - ASCII "Adobe") + * Version Number (2 bytes - currently 100) + * Flags0 (2 bytes - currently 0) + * Flags1 (2 bytes - currently 0) + * Color transform (1 byte) * * Although Adobe TN 5116 mentions Version = 101, all the Adobe files * now in circulation seem to use Version = 100, so that's what we write. @@ -401,28 +404,28 @@ * YCbCr, 2 if it's YCCK, 0 otherwise. Adobe's definition has to do with * whether the encoder performed a transformation, which is pretty useless. */ - + emit_marker(cinfo, M_APP14); - + emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */ - emit_byte(cinfo, 0x41); /* Identifier: ASCII "Adobe" */ + emit_byte(cinfo, 0x41); /* Identifier: ASCII "Adobe" */ emit_byte(cinfo, 0x64); emit_byte(cinfo, 0x6F); emit_byte(cinfo, 0x62); emit_byte(cinfo, 0x65); - emit_2bytes(cinfo, 100); /* Version */ - emit_2bytes(cinfo, 0); /* Flags0 */ - emit_2bytes(cinfo, 0); /* Flags1 */ + emit_2bytes(cinfo, 100); /* Version */ + emit_2bytes(cinfo, 0); /* Flags0 */ + emit_2bytes(cinfo, 0); /* Flags1 */ switch (cinfo->jpeg_color_space) { case JCS_YCbCr: - emit_byte(cinfo, 1); /* Color transform = 1 */ + emit_byte(cinfo, 1); /* Color transform = 1 */ break; case JCS_YCCK: - emit_byte(cinfo, 2); /* Color transform = 2 */ + emit_byte(cinfo, 2); /* Color transform = 2 */ break; default: - emit_byte(cinfo, 0); /* Color transform = 0 */ + emit_byte(cinfo, 0); /* Color transform = 0 */ break; } } @@ -440,12 +443,12 @@ write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen) /* Emit an arbitrary marker header */ { - if (datalen > (unsigned int) 65533) /* safety check */ + if (datalen > (unsigned int) 65533) /* safety check */ ERREXIT(cinfo, JERR_BAD_LENGTH); emit_marker(cinfo, (JPEG_MARKER) marker); - emit_2bytes(cinfo, (int) (datalen + 2)); /* total length */ + emit_2bytes(cinfo, (int) (datalen + 2)); /* total length */ } METHODDEF(void) @@ -472,12 +475,12 @@ { my_marker_ptr marker = (my_marker_ptr) cinfo->marker; - emit_marker(cinfo, M_SOI); /* first the SOI */ + emit_marker(cinfo, M_SOI); /* first the SOI */ /* SOI is defined to reset restart interval to 0 */ marker->last_restart_interval = 0; - if (cinfo->write_JFIF_header) /* next an optional JFIF APP0 */ + if (cinfo->write_JFIF_header) /* next an optional JFIF APP0 */ emit_jfif_app0(cinfo); if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */ emit_adobe_app14(cinfo); @@ -498,7 +501,7 @@ int ci, prec; boolean is_baseline; jpeg_component_info *compptr; - + /* Emit DQT for each quantization table. * Note that emit_dqt() suppresses any duplicate tables. */ @@ -518,9 +521,9 @@ } else { is_baseline = TRUE; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1) - is_baseline = FALSE; + is_baseline = FALSE; } if (prec && is_baseline) { is_baseline = FALSE; @@ -531,14 +534,17 @@ /* Emit the proper SOF marker */ if (cinfo->arith_code) { - emit_sof(cinfo, M_SOF9); /* SOF code for arithmetic coding */ + if (cinfo->progressive_mode) + emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */ + else + emit_sof(cinfo, M_SOF9); /* SOF code for sequential arithmetic */ } else { if (cinfo->progressive_mode) - emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */ + emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */ else if (is_baseline) - emit_sof(cinfo, M_SOF0); /* SOF code for baseline implementation */ + emit_sof(cinfo, M_SOF0); /* SOF code for baseline implementation */ else - emit_sof(cinfo, M_SOF1); /* SOF code for non-baseline Huffman file */ + emit_sof(cinfo, M_SOF1); /* SOF code for non-baseline Huffman file */ } } @@ -568,19 +574,12 @@ */ for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; - if (cinfo->progressive_mode) { - /* Progressive mode: only DC or only AC tables are used in one scan */ - if (cinfo->Ss == 0) { - if (cinfo->Ah == 0) /* DC needs no table for refinement scan */ - emit_dht(cinfo, compptr->dc_tbl_no, FALSE); - } else { - emit_dht(cinfo, compptr->ac_tbl_no, TRUE); - } - } else { - /* Sequential mode: need both DC and AC tables */ - emit_dht(cinfo, compptr->dc_tbl_no, FALSE); - emit_dht(cinfo, compptr->ac_tbl_no, TRUE); - } + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) + emit_dht(cinfo, compptr->dc_tbl_no, FALSE); + /* AC needs no table when not present */ + if (cinfo->Se) + emit_dht(cinfo, compptr->ac_tbl_no, TRUE); } } @@ -629,9 +628,9 @@ if (! cinfo->arith_code) { for (i = 0; i < NUM_HUFF_TBLS; i++) { if (cinfo->dc_huff_tbl_ptrs[i] != NULL) - emit_dht(cinfo, i, FALSE); + emit_dht(cinfo, i, FALSE); if (cinfo->ac_huff_tbl_ptrs[i] != NULL) - emit_dht(cinfo, i, TRUE); + emit_dht(cinfo, i, TRUE); } } @@ -651,7 +650,7 @@ /* Create the subobject */ marker = (my_marker_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_marker_writer)); + sizeof(my_marker_writer)); cinfo->marker = (struct jpeg_marker_writer *) marker; /* Initialize method pointers */ marker->pub.write_file_header = write_file_header; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcmaster.c glmark2-2021.02/src/libjpeg-turbo/jcmaster.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcmaster.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcmaster.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,15 +1,17 @@ /* * jcmaster.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 2003-2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains master control logic for the JPEG compressor. * These routines are concerned with parameter validation, initial setup, - * and inter-pass control (determining the number of passes and the work + * and inter-pass control (determining the number of passes and the work * to be done in each pass). */ @@ -22,23 +24,23 @@ /* Private state */ typedef enum { - main_pass, /* input data, also do first output step */ - huff_opt_pass, /* Huffman code optimization pass */ - output_pass /* data output pass */ + main_pass, /* input data, also do first output step */ + huff_opt_pass, /* Huffman code optimization pass */ + output_pass /* data output pass */ } c_pass_type; typedef struct { - struct jpeg_comp_master pub; /* public fields */ + struct jpeg_comp_master pub; /* public fields */ - c_pass_type pass_type; /* the type of the current pass */ + c_pass_type pass_type; /* the type of the current pass */ - int pass_number; /* # of passes completed */ - int total_passes; /* total # of passes needed */ + int pass_number; /* # of passes completed */ + int total_passes; /* total # of passes needed */ - int scan_number; /* current index in scan_info[] */ + int scan_number; /* current index in scan_info[] */ } my_comp_master; -typedef my_comp_master * my_master_ptr; +typedef my_comp_master *my_master_ptr; /* @@ -104,7 +106,7 @@ /* Check that number of components won't exceed internal array sizes */ if (cinfo->num_components > MAX_COMPONENTS) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPONENTS); + MAX_COMPONENTS); /* Compute maximum sampling factors; check factor validity */ cinfo->max_h_samp_factor = 1; @@ -112,12 +114,12 @@ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR || - compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) + compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) ERREXIT(cinfo, JERR_BAD_SAMPLING); cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor, - compptr->h_samp_factor); + compptr->h_samp_factor); cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor, - compptr->v_samp_factor); + compptr->v_samp_factor); } /* Compute dimensions of components */ @@ -134,17 +136,17 @@ /* Size in DCT blocks */ compptr->width_in_blocks = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor, - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + (long) (cinfo->max_h_samp_factor * DCTSIZE)); compptr->height_in_blocks = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor, - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + (long) (cinfo->max_v_samp_factor * DCTSIZE)); /* Size in samples */ compptr->downsampled_width = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor, - (long) cinfo->max_h_samp_factor); + (long) cinfo->max_h_samp_factor); compptr->downsampled_height = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor, - (long) cinfo->max_v_samp_factor); + (long) cinfo->max_v_samp_factor); /* Mark component needed (this flag isn't actually used for compression) */ compptr->component_needed = TRUE; } @@ -154,7 +156,7 @@ */ cinfo->total_iMCU_rows = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + (long) (cinfo->max_v_samp_factor*DCTSIZE)); } @@ -166,12 +168,12 @@ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode. */ { - const jpeg_scan_info * scanptr; + const jpeg_scan_info *scanptr; int scanno, ncomps, ci, coefi, thisi; int Ss, Se, Ah, Al; boolean component_sent[MAX_COMPONENTS]; #ifdef C_PROGRESSIVE_SUPPORTED - int * last_bitpos_ptr; + int *last_bitpos_ptr; int last_bitpos[MAX_COMPONENTS][DCTSIZE2]; /* -1 until that coefficient has been seen; then last Al for it */ #endif @@ -187,15 +189,15 @@ #ifdef C_PROGRESSIVE_SUPPORTED cinfo->progressive_mode = TRUE; last_bitpos_ptr = & last_bitpos[0][0]; - for (ci = 0; ci < cinfo->num_components; ci++) + for (ci = 0; ci < cinfo->num_components; ci++) for (coefi = 0; coefi < DCTSIZE2; coefi++) - *last_bitpos_ptr++ = -1; + *last_bitpos_ptr++ = -1; #else ERREXIT(cinfo, JERR_NOT_COMPILED); #endif } else { cinfo->progressive_mode = FALSE; - for (ci = 0; ci < cinfo->num_components; ci++) + for (ci = 0; ci < cinfo->num_components; ci++) component_sent[ci] = FALSE; } @@ -207,10 +209,10 @@ for (ci = 0; ci < ncomps; ci++) { thisi = scanptr->component_index[ci]; if (thisi < 0 || thisi >= cinfo->num_components) - ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); + ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); /* Components must appear in SOF order within each scan */ if (ci > 0 && thisi <= scanptr->component_index[ci-1]) - ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); + ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); } /* Validate progression parameters */ Ss = scanptr->Ss; @@ -232,43 +234,43 @@ #define MAX_AH_AL 13 #endif if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 || - Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL) - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL) + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); if (Ss == 0) { - if (Se != 0) /* DC and AC together not OK */ - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + if (Se != 0) /* DC and AC together not OK */ + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); } else { - if (ncomps != 1) /* AC scans must be for only one component */ - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + if (ncomps != 1) /* AC scans must be for only one component */ + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); } for (ci = 0; ci < ncomps; ci++) { - last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0]; - if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */ - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); - for (coefi = Ss; coefi <= Se; coefi++) { - if (last_bitpos_ptr[coefi] < 0) { - /* first scan of this coefficient */ - if (Ah != 0) - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); - } else { - /* not first scan */ - if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1) - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); - } - last_bitpos_ptr[coefi] = Al; - } + last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0]; + if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */ + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + for (coefi = Ss; coefi <= Se; coefi++) { + if (last_bitpos_ptr[coefi] < 0) { + /* first scan of this coefficient */ + if (Ah != 0) + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + } else { + /* not first scan */ + if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1) + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + } + last_bitpos_ptr[coefi] = Al; + } } #endif } else { /* For sequential JPEG, all progression parameters must be these: */ if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0) - ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); + ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); /* Make sure components are not sent twice */ for (ci = 0; ci < ncomps; ci++) { - thisi = scanptr->component_index[ci]; - if (component_sent[thisi]) - ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); - component_sent[thisi] = TRUE; + thisi = scanptr->component_index[ci]; + if (component_sent[thisi]) + ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); + component_sent[thisi] = TRUE; } } } @@ -283,13 +285,13 @@ */ for (ci = 0; ci < cinfo->num_components; ci++) { if (last_bitpos[ci][0] < 0) - ERREXIT(cinfo, JERR_MISSING_DATA); + ERREXIT(cinfo, JERR_MISSING_DATA); } #endif } else { for (ci = 0; ci < cinfo->num_components; ci++) { if (! component_sent[ci]) - ERREXIT(cinfo, JERR_MISSING_DATA); + ERREXIT(cinfo, JERR_MISSING_DATA); } } } @@ -307,12 +309,12 @@ if (cinfo->scan_info != NULL) { /* Prepare for current scan --- the script is already validated */ my_master_ptr master = (my_master_ptr) cinfo->master; - const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number; + const jpeg_scan_info *scanptr = cinfo->scan_info + master->scan_number; cinfo->comps_in_scan = scanptr->comps_in_scan; for (ci = 0; ci < scanptr->comps_in_scan; ci++) { cinfo->cur_comp_info[ci] = - &cinfo->comp_info[scanptr->component_index[ci]]; + &cinfo->comp_info[scanptr->component_index[ci]]; } cinfo->Ss = scanptr->Ss; cinfo->Se = scanptr->Se; @@ -325,7 +327,7 @@ /* Prepare for single sequential-JPEG scan containing all components */ if (cinfo->num_components > MAX_COMPS_IN_SCAN) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPS_IN_SCAN); + MAX_COMPS_IN_SCAN); cinfo->comps_in_scan = cinfo->num_components; for (ci = 0; ci < cinfo->num_components; ci++) { cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci]; @@ -345,16 +347,16 @@ { int ci, mcublks, tmp; jpeg_component_info *compptr; - + if (cinfo->comps_in_scan == 1) { - + /* Noninterleaved (single-component) scan */ compptr = cinfo->cur_comp_info[0]; - + /* Overall image size in MCUs */ cinfo->MCUs_per_row = compptr->width_in_blocks; cinfo->MCU_rows_in_scan = compptr->height_in_blocks; - + /* For noninterleaved scan, always one block per MCU */ compptr->MCU_width = 1; compptr->MCU_height = 1; @@ -367,28 +369,28 @@ tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor); if (tmp == 0) tmp = compptr->v_samp_factor; compptr->last_row_height = tmp; - + /* Prepare array describing MCU composition */ cinfo->blocks_in_MCU = 1; cinfo->MCU_membership[0] = 0; - + } else { - + /* Interleaved (multi-component) scan */ if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan, - MAX_COMPS_IN_SCAN); - + MAX_COMPS_IN_SCAN); + /* Overall image size in MCUs */ cinfo->MCUs_per_row = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_width, - (long) (cinfo->max_h_samp_factor*DCTSIZE)); + (long) (cinfo->max_h_samp_factor*DCTSIZE)); cinfo->MCU_rows_in_scan = (JDIMENSION) jdiv_round_up((long) cinfo->_jpeg_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); - + (long) (cinfo->max_v_samp_factor*DCTSIZE)); + cinfo->blocks_in_MCU = 0; - + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; /* Sampling factors give # of blocks of component in each MCU */ @@ -406,12 +408,12 @@ /* Prepare array describing MCU composition */ mcublks = compptr->MCU_blocks; if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU) - ERREXIT(cinfo, JERR_BAD_MCU_SIZE); + ERREXIT(cinfo, JERR_BAD_MCU_SIZE); while (mcublks-- > 0) { - cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; + cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; } } - + } /* Convert restart specified in rows to actual MCU count. */ @@ -451,8 +453,8 @@ (*cinfo->fdct->start_pass) (cinfo); (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding); (*cinfo->coef->start_pass) (cinfo, - (master->total_passes > 1 ? - JBUF_SAVE_AND_PASS : JBUF_PASS_THRU)); + (master->total_passes > 1 ? + JBUF_SAVE_AND_PASS : JBUF_PASS_THRU)); (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU); if (cinfo->optimize_coding) { /* No immediate data output; postpone writing frame/scan headers */ @@ -580,7 +582,7 @@ master = (my_master_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_comp_master)); + sizeof(my_comp_master)); cinfo->master = (struct jpeg_comp_master *) master; master->pub.prepare_for_pass = prepare_for_pass; master->pub.pass_startup = pass_startup; @@ -601,7 +603,7 @@ cinfo->num_scans = 1; } - if (cinfo->progressive_mode) /* TEMPORARY HACK ??? */ + if (cinfo->progressive_mode && !cinfo->arith_code) /* TEMPORARY HACK ??? */ cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */ /* Initialize my private state */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcomapi.c glmark2-2021.02/src/libjpeg-turbo/jcomapi.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcomapi.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcomapi.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcomapi.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface routines that are used for both * compression and decompression. @@ -72,8 +75,8 @@ /* NB: mem pointer is NULL if memory mgr failed to initialize. */ if (cinfo->mem != NULL) (*cinfo->mem->self_destruct) (cinfo); - cinfo->mem = NULL; /* be safe if jpeg_destroy is called twice */ - cinfo->global_state = 0; /* mark it destroyed */ + cinfo->mem = NULL; /* be safe if jpeg_destroy is called twice */ + cinfo->global_state = 0; /* mark it destroyed */ } @@ -88,8 +91,8 @@ JQUANT_TBL *tbl; tbl = (JQUANT_TBL *) - (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL)); - tbl->sent_table = FALSE; /* make sure this is false in any new table */ + (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, sizeof(JQUANT_TBL)); + tbl->sent_table = FALSE; /* make sure this is false in any new table */ return tbl; } @@ -100,7 +103,7 @@ JHUFF_TBL *tbl; tbl = (JHUFF_TBL *) - (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL)); - tbl->sent_table = FALSE; /* make sure this is false in any new table */ + (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, sizeof(JHUFF_TBL)); + tbl->sent_table = FALSE; /* make sure this is false in any new table */ return tbl; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfig.h glmark2-2021.02/src/libjpeg-turbo/jconfig.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfig.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jconfig.h 2021-04-25 01:47:22.000000000 +0800 @@ -5,16 +5,27 @@ #define JPEG_LIB_VERSION 62 /* libjpeg-turbo version */ -#define LIBJPEG_TURBO_VERSION 1.2.0 +#define LIBJPEG_TURBO_VERSION 1.4.2 /* Support arithmetic encoding */ -#define C_ARITH_CODING_SUPPORTED 1 +/* #define C_ARITH_CODING_SUPPORTED 1 */ /* Support arithmetic decoding */ -#define D_ARITH_CODING_SUPPORTED 1 +/* #define D_ARITH_CODING_SUPPORTED 1 */ -/* Compiler supports function prototypes. */ -#define HAVE_PROTOTYPES 1 +/* + * Define BITS_IN_JSAMPLE as either + * 8 for 8-bit sample values (the usual setting) + * 12 for 12-bit sample values + * Only 8 and 12 are legal data precisions for lossy JPEG according to the + * JPEG standard, and the IJG code does not support anything else! + * We do not support run-time selection of data precision, sorry. + */ + +#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LOCALE_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDDEF_H 1 @@ -22,25 +33,27 @@ /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 -/* Compiler supports 'unsigned char'. */ +/* Define to 1 if the system has the type `unsigned char'. */ #define HAVE_UNSIGNED_CHAR 1 -/* Compiler supports 'unsigned short'. */ +/* Define to 1 if the system has the type `unsigned short'. */ #define HAVE_UNSIGNED_SHORT 1 -/* Compiler does not support pointers to unspecified structures. */ +/* Compiler does not support pointers to undefined structures. */ /* #undef INCOMPLETE_TYPES_BROKEN */ -/* Compiler has rather than standard . */ -/* #undef NEED_BSD_STRINGS */ +/* Support in-memory source/destination managers */ +/* #undef MEM_SRCDST_SUPPORTED */ -/* Linker requires that global names be unique in first 15 characters. */ -/* #undef NEED_SHORT_EXTERNAL_NAMES */ +/* Define if you have BSD-like bzero and bcopy in rather than + memset/memcpy in . */ +/* #undef NEED_BSD_STRINGS */ -/* Need to include in order to obtain size_t. */ -#define NEED_SYS_TYPES_H 1 +/* Define if you need to include to get size_t. */ +/* #undef NEED_SYS_TYPES_H 1 */ -/* Broken compiler shifts signed values as an unsigned shift. */ +/* Define if your (broken) compiler shifts signed values as if they were + unsigned. */ /* #undef RIGHT_SHIFT_IS_UNSIGNED */ /* Use accelerated SIMD routines. */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfig.h.in glmark2-2021.02/src/libjpeg-turbo/jconfig.h.in --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfig.h.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jconfig.h.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,70 @@ +/* Version ID for the JPEG library. + * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60". + */ +#define JPEG_LIB_VERSION 62 /* Version 6b */ + +/* libjpeg-turbo version */ +#define LIBJPEG_TURBO_VERSION 0 + +/* Support arithmetic encoding */ +#undef C_ARITH_CODING_SUPPORTED + +/* Support arithmetic decoding */ +#undef D_ARITH_CODING_SUPPORTED + +/* + * Define BITS_IN_JSAMPLE as either + * 8 for 8-bit sample values (the usual setting) + * 12 for 12-bit sample values + * Only 8 and 12 are legal data precisions for lossy JPEG according to the + * JPEG standard, and the IJG code does not support anything else! + * We do not support run-time selection of data precision, sorry. + */ + +#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */ + +/* Define to 1 if you have the header file. */ +#undef HAVE_LOCALE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDDEF_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if the system has the type `unsigned char'. */ +#undef HAVE_UNSIGNED_CHAR + +/* Define to 1 if the system has the type `unsigned short'. */ +#undef HAVE_UNSIGNED_SHORT + +/* Compiler does not support pointers to undefined structures. */ +#undef INCOMPLETE_TYPES_BROKEN + +/* Support in-memory source/destination managers */ +#undef MEM_SRCDST_SUPPORTED + +/* Define if you have BSD-like bzero and bcopy in rather than + memset/memcpy in . */ +#undef NEED_BSD_STRINGS + +/* Define if you need to include to get size_t. */ +#undef NEED_SYS_TYPES_H + +/* Define if your (broken) compiler shifts signed values as if they were + unsigned. */ +#undef RIGHT_SHIFT_IS_UNSIGNED + +/* Use accelerated SIMD routines. */ +#undef WITH_SIMD + +/* Define to 1 if type `char' is unsigned and you are not using gcc. */ +#ifndef __CHAR_UNSIGNED__ +# undef __CHAR_UNSIGNED__ +#endif + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfigint.h glmark2-2021.02/src/libjpeg-turbo/jconfigint.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfigint.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jconfigint.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,27 @@ +/* jconfigint.h. Generated from jconfigint.h.in by configure. */ +/* libjpeg-turbo build number */ +#define BUILD "" + +/* How to obtain function inlining. */ +#ifndef INLINE +#if defined(__GNUC__) +#define INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define INLINE __forceinline +#else +#define INLINE +#endif +#endif + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "libjpeg-turbo" + +/* Version number of package */ +#define VERSION "1.4.90" + +/* The size of `size_t', as computed by sizeof. */ +#if __WORDSIZE==64 || defined(_WIN64) +#define SIZEOF_SIZE_T 8 +#else +#define SIZEOF_SIZE_T 4 +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfigint.h.in glmark2-2021.02/src/libjpeg-turbo/jconfigint.h.in --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfigint.h.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jconfigint.h.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,14 @@ +/* libjpeg-turbo build number */ +#undef BUILD + +/* How to obtain function inlining. */ +#undef INLINE + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Version number of package */ +#undef VERSION + +/* The size of `size_t', as computed by sizeof. */ +#undef SIZEOF_SIZE_T diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfig.txt glmark2-2021.02/src/libjpeg-turbo/jconfig.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jconfig.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jconfig.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,143 @@ +/* + * jconfig.txt + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1994, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file documents the configuration options that are required to + * customize the JPEG software for a particular system. + * + * The actual configuration options for a particular installation are stored + * in jconfig.h. On many machines, jconfig.h can be generated automatically + * or copied from one of the "canned" jconfig files that we supply. But if + * you need to generate a jconfig.h file by hand, this file tells you how. + * + * DO NOT EDIT THIS FILE --- IT WON'T ACCOMPLISH ANYTHING. + * EDIT A COPY NAMED JCONFIG.H. + */ + + +/* + * These symbols indicate the properties of your machine or compiler. + * #define the symbol if yes, #undef it if no. + */ + +/* Does your compiler support the declaration "unsigned char" ? + * How about "unsigned short" ? + */ +#define HAVE_UNSIGNED_CHAR +#define HAVE_UNSIGNED_SHORT + +/* Define "void" as "char" if your compiler doesn't know about type void. + * NOTE: be sure to define void such that "void *" represents the most general + * pointer type, e.g., that returned by malloc(). + */ +/* #define void char */ + +/* Define "const" as empty if your compiler doesn't know the "const" keyword. + */ +/* #define const */ + +/* Define this if an ordinary "char" type is unsigned. + * If you're not sure, leaving it undefined will work at some cost in speed. + * If you defined HAVE_UNSIGNED_CHAR then the speed difference is minimal. + */ +#undef __CHAR_UNSIGNED__ + +/* Define this if your system has an ANSI-conforming file. + */ +#define HAVE_STDDEF_H + +/* Define this if your system has an ANSI-conforming file. + */ +#define HAVE_STDLIB_H + +/* Define this if your system does not have an ANSI/SysV , + * but does have a BSD-style . + */ +#undef NEED_BSD_STRINGS + +/* Define this if your system does not provide typedef size_t in any of the + * ANSI-standard places (stddef.h, stdlib.h, or stdio.h), but places it in + * instead. + */ +#undef NEED_SYS_TYPES_H + +/* Although a real ANSI C compiler can deal perfectly well with pointers to + * unspecified structures (see "incomplete types" in the spec), a few pre-ANSI + * and pseudo-ANSI compilers get confused. To keep one of these bozos happy, + * define INCOMPLETE_TYPES_BROKEN. This is not recommended unless you + * actually get "missing structure definition" warnings or errors while + * compiling the JPEG code. + */ +#undef INCOMPLETE_TYPES_BROKEN + +/* Define "boolean" as unsigned char, not int, on Windows systems. + */ +#ifdef _WIN32 +#ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */ +typedef unsigned char boolean; +#endif +#define HAVE_BOOLEAN /* prevent jmorecfg.h from redefining it */ +#endif + + +/* + * The following options affect code selection within the JPEG library, + * but they don't need to be visible to applications using the library. + * To minimize application namespace pollution, the symbols won't be + * defined unless JPEG_INTERNALS has been defined. + */ + +#ifdef JPEG_INTERNALS + +/* Define this if your compiler implements ">>" on signed values as a logical + * (unsigned) shift; leave it undefined if ">>" is a signed (arithmetic) shift, + * which is the normal and rational definition. + */ +#undef RIGHT_SHIFT_IS_UNSIGNED + + +#endif /* JPEG_INTERNALS */ + + +/* + * The remaining options do not affect the JPEG library proper, + * but only the sample applications cjpeg/djpeg (see cjpeg.c, djpeg.c). + * Other applications can ignore these. + */ + +#ifdef JPEG_CJPEG_DJPEG + +/* These defines indicate which image (non-JPEG) file formats are allowed. */ + +#define BMP_SUPPORTED /* BMP image file format */ +#define GIF_SUPPORTED /* GIF image file format */ +#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */ +#undef RLE_SUPPORTED /* Utah RLE image file format */ +#define TARGA_SUPPORTED /* Targa image file format */ + +/* Define this if you want to name both input and output files on the command + * line, rather than using stdout and optionally stdin. You MUST do this if + * your system can't cope with binary I/O to stdin/stdout. See comments at + * head of cjpeg.c or djpeg.c. + */ +#undef TWO_FILE_COMMANDLINE + +/* By default, we open image files with fopen(...,"rb") or fopen(...,"wb"). + * This is necessary on systems that distinguish text files from binary files, + * and is harmless on most systems that don't. If you have one of the rare + * systems that complains about the "b" spec, define this symbol. + */ +#undef DONT_USE_B_MODE + +/* Define this if you want percent-done progress reports from cjpeg/djpeg. + */ +#undef PROGRESS_REPORT + + +#endif /* JPEG_CJPEG_DJPEG */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcparam.c glmark2-2021.02/src/libjpeg-turbo/jcparam.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcparam.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcparam.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,13 @@ /* * jcparam.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. * Modified 2003-2008 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains optional default-setting code for the JPEG compressor. * Applications do not have to use this file, but those that don't use it @@ -15,6 +17,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jstdhuff.c" /* @@ -23,15 +26,15 @@ GLOBAL(void) jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, - const unsigned int *basic_table, - int scale_factor, boolean force_baseline) + const unsigned int *basic_table, + int scale_factor, boolean force_baseline) /* Define a quantization table equal to the basic_table times * a scale factor (given as a percentage). * If force_baseline is TRUE, the computed quantization table entries * are limited to 1..255 for JPEG baseline compatibility. */ { - JQUANT_TBL ** qtblptr; + JQUANT_TBL **qtblptr; int i; long temp; @@ -53,7 +56,7 @@ if (temp <= 0L) temp = 1L; if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */ if (force_baseline && temp > 255L) - temp = 255L; /* limit to baseline range if requested */ + temp = 255L; /* limit to baseline range if requested */ (*qtblptr)->quantval[i] = (UINT16) temp; } @@ -98,16 +101,16 @@ { /* Set up two quantization tables using the specified scaling */ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, - cinfo->q_scale_factor[0], force_baseline); + cinfo->q_scale_factor[0], force_baseline); jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, - cinfo->q_scale_factor[1], force_baseline); + cinfo->q_scale_factor[1], force_baseline); } #endif GLOBAL(void) jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, - boolean force_baseline) + boolean force_baseline) /* Set or change the 'quality' (quantization) setting, using default tables * and a straight percentage-scaling quality scale. In most cases it's better * to use jpeg_set_quality (below); this entry point is provided for @@ -116,9 +119,9 @@ { /* Set up two quantization tables using the specified scaling */ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, - scale_factor, force_baseline); + scale_factor, force_baseline); jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, - scale_factor, force_baseline); + scale_factor, force_baseline); } @@ -165,116 +168,6 @@ /* - * Huffman table setup routines - */ - -LOCAL(void) -add_huff_table (j_compress_ptr cinfo, - JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val) -/* Define a Huffman table */ -{ - int nsymbols, len; - - if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); - - /* Copy the number-of-symbols-of-each-code-length counts */ - MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits)); - - /* Validate the counts. We do this here mainly so we can copy the right - * number of symbols from the val[] array, without risking marching off - * the end of memory. jchuff.c will do a more thorough test later. - */ - nsymbols = 0; - for (len = 1; len <= 16; len++) - nsymbols += bits[len]; - if (nsymbols < 1 || nsymbols > 256) - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); - - MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8)); - - /* Initialize sent_table FALSE so table will be written to JPEG file. */ - (*htblptr)->sent_table = FALSE; -} - - -LOCAL(void) -std_huff_tables (j_compress_ptr cinfo) -/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */ -/* IMPORTANT: these are only valid for 8-bit data precision! */ -{ - static const UINT8 bits_dc_luminance[17] = - { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; - static const UINT8 val_dc_luminance[] = - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; - - static const UINT8 bits_dc_chrominance[17] = - { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; - static const UINT8 val_dc_chrominance[] = - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; - - static const UINT8 bits_ac_luminance[17] = - { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d }; - static const UINT8 val_ac_luminance[] = - { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, - 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, - 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, - 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, - 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, - 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, - 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, - 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, - 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, - 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, - 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, - 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, - 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, - 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, - 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, - 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, - 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, - 0xf9, 0xfa }; - - static const UINT8 bits_ac_chrominance[17] = - { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 }; - static const UINT8 val_ac_chrominance[] = - { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, - 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, - 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, - 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, - 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, - 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, - 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, - 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, - 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, - 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, - 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, - 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, - 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, - 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, - 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, - 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, - 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, - 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, - 0xf9, 0xfa }; - - add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0], - bits_dc_luminance, val_dc_luminance); - add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0], - bits_ac_luminance, val_ac_luminance); - add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1], - bits_dc_chrominance, val_dc_chrominance); - add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1], - bits_ac_chrominance, val_ac_chrominance); -} - - -/* * Default parameter setup for compression. * * Applications that don't choose to use this routine must do their @@ -300,19 +193,19 @@ if (cinfo->comp_info == NULL) cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - MAX_COMPONENTS * SIZEOF(jpeg_component_info)); + MAX_COMPONENTS * sizeof(jpeg_component_info)); /* Initialize everything not dependent on the color space */ #if JPEG_LIB_VERSION >= 70 - cinfo->scale_num = 1; /* 1:1 scaling */ + cinfo->scale_num = 1; /* 1:1 scaling */ cinfo->scale_denom = 1; #endif cinfo->data_precision = BITS_IN_JSAMPLE; /* Set up two quantization tables using default quality of 75 */ jpeg_set_quality(cinfo, 75, TRUE); /* Set up two Huffman tables */ - std_huff_tables(cinfo); + std_huff_tables((j_common_ptr) cinfo); /* Initialize default arithmetic coding conditioning */ for (i = 0; i < NUM_ARITH_TBLS; i++) { @@ -370,8 +263,8 @@ */ cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */ cinfo->JFIF_minor_version = 1; - cinfo->density_unit = 0; /* Pixel size is unknown by default */ - cinfo->X_density = 1; /* Pixel aspect ratio is square by default */ + cinfo->density_unit = 0; /* Pixel size is unknown by default */ + cinfo->X_density = 1; /* Pixel aspect ratio is square by default */ cinfo->Y_density = 1; /* Choose JPEG colorspace based on input space, set defaults accordingly */ @@ -429,7 +322,7 @@ GLOBAL(void) jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace) { - jpeg_component_info * compptr; + jpeg_component_info *compptr; int ci; #define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl) \ @@ -497,7 +390,7 @@ cinfo->num_components = cinfo->input_components; if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPONENTS); + MAX_COMPONENTS); for (ci = 0; ci < cinfo->num_components; ci++) { SET_COMP(ci, ci, 1,1, 0, 0,0); } @@ -511,8 +404,8 @@ #ifdef C_PROGRESSIVE_SUPPORTED LOCAL(jpeg_scan_info *) -fill_a_scan (jpeg_scan_info * scanptr, int ci, - int Ss, int Se, int Ah, int Al) +fill_a_scan (jpeg_scan_info *scanptr, int ci, + int Ss, int Se, int Ah, int Al) /* Support routine: generate one scan for specified component */ { scanptr->comps_in_scan = 1; @@ -526,8 +419,8 @@ } LOCAL(jpeg_scan_info *) -fill_scans (jpeg_scan_info * scanptr, int ncomps, - int Ss, int Se, int Ah, int Al) +fill_scans (jpeg_scan_info *scanptr, int ncomps, + int Ss, int Se, int Ah, int Al) /* Support routine: generate one scan for each component */ { int ci; @@ -545,7 +438,7 @@ } LOCAL(jpeg_scan_info *) -fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al) +fill_dc_scans (jpeg_scan_info *scanptr, int ncomps, int Ah, int Al) /* Support routine: generate interleaved DC scan if possible, else N scans */ { int ci; @@ -577,7 +470,7 @@ { int ncomps = cinfo->num_components; int nscans; - jpeg_scan_info * scanptr; + jpeg_scan_info *scanptr; /* Safety check to ensure start_compress not called yet. */ if (cinfo->global_state != CSTATE_START) @@ -590,9 +483,9 @@ } else { /* All-purpose script for other color spaces. */ if (ncomps > MAX_COMPS_IN_SCAN) - nscans = 6 * ncomps; /* 2 DC + 4 AC scans per component */ + nscans = 6 * ncomps; /* 2 DC + 4 AC scans per component */ else - nscans = 2 + 4 * ncomps; /* 2 DC scans; 4 AC scans per component */ + nscans = 2 + 4 * ncomps; /* 2 DC scans; 4 AC scans per component */ } /* Allocate space for script. @@ -606,7 +499,7 @@ cinfo->script_space_size = MAX(nscans, 10); cinfo->script_space = (jpeg_scan_info *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - cinfo->script_space_size * SIZEOF(jpeg_scan_info)); + cinfo->script_space_size * sizeof(jpeg_scan_info)); } scanptr = cinfo->script_space; cinfo->scan_info = scanptr; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcphuff.c glmark2-2021.02/src/libjpeg-turbo/jcphuff.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcphuff.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcphuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcphuff.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains Huffman entropy encoding routines for progressive JPEG. * @@ -15,7 +18,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jchuff.h" /* Declarations shared with jchuff.c */ +#include "jchuff.h" /* Declarations shared with jchuff.c */ #ifdef C_PROGRESSIVE_SUPPORTED @@ -30,36 +33,36 @@ /* Bit-level coding status. * next_output_byte/free_in_buffer are local copies of cinfo->dest fields. */ - JOCTET * next_output_byte; /* => next byte to write in buffer */ - size_t free_in_buffer; /* # of byte spaces remaining in buffer */ - INT32 put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ - j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + size_t put_buffer; /* current bit-accumulation buffer */ + int put_bits; /* # of bits now in it */ + j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ /* Coding status for DC components */ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ /* Coding status for AC components */ - int ac_tbl_no; /* the table number of the single component */ - unsigned int EOBRUN; /* run length of EOBs */ - unsigned int BE; /* # of buffered correction bits before MCU */ - char * bit_buffer; /* buffer for correction bits (1 per char) */ + int ac_tbl_no; /* the table number of the single component */ + unsigned int EOBRUN; /* run length of EOBs */ + unsigned int BE; /* # of buffered correction bits before MCU */ + char *bit_buffer; /* buffer for correction bits (1 per char) */ /* packing correction bits tightly would save some space but cost time... */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - int next_restart_num; /* next restart number to write (0-7) */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ /* Pointers to derived tables (these workspaces have image lifespan). * Since any one scan codes only DC or only AC, we only need one set * of tables, not one for DC and one for AC. */ - c_derived_tbl * derived_tbls[NUM_HUFF_TBLS]; + c_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; /* Statistics tables for optimization; again, one set is enough */ - long * count_ptrs[NUM_HUFF_TBLS]; + long *count_ptrs[NUM_HUFF_TBLS]; } phuff_entropy_encoder; -typedef phuff_entropy_encoder * phuff_entropy_ptr; +typedef phuff_entropy_encoder *phuff_entropy_ptr; /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit * buffer can hold. Larger sizes may slightly improve compression, but @@ -67,35 +70,35 @@ * The minimum safe size is 64 bits. */ -#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ +#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ -/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32. - * We assume that int right shift is unsigned if INT32 right shift is, +/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG. + * We assume that int right shift is unsigned if JLONG right shift is, * which should be safe. */ #ifdef RIGHT_SHIFT_IS_UNSIGNED -#define ISHIFT_TEMPS int ishift_temp; +#define ISHIFT_TEMPS int ishift_temp; #define IRIGHT_SHIFT(x,shft) \ - ((ishift_temp = (x)) < 0 ? \ - (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ - (ishift_temp >> (shft))) + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ + (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) #endif /* Forward declarations */ -METHODDEF(boolean) encode_mcu_DC_first JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_AC_first JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_DC_refine JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_AC_refine JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_phuff JPP((j_compress_ptr cinfo)); -METHODDEF(void) finish_pass_gather_phuff JPP((j_compress_ptr cinfo)); +METHODDEF(boolean) encode_mcu_DC_first (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) encode_mcu_AC_first (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) encode_mcu_DC_refine (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) encode_mcu_AC_refine (j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_phuff (j_compress_ptr cinfo); +METHODDEF(void) finish_pass_gather_phuff (j_compress_ptr cinfo); /* @@ -104,11 +107,11 @@ METHODDEF(void) start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; boolean is_DC_band; int ci, tbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; entropy->cinfo = cinfo; entropy->gather_statistics = gather_statistics; @@ -130,9 +133,9 @@ entropy->pub.encode_mcu = encode_mcu_AC_refine; /* AC refinement needs a correction bit buffer */ if (entropy->bit_buffer == NULL) - entropy->bit_buffer = (char *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - MAX_CORR_BITS * SIZEOF(char)); + entropy->bit_buffer = (char *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + MAX_CORR_BITS * sizeof(char)); } } if (gather_statistics) @@ -149,8 +152,8 @@ entropy->last_dc_val[ci] = 0; /* Get table index */ if (is_DC_band) { - if (cinfo->Ah != 0) /* DC refinement needs no table */ - continue; + if (cinfo->Ah != 0) /* DC refinement needs no table */ + continue; tbl = compptr->dc_tbl_no; } else { entropy->ac_tbl_no = tbl = compptr->ac_tbl_no; @@ -163,15 +166,15 @@ /* Allocate and zero the statistics tables */ /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ if (entropy->count_ptrs[tbl] == NULL) - entropy->count_ptrs[tbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long)); + entropy->count_ptrs[tbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + 257 * sizeof(long)); + MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long)); } else { /* Compute derived values for Huffman table */ /* We may do this more than once for a table, but it's not expensive */ jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl, - & entropy->derived_tbls[tbl]); + & entropy->derived_tbls[tbl]); } } @@ -196,16 +199,16 @@ /* Emit a byte */ #define emit_byte(entropy,val) \ - { *(entropy)->next_output_byte++ = (JOCTET) (val); \ - if (--(entropy)->free_in_buffer == 0) \ - dump_buffer(entropy); } + { *(entropy)->next_output_byte++ = (JOCTET) (val); \ + if (--(entropy)->free_in_buffer == 0) \ + dump_buffer(entropy); } LOCAL(void) dump_buffer (phuff_entropy_ptr entropy) /* Empty the output buffer; we do not support suspension in this module. */ { - struct jpeg_destination_mgr * dest = entropy->cinfo->dest; + struct jpeg_destination_mgr *dest = entropy->cinfo->dest; if (! (*dest->empty_output_buffer) (entropy->cinfo)) ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); @@ -228,7 +231,7 @@ /* Emit some bits, unless we are in gather mode */ { /* This routine is heavily used, so it's worth coding tightly. */ - register INT32 put_buffer = (INT32) code; + register size_t put_buffer = (size_t) code; register int put_bits = entropy->put_bits; /* if size is 0, caller used an invalid Huffman table entry */ @@ -236,21 +239,21 @@ ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); if (entropy->gather_statistics) - return; /* do nothing if we're only getting stats */ + return; /* do nothing if we're only getting stats */ + + put_buffer &= (((size_t) 1)<put_buffer; /* and merge with old buffer contents */ while (put_bits >= 8) { int c = (int) ((put_buffer >> 16) & 0xFF); - + emit_byte(entropy, c); - if (c == 0xFF) { /* need to stuff a zero byte? */ + if (c == 0xFF) { /* need to stuff a zero byte? */ emit_byte(entropy, 0); } put_buffer <<= 8; @@ -281,7 +284,7 @@ if (entropy->gather_statistics) entropy->count_ptrs[tbl_no][symbol]++; else { - c_derived_tbl * tbl = entropy->derived_tbls[tbl_no]; + c_derived_tbl *tbl = entropy->derived_tbls[tbl_no]; emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); } } @@ -292,11 +295,11 @@ */ LOCAL(void) -emit_buffered_bits (phuff_entropy_ptr entropy, char * bufstart, - unsigned int nbits) +emit_buffered_bits (phuff_entropy_ptr entropy, char *bufstart, + unsigned int nbits) { if (entropy->gather_statistics) - return; /* no real work */ + return; /* no real work */ while (nbits > 0) { emit_bits(entropy, (unsigned int) (*bufstart), 1); @@ -315,7 +318,7 @@ { register int temp, nbits; - if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ + if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ temp = entropy->EOBRUN; nbits = 0; while ((temp >>= 1)) @@ -380,7 +383,7 @@ int blkn, ci; int Al = cinfo->Al; JBLOCKROW block; - jpeg_component_info * compptr; + jpeg_component_info *compptr; ISHIFT_TEMPS entropy->next_output_byte = cinfo->dest->next_output_byte; @@ -409,12 +412,12 @@ /* Encode the DC coefficient difference per section G.1.2.1 */ temp2 = temp; if (temp < 0) { - temp = -temp; /* temp is abs value of input */ + temp = -temp; /* temp is abs value of input */ /* For a negative input, want temp2 = bitwise complement of abs(input) */ /* This code assumes we are on a two's complement machine */ temp2--; } - + /* Find the number of bits needed for the magnitude of the coefficient */ nbits = 0; while (temp) { @@ -426,13 +429,13 @@ */ if (nbits > MAX_COEF_BITS+1) ERREXIT(cinfo, JERR_BAD_DCT_COEF); - + /* Count/emit the Huffman-coded symbol for the number of bits */ emit_symbol(entropy, compptr->dc_tbl_no, nbits); - + /* Emit that number of bits of the value, if positive, */ /* or the complement of its magnitude, if negative. */ - if (nbits) /* emit_bits rejects calls with size 0 */ + if (nbits) /* emit_bits rejects calls with size 0 */ emit_bits(entropy, (unsigned int) temp2, nbits); } @@ -481,9 +484,9 @@ block = MCU_data[0]; /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ - - r = 0; /* r = run length of zeros */ - + + r = 0; /* r = run length of zeros */ + for (k = cinfo->Ss; k <= Se; k++) { if ((temp = (*block)[jpeg_natural_order[k]]) == 0) { r++; @@ -495,12 +498,12 @@ * interwoven with finding the abs value (temp) and output bits (temp2). */ if (temp < 0) { - temp = -temp; /* temp is abs value of input */ - temp >>= Al; /* apply the point transform */ + temp = -temp; /* temp is abs value of input */ + temp >>= Al; /* apply the point transform */ /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ temp2 = ~temp; } else { - temp >>= Al; /* apply the point transform */ + temp >>= Al; /* apply the point transform */ temp2 = temp; } /* Watch out for case that nonzero coef is zero after point transform */ @@ -519,7 +522,7 @@ } /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 1; /* there must be at least one 1 bit */ + nbits = 1; /* there must be at least one 1 bit */ while ((temp >>= 1)) nbits++; /* Check for out-of-range coefficient values */ @@ -533,13 +536,13 @@ /* or the complement of its magnitude, if negative. */ emit_bits(entropy, (unsigned int) temp2, nbits); - r = 0; /* reset zero run length */ + r = 0; /* reset zero run length */ } - if (r > 0) { /* If there are trailing zeroes, */ - entropy->EOBRUN++; /* count an EOB */ + if (r > 0) { /* If there are trailing zeroes, */ + entropy->EOBRUN++; /* count an EOB */ if (entropy->EOBRUN == 0x7FFF) - emit_eobrun(entropy); /* force it out to avoid overflow */ + emit_eobrun(entropy); /* force it out to avoid overflow */ } cinfo->dest->next_output_byte = entropy->next_output_byte; @@ -648,17 +651,17 @@ * in C, we shift after obtaining the absolute value. */ if (temp < 0) - temp = -temp; /* temp is abs value of input */ - temp >>= Al; /* apply the point transform */ - absvalues[k] = temp; /* save abs value for main pass */ + temp = -temp; /* temp is abs value of input */ + temp >>= Al; /* apply the point transform */ + absvalues[k] = temp; /* save abs value for main pass */ if (temp == 1) - EOB = k; /* EOB = index of last newly-nonzero coef */ + EOB = k; /* EOB = index of last newly-nonzero coef */ } /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ - - r = 0; /* r = run length of zeros */ - BR = 0; /* BR = count of buffered bits added now */ + + r = 0; /* r = run length of zeros */ + BR = 0; /* BR = count of buffered bits added now */ BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ for (k = cinfo->Ss; k <= Se; k++) { @@ -705,12 +708,12 @@ emit_buffered_bits(entropy, BR_buffer, BR); BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ BR = 0; - r = 0; /* reset zero run length */ + r = 0; /* reset zero run length */ } - if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ - entropy->EOBRUN++; /* count an EOB */ - entropy->BE += BR; /* concat my correction bits to older ones */ + if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ + entropy->EOBRUN++; /* count an EOB */ + entropy->BE += BR; /* concat my correction bits to older ones */ /* We force out the EOB if we risk either: * 1. overflow of the EOB counter; * 2. overflow of the correction bit buffer during the next MCU. @@ -742,7 +745,7 @@ METHODDEF(void) finish_pass_phuff (j_compress_ptr cinfo) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; entropy->next_output_byte = cinfo->dest->next_output_byte; @@ -767,7 +770,7 @@ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; boolean is_DC_band; int ci, tbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JHUFF_TBL **htblptr; boolean did[NUM_HUFF_TBLS]; @@ -779,13 +782,13 @@ /* It's important not to apply jpeg_gen_optimal_table more than once * per table, because it clobbers the input frequency counts! */ - MEMZERO(did, SIZEOF(did)); + MEMZERO(did, sizeof(did)); for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; if (is_DC_band) { - if (cinfo->Ah != 0) /* DC refinement needs no table */ - continue; + if (cinfo->Ah != 0) /* DC refinement needs no table */ + continue; tbl = compptr->dc_tbl_no; } else { tbl = compptr->ac_tbl_no; @@ -816,7 +819,7 @@ entropy = (phuff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(phuff_entropy_encoder)); + sizeof(phuff_entropy_encoder)); cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; entropy->pub.start_pass = start_pass_phuff; @@ -825,7 +828,7 @@ entropy->derived_tbls[i] = NULL; entropy->count_ptrs[i] = NULL; } - entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ + entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ } #endif /* C_PROGRESSIVE_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcprepct.c glmark2-2021.02/src/libjpeg-turbo/jcprepct.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcprepct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcprepct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jcprepct.c * + * This file is part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the compression preprocessing controller. * This controller manages the color conversion, downsampling, @@ -58,16 +61,16 @@ */ JSAMPARRAY color_buf[MAX_COMPONENTS]; - JDIMENSION rows_to_go; /* counts rows remaining in source image */ - int next_buf_row; /* index of next row to store in color_buf */ + JDIMENSION rows_to_go; /* counts rows remaining in source image */ + int next_buf_row; /* index of next row to store in color_buf */ -#ifdef CONTEXT_ROWS_SUPPORTED /* only needed for context case */ - int this_row_group; /* starting row index of group to process */ - int next_buf_stop; /* downsample when we reach this index */ +#ifdef CONTEXT_ROWS_SUPPORTED /* only needed for context case */ + int this_row_group; /* starting row index of group to process */ + int next_buf_stop; /* downsample when we reach this index */ #endif } my_prep_controller; -typedef my_prep_controller * my_prep_ptr; +typedef my_prep_controller *my_prep_ptr; /* @@ -104,13 +107,13 @@ LOCAL(void) expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols, - int input_rows, int output_rows) + int input_rows, int output_rows) { register int row; for (row = input_rows; row < output_rows; row++) { jcopy_sample_rows(image_data, input_rows-1, image_data, row, - 1, num_cols); + 1, num_cols); } } @@ -126,43 +129,43 @@ METHODDEF(void) pre_process_data (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail, - JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, - JDIMENSION out_row_groups_avail) + JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail, + JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, + JDIMENSION out_row_groups_avail) { my_prep_ptr prep = (my_prep_ptr) cinfo->prep; int numrows, ci; JDIMENSION inrows; - jpeg_component_info * compptr; + jpeg_component_info *compptr; while (*in_row_ctr < in_rows_avail && - *out_row_group_ctr < out_row_groups_avail) { + *out_row_group_ctr < out_row_groups_avail) { /* Do color conversion to fill the conversion buffer. */ inrows = in_rows_avail - *in_row_ctr; numrows = cinfo->max_v_samp_factor - prep->next_buf_row; numrows = (int) MIN((JDIMENSION) numrows, inrows); (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr, - prep->color_buf, - (JDIMENSION) prep->next_buf_row, - numrows); + prep->color_buf, + (JDIMENSION) prep->next_buf_row, + numrows); *in_row_ctr += numrows; prep->next_buf_row += numrows; prep->rows_to_go -= numrows; /* If at bottom of image, pad to fill the conversion buffer. */ if (prep->rows_to_go == 0 && - prep->next_buf_row < cinfo->max_v_samp_factor) { + prep->next_buf_row < cinfo->max_v_samp_factor) { for (ci = 0; ci < cinfo->num_components; ci++) { - expand_bottom_edge(prep->color_buf[ci], cinfo->image_width, - prep->next_buf_row, cinfo->max_v_samp_factor); + expand_bottom_edge(prep->color_buf[ci], cinfo->image_width, + prep->next_buf_row, cinfo->max_v_samp_factor); } prep->next_buf_row = cinfo->max_v_samp_factor; } /* If we've filled the conversion buffer, empty it. */ if (prep->next_buf_row == cinfo->max_v_samp_factor) { (*cinfo->downsample->downsample) (cinfo, - prep->color_buf, (JDIMENSION) 0, - output_buf, *out_row_group_ctr); + prep->color_buf, (JDIMENSION) 0, + output_buf, *out_row_group_ctr); prep->next_buf_row = 0; (*out_row_group_ctr)++; } @@ -170,16 +173,16 @@ * Note we assume the caller is providing a one-iMCU-height output buffer! */ if (prep->rows_to_go == 0 && - *out_row_group_ctr < out_row_groups_avail) { + *out_row_group_ctr < out_row_groups_avail) { for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - expand_bottom_edge(output_buf[ci], - compptr->width_in_blocks * DCTSIZE, - (int) (*out_row_group_ctr * compptr->v_samp_factor), - (int) (out_row_groups_avail * compptr->v_samp_factor)); + ci++, compptr++) { + expand_bottom_edge(output_buf[ci], + compptr->width_in_blocks * DCTSIZE, + (int) (*out_row_group_ctr * compptr->v_samp_factor), + (int) (out_row_groups_avail * compptr->v_samp_factor)); } *out_row_group_ctr = out_row_groups_avail; - break; /* can exit outer loop without test */ + break; /* can exit outer loop without test */ } } } @@ -193,10 +196,10 @@ METHODDEF(void) pre_process_context (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail, - JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, - JDIMENSION out_row_groups_avail) + JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail, + JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, + JDIMENSION out_row_groups_avail) { my_prep_ptr prep = (my_prep_ptr) cinfo->prep; int numrows, ci; @@ -210,19 +213,19 @@ numrows = prep->next_buf_stop - prep->next_buf_row; numrows = (int) MIN((JDIMENSION) numrows, inrows); (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr, - prep->color_buf, - (JDIMENSION) prep->next_buf_row, - numrows); + prep->color_buf, + (JDIMENSION) prep->next_buf_row, + numrows); /* Pad at top of image, if first time through */ if (prep->rows_to_go == cinfo->image_height) { - for (ci = 0; ci < cinfo->num_components; ci++) { - int row; - for (row = 1; row <= cinfo->max_v_samp_factor; row++) { - jcopy_sample_rows(prep->color_buf[ci], 0, - prep->color_buf[ci], -row, - 1, cinfo->image_width); - } - } + for (ci = 0; ci < cinfo->num_components; ci++) { + int row; + for (row = 1; row <= cinfo->max_v_samp_factor; row++) { + jcopy_sample_rows(prep->color_buf[ci], 0, + prep->color_buf[ci], -row, + 1, cinfo->image_width); + } + } } *in_row_ctr += numrows; prep->next_buf_row += numrows; @@ -230,29 +233,29 @@ } else { /* Return for more data, unless we are at the bottom of the image. */ if (prep->rows_to_go != 0) - break; + break; /* When at bottom of image, pad to fill the conversion buffer. */ if (prep->next_buf_row < prep->next_buf_stop) { - for (ci = 0; ci < cinfo->num_components; ci++) { - expand_bottom_edge(prep->color_buf[ci], cinfo->image_width, - prep->next_buf_row, prep->next_buf_stop); - } - prep->next_buf_row = prep->next_buf_stop; + for (ci = 0; ci < cinfo->num_components; ci++) { + expand_bottom_edge(prep->color_buf[ci], cinfo->image_width, + prep->next_buf_row, prep->next_buf_stop); + } + prep->next_buf_row = prep->next_buf_stop; } } /* If we've gotten enough data, downsample a row group. */ if (prep->next_buf_row == prep->next_buf_stop) { (*cinfo->downsample->downsample) (cinfo, - prep->color_buf, - (JDIMENSION) prep->this_row_group, - output_buf, *out_row_group_ctr); + prep->color_buf, + (JDIMENSION) prep->this_row_group, + output_buf, *out_row_group_ctr); (*out_row_group_ctr)++; /* Advance pointers with wraparound as necessary. */ prep->this_row_group += cinfo->max_v_samp_factor; if (prep->this_row_group >= buf_height) - prep->this_row_group = 0; + prep->this_row_group = 0; if (prep->next_buf_row >= buf_height) - prep->next_buf_row = 0; + prep->next_buf_row = 0; prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor; } } @@ -269,7 +272,7 @@ my_prep_ptr prep = (my_prep_ptr) cinfo->prep; int rgroup_height = cinfo->max_v_samp_factor; int ci, i; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JSAMPARRAY true_buffer, fake_buffer; /* Grab enough space for fake row pointers for all the components; @@ -277,8 +280,8 @@ */ fake_buffer = (JSAMPARRAY) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (cinfo->num_components * 5 * rgroup_height) * - SIZEOF(JSAMPROW)); + (cinfo->num_components * 5 * rgroup_height) * + sizeof(JSAMPROW)); for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { @@ -289,11 +292,11 @@ true_buffer = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE, (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * - cinfo->max_h_samp_factor) / compptr->h_samp_factor), + cinfo->max_h_samp_factor) / compptr->h_samp_factor), (JDIMENSION) (3 * rgroup_height)); /* Copy true buffer row pointers into the middle of the fake row array */ MEMCOPY(fake_buffer + rgroup_height, true_buffer, - 3 * rgroup_height * SIZEOF(JSAMPROW)); + 3 * rgroup_height * sizeof(JSAMPROW)); /* Fill in the above and below wraparound pointers */ for (i = 0; i < rgroup_height; i++) { fake_buffer[i] = true_buffer[2 * rgroup_height + i]; @@ -316,14 +319,14 @@ { my_prep_ptr prep; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; - if (need_full_buffer) /* safety check */ + if (need_full_buffer) /* safety check */ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); prep = (my_prep_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_prep_controller)); + sizeof(my_prep_controller)); cinfo->prep = (struct jpeg_c_prep_controller *) prep; prep->pub.start_pass = start_pass_prep; @@ -343,12 +346,12 @@ /* No context, just make it tall enough for one row group */ prep->pub.pre_process_data = pre_process_data; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { prep->color_buf[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * - cinfo->max_h_samp_factor) / compptr->h_samp_factor), - (JDIMENSION) cinfo->max_v_samp_factor); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * + cinfo->max_h_samp_factor) / compptr->h_samp_factor), + (JDIMENSION) cinfo->max_v_samp_factor); } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcsample.c glmark2-2021.02/src/libjpeg-turbo/jcsample.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcsample.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcsample.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,14 @@ /* * jcsample.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2014, MIPS Technologies, Inc., California + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains downsampling routines. * @@ -53,20 +57,21 @@ /* Pointer to routine to downsample a single component */ -typedef JMETHOD(void, downsample1_ptr, - (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data)); +typedef void (*downsample1_ptr) (j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY output_data); /* Private subobject */ typedef struct { - struct jpeg_downsampler pub; /* public fields */ + struct jpeg_downsampler pub; /* public fields */ /* Downsampling method pointers, one per component */ downsample1_ptr methods[MAX_COMPONENTS]; } my_downsampler; -typedef my_downsampler * my_downsample_ptr; +typedef my_downsampler *my_downsample_ptr; /* @@ -87,7 +92,7 @@ LOCAL(void) expand_right_edge (JSAMPARRAY image_data, int num_rows, - JDIMENSION input_cols, JDIMENSION output_cols) + JDIMENSION input_cols, JDIMENSION output_cols) { register JSAMPROW ptr; register JSAMPLE pixval; @@ -98,9 +103,9 @@ if (numcols > 0) { for (row = 0; row < num_rows; row++) { ptr = image_data[row] + input_cols; - pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ + pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ for (count = numcols; count > 0; count--) - *ptr++ = pixval; + *ptr++ = pixval; } } } @@ -114,12 +119,12 @@ METHODDEF(void) sep_downsample (j_compress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_index, - JSAMPIMAGE output_buf, JDIMENSION out_row_group_index) + JSAMPIMAGE input_buf, JDIMENSION in_row_index, + JSAMPIMAGE output_buf, JDIMENSION out_row_group_index) { my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JSAMPARRAY in_ptr, out_ptr; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; @@ -139,14 +144,14 @@ */ METHODDEF(void) -int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +int_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v; - JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */ + JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */ JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; JSAMPROW inptr, outptr; - INT32 outvalue; + JLONG outvalue; h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor; v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor; @@ -158,19 +163,19 @@ * efficient. */ expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * h_expand); + cinfo->image_width, output_cols * h_expand); inrow = 0; for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; for (outcol = 0, outcol_h = 0; outcol < output_cols; - outcol++, outcol_h += h_expand) { + outcol++, outcol_h += h_expand) { outvalue = 0; for (v = 0; v < v_expand; v++) { - inptr = input_data[inrow+v] + outcol_h; - for (h = 0; h < h_expand; h++) { - outvalue += (INT32) GETJSAMPLE(*inptr++); - } + inptr = input_data[inrow+v] + outcol_h; + for (h = 0; h < h_expand; h++) { + outvalue += (JLONG) GETJSAMPLE(*inptr++); + } } *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix); } @@ -186,15 +191,15 @@ */ METHODDEF(void) -fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { /* Copy the data */ jcopy_sample_rows(input_data, 0, output_data, 0, - cinfo->max_v_samp_factor, cinfo->image_width); + cinfo->max_v_samp_factor, cinfo->image_width); /* Edge-expand */ expand_right_edge(output_data, cinfo->max_v_samp_factor, - cinfo->image_width, compptr->width_in_blocks * DCTSIZE); + cinfo->image_width, compptr->width_in_blocks * DCTSIZE); } @@ -211,8 +216,8 @@ */ METHODDEF(void) -h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int outrow; JDIMENSION outcol; @@ -225,16 +230,16 @@ * efficient. */ expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * 2); + cinfo->image_width, output_cols * 2); for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr = input_data[outrow]; - bias = 0; /* bias = 0,1,0,1,... for successive samples */ + bias = 0; /* bias = 0,1,0,1,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) - + bias) >> 1); - bias ^= 1; /* 0=>1, 1=>0 */ + + bias) >> 1); + bias ^= 1; /* 0=>1, 1=>0 */ inptr += 2; } } @@ -248,8 +253,8 @@ */ METHODDEF(void) -h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow; JDIMENSION outcol; @@ -262,19 +267,19 @@ * efficient. */ expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * 2); + cinfo->image_width, output_cols * 2); inrow = 0; for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr0 = input_data[inrow]; inptr1 = input_data[inrow+1]; - bias = 1; /* bias = 1,2,1,2,... for successive samples */ + bias = 1; /* bias = 1,2,1,2,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) - + bias) >> 2); - bias ^= 3; /* 1=>2, 2=>1 */ + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + + bias) >> 2); + bias ^= 3; /* 1=>2, 2=>1 */ inptr0 += 2; inptr1 += 2; } inrow += 2; @@ -291,21 +296,21 @@ */ METHODDEF(void) -h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow; JDIMENSION colctr; JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr; - INT32 membersum, neighsum, memberscale, neighscale; + JLONG membersum, neighsum, memberscale, neighscale; /* Expand input data enough to let all the output samples be generated * by the standard loop. Special-casing padded output would be more * efficient. */ expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2, - cinfo->image_width, output_cols * 2); + cinfo->image_width, output_cols * 2); /* We don't bother to form the individual "smoothed" input pixel values; * we can directly compute the output which is the average of the four @@ -333,14 +338,14 @@ /* Special case for first column: pretend column -1 is same as column 0 */ membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]); + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + + GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) + + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]); neighsum += neighsum; neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]); + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]); membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; @@ -348,17 +353,17 @@ for (colctr = output_cols - 2; colctr > 0; colctr--) { /* sum of pixels directly mapped to this output element */ membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); /* sum of edge-neighbor pixels */ neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) + - GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]); + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + + GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) + + GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]); /* The edge-neighbors count twice as much as corner-neighbors */ neighsum += neighsum; /* Add in the corner-neighbors */ neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) + - GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]); + GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]); /* form final output scaled up by 2^16 */ membersum = membersum * memberscale + neighsum * neighscale; /* round, descale and output it */ @@ -368,14 +373,14 @@ /* Special case for last column */ membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]); + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + + GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) + + GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]); neighsum += neighsum; neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]); + GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]); membersum = membersum * memberscale + neighsum * neighscale; *outptr = (JSAMPLE) ((membersum + 32768) >> 16); @@ -392,13 +397,13 @@ METHODDEF(void) fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) + JSAMPARRAY input_data, JSAMPARRAY output_data) { int outrow; JDIMENSION colctr; JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; register JSAMPROW inptr, above_ptr, below_ptr, outptr; - INT32 membersum, neighsum, memberscale, neighscale; + JLONG membersum, neighsum, memberscale, neighscale; int colsum, lastcolsum, nextcolsum; /* Expand input data enough to let all the output samples be generated @@ -406,7 +411,7 @@ * efficient. */ expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2, - cinfo->image_width, output_cols); + cinfo->image_width, output_cols); /* Each of the eight neighbor pixels contributes a fraction SF to the * smoothed pixel, while the main pixel contributes (1-8*SF). In order @@ -425,10 +430,10 @@ /* Special case for first column */ colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) + - GETJSAMPLE(*inptr); + GETJSAMPLE(*inptr); membersum = GETJSAMPLE(*inptr++); nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + - GETJSAMPLE(*inptr); + GETJSAMPLE(*inptr); neighsum = colsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); @@ -438,7 +443,7 @@ membersum = GETJSAMPLE(*inptr++); above_ptr++; below_ptr++; nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + - GETJSAMPLE(*inptr); + GETJSAMPLE(*inptr); neighsum = lastcolsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); @@ -467,12 +472,12 @@ { my_downsample_ptr downsample; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; boolean smoothok = TRUE; downsample = (my_downsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_downsampler)); + sizeof(my_downsampler)); cinfo->downsample = (struct jpeg_downsampler *) downsample; downsample->pub.start_pass = start_pass_downsample; downsample->pub.downsample = sep_downsample; @@ -485,35 +490,42 @@ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { if (compptr->h_samp_factor == cinfo->max_h_samp_factor && - compptr->v_samp_factor == cinfo->max_v_samp_factor) { + compptr->v_samp_factor == cinfo->max_v_samp_factor) { #ifdef INPUT_SMOOTHING_SUPPORTED if (cinfo->smoothing_factor) { - downsample->methods[ci] = fullsize_smooth_downsample; - downsample->pub.need_context_rows = TRUE; + downsample->methods[ci] = fullsize_smooth_downsample; + downsample->pub.need_context_rows = TRUE; } else #endif - downsample->methods[ci] = fullsize_downsample; + downsample->methods[ci] = fullsize_downsample; } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && - compptr->v_samp_factor == cinfo->max_v_samp_factor) { + compptr->v_samp_factor == cinfo->max_v_samp_factor) { smoothok = FALSE; if (jsimd_can_h2v1_downsample()) downsample->methods[ci] = jsimd_h2v1_downsample; else downsample->methods[ci] = h2v1_downsample; } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && - compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) { + compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) { #ifdef INPUT_SMOOTHING_SUPPORTED if (cinfo->smoothing_factor) { - downsample->methods[ci] = h2v2_smooth_downsample; - downsample->pub.need_context_rows = TRUE; +#if defined(__mips__) + if (jsimd_can_h2v2_smooth_downsample()) + downsample->methods[ci] = jsimd_h2v2_smooth_downsample; + else +#endif + downsample->methods[ci] = h2v2_smooth_downsample; + downsample->pub.need_context_rows = TRUE; } else #endif - if (jsimd_can_h2v2_downsample()) - downsample->methods[ci] = jsimd_h2v2_downsample; - else - downsample->methods[ci] = h2v2_downsample; + { + if (jsimd_can_h2v2_downsample()) + downsample->methods[ci] = jsimd_h2v2_downsample; + else + downsample->methods[ci] = h2v2_downsample; + } } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 && - (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) { + (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) { smoothok = FALSE; downsample->methods[ci] = int_downsample; } else diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcstest.c glmark2-2021.02/src/libjpeg-turbo/jcstest.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jcstest.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jcstest.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,126 @@ +/* + * Copyright (C)2011 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* This program demonstrates how to check for the colorspace extension + capabilities of libjpeg-turbo at both compile time and run time. */ + +#include +#include +#include +#include + +#ifndef JCS_EXTENSIONS +#define JCS_EXT_RGB 6 +#endif +#if !defined(JCS_EXTENSIONS) || !defined(JCS_ALPHA_EXTENSIONS) +#define JCS_EXT_RGBA 12 +#endif + +static char lasterror[JMSG_LENGTH_MAX] = "No error"; + +typedef struct _error_mgr { + struct jpeg_error_mgr pub; + jmp_buf jb; +} error_mgr; + +static void my_error_exit(j_common_ptr cinfo) +{ + error_mgr *myerr = (error_mgr *)cinfo->err; + (*cinfo->err->output_message)(cinfo); + longjmp(myerr->jb, 1); +} + +static void my_output_message(j_common_ptr cinfo) +{ + (*cinfo->err->format_message)(cinfo, lasterror); +} + +int main(void) +{ + int jcs_valid = -1, jcs_alpha_valid = -1; + struct jpeg_compress_struct cinfo; + error_mgr jerr; + + printf("libjpeg-turbo colorspace extensions:\n"); + #if JCS_EXTENSIONS + printf(" Present at compile time\n"); + #else + printf(" Not present at compile time\n"); + #endif + + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = my_error_exit; + jerr.pub.output_message = my_output_message; + + if(setjmp(jerr.jb)) { + /* this will execute if libjpeg has an error */ + jcs_valid = 0; + goto done; + } + + jpeg_create_compress(&cinfo); + cinfo.input_components = 3; + jpeg_set_defaults(&cinfo); + cinfo.in_color_space = JCS_EXT_RGB; + jpeg_default_colorspace(&cinfo); + jcs_valid = 1; + + done: + if (jcs_valid) + printf(" Working properly\n"); + else + printf(" Not working properly. Error returned was:\n %s\n", + lasterror); + + printf("libjpeg-turbo alpha colorspace extensions:\n"); + #if JCS_ALPHA_EXTENSIONS + printf(" Present at compile time\n"); + #else + printf(" Not present at compile time\n"); + #endif + + if(setjmp(jerr.jb)) { + /* this will execute if libjpeg has an error */ + jcs_alpha_valid = 0; + goto done2; + } + + cinfo.in_color_space = JCS_EXT_RGBA; + jpeg_default_colorspace(&cinfo); + jcs_alpha_valid = 1; + + done2: + if (jcs_alpha_valid) + printf(" Working properly\n"); + else + printf(" Not working properly. Error returned was:\n %s\n", + lasterror); + + jpeg_destroy_compress(&cinfo); + return 0; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jctrans.c glmark2-2021.02/src/libjpeg-turbo/jctrans.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jctrans.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jctrans.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jctrans.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1998, Thomas G. Lane. * Modified 2000-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains library routines for transcoding compression, * that is, writing raw DCT coefficient arrays to an output JPEG file. @@ -18,9 +21,9 @@ /* Forward declarations */ LOCAL(void) transencode_master_selection - JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)); + (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays); LOCAL(void) transencode_coef_controller - JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)); + (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays); /* @@ -36,7 +39,7 @@ */ GLOBAL(void) -jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays) +jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays) { if (cinfo->global_state != CSTATE_START) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); @@ -48,7 +51,7 @@ /* Perform master selection of active modules */ transencode_master_selection(cinfo, coef_arrays); /* Wait for jpeg_finish_compress() call */ - cinfo->next_scanline = 0; /* so jpeg_write_marker works */ + cinfo->next_scanline = 0; /* so jpeg_write_marker works */ cinfo->global_state = CSTATE_WRCOEFS; } @@ -62,9 +65,9 @@ GLOBAL(void) jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, - j_compress_ptr dstinfo) + j_compress_ptr dstinfo) { - JQUANT_TBL ** qtblptr; + JQUANT_TBL **qtblptr; jpeg_component_info *incomp, *outcomp; JQUANT_TBL *c_quant, *slot_quant; int tblno, ci, coefi; @@ -96,10 +99,10 @@ if (srcinfo->quant_tbl_ptrs[tblno] != NULL) { qtblptr = & dstinfo->quant_tbl_ptrs[tblno]; if (*qtblptr == NULL) - *qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo); + *qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo); MEMCOPY((*qtblptr)->quantval, - srcinfo->quant_tbl_ptrs[tblno]->quantval, - SIZEOF((*qtblptr)->quantval)); + srcinfo->quant_tbl_ptrs[tblno]->quantval, + sizeof((*qtblptr)->quantval)); (*qtblptr)->sent_table = FALSE; } } @@ -109,7 +112,7 @@ dstinfo->num_components = srcinfo->num_components; if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS) ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components, - MAX_COMPONENTS); + MAX_COMPONENTS); for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info; ci < dstinfo->num_components; ci++, incomp++, outcomp++) { outcomp->component_id = incomp->component_id; @@ -122,14 +125,14 @@ */ tblno = outcomp->quant_tbl_no; if (tblno < 0 || tblno >= NUM_QUANT_TBLS || - srcinfo->quant_tbl_ptrs[tblno] == NULL) + srcinfo->quant_tbl_ptrs[tblno] == NULL) ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno); slot_quant = srcinfo->quant_tbl_ptrs[tblno]; c_quant = incomp->quant_table; if (c_quant != NULL) { for (coefi = 0; coefi < DCTSIZE2; coefi++) { - if (c_quant->quantval[coefi] != slot_quant->quantval[coefi]) - ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno); + if (c_quant->quantval[coefi] != slot_quant->quantval[coefi]) + ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno); } } /* Note: we do not copy the source's Huffman table assignments; @@ -163,7 +166,7 @@ LOCAL(void) transencode_master_selection (j_compress_ptr cinfo, - jvirt_barray_ptr * coef_arrays) + jvirt_barray_ptr *coef_arrays) { /* Although we don't actually use input_components for transcoding, * jcmaster.c's initial_setup will complain if input_components is 0. @@ -219,19 +222,19 @@ typedef struct { struct jpeg_c_coef_controller pub; /* public fields */ - JDIMENSION iMCU_row_num; /* iMCU row # within image */ - JDIMENSION mcu_ctr; /* counts MCUs processed in current row */ - int MCU_vert_offset; /* counts MCU rows within iMCU row */ - int MCU_rows_per_iMCU_row; /* number of such rows needed */ + JDIMENSION iMCU_row_num; /* iMCU row # within image */ + JDIMENSION mcu_ctr; /* counts MCUs processed in current row */ + int MCU_vert_offset; /* counts MCU rows within iMCU row */ + int MCU_rows_per_iMCU_row; /* number of such rows needed */ /* Virtual block array for each component. */ - jvirt_barray_ptr * whole_image; + jvirt_barray_ptr *whole_image; /* Workspace for constructing dummy blocks at right/bottom edges. */ JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU]; } my_coef_controller; -typedef my_coef_controller * my_coef_ptr; +typedef my_coef_controller *my_coef_ptr; LOCAL(void) @@ -289,7 +292,7 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; int blkn, ci, xindex, yindex, yoffset, blockcnt; @@ -312,44 +315,44 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row; - MCU_col_num++) { + MCU_col_num++) { /* Construct list of pointers to DCT blocks belonging to this MCU */ - blkn = 0; /* index of current DCT block within MCU */ + blkn = 0; /* index of current DCT block within MCU */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - start_col = MCU_col_num * compptr->MCU_width; - blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - if (coef->iMCU_row_num < last_iMCU_row || - yindex+yoffset < compptr->last_row_height) { - /* Fill in pointers to real blocks in this row */ - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; - for (xindex = 0; xindex < blockcnt; xindex++) - MCU_buffer[blkn++] = buffer_ptr++; - } else { - /* At bottom of image, need a whole row of dummy blocks */ - xindex = 0; - } - /* Fill in any dummy blocks needed in this row. - * Dummy blocks are filled in the same way as in jccoefct.c: - * all zeroes in the AC entries, DC entries equal to previous - * block's DC value. The init routine has already zeroed the - * AC entries, so we need only set the DC entries correctly. - */ - for (; xindex < compptr->MCU_width; xindex++) { - MCU_buffer[blkn] = coef->dummy_buffer[blkn]; - MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0]; - blkn++; - } - } + compptr = cinfo->cur_comp_info[ci]; + start_col = MCU_col_num * compptr->MCU_width; + blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width + : compptr->last_col_width; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + if (coef->iMCU_row_num < last_iMCU_row || + yindex+yoffset < compptr->last_row_height) { + /* Fill in pointers to real blocks in this row */ + buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + for (xindex = 0; xindex < blockcnt; xindex++) + MCU_buffer[blkn++] = buffer_ptr++; + } else { + /* At bottom of image, need a whole row of dummy blocks */ + xindex = 0; + } + /* Fill in any dummy blocks needed in this row. + * Dummy blocks are filled in the same way as in jccoefct.c: + * all zeroes in the AC entries, DC entries equal to previous + * block's DC value. The init routine has already zeroed the + * AC entries, so we need only set the DC entries correctly. + */ + for (; xindex < compptr->MCU_width; xindex++) { + MCU_buffer[blkn] = coef->dummy_buffer[blkn]; + MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0]; + blkn++; + } + } } /* Try to write the MCU. */ if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->mcu_ctr = MCU_col_num; - return FALSE; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->mcu_ctr = MCU_col_num; + return FALSE; } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -372,7 +375,7 @@ LOCAL(void) transencode_coef_controller (j_compress_ptr cinfo, - jvirt_barray_ptr * coef_arrays) + jvirt_barray_ptr *coef_arrays) { my_coef_ptr coef; JBLOCKROW buffer; @@ -380,7 +383,7 @@ coef = (my_coef_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_coef_controller)); + sizeof(my_coef_controller)); cinfo->coef = (struct jpeg_c_coef_controller *) coef; coef->pub.start_pass = start_pass_coef; coef->pub.compress_data = compress_output; @@ -391,8 +394,8 @@ /* Allocate and pre-zero space for dummy DCT blocks. */ buffer = (JBLOCKROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)); - jzero_far((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)); + C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); + jzero_far((void *) buffer, C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) { coef->dummy_buffer[i] = buffer + i; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdapimin.c glmark2-2021.02/src/libjpeg-turbo/jdapimin.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdapimin.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdapimin.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdapimin.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface code for the decompression half * of the JPEG library. These are the "minimum" API routines that may be @@ -19,6 +22,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jdmaster.h" /* @@ -32,12 +36,12 @@ int i; /* Guard against version mismatches between library and caller. */ - cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */ + cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */ if (version != JPEG_LIB_VERSION) ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version); - if (structsize != SIZEOF(struct jpeg_decompress_struct)) - ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, - (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize); + if (structsize != sizeof(struct jpeg_decompress_struct)) + ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, + (int) sizeof(struct jpeg_decompress_struct), (int) structsize); /* For debugging purposes, we zero the whole master structure. * But the application has already set the err pointer, and may have set @@ -48,7 +52,7 @@ { struct jpeg_error_mgr * err = cinfo->err; void * client_data = cinfo->client_data; /* ignore Purify complaint here */ - MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct)); + MEMZERO(cinfo, sizeof(struct jpeg_decompress_struct)); cinfo->err = err; cinfo->client_data = client_data; } @@ -80,6 +84,14 @@ /* OK, I'm ready */ cinfo->global_state = DSTATE_START; + + /* The master struct is used to store extension parameters, so we allocate it + * here. + */ + cinfo->master = (struct jpeg_decomp_master *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(my_decomp_master)); + MEMZERO(cinfo->master, sizeof(my_decomp_master)); } @@ -121,22 +133,22 @@ cinfo->jpeg_color_space = JCS_GRAYSCALE; cinfo->out_color_space = JCS_GRAYSCALE; break; - + case 3: if (cinfo->saw_JFIF_marker) { cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */ } else if (cinfo->saw_Adobe_marker) { switch (cinfo->Adobe_transform) { case 0: - cinfo->jpeg_color_space = JCS_RGB; - break; + cinfo->jpeg_color_space = JCS_RGB; + break; case 1: - cinfo->jpeg_color_space = JCS_YCbCr; - break; + cinfo->jpeg_color_space = JCS_YCbCr; + break; default: - WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform); - cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */ - break; + WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform); + cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */ + break; } } else { /* Saw no special markers, try to guess from the component IDs */ @@ -145,31 +157,31 @@ int cid2 = cinfo->comp_info[2].component_id; if (cid0 == 1 && cid1 == 2 && cid2 == 3) - cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */ + cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */ else if (cid0 == 82 && cid1 == 71 && cid2 == 66) - cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */ + cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */ else { - TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2); - cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */ + TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2); + cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */ } } /* Always guess RGB is proper output colorspace. */ cinfo->out_color_space = JCS_RGB; break; - + case 4: if (cinfo->saw_Adobe_marker) { switch (cinfo->Adobe_transform) { case 0: - cinfo->jpeg_color_space = JCS_CMYK; - break; + cinfo->jpeg_color_space = JCS_CMYK; + break; case 2: - cinfo->jpeg_color_space = JCS_YCCK; - break; + cinfo->jpeg_color_space = JCS_YCCK; + break; default: - WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform); - cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */ - break; + WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform); + cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */ + break; } } else { /* No special markers, assume straight CMYK. */ @@ -177,7 +189,7 @@ } cinfo->out_color_space = JCS_CMYK; break; - + default: cinfo->jpeg_color_space = JCS_UNKNOWN; cinfo->out_color_space = JCS_UNKNOWN; @@ -185,7 +197,7 @@ } /* Set defaults for other decompression parameters. */ - cinfo->scale_num = 1; /* 1:1 scaling */ + cinfo->scale_num = 1; /* 1:1 scaling */ cinfo->scale_denom = 1; cinfo->output_gamma = 1.0; cinfo->buffered_image = FALSE; @@ -253,7 +265,7 @@ retcode = JPEG_HEADER_OK; break; case JPEG_REACHED_EOI: - if (require_image) /* Complain if application wanted an image */ + if (require_image) /* Complain if application wanted an image */ ERREXIT(cinfo, JERR_NO_IMAGE); /* Reset to start state; it would be safer to require the application to * call jpeg_abort, but we can't change it now for compatibility reasons. @@ -385,7 +397,7 @@ /* Read until EOI */ while (! cinfo->inputctl->eoi_reached) { if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED) - return FALSE; /* Suspend, come back later */ + return FALSE; /* Suspend, come back later */ } /* Do final cleanup */ (*cinfo->src->term_source) (cinfo); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdapistd.c glmark2-2021.02/src/libjpeg-turbo/jdapistd.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdapistd.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdapistd.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jdapistd.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2015-2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains application interface code for the decompression half * of the JPEG library. These are the "standard" API routines that are @@ -15,14 +18,14 @@ * whole decompression library into a transcoder. */ -#define JPEG_INTERNALS #include "jinclude.h" -#include "jpeglib.h" -#include "jpegcomp.h" - +#include "jdmainct.h" +#include "jdcoefct.h" +#include "jdsample.h" +#include "jmemsys.h" /* Forward declarations */ -LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo)); +LOCAL(boolean) output_pass_setup (j_decompress_ptr cinfo); /* @@ -54,24 +57,24 @@ if (cinfo->inputctl->has_multiple_scans) { #ifdef D_MULTISCAN_FILES_SUPPORTED for (;;) { - int retcode; - /* Call progress monitor hook if present */ - if (cinfo->progress != NULL) - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); - /* Absorb some more input */ - retcode = (*cinfo->inputctl->consume_input) (cinfo); - if (retcode == JPEG_SUSPENDED) - return FALSE; - if (retcode == JPEG_REACHED_EOI) - break; - /* Advance progress counter if appropriate */ - if (cinfo->progress != NULL && - (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { - if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { - /* jdmaster underestimated number of scans; ratchet up one scan */ - cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; - } - } + int retcode; + /* Call progress monitor hook if present */ + if (cinfo->progress != NULL) + (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + /* Absorb some more input */ + retcode = (*cinfo->inputctl->consume_input) (cinfo); + if (retcode == JPEG_SUSPENDED) + return FALSE; + if (retcode == JPEG_REACHED_EOI) + break; + /* Advance progress counter if appropriate */ + if (cinfo->progress != NULL && + (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { + if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { + /* jdmaster underestimated number of scans; ratchet up one scan */ + cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; + } + } } #else ERREXIT(cinfo, JERR_NOT_COMPILED); @@ -110,16 +113,16 @@ JDIMENSION last_scanline; /* Call progress monitor hook if present */ if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) cinfo->output_scanline; - cinfo->progress->pass_limit = (long) cinfo->output_height; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long) cinfo->output_scanline; + cinfo->progress->pass_limit = (long) cinfo->output_height; + (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); } /* Process some data */ last_scanline = cinfo->output_scanline; (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL, - &cinfo->output_scanline, (JDIMENSION) 0); + &cinfo->output_scanline, (JDIMENSION) 0); if (cinfo->output_scanline == last_scanline) - return FALSE; /* No progress made, must suspend */ + return FALSE; /* No progress made, must suspend */ } /* Finish up dummy pass, and set up for another one */ (*cinfo->master->finish_output_pass) (cinfo); @@ -138,6 +141,110 @@ /* + * Enable partial scanline decompression + * + * Must be called after jpeg_start_decompress() and before any calls to + * jpeg_read_scanlines() or jpeg_skip_scanlines(). + * + * Refer to libjpeg.txt for more information. + */ + +GLOBAL(void) +jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width) +{ + int ci, align, orig_downsampled_width; + JDIMENSION input_xoffset; + boolean reinit_upsampler = FALSE; + jpeg_component_info *compptr; + + if (cinfo->global_state != DSTATE_SCANNING || cinfo->output_scanline != 0) + ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + + if (!xoffset || !width) + ERREXIT(cinfo, JERR_BAD_CROP_SPEC); + + /* xoffset and width must fall within the output image dimensions. */ + if (*width == 0 || *xoffset + *width > cinfo->output_width) + ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); + + /* No need to do anything if the caller wants the entire width. */ + if (*width == cinfo->output_width) + return; + + /* Ensuring the proper alignment of xoffset is tricky. At minimum, it + * must align with an MCU boundary, because: + * + * (1) The IDCT is performed in blocks, and it is not feasible to modify + * the algorithm so that it can transform partial blocks. + * (2) Because of the SIMD extensions, any input buffer passed to the + * upsampling and color conversion routines must be aligned to the + * SIMD word size (for instance, 128-bit in the case of SSE2.) The + * easiest way to accomplish this without copying data is to ensure + * that upsampling and color conversion begin at the start of the + * first MCU column that will be inverse transformed. + * + * In practice, we actually impose a stricter alignment requirement. We + * require that xoffset be a multiple of the maximum MCU column width of all + * of the components (the "iMCU column width.") This is to simplify the + * single-pass decompression case, allowing us to use the same MCU column + * width for all of the components. + */ + align = cinfo->_min_DCT_scaled_size * cinfo->max_h_samp_factor; + + /* Adjust xoffset to the nearest iMCU boundary <= the requested value */ + input_xoffset = *xoffset; + *xoffset = (input_xoffset / align) * align; + + /* Adjust the width so that the right edge of the output image is as + * requested (only the left edge is altered.) It is important that calling + * programs check this value after this function returns, so that they can + * allocate an output buffer with the appropriate size. + */ + *width = *width + input_xoffset - *xoffset; + cinfo->output_width = *width; + + /* Set the first and last iMCU columns that we must decompress. These values + * will be used in single-scan decompressions. + */ + cinfo->master->first_iMCU_col = + (JDIMENSION) (long) (*xoffset) / (long) align; + cinfo->master->last_iMCU_col = + (JDIMENSION) jdiv_round_up((long) (*xoffset + cinfo->output_width), + (long) align) - 1; + + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + /* Set downsampled_width to the new output width. */ + orig_downsampled_width = compptr->downsampled_width; + compptr->downsampled_width = + (JDIMENSION) jdiv_round_up((long) (cinfo->output_width * + compptr->h_samp_factor), + (long) cinfo->max_h_samp_factor); + if (compptr->downsampled_width < 2 && orig_downsampled_width >= 2) + reinit_upsampler = TRUE; + + /* Set the first and last iMCU columns that we must decompress. These + * values will be used in multi-scan decompressions. + */ + cinfo->master->first_MCU_col[ci] = + (JDIMENSION) (long) (*xoffset * compptr->h_samp_factor) / + (long) align; + cinfo->master->last_MCU_col[ci] = + (JDIMENSION) jdiv_round_up((long) ((*xoffset + cinfo->output_width) * + compptr->h_samp_factor), + (long) align) - 1; + } + + if (reinit_upsampler) { + cinfo->master->jinit_upsampler_no_alloc = TRUE; + jinit_upsampler(cinfo); + cinfo->master->jinit_upsampler_no_alloc = FALSE; + } +} + + +/* * Read some scanlines of data from the JPEG decompressor. * * The return value will be the number of lines actually read. @@ -152,7 +259,7 @@ GLOBAL(JDIMENSION) jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines, - JDIMENSION max_lines) + JDIMENSION max_lines) { JDIMENSION row_ctr; @@ -178,6 +285,236 @@ } +/* Dummy color convert function used by jpeg_skip_scanlines() */ +LOCAL(void) +noop_convert (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) +{ +} + + +/* + * In some cases, it is best to call jpeg_read_scanlines() and discard the + * output, rather than skipping the scanlines, because this allows us to + * maintain the internal state of the context-based upsampler. In these cases, + * we set up and tear down a dummy color converter in order to avoid valgrind + * errors and to achieve the best possible performance. + */ + +LOCAL(void) +read_and_discard_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) +{ + JDIMENSION n; + void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows); + + color_convert = cinfo->cconvert->color_convert; + cinfo->cconvert->color_convert = noop_convert; + + for (n = 0; n < num_lines; n++) + jpeg_read_scanlines(cinfo, NULL, 1); + + cinfo->cconvert->color_convert = color_convert; +} + + +/* + * Called by jpeg_skip_scanlines(). This partially skips a decompress block by + * incrementing the rowgroup counter. + */ + +LOCAL(void) +increment_simple_rowgroup_ctr (j_decompress_ptr cinfo, JDIMENSION rows) +{ + JDIMENSION rows_left; + my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + + /* Increment the counter to the next row group after the skipped rows. */ + main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor; + + /* Partially skipping a row group would involve modifying the internal state + * of the upsampler, so read the remaining rows into a dummy buffer instead. + */ + rows_left = rows % cinfo->max_v_samp_factor; + cinfo->output_scanline += rows - rows_left; + + read_and_discard_scanlines(cinfo, rows_left); +} + +/* + * Skips some scanlines of data from the JPEG decompressor. + * + * The return value will be the number of lines actually skipped. If skipping + * num_lines would move beyond the end of the image, then the actual number of + * lines remaining in the image is returned. Otherwise, the return value will + * be equal to num_lines. + * + * Refer to libjpeg.txt for more information. + */ + +GLOBAL(JDIMENSION) +jpeg_skip_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) +{ + my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + JDIMENSION i, x; + int y; + JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row; + JDIMENSION lines_to_skip, lines_to_read; + + if (cinfo->global_state != DSTATE_SCANNING) + ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + + /* Do not skip past the bottom of the image. */ + if (cinfo->output_scanline + num_lines >= cinfo->output_height) { + cinfo->output_scanline = cinfo->output_height; + return cinfo->output_height - cinfo->output_scanline; + } + + if (num_lines == 0) + return 0; + + lines_per_iMCU_row = cinfo->_min_DCT_scaled_size * cinfo->max_v_samp_factor; + lines_left_in_iMCU_row = + (lines_per_iMCU_row - (cinfo->output_scanline % lines_per_iMCU_row)) % + lines_per_iMCU_row; + lines_after_iMCU_row = num_lines - lines_left_in_iMCU_row; + + /* Skip the lines remaining in the current iMCU row. When upsampling + * requires context rows, we need the previous and next rows in order to read + * the current row. This adds some complexity. + */ + if (cinfo->upsample->need_context_rows) { + /* If the skipped lines would not move us past the current iMCU row, we + * read the lines and ignore them. There might be a faster way of doing + * this, but we are facing increasing complexity for diminishing returns. + * The increasing complexity would be a by-product of meddling with the + * state machine used to skip context rows. Near the end of an iMCU row, + * the next iMCU row may have already been entropy-decoded. In this unique + * case, we will read the next iMCU row if we cannot skip past it as well. + */ + if ((num_lines < lines_left_in_iMCU_row + 1) || + (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full && + lines_after_iMCU_row < lines_per_iMCU_row + 1)) { + read_and_discard_scanlines(cinfo, num_lines); + return num_lines; + } + + /* If the next iMCU row has already been entropy-decoded, make sure that + * we do not skip too far. + */ + if (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full) { + cinfo->output_scanline += lines_left_in_iMCU_row + lines_per_iMCU_row; + lines_after_iMCU_row -= lines_per_iMCU_row; + } else { + cinfo->output_scanline += lines_left_in_iMCU_row; + } + + /* If we have just completed the first block, adjust the buffer pointers */ + if (main_ptr->iMCU_row_ctr == 0 || + (main_ptr->iMCU_row_ctr == 1 && lines_left_in_iMCU_row > 2)) + set_wraparound_pointers(cinfo); + main_ptr->buffer_full = FALSE; + main_ptr->rowgroup_ctr = 0; + main_ptr->context_state = CTX_PREPARE_FOR_IMCU; + upsample->next_row_out = cinfo->max_v_samp_factor; + upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; + } + + /* Skipping is much simpler when context rows are not required. */ + else { + if (num_lines < lines_left_in_iMCU_row) { + increment_simple_rowgroup_ctr(cinfo, num_lines); + return num_lines; + } else { + cinfo->output_scanline += lines_left_in_iMCU_row; + main_ptr->buffer_full = FALSE; + main_ptr->rowgroup_ctr = 0; + upsample->next_row_out = cinfo->max_v_samp_factor; + upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; + } + } + + /* Calculate how many full iMCU rows we can skip. */ + if (cinfo->upsample->need_context_rows) + lines_to_skip = ((lines_after_iMCU_row - 1) / lines_per_iMCU_row) * + lines_per_iMCU_row; + else + lines_to_skip = (lines_after_iMCU_row / lines_per_iMCU_row) * + lines_per_iMCU_row; + /* Calculate the number of lines that remain to be skipped after skipping all + * of the full iMCU rows that we can. We will not read these lines unless we + * have to. + */ + lines_to_read = lines_after_iMCU_row - lines_to_skip; + + /* For images requiring multiple scans (progressive, non-interleaved, etc.), + * all of the entropy decoding occurs in jpeg_start_decompress(), assuming + * that the input data source is non-suspending. This makes skipping easy. + */ + if (cinfo->inputctl->has_multiple_scans) { + if (cinfo->upsample->need_context_rows) { + cinfo->output_scanline += lines_to_skip; + cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row; + main_ptr->iMCU_row_ctr += lines_after_iMCU_row / lines_per_iMCU_row; + /* It is complex to properly move to the middle of a context block, so + * read the remaining lines instead of skipping them. + */ + read_and_discard_scanlines(cinfo, lines_to_read); + } else { + cinfo->output_scanline += lines_to_skip; + cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row; + increment_simple_rowgroup_ctr(cinfo, lines_to_read); + } + upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; + return num_lines; + } + + /* Skip the iMCU rows that we can safely skip. */ + for (i = 0; i < lines_to_skip; i += lines_per_iMCU_row) { + for (y = 0; y < coef->MCU_rows_per_iMCU_row; y++) { + for (x = 0; x < cinfo->MCUs_per_row; x++) { + /* Calling decode_mcu() with a NULL pointer causes it to discard the + * decoded coefficients. This is ~5% faster for large subsets, but + * it's tough to tell a difference for smaller images. + */ + (*cinfo->entropy->decode_mcu) (cinfo, NULL); + } + } + cinfo->input_iMCU_row++; + cinfo->output_iMCU_row++; + if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) + start_iMCU_row(cinfo); + else + (*cinfo->inputctl->finish_input_pass) (cinfo); + } + cinfo->output_scanline += lines_to_skip; + + if (cinfo->upsample->need_context_rows) { + /* Context-based upsampling keeps track of iMCU rows. */ + main_ptr->iMCU_row_ctr += lines_to_skip / lines_per_iMCU_row; + + /* It is complex to properly move to the middle of a context block, so + * read the remaining lines instead of skipping them. + */ + read_and_discard_scanlines(cinfo, lines_to_read); + } else { + increment_simple_rowgroup_ctr(cinfo, lines_to_read); + } + + /* Since skipping lines involves skipping the upsampling step, the value of + * "rows_to_go" will become invalid unless we set it here. NOTE: This is a + * bit odd, since "rows_to_go" seems to be redundantly keeping track of + * output_scanline. + */ + upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; + + /* Always skip the requested number of lines. */ + return num_lines; +} + /* * Alternate entry point to read raw data. * Processes exactly one iMCU row per call, unless suspended. @@ -185,7 +522,7 @@ GLOBAL(JDIMENSION) jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, - JDIMENSION max_lines) + JDIMENSION max_lines) { JDIMENSION lines_per_iMCU_row; @@ -210,7 +547,7 @@ /* Decompress directly into user's buffer. */ if (! (*cinfo->coef->decompress_data) (cinfo, data)) - return 0; /* suspension forced, can do nothing more */ + return 0; /* suspension forced, can do nothing more */ /* OK, we processed one iMCU row. */ cinfo->output_scanline += lines_per_iMCU_row; @@ -266,9 +603,9 @@ } /* Read markers looking for SOS or EOI */ while (cinfo->input_scan_number <= cinfo->output_scan_number && - ! cinfo->inputctl->eoi_reached) { + ! cinfo->inputctl->eoi_reached) { if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED) - return FALSE; /* Suspend, come back later */ + return FALSE; /* Suspend, come back later */ } cinfo->global_state = DSTATE_BUFIMAGE; return TRUE; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdarith.c glmark2-2021.02/src/libjpeg-turbo/jdarith.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdarith.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdarith.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdarith.c * - * Developed 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * This file was part of the Independent JPEG Group's software: + * Developed 1997-2015 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains portable arithmetic entropy decoding routines for JPEG * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81). @@ -23,8 +26,8 @@ typedef struct { struct jpeg_entropy_decoder pub; /* public fields */ - INT32 c; /* C register, base of coding interval + input bit buffer */ - INT32 a; /* A register, normalized size of coding interval */ + JLONG c; /* C register, base of coding interval + input bit buffer */ + JLONG a; /* A register, normalized size of coding interval */ int ct; /* bit shift counter, # of bits left in bit buffer part of C */ /* init: ct = -16 */ /* run: ct = 0..7 */ @@ -32,17 +35,17 @@ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ /* Pointers to statistics areas (these workspaces have image lifespan) */ - unsigned char * dc_stats[NUM_ARITH_TBLS]; - unsigned char * ac_stats[NUM_ARITH_TBLS]; + unsigned char *dc_stats[NUM_ARITH_TBLS]; + unsigned char *ac_stats[NUM_ARITH_TBLS]; /* Statistics bin for coding with fixed probability 0.5 */ unsigned char fixed_bin[4]; } arith_entropy_decoder; -typedef arith_entropy_decoder * arith_entropy_ptr; +typedef arith_entropy_decoder *arith_entropy_ptr; /* The following two definitions specify the allocation chunk size * for the statistics area. @@ -65,7 +68,7 @@ get_byte (j_decompress_ptr cinfo) /* Read next input byte; we do not support suspension in this module. */ { - struct jpeg_source_mgr * src = cinfo->src; + struct jpeg_source_mgr *src = cinfo->src; if (src->bytes_in_buffer == 0) if (! (*src->fill_input_buffer) (cinfo)) @@ -94,7 +97,7 @@ * (instead of fixed) with the bit shift counter CT. * Thus, we also need only one (variable instead of * fixed size) shift for the LPS/MPS decision, and - * we can get away with any renormalization update + * we can do away with any renormalization update * of C (except for new data insertion, of course). * * I've also introduced a new scheme for accessing @@ -107,7 +110,7 @@ { register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; register unsigned char nl, nm; - register INT32 qe, temp; + register JLONG qe, temp; register int sv, data; /* Renormalization & data input per section D.2.6 */ @@ -115,32 +118,32 @@ if (--e->ct < 0) { /* Need to fetch next data byte */ if (cinfo->unread_marker) - data = 0; /* stuff zero data */ + data = 0; /* stuff zero data */ else { - data = get_byte(cinfo); /* read next input byte */ - if (data == 0xFF) { /* zero stuff or marker code */ - do data = get_byte(cinfo); - while (data == 0xFF); /* swallow extra 0xFF bytes */ - if (data == 0) - data = 0xFF; /* discard stuffed zero byte */ - else { - /* Note: Different from the Huffman decoder, hitting - * a marker while processing the compressed data - * segment is legal in arithmetic coding. - * The convention is to supply zero data - * then until decoding is complete. - */ - cinfo->unread_marker = data; - data = 0; - } - } + data = get_byte(cinfo); /* read next input byte */ + if (data == 0xFF) { /* zero stuff or marker code */ + do data = get_byte(cinfo); + while (data == 0xFF); /* swallow extra 0xFF bytes */ + if (data == 0) + data = 0xFF; /* discard stuffed zero byte */ + else { + /* Note: Different from the Huffman decoder, hitting + * a marker while processing the compressed data + * segment is legal in arithmetic coding. + * The convention is to supply zero data + * then until decoding is complete. + */ + cinfo->unread_marker = data; + data = 0; + } + } } e->c = (e->c << 8) | data; /* insert data into C register */ - if ((e->ct += 8) < 0) /* update bit shift counter */ - /* Need more initial bytes */ - if (++e->ct == 0) - /* Got 2 initial bytes -> re-init A and exit loop */ - e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */ + if ((e->ct += 8) < 0) /* update bit shift counter */ + /* Need more initial bytes */ + if (++e->ct == 0) + /* Got 2 initial bytes -> re-init A and exit loop */ + e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */ } e->a <<= 1; } @@ -149,9 +152,9 @@ * Qe values and probability estimation state machine */ sv = *st; - qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ - nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ - nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ + qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ + nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ + nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ /* Decode & estimation procedures per sections D.2.4 & D.2.5 */ temp = e->a - qe; @@ -162,19 +165,19 @@ /* Conditional LPS (less probable symbol) exchange */ if (e->a < qe) { e->a = qe; - *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ } else { e->a = qe; - *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ - sv ^= 0x80; /* Exchange LPS/MPS */ + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + sv ^= 0x80; /* Exchange LPS/MPS */ } } else if (e->a < 0x8000L) { /* Conditional MPS (more probable symbol) exchange */ if (e->a < qe) { - *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ - sv ^= 0x80; /* Exchange LPS/MPS */ + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + sv ^= 0x80; /* Exchange LPS/MPS */ } else { - *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ } } @@ -191,7 +194,7 @@ { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; /* Advance past the RSTn marker */ if (! (*cinfo->marker->read_restart_marker) (cinfo)) @@ -200,13 +203,13 @@ /* Re-initialize statistics areas */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; - if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { + if (!cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS); /* Reset DC predictions to 0 */ entropy->last_dc_val[ci] = 0; entropy->dc_context[ci] = 0; } - if (! cinfo->progressive_mode || cinfo->Ss) { + if (!cinfo->progressive_mode || cinfo->Ss) { MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS); } } @@ -214,7 +217,7 @@ /* Reset arithmetic decoding variables */ entropy->c = 0; entropy->a = 0; - entropy->ct = -16; /* force reading 2 initial bytes to fill C */ + entropy->ct = -16; /* force reading 2 initial bytes to fill C */ /* Reset restart counter */ entropy->restarts_to_go = cinfo->restart_interval; @@ -253,7 +256,7 @@ entropy->restarts_to_go--; } - if (entropy->ct == -1) return TRUE; /* if error do nothing */ + if (entropy->ct == -1) return TRUE; /* if error do nothing */ /* Outer loop handles each block in the MCU */ @@ -277,34 +280,34 @@ st += 2; st += sign; /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { - st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ - while (arith_decode(cinfo, st)) { - if ((m <<= 1) == 0x8000) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* magnitude overflow */ - return TRUE; - } - st += 1; - } + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } } /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) - entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ + entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ else - entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ + entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ v = m; /* Figure F.24: Decoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - if (arith_decode(cinfo, st)) v |= m; + if (arith_decode(cinfo, st)) v |= m; v += 1; if (sign) v = -v; entropy->last_dc_val[ci] += v; } /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */ - (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al); + (*block)[0] = (JCOEF) LEFT_SHIFT(entropy->last_dc_val[ci], cinfo->Al); } return TRUE; @@ -332,7 +335,7 @@ entropy->restarts_to_go--; } - if (entropy->ct == -1) return TRUE; /* if error do nothing */ + if (entropy->ct == -1) return TRUE; /* if error do nothing */ /* There is always only one block per MCU */ block = MCU_data[0]; @@ -343,13 +346,13 @@ /* Figure F.20: Decode_AC_coefficients */ for (k = cinfo->Ss; k <= cinfo->Se; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); - if (arith_decode(cinfo, st)) break; /* EOB flag */ + if (arith_decode(cinfo, st)) break; /* EOB flag */ while (arith_decode(cinfo, st + 1) == 0) { st += 3; k++; if (k > cinfo->Se) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* spectral overflow */ - return TRUE; + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; } } /* Figure F.21: Decoding nonzero value v */ @@ -359,17 +362,17 @@ /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { if (arith_decode(cinfo, st)) { - m <<= 1; - st = entropy->ac_stats[tbl] + - (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); - while (arith_decode(cinfo, st)) { - if ((m <<= 1) == 0x8000) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* magnitude overflow */ - return TRUE; - } - st += 1; - } + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } } } v = m; @@ -404,8 +407,8 @@ entropy->restarts_to_go--; } - st = entropy->fixed_bin; /* use fixed probability estimation */ - p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + st = entropy->fixed_bin; /* use fixed probability estimation */ + p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ /* Outer loop handles each block in the MCU */ @@ -440,14 +443,14 @@ entropy->restarts_to_go--; } - if (entropy->ct == -1) return TRUE; /* if error do nothing */ + if (entropy->ct == -1) return TRUE; /* if error do nothing */ /* There is always only one block per MCU */ block = MCU_data[0]; tbl = cinfo->cur_comp_info[0]->ac_tbl_no; - p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ - m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ + p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ /* Establish EOBx (previous stage end-of-block) index */ for (kex = cinfo->Se; kex > 0; kex--) @@ -456,30 +459,30 @@ for (k = cinfo->Ss; k <= cinfo->Se; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); if (k > kex) - if (arith_decode(cinfo, st)) break; /* EOB flag */ + if (arith_decode(cinfo, st)) break; /* EOB flag */ for (;;) { thiscoef = *block + jpeg_natural_order[k]; - if (*thiscoef) { /* previously nonzero coef */ - if (arith_decode(cinfo, st + 2)) { - if (*thiscoef < 0) - *thiscoef += m1; - else - *thiscoef += p1; - } - break; - } - if (arith_decode(cinfo, st + 1)) { /* newly nonzero coef */ - if (arith_decode(cinfo, entropy->fixed_bin)) - *thiscoef = m1; - else - *thiscoef = p1; - break; + if (*thiscoef) { /* previously nonzero coef */ + if (arith_decode(cinfo, st + 2)) { + if (*thiscoef < 0) + *thiscoef += m1; + else + *thiscoef += p1; + } + break; + } + if (arith_decode(cinfo, st + 1)) { /* newly nonzero coef */ + if (arith_decode(cinfo, entropy->fixed_bin)) + *thiscoef = m1; + else + *thiscoef = p1; + break; } st += 3; k++; if (k > cinfo->Se) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* spectral overflow */ - return TRUE; + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; } } } @@ -496,7 +499,7 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JBLOCKROW block; unsigned char *st; int blkn, ci, tbl, sign, k; @@ -509,12 +512,12 @@ entropy->restarts_to_go--; } - if (entropy->ct == -1) return TRUE; /* if error do nothing */ + if (entropy->ct == -1) return TRUE; /* if error do nothing */ /* Outer loop handles each block in the MCU */ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - block = MCU_data[blkn]; + block = MCU_data ? MCU_data[blkn] : NULL; ci = cinfo->MCU_membership[blkn]; compptr = cinfo->cur_comp_info[ci]; @@ -535,33 +538,34 @@ st += 2; st += sign; /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { - st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ - while (arith_decode(cinfo, st)) { - if ((m <<= 1) == 0x8000) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* magnitude overflow */ - return TRUE; - } - st += 1; - } + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } } /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) - entropy->dc_context[ci] = 0; /* zero diff category */ + entropy->dc_context[ci] = 0; /* zero diff category */ else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) - entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ + entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ else - entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ + entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ v = m; /* Figure F.24: Decoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - if (arith_decode(cinfo, st)) v |= m; + if (arith_decode(cinfo, st)) v |= m; v += 1; if (sign) v = -v; entropy->last_dc_val[ci] += v; } - (*block)[0] = (JCOEF) entropy->last_dc_val[ci]; + if (block) + (*block)[0] = (JCOEF) entropy->last_dc_val[ci]; /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */ @@ -570,14 +574,14 @@ /* Figure F.20: Decode_AC_coefficients */ for (k = 1; k <= DCTSIZE2 - 1; k++) { st = entropy->ac_stats[tbl] + 3 * (k - 1); - if (arith_decode(cinfo, st)) break; /* EOB flag */ + if (arith_decode(cinfo, st)) break; /* EOB flag */ while (arith_decode(cinfo, st + 1) == 0) { - st += 3; k++; - if (k > DCTSIZE2 - 1) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* spectral overflow */ - return TRUE; - } + st += 3; k++; + if (k > DCTSIZE2 - 1) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; + } } /* Figure F.21: Decoding nonzero value v */ /* Figure F.22: Decoding the sign of v */ @@ -585,27 +589,28 @@ st += 2; /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { - if (arith_decode(cinfo, st)) { - m <<= 1; - st = entropy->ac_stats[tbl] + - (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); - while (arith_decode(cinfo, st)) { - if ((m <<= 1) == 0x8000) { - WARNMS(cinfo, JWRN_ARITH_BAD_CODE); - entropy->ct = -1; /* magnitude overflow */ - return TRUE; - } - st += 1; - } - } + if (arith_decode(cinfo, st)) { + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } + } } v = m; /* Figure F.24: Decoding the magnitude bit pattern of v */ st += 14; while (m >>= 1) - if (arith_decode(cinfo, st)) v |= m; + if (arith_decode(cinfo, st)) v |= m; v += 1; if (sign) v = -v; - (*block)[jpeg_natural_order[k]] = (JCOEF) v; + if (block) + (*block)[jpeg_natural_order[k]] = (JCOEF) v; } } @@ -622,30 +627,30 @@ { arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; int ci, tbl; - jpeg_component_info * compptr; + jpeg_component_info *compptr; if (cinfo->progressive_mode) { /* Validate progressive scan parameters */ if (cinfo->Ss == 0) { if (cinfo->Se != 0) - goto bad; + goto bad; } else { /* need not check Ss/Se < 0 since they came from unsigned bytes */ if (cinfo->Se < cinfo->Ss || cinfo->Se > DCTSIZE2 - 1) - goto bad; + goto bad; /* AC scans may have only one component */ if (cinfo->comps_in_scan != 1) - goto bad; + goto bad; } if (cinfo->Ah != 0) { /* Successive approximation refinement scan: must have Al = Ah-1. */ if (cinfo->Ah-1 != cinfo->Al) - goto bad; + goto bad; } - if (cinfo->Al > 13) { /* need not check for < 0 */ + if (cinfo->Al > 13) { /* need not check for < 0 */ bad: ERREXIT4(cinfo, JERR_BAD_PROGRESSION, - cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); + cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); } /* Update progression status, and verify that scan order is legal. * Note that inter-scan inconsistencies are treated as warnings @@ -655,32 +660,32 @@ int coefi, cindex = cinfo->cur_comp_info[ci]->component_index; int *coef_bit_ptr = & cinfo->coef_bits[cindex][0]; if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ - WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { - int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; - if (cinfo->Ah != expected) - WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); - coef_bit_ptr[coefi] = cinfo->Al; + int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; + if (cinfo->Ah != expected) + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); + coef_bit_ptr[coefi] = cinfo->Al; } } /* Select MCU decoding routine */ if (cinfo->Ah == 0) { if (cinfo->Ss == 0) - entropy->pub.decode_mcu = decode_mcu_DC_first; + entropy->pub.decode_mcu = decode_mcu_DC_first; else - entropy->pub.decode_mcu = decode_mcu_AC_first; + entropy->pub.decode_mcu = decode_mcu_AC_first; } else { if (cinfo->Ss == 0) - entropy->pub.decode_mcu = decode_mcu_DC_refine; + entropy->pub.decode_mcu = decode_mcu_DC_refine; else - entropy->pub.decode_mcu = decode_mcu_AC_refine; + entropy->pub.decode_mcu = decode_mcu_AC_refine; } } else { /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG. * This ought to be an error condition, but we make it a warning. */ if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 || - (cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1)) + (cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1)) WARNMS(cinfo, JWRN_NOT_SEQUENTIAL); /* Select MCU decoding routine */ entropy->pub.decode_mcu = decode_mcu; @@ -689,25 +694,25 @@ /* Allocate & initialize requested statistics areas */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; - if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { + if (!cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { tbl = compptr->dc_tbl_no; if (tbl < 0 || tbl >= NUM_ARITH_TBLS) - ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->dc_stats[tbl] == NULL) - entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); + entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS); /* Initialize DC predictions to 0 */ entropy->last_dc_val[ci] = 0; entropy->dc_context[ci] = 0; } - if (! cinfo->progressive_mode || cinfo->Ss) { + if (!cinfo->progressive_mode || cinfo->Ss) { tbl = compptr->ac_tbl_no; if (tbl < 0 || tbl >= NUM_ARITH_TBLS) - ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->ac_stats[tbl] == NULL) - entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); + entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS); } } @@ -715,7 +720,7 @@ /* Initialize arithmetic decoding variables */ entropy->c = 0; entropy->a = 0; - entropy->ct = -16; /* force reading 2 initial bytes to fill C */ + entropy->ct = -16; /* force reading 2 initial bytes to fill C */ /* Initialize restart counter */ entropy->restarts_to_go = cinfo->restart_interval; @@ -734,7 +739,7 @@ entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(arith_entropy_decoder)); + sizeof(arith_entropy_decoder)); cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; entropy->pub.start_pass = start_pass; @@ -752,10 +757,10 @@ int *coef_bit_ptr, ci; cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components*DCTSIZE2*SIZEOF(int)); + cinfo->num_components*DCTSIZE2*sizeof(int)); coef_bit_ptr = & cinfo->coef_bits[0][0]; - for (ci = 0; ci < cinfo->num_components; ci++) + for (ci = 0; ci < cinfo->num_components; ci++) for (i = 0; i < DCTSIZE2; i++) - *coef_bit_ptr++ = -1; + *coef_bit_ptr++ = -1; } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdatadst.c glmark2-2021.02/src/libjpeg-turbo/jdatadst.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdatadst.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdatadst.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,283 @@ +/* + * jdatadst.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2009-2012 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains compression data destination routines for the case of + * emitting JPEG data to memory or to a file (or any stdio stream). + * While these routines are sufficient for most applications, + * some will want to use a different destination manager. + * IMPORTANT: we assume that fwrite() will correctly transcribe an array of + * JOCTETs into 8-bit-wide elements on external storage. If char is wider + * than 8 bits on your machine, you may need to do some tweaking. + */ + +/* this is not a core library module, so it doesn't define JPEG_INTERNALS */ +#include "jinclude.h" +#include "jpeglib.h" +#include "jerror.h" + +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void *malloc (size_t size); +extern void free (void *ptr); +#endif + + +/* Expanded data destination object for stdio output */ + +typedef struct { + struct jpeg_destination_mgr pub; /* public fields */ + + FILE *outfile; /* target stream */ + JOCTET *buffer; /* start of buffer */ +} my_destination_mgr; + +typedef my_destination_mgr *my_dest_ptr; + +#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */ + + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +/* Expanded data destination object for memory output */ + +typedef struct { + struct jpeg_destination_mgr pub; /* public fields */ + + unsigned char **outbuffer; /* target buffer */ + unsigned long *outsize; + unsigned char *newbuffer; /* newly allocated buffer */ + JOCTET *buffer; /* start of buffer */ + size_t bufsize; +} my_mem_destination_mgr; + +typedef my_mem_destination_mgr *my_mem_dest_ptr; +#endif + + +/* + * Initialize destination --- called by jpeg_start_compress + * before any data is actually written. + */ + +METHODDEF(void) +init_destination (j_compress_ptr cinfo) +{ + my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + + /* Allocate the output buffer --- it will be released when done with image */ + dest->buffer = (JOCTET *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + OUTPUT_BUF_SIZE * sizeof(JOCTET)); + + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = OUTPUT_BUF_SIZE; +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(void) +init_mem_destination (j_compress_ptr cinfo) +{ + /* no work necessary here */ +} +#endif + + +/* + * Empty the output buffer --- called whenever buffer fills up. + * + * In typical applications, this should write the entire output buffer + * (ignoring the current state of next_output_byte & free_in_buffer), + * reset the pointer & count to the start of the buffer, and return TRUE + * indicating that the buffer has been dumped. + * + * In applications that need to be able to suspend compression due to output + * overrun, a FALSE return indicates that the buffer cannot be emptied now. + * In this situation, the compressor will return to its caller (possibly with + * an indication that it has not accepted all the supplied scanlines). The + * application should resume compression after it has made more room in the + * output buffer. Note that there are substantial restrictions on the use of + * suspension --- see the documentation. + * + * When suspending, the compressor will back up to a convenient restart point + * (typically the start of the current MCU). next_output_byte & free_in_buffer + * indicate where the restart point will be if the current call returns FALSE. + * Data beyond this point will be regenerated after resumption, so do not + * write it out when emptying the buffer externally. + */ + +METHODDEF(boolean) +empty_output_buffer (j_compress_ptr cinfo) +{ + my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + + if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) != + (size_t) OUTPUT_BUF_SIZE) + ERREXIT(cinfo, JERR_FILE_WRITE); + + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = OUTPUT_BUF_SIZE; + + return TRUE; +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(boolean) +empty_mem_output_buffer (j_compress_ptr cinfo) +{ + size_t nextsize; + JOCTET *nextbuffer; + my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + + /* Try to allocate new buffer with double size */ + nextsize = dest->bufsize * 2; + nextbuffer = (JOCTET *) malloc(nextsize); + + if (nextbuffer == NULL) + ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); + + MEMCOPY(nextbuffer, dest->buffer, dest->bufsize); + + if (dest->newbuffer != NULL) + free(dest->newbuffer); + + dest->newbuffer = nextbuffer; + + dest->pub.next_output_byte = nextbuffer + dest->bufsize; + dest->pub.free_in_buffer = dest->bufsize; + + dest->buffer = nextbuffer; + dest->bufsize = nextsize; + + return TRUE; +} +#endif + + +/* + * Terminate destination --- called by jpeg_finish_compress + * after all data has been written. Usually needs to flush buffer. + * + * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding + * application must deal with any cleanup that should happen even + * for error exit. + */ + +METHODDEF(void) +term_destination (j_compress_ptr cinfo) +{ + my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer; + + /* Write any data remaining in the buffer */ + if (datacount > 0) { + if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount) + ERREXIT(cinfo, JERR_FILE_WRITE); + } + fflush(dest->outfile); + /* Make sure we wrote the output file OK */ + if (ferror(dest->outfile)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(void) +term_mem_destination (j_compress_ptr cinfo) +{ + my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + + *dest->outbuffer = dest->buffer; + *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer); +} +#endif + + +/* + * Prepare for output to a stdio stream. + * The caller must have already opened the stream, and is responsible + * for closing it after finishing compression. + */ + +GLOBAL(void) +jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile) +{ + my_dest_ptr dest; + + /* The destination object is made permanent so that multiple JPEG images + * can be written to the same file without re-executing jpeg_stdio_dest. + * This makes it dangerous to use this manager and a different destination + * manager serially with the same JPEG object, because their private object + * sizes may be different. Caveat programmer. + */ + if (cinfo->dest == NULL) { /* first time for this JPEG object? */ + cinfo->dest = (struct jpeg_destination_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(my_destination_mgr)); + } + + dest = (my_dest_ptr) cinfo->dest; + dest->pub.init_destination = init_destination; + dest->pub.empty_output_buffer = empty_output_buffer; + dest->pub.term_destination = term_destination; + dest->outfile = outfile; +} + + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +/* + * Prepare for output to a memory buffer. + * The caller may supply an own initial buffer with appropriate size. + * Otherwise, or when the actual data output exceeds the given size, + * the library adapts the buffer size as necessary. + * The standard library functions malloc/free are used for allocating + * larger memory, so the buffer is available to the application after + * finishing compression, and then the application is responsible for + * freeing the requested memory. + * Note: An initial buffer supplied by the caller is expected to be + * managed by the application. The library does not free such buffer + * when allocating a larger buffer. + */ + +GLOBAL(void) +jpeg_mem_dest (j_compress_ptr cinfo, + unsigned char **outbuffer, unsigned long *outsize) +{ + my_mem_dest_ptr dest; + + if (outbuffer == NULL || outsize == NULL) /* sanity check */ + ERREXIT(cinfo, JERR_BUFFER_SIZE); + + /* The destination object is made permanent so that multiple JPEG images + * can be written to the same buffer without re-executing jpeg_mem_dest. + */ + if (cinfo->dest == NULL) { /* first time for this JPEG object? */ + cinfo->dest = (struct jpeg_destination_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(my_mem_destination_mgr)); + } + + dest = (my_mem_dest_ptr) cinfo->dest; + dest->pub.init_destination = init_mem_destination; + dest->pub.empty_output_buffer = empty_mem_output_buffer; + dest->pub.term_destination = term_mem_destination; + dest->outbuffer = outbuffer; + dest->outsize = outsize; + dest->newbuffer = NULL; + + if (*outbuffer == NULL || *outsize == 0) { + /* Allocate initial buffer */ + dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE); + if (dest->newbuffer == NULL) + ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); + *outsize = OUTPUT_BUF_SIZE; + } + + dest->pub.next_output_byte = dest->buffer = *outbuffer; + dest->pub.free_in_buffer = dest->bufsize = *outsize; +} +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdatadst-tj.c glmark2-2021.02/src/libjpeg-turbo/jdatadst-tj.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdatadst-tj.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdatadst-tj.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* - * jdatadst.c + * jdatadst-tj.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Modified 2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 2009-2012 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2011, 2014 D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains compression data destination routines for the case of * emitting JPEG data to memory or to a file (or any stdio stream). @@ -20,13 +23,13 @@ #include "jpeglib.h" #include "jerror.h" -#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void * malloc JPP((size_t size)); -extern void free JPP((void *ptr)); +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void *malloc (size_t size); +extern void free (void *ptr); #endif -#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */ +#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */ /* Expanded data destination object for memory output */ @@ -34,15 +37,15 @@ typedef struct { struct jpeg_destination_mgr pub; /* public fields */ - unsigned char ** outbuffer; /* target buffer */ - unsigned long * outsize; - unsigned char * newbuffer; /* newly allocated buffer */ - JOCTET * buffer; /* start of buffer */ + unsigned char **outbuffer; /* target buffer */ + unsigned long *outsize; + unsigned char *newbuffer; /* newly allocated buffer */ + JOCTET *buffer; /* start of buffer */ size_t bufsize; boolean alloc; } my_mem_destination_mgr; -typedef my_mem_destination_mgr * my_mem_dest_ptr; +typedef my_mem_destination_mgr *my_mem_dest_ptr; /* @@ -84,14 +87,14 @@ empty_mem_output_buffer (j_compress_ptr cinfo) { size_t nextsize; - JOCTET * nextbuffer; + JOCTET *nextbuffer; my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; if (!dest->alloc) ERREXIT(cinfo, JERR_BUFFER_SIZE); /* Try to allocate new buffer with double size */ nextsize = dest->bufsize * 2; - nextbuffer = malloc(nextsize); + nextbuffer = (JOCTET *) malloc(nextsize); if (nextbuffer == NULL) ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); @@ -145,29 +148,33 @@ GLOBAL(void) jpeg_mem_dest_tj (j_compress_ptr cinfo, - unsigned char ** outbuffer, unsigned long * outsize, - boolean alloc) + unsigned char **outbuffer, unsigned long *outsize, + boolean alloc) { + boolean reused = FALSE; my_mem_dest_ptr dest; - if (outbuffer == NULL || outsize == NULL) /* sanity check */ + if (outbuffer == NULL || outsize == NULL) /* sanity check */ ERREXIT(cinfo, JERR_BUFFER_SIZE); /* The destination object is made permanent so that multiple JPEG images * can be written to the same buffer without re-executing jpeg_mem_dest. */ - if (cinfo->dest == NULL) { /* first time for this JPEG object? */ + if (cinfo->dest == NULL) { /* first time for this JPEG object? */ cinfo->dest = (struct jpeg_destination_mgr *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(my_mem_destination_mgr)); + sizeof(my_mem_destination_mgr)); dest = (my_mem_dest_ptr) cinfo->dest; dest->newbuffer = NULL; + dest->buffer = NULL; } dest = (my_mem_dest_ptr) cinfo->dest; dest->pub.init_destination = init_mem_destination; dest->pub.empty_output_buffer = empty_mem_output_buffer; dest->pub.term_destination = term_mem_destination; + if (dest->buffer == *outbuffer && *outbuffer != NULL && alloc) + reused = TRUE; dest->outbuffer = outbuffer; dest->outsize = outsize; dest->alloc = alloc; @@ -175,7 +182,7 @@ if (*outbuffer == NULL || *outsize == 0) { if (alloc) { /* Allocate initial buffer */ - dest->newbuffer = *outbuffer = malloc(OUTPUT_BUF_SIZE); + dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE); if (dest->newbuffer == NULL) ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); *outsize = OUTPUT_BUF_SIZE; @@ -184,5 +191,7 @@ } dest->pub.next_output_byte = dest->buffer = *outbuffer; - dest->pub.free_in_buffer = dest->bufsize = *outsize; + if (!reused) + dest->bufsize = *outsize; + dest->pub.free_in_buffer = dest->bufsize; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdatasrc.c glmark2-2021.02/src/libjpeg-turbo/jdatasrc.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdatasrc.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdatasrc.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,284 @@ +/* + * jdatasrc.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2009-2011 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains decompression data source routines for the case of + * reading JPEG data from memory or from a file (or any stdio stream). + * While these routines are sufficient for most applications, + * some will want to use a different source manager. + * IMPORTANT: we assume that fread() will correctly transcribe an array of + * JOCTETs from 8-bit-wide elements on external storage. If char is wider + * than 8 bits on your machine, you may need to do some tweaking. + */ + +/* this is not a core library module, so it doesn't define JPEG_INTERNALS */ +#include "jinclude.h" +#include "jpeglib.h" +#include "jerror.h" + + +/* Expanded data source object for stdio input */ + +typedef struct { + struct jpeg_source_mgr pub; /* public fields */ + + FILE *infile; /* source stream */ + JOCTET *buffer; /* start of buffer */ + boolean start_of_file; /* have we gotten any data yet? */ +} my_source_mgr; + +typedef my_source_mgr *my_src_ptr; + +#define INPUT_BUF_SIZE 4096 /* choose an efficiently fread'able size */ + + +/* + * Initialize source --- called by jpeg_read_header + * before any data is actually read. + */ + +METHODDEF(void) +init_source (j_decompress_ptr cinfo) +{ + my_src_ptr src = (my_src_ptr) cinfo->src; + + /* We reset the empty-input-file flag for each image, + * but we don't clear the input buffer. + * This is correct behavior for reading a series of images from one source. + */ + src->start_of_file = TRUE; +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(void) +init_mem_source (j_decompress_ptr cinfo) +{ + /* no work necessary here */ +} +#endif + + +/* + * Fill the input buffer --- called whenever buffer is emptied. + * + * In typical applications, this should read fresh data into the buffer + * (ignoring the current state of next_input_byte & bytes_in_buffer), + * reset the pointer & count to the start of the buffer, and return TRUE + * indicating that the buffer has been reloaded. It is not necessary to + * fill the buffer entirely, only to obtain at least one more byte. + * + * There is no such thing as an EOF return. If the end of the file has been + * reached, the routine has a choice of ERREXIT() or inserting fake data into + * the buffer. In most cases, generating a warning message and inserting a + * fake EOI marker is the best course of action --- this will allow the + * decompressor to output however much of the image is there. However, + * the resulting error message is misleading if the real problem is an empty + * input file, so we handle that case specially. + * + * In applications that need to be able to suspend compression due to input + * not being available yet, a FALSE return indicates that no more data can be + * obtained right now, but more may be forthcoming later. In this situation, + * the decompressor will return to its caller (with an indication of the + * number of scanlines it has read, if any). The application should resume + * decompression after it has loaded more data into the input buffer. Note + * that there are substantial restrictions on the use of suspension --- see + * the documentation. + * + * When suspending, the decompressor will back up to a convenient restart point + * (typically the start of the current MCU). next_input_byte & bytes_in_buffer + * indicate where the restart point will be if the current call returns FALSE. + * Data beyond this point must be rescanned after resumption, so move it to + * the front of the buffer rather than discarding it. + */ + +METHODDEF(boolean) +fill_input_buffer (j_decompress_ptr cinfo) +{ + my_src_ptr src = (my_src_ptr) cinfo->src; + size_t nbytes; + + nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE); + + if (nbytes <= 0) { + if (src->start_of_file) /* Treat empty input file as fatal error */ + ERREXIT(cinfo, JERR_INPUT_EMPTY); + WARNMS(cinfo, JWRN_JPEG_EOF); + /* Insert a fake EOI marker */ + src->buffer[0] = (JOCTET) 0xFF; + src->buffer[1] = (JOCTET) JPEG_EOI; + nbytes = 2; + } + + src->pub.next_input_byte = src->buffer; + src->pub.bytes_in_buffer = nbytes; + src->start_of_file = FALSE; + + return TRUE; +} + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +METHODDEF(boolean) +fill_mem_input_buffer (j_decompress_ptr cinfo) +{ + static const JOCTET mybuffer[4] = { + (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0 + }; + + /* The whole JPEG data is expected to reside in the supplied memory + * buffer, so any request for more data beyond the given buffer size + * is treated as an error. + */ + WARNMS(cinfo, JWRN_JPEG_EOF); + + /* Insert a fake EOI marker */ + + cinfo->src->next_input_byte = mybuffer; + cinfo->src->bytes_in_buffer = 2; + + return TRUE; +} +#endif + + +/* + * Skip data --- used to skip over a potentially large amount of + * uninteresting data (such as an APPn marker). + * + * Writers of suspendable-input applications must note that skip_input_data + * is not granted the right to give a suspension return. If the skip extends + * beyond the data currently in the buffer, the buffer can be marked empty so + * that the next read will cause a fill_input_buffer call that can suspend. + * Arranging for additional bytes to be discarded before reloading the input + * buffer is the application writer's problem. + */ + +METHODDEF(void) +skip_input_data (j_decompress_ptr cinfo, long num_bytes) +{ + struct jpeg_source_mgr *src = cinfo->src; + + /* Just a dumb implementation for now. Could use fseek() except + * it doesn't work on pipes. Not clear that being smart is worth + * any trouble anyway --- large skips are infrequent. + */ + if (num_bytes > 0) { + while (num_bytes > (long) src->bytes_in_buffer) { + num_bytes -= (long) src->bytes_in_buffer; + (void) (*src->fill_input_buffer) (cinfo); + /* note we assume that fill_input_buffer will never return FALSE, + * so suspension need not be handled. + */ + } + src->next_input_byte += (size_t) num_bytes; + src->bytes_in_buffer -= (size_t) num_bytes; + } +} + + +/* + * An additional method that can be provided by data source modules is the + * resync_to_restart method for error recovery in the presence of RST markers. + * For the moment, this source module just uses the default resync method + * provided by the JPEG library. That method assumes that no backtracking + * is possible. + */ + + +/* + * Terminate source --- called by jpeg_finish_decompress + * after all data has been read. Often a no-op. + * + * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding + * application must deal with any cleanup that should happen even + * for error exit. + */ + +METHODDEF(void) +term_source (j_decompress_ptr cinfo) +{ + /* no work necessary here */ +} + + +/* + * Prepare for input from a stdio stream. + * The caller must have already opened the stream, and is responsible + * for closing it after finishing decompression. + */ + +GLOBAL(void) +jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile) +{ + my_src_ptr src; + + /* The source object and input buffer are made permanent so that a series + * of JPEG images can be read from the same file by calling jpeg_stdio_src + * only before the first one. (If we discarded the buffer at the end of + * one image, we'd likely lose the start of the next one.) + * This makes it unsafe to use this manager and a different source + * manager serially with the same JPEG object. Caveat programmer. + */ + if (cinfo->src == NULL) { /* first time for this JPEG object? */ + cinfo->src = (struct jpeg_source_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(my_source_mgr)); + src = (my_src_ptr) cinfo->src; + src->buffer = (JOCTET *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + INPUT_BUF_SIZE * sizeof(JOCTET)); + } + + src = (my_src_ptr) cinfo->src; + src->pub.init_source = init_source; + src->pub.fill_input_buffer = fill_input_buffer; + src->pub.skip_input_data = skip_input_data; + src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */ + src->pub.term_source = term_source; + src->infile = infile; + src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */ + src->pub.next_input_byte = NULL; /* until buffer loaded */ +} + + +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) +/* + * Prepare for input from a supplied memory buffer. + * The buffer must contain the whole JPEG data. + */ + +GLOBAL(void) +jpeg_mem_src (j_decompress_ptr cinfo, + const unsigned char *inbuffer, unsigned long insize) +{ + struct jpeg_source_mgr *src; + + if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */ + ERREXIT(cinfo, JERR_INPUT_EMPTY); + + /* The source object is made permanent so that a series of JPEG images + * can be read from the same buffer by calling jpeg_mem_src only before + * the first one. + */ + if (cinfo->src == NULL) { /* first time for this JPEG object? */ + cinfo->src = (struct jpeg_source_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + sizeof(struct jpeg_source_mgr)); + } + + src = cinfo->src; + src->init_source = init_mem_source; + src->fill_input_buffer = fill_mem_input_buffer; + src->skip_input_data = skip_input_data; + src->resync_to_restart = jpeg_resync_to_restart; /* use default method */ + src->term_source = term_source; + src->bytes_in_buffer = (size_t) insize; + src->next_input_byte = (const JOCTET *) inbuffer; +} +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdatasrc-tj.c glmark2-2021.02/src/libjpeg-turbo/jdatasrc-tj.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdatasrc-tj.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdatasrc-tj.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* - * jdatasrc.c + * jdatasrc-tj.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Modified 2009-2010 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 2009-2011 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2011, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains decompression data source routines for the case of * reading JPEG data from memory or from a file (or any stdio stream). @@ -69,16 +72,17 @@ METHODDEF(boolean) fill_mem_input_buffer (j_decompress_ptr cinfo) { - static JOCTET mybuffer[4]; + static const JOCTET mybuffer[4] = { + (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0 + }; /* The whole JPEG data is expected to reside in the supplied memory * buffer, so any request for more data beyond the given buffer size * is treated as an error. */ WARNMS(cinfo, JWRN_JPEG_EOF); + /* Insert a fake EOI marker */ - mybuffer[0] = (JOCTET) 0xFF; - mybuffer[1] = (JOCTET) JPEG_EOI; cinfo->src->next_input_byte = mybuffer; cinfo->src->bytes_in_buffer = 2; @@ -102,7 +106,7 @@ METHODDEF(void) skip_input_data (j_decompress_ptr cinfo, long num_bytes) { - struct jpeg_source_mgr * src = cinfo->src; + struct jpeg_source_mgr *src = cinfo->src; /* Just a dumb implementation for now. Could use fseek() except * it doesn't work on pipes. Not clear that being smart is worth @@ -154,21 +158,21 @@ GLOBAL(void) jpeg_mem_src_tj (j_decompress_ptr cinfo, - unsigned char * inbuffer, unsigned long insize) + const unsigned char *inbuffer, unsigned long insize) { - struct jpeg_source_mgr * src; + struct jpeg_source_mgr *src; - if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */ + if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */ ERREXIT(cinfo, JERR_INPUT_EMPTY); /* The source object is made permanent so that a series of JPEG images * can be read from the same buffer by calling jpeg_mem_src only before * the first one. */ - if (cinfo->src == NULL) { /* first time for this JPEG object? */ + if (cinfo->src == NULL) { /* first time for this JPEG object? */ cinfo->src = (struct jpeg_source_mgr *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(struct jpeg_source_mgr)); + sizeof(struct jpeg_source_mgr)); } src = cinfo->src; @@ -178,5 +182,5 @@ src->resync_to_restart = jpeg_resync_to_restart; /* use default method */ src->term_source = term_source; src->bytes_in_buffer = (size_t) insize; - src->next_input_byte = (JOCTET *) inbuffer; + src->next_input_byte = (const JOCTET *) inbuffer; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcoefct.c glmark2-2021.02/src/libjpeg-turbo/jdcoefct.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcoefct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdcoefct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,14 @@ /* * jdcoefct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1997, Thomas G. Lane. - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2010, 2015-2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the coefficient buffer controller for decompression. * This controller is the top level of the JPEG decompressor proper. @@ -15,95 +19,25 @@ * Also, the input side (only) is used when reading a file for transcoding. */ -#define JPEG_INTERNALS #include "jinclude.h" -#include "jpeglib.h" +#include "jdcoefct.h" #include "jpegcomp.h" -/* Block smoothing is only applicable for progressive JPEG, so: */ -#ifndef D_PROGRESSIVE_SUPPORTED -#undef BLOCK_SMOOTHING_SUPPORTED -#endif - -/* Private buffer controller object */ - -typedef struct { - struct jpeg_d_coef_controller pub; /* public fields */ - - /* These variables keep track of the current location of the input side. */ - /* cinfo->input_iMCU_row is also used for this. */ - JDIMENSION MCU_ctr; /* counts MCUs processed in current row */ - int MCU_vert_offset; /* counts MCU rows within iMCU row */ - int MCU_rows_per_iMCU_row; /* number of such rows needed */ - - /* The output side's location is represented by cinfo->output_iMCU_row. */ - - /* In single-pass modes, it's sufficient to buffer just one MCU. - * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks, - * and let the entropy decoder write into that workspace each time. - * (On 80x86, the workspace is FAR even though it's not really very big; - * this is to keep the module interfaces unchanged when a large coefficient - * buffer is necessary.) - * In multi-pass modes, this array points to the current MCU's blocks - * within the virtual arrays; it is used only by the input side. - */ - JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU]; - - /* Temporary workspace for one MCU */ - JCOEF * workspace; - -#ifdef D_MULTISCAN_FILES_SUPPORTED - /* In multi-pass modes, we need a virtual block array for each component. */ - jvirt_barray_ptr whole_image[MAX_COMPONENTS]; -#endif - -#ifdef BLOCK_SMOOTHING_SUPPORTED - /* When doing block smoothing, we latch coefficient Al values here */ - int * coef_bits_latch; -#define SAVED_COEFS 6 /* we save coef_bits[0..5] */ -#endif -} my_coef_controller; - -typedef my_coef_controller * my_coef_ptr; /* Forward declarations */ METHODDEF(int) decompress_onepass - JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); #ifdef D_MULTISCAN_FILES_SUPPORTED METHODDEF(int) decompress_data - JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); #endif #ifdef BLOCK_SMOOTHING_SUPPORTED -LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo)); +LOCAL(boolean) smoothing_ok (j_decompress_ptr cinfo); METHODDEF(int) decompress_smooth_data - JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); #endif -LOCAL(void) -start_iMCU_row (j_decompress_ptr cinfo) -/* Reset within-iMCU-row counters for a new row (input side) */ -{ - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - - /* In an interleaved scan, an MCU row is the same as an iMCU row. - * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows. - * But at the bottom of the image, process only what's left. - */ - if (cinfo->comps_in_scan > 1) { - coef->MCU_rows_per_iMCU_row = 1; - } else { - if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1)) - coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor; - else - coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height; - } - - coef->MCU_ctr = 0; - coef->MCU_vert_offset = 0; -} - - /* * Initialize for an input processing pass. */ @@ -152,7 +86,7 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; int blkn, ci, xindex, yindex, yoffset, useful_width; @@ -165,49 +99,57 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col; - MCU_col_num++) { + MCU_col_num++) { /* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */ - jzero_far((void FAR *) coef->MCU_buffer[0], - (size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK))); + jzero_far((void *) coef->MCU_buffer[0], + (size_t) (cinfo->blocks_in_MCU * sizeof(JBLOCK))); if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->MCU_ctr = MCU_col_num; - return JPEG_SUSPENDED; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->MCU_ctr = MCU_col_num; + return JPEG_SUSPENDED; } - /* Determine where data should go in output_buf and do the IDCT thing. - * We skip dummy blocks at the right and bottom edges (but blkn gets - * incremented past them!). Note the inner loop relies on having - * allocated the MCU_buffer[] blocks sequentially. + + /* Only perform the IDCT on blocks that are contained within the desired + * cropping region. */ - blkn = 0; /* index of current DCT block within MCU */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - /* Don't bother to IDCT an uninteresting component. */ - if (! compptr->component_needed) { - blkn += compptr->MCU_blocks; - continue; - } - inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index]; - useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; - output_ptr = output_buf[compptr->component_index] + - yoffset * compptr->_DCT_scaled_size; - start_col = MCU_col_num * compptr->MCU_sample_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - if (cinfo->input_iMCU_row < last_iMCU_row || - yoffset+yindex < compptr->last_row_height) { - output_col = start_col; - for (xindex = 0; xindex < useful_width; xindex++) { - (*inverse_DCT) (cinfo, compptr, - (JCOEFPTR) coef->MCU_buffer[blkn+xindex], - output_ptr, output_col); - output_col += compptr->_DCT_scaled_size; - } - } - blkn += compptr->MCU_width; - output_ptr += compptr->_DCT_scaled_size; - } + if (MCU_col_num >= cinfo->master->first_iMCU_col && + MCU_col_num <= cinfo->master->last_iMCU_col) { + /* Determine where data should go in output_buf and do the IDCT thing. + * We skip dummy blocks at the right and bottom edges (but blkn gets + * incremented past them!). Note the inner loop relies on having + * allocated the MCU_buffer[] blocks sequentially. + */ + blkn = 0; /* index of current DCT block within MCU */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* Don't bother to IDCT an uninteresting component. */ + if (! compptr->component_needed) { + blkn += compptr->MCU_blocks; + continue; + } + inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index]; + useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width + : compptr->last_col_width; + output_ptr = output_buf[compptr->component_index] + + yoffset * compptr->_DCT_scaled_size; + start_col = (MCU_col_num - cinfo->master->first_iMCU_col) * + compptr->MCU_sample_width; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + if (cinfo->input_iMCU_row < last_iMCU_row || + yoffset+yindex < compptr->last_row_height) { + output_col = start_col; + for (xindex = 0; xindex < useful_width; xindex++) { + (*inverse_DCT) (cinfo, compptr, + (JCOEFPTR) coef->MCU_buffer[blkn+xindex], + output_ptr, output_col); + output_col += compptr->_DCT_scaled_size; + } + } + blkn += compptr->MCU_width; + output_ptr += compptr->_DCT_scaled_size; + } + } } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -232,7 +174,7 @@ METHODDEF(int) dummy_consume_data (j_decompress_ptr cinfo) { - return JPEG_SUSPENDED; /* Always indicate nothing was done */ + return JPEG_SUSPENDED; /* Always indicate nothing was done */ } @@ -249,7 +191,7 @@ consume_data (j_decompress_ptr cinfo) { my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - JDIMENSION MCU_col_num; /* index of current MCU within row */ + JDIMENSION MCU_col_num; /* index of current MCU within row */ int blkn, ci, xindex, yindex, yoffset; JDIMENSION start_col; JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN]; @@ -273,25 +215,25 @@ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row; - MCU_col_num++) { + MCU_col_num++) { /* Construct list of pointers to DCT blocks belonging to this MCU */ - blkn = 0; /* index of current DCT block within MCU */ + blkn = 0; /* index of current DCT block within MCU */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - start_col = MCU_col_num * compptr->MCU_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; - for (xindex = 0; xindex < compptr->MCU_width; xindex++) { - coef->MCU_buffer[blkn++] = buffer_ptr++; - } - } + compptr = cinfo->cur_comp_info[ci]; + start_col = MCU_col_num * compptr->MCU_width; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + for (xindex = 0; xindex < compptr->MCU_width; xindex++) { + coef->MCU_buffer[blkn++] = buffer_ptr++; + } + } } /* Try to fetch the MCU. */ if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { - /* Suspension forced; update state counters and exit */ - coef->MCU_vert_offset = yoffset; - coef->MCU_ctr = MCU_col_num; - return JPEG_SUSPENDED; + /* Suspension forced; update state counters and exit */ + coef->MCU_vert_offset = yoffset; + coef->MCU_ctr = MCU_col_num; + return JPEG_SUSPENDED; } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -332,8 +274,8 @@ /* Force some input to be done if we are getting ahead of the input. */ while (cinfo->input_scan_number < cinfo->output_scan_number || - (cinfo->input_scan_number == cinfo->output_scan_number && - cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) { + (cinfo->input_scan_number == cinfo->output_scan_number && + cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) { if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED) return JPEG_SUSPENDED; } @@ -361,13 +303,14 @@ output_ptr = output_buf[ci]; /* Loop over all DCT blocks to be processed. */ for (block_row = 0; block_row < block_rows; block_row++) { - buffer_ptr = buffer[block_row]; + buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci]; output_col = 0; - for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) { - (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr, - output_ptr, output_col); - buffer_ptr++; - output_col += compptr->_DCT_scaled_size; + for (block_num = cinfo->master->first_MCU_col[ci]; + block_num <= cinfo->master->last_MCU_col[ci]; block_num++) { + (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr, + output_ptr, output_col); + buffer_ptr++; + output_col += compptr->_DCT_scaled_size; } output_ptr += compptr->_DCT_scaled_size; } @@ -413,9 +356,9 @@ boolean smoothing_useful = FALSE; int ci, coefi; jpeg_component_info *compptr; - JQUANT_TBL * qtable; - int * coef_bits; - int * coef_bits_latch; + JQUANT_TBL *qtable; + int *coef_bits; + int *coef_bits_latch; if (! cinfo->progressive_mode || cinfo->coef_bits == NULL) return FALSE; @@ -424,8 +367,8 @@ if (coef->coef_bits_latch == NULL) coef->coef_bits_latch = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components * - (SAVED_COEFS * SIZEOF(int))); + cinfo->num_components * + (SAVED_COEFS * sizeof(int))); coef_bits_latch = coef->coef_bits_latch; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; @@ -435,11 +378,11 @@ return FALSE; /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */ if (qtable->quantval[0] == 0 || - qtable->quantval[Q01_POS] == 0 || - qtable->quantval[Q10_POS] == 0 || - qtable->quantval[Q20_POS] == 0 || - qtable->quantval[Q11_POS] == 0 || - qtable->quantval[Q02_POS] == 0) + qtable->quantval[Q01_POS] == 0 || + qtable->quantval[Q10_POS] == 0 || + qtable->quantval[Q20_POS] == 0 || + qtable->quantval[Q11_POS] == 0 || + qtable->quantval[Q02_POS] == 0) return FALSE; /* DC values must be at least partly known for all components. */ coef_bits = cinfo->coef_bits[ci]; @@ -449,7 +392,7 @@ for (coefi = 1; coefi <= 5; coefi++) { coef_bits_latch[coefi] = coef_bits[coefi]; if (coef_bits[coefi] != 0) - smoothing_useful = TRUE; + smoothing_useful = TRUE; } coef_bits_latch += SAVED_COEFS; } @@ -476,10 +419,10 @@ jpeg_component_info *compptr; inverse_DCT_method_ptr inverse_DCT; boolean first_row, last_row; - JCOEF * workspace; + JCOEF *workspace; int *coef_bits; JQUANT_TBL *quanttbl; - INT32 Q00,Q01,Q02,Q10,Q11,Q20, num; + JLONG Q00,Q01,Q02,Q10,Q11,Q20, num; int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9; int Al, pred; @@ -488,7 +431,7 @@ /* Force some input to be done if we are getting ahead of the input. */ while (cinfo->input_scan_number <= cinfo->output_scan_number && - ! cinfo->inputctl->eoi_reached) { + ! cinfo->inputctl->eoi_reached) { if (cinfo->input_scan_number == cinfo->output_scan_number) { /* If input is working on current scan, we ordinarily want it to * have completed the current row. But if input scan is DC, @@ -497,7 +440,7 @@ */ JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0; if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta) - break; + break; } if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED) return JPEG_SUSPENDED; @@ -525,15 +468,15 @@ if (cinfo->output_iMCU_row > 0) { access_rows += compptr->v_samp_factor; /* prior iMCU row too */ buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], - (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor, - (JDIMENSION) access_rows, FALSE); - buffer += compptr->v_samp_factor; /* point to current iMCU row */ + ((j_common_ptr) cinfo, coef->whole_image[ci], + (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor, + (JDIMENSION) access_rows, FALSE); + buffer += compptr->v_samp_factor; /* point to current iMCU row */ first_row = FALSE; } else { buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], - (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE); + ((j_common_ptr) cinfo, coef->whole_image[ci], + (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE); first_row = TRUE; } /* Fetch component-dependent info */ @@ -549,15 +492,15 @@ output_ptr = output_buf[ci]; /* Loop over all DCT blocks to be processed. */ for (block_row = 0; block_row < block_rows; block_row++) { - buffer_ptr = buffer[block_row]; + buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci]; if (first_row && block_row == 0) - prev_block_row = buffer_ptr; + prev_block_row = buffer_ptr; else - prev_block_row = buffer[block_row-1]; + prev_block_row = buffer[block_row-1]; if (last_row && block_row == block_rows-1) - next_block_row = buffer_ptr; + next_block_row = buffer_ptr; else - next_block_row = buffer[block_row+1]; + next_block_row = buffer[block_row+1]; /* We fetch the surrounding DC values using a sliding-register approach. * Initialize all nine here so as to do the right thing on narrow pics. */ @@ -566,103 +509,104 @@ DC7 = DC8 = DC9 = (int) next_block_row[0][0]; output_col = 0; last_block_column = compptr->width_in_blocks - 1; - for (block_num = 0; block_num <= last_block_column; block_num++) { - /* Fetch current DCT block into workspace so we can modify it. */ - jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1); - /* Update DC values */ - if (block_num < last_block_column) { - DC3 = (int) prev_block_row[1][0]; - DC6 = (int) buffer_ptr[1][0]; - DC9 = (int) next_block_row[1][0]; - } - /* Compute coefficient estimates per K.8. - * An estimate is applied only if coefficient is still zero, - * and is not known to be fully accurate. - */ - /* AC01 */ - if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) { - num = 36 * Q00 * (DC4 - DC6); - if (num >= 0) { - pred = (int) (((Q01<<7) + num) / (Q01<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { - pred = (int) (((Q10<<7) + num) / (Q10<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { - pred = (int) (((Q20<<7) + num) / (Q20<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { - pred = (int) (((Q11<<7) + num) / (Q11<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { - pred = (int) (((Q02<<7) + num) / (Q02<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1<_DCT_scaled_size; + for (block_num = cinfo->master->first_MCU_col[ci]; + block_num <= cinfo->master->last_MCU_col[ci]; block_num++) { + /* Fetch current DCT block into workspace so we can modify it. */ + jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1); + /* Update DC values */ + if (block_num < last_block_column) { + DC3 = (int) prev_block_row[1][0]; + DC6 = (int) buffer_ptr[1][0]; + DC9 = (int) next_block_row[1][0]; + } + /* Compute coefficient estimates per K.8. + * An estimate is applied only if coefficient is still zero, + * and is not known to be fully accurate. + */ + /* AC01 */ + if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) { + num = 36 * Q00 * (DC4 - DC6); + if (num >= 0) { + pred = (int) (((Q01<<7) + num) / (Q01<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { + pred = (int) (((Q10<<7) + num) / (Q10<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { + pred = (int) (((Q20<<7) + num) / (Q20<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { + pred = (int) (((Q11<<7) + num) / (Q11<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<= 0) { + pred = (int) (((Q02<<7) + num) / (Q02<<8)); + if (Al > 0 && pred >= (1< 0 && pred >= (1<_DCT_scaled_size; } output_ptr += compptr->_DCT_scaled_size; } @@ -687,7 +631,7 @@ coef = (my_coef_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_coef_controller)); + sizeof(my_coef_controller)); cinfo->coef = (struct jpeg_d_coef_controller *) coef; coef->pub.start_input_pass = start_input_pass; coef->pub.start_output_pass = start_output_pass; @@ -705,20 +649,20 @@ jpeg_component_info *compptr; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { access_rows = compptr->v_samp_factor; #ifdef BLOCK_SMOOTHING_SUPPORTED /* If block smoothing could be used, need a bigger window */ if (cinfo->progressive_mode) - access_rows *= 3; + access_rows *= 3; #endif coef->whole_image[ci] = (*cinfo->mem->request_virt_barray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE, - (JDIMENSION) jround_up((long) compptr->width_in_blocks, - (long) compptr->h_samp_factor), - (JDIMENSION) jround_up((long) compptr->height_in_blocks, - (long) compptr->v_samp_factor), - (JDIMENSION) access_rows); + ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE, + (JDIMENSION) jround_up((long) compptr->width_in_blocks, + (long) compptr->h_samp_factor), + (JDIMENSION) jround_up((long) compptr->height_in_blocks, + (long) compptr->v_samp_factor), + (JDIMENSION) access_rows); } coef->pub.consume_data = consume_data; coef->pub.decompress_data = decompress_data; @@ -733,7 +677,7 @@ buffer = (JBLOCKROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)); + D_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) { coef->MCU_buffer[i] = buffer + i; } @@ -745,5 +689,5 @@ /* Allocate the workspace buffer */ coef->workspace = (JCOEF *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(JCOEF) * DCTSIZE2); + sizeof(JCOEF) * DCTSIZE2); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcoefct.h glmark2-2021.02/src/libjpeg-turbo/jdcoefct.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcoefct.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdcoefct.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,82 @@ +/* + * jdcoefct.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright 2009 Pierre Ossman for Cendio AB + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +#define JPEG_INTERNALS +#include "jpeglib.h" + + +/* Block smoothing is only applicable for progressive JPEG, so: */ +#ifndef D_PROGRESSIVE_SUPPORTED +#undef BLOCK_SMOOTHING_SUPPORTED +#endif + + +/* Private buffer controller object */ + +typedef struct { + struct jpeg_d_coef_controller pub; /* public fields */ + + /* These variables keep track of the current location of the input side. */ + /* cinfo->input_iMCU_row is also used for this. */ + JDIMENSION MCU_ctr; /* counts MCUs processed in current row */ + int MCU_vert_offset; /* counts MCU rows within iMCU row */ + int MCU_rows_per_iMCU_row; /* number of such rows needed */ + + /* The output side's location is represented by cinfo->output_iMCU_row. */ + + /* In single-pass modes, it's sufficient to buffer just one MCU. + * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks, + * and let the entropy decoder write into that workspace each time. + * In multi-pass modes, this array points to the current MCU's blocks + * within the virtual arrays; it is used only by the input side. + */ + JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU]; + + /* Temporary workspace for one MCU */ + JCOEF *workspace; + +#ifdef D_MULTISCAN_FILES_SUPPORTED + /* In multi-pass modes, we need a virtual block array for each component. */ + jvirt_barray_ptr whole_image[MAX_COMPONENTS]; +#endif + +#ifdef BLOCK_SMOOTHING_SUPPORTED + /* When doing block smoothing, we latch coefficient Al values here */ + int *coef_bits_latch; +#define SAVED_COEFS 6 /* we save coef_bits[0..5] */ +#endif +} my_coef_controller; + +typedef my_coef_controller *my_coef_ptr; + + +LOCAL(void) +start_iMCU_row (j_decompress_ptr cinfo) +/* Reset within-iMCU-row counters for a new row (input side) */ +{ + my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + + /* In an interleaved scan, an MCU row is the same as an iMCU row. + * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows. + * But at the bottom of the image, process only what's left. + */ + if (cinfo->comps_in_scan > 1) { + coef->MCU_rows_per_iMCU_row = 1; + } else { + if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1)) + coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor; + else + coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height; + } + + coef->MCU_ctr = 0; + coef->MCU_vert_offset = 0; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcol565.c glmark2-2021.02/src/libjpeg-turbo/jdcol565.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcol565.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdcol565.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,384 @@ +/* + * jdcol565.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modifications: + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains output colorspace conversion routines. + */ + +/* This file is included by jdcolor.c */ + + +INLINE +LOCAL(void) +ycc_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int y, cb, cr; + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + register int * Crrtab = cconvert->Cr_r_tab; + register int * Cbbtab = cconvert->Cb_b_tab; + register JLONG * Crgtab = cconvert->Cr_g_tab; + register JLONG * Cbgtab = cconvert->Cb_g_tab; + SHIFT_TEMPS + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + + if (PACK_NEED_ALIGNMENT(outptr)) { + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[y + Crrtab[cr]]; + g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + b = range_limit[y + Cbbtab[cb]]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[y + Crrtab[cr]]; + g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + b = range_limit[y + Cbbtab[cb]]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[y + Crrtab[cr]]; + g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + b = range_limit[y + Cbbtab[cb]]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + y = GETJSAMPLE(*inptr0); + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + r = range_limit[y + Crrtab[cr]]; + g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + b = range_limit[y + Cbbtab[cb]]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +ycc_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int y, cb, cr; + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + register int * Crrtab = cconvert->Cr_r_tab; + register int * Cbbtab = cconvert->Cb_b_tab; + register JLONG * Crgtab = cconvert->Cr_g_tab; + register JLONG * Cbgtab = cconvert->Cb_g_tab; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + SHIFT_TEMPS + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; + g = range_limit[DITHER_565_G(y + + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)), d0)]; + b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; + g = range_limit[DITHER_565_G(y + + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)), d0)]; + b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; + g = range_limit[DITHER_565_G(y + + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)), d0)]; + b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + y = GETJSAMPLE(*inptr0); + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; + g = range_limit[DITHER_565_G(y + + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)), d0)]; + b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +rgb_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + SHIFT_TEMPS + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + r = GETJSAMPLE(*inptr0++); + g = GETJSAMPLE(*inptr1++); + b = GETJSAMPLE(*inptr2++); + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + r = GETJSAMPLE(*inptr0++); + g = GETJSAMPLE(*inptr1++); + b = GETJSAMPLE(*inptr2++); + rgb = PACK_SHORT_565(r, g, b); + + r = GETJSAMPLE(*inptr0++); + g = GETJSAMPLE(*inptr1++); + b = GETJSAMPLE(*inptr2++); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + r = GETJSAMPLE(*inptr0); + g = GETJSAMPLE(*inptr1); + b = GETJSAMPLE(*inptr2); + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +rgb_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + register JSAMPLE * range_limit = cinfo->sample_range_limit; + JDIMENSION num_cols = cinfo->output_width; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + SHIFT_TEMPS + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; + g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; + b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; + g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; + b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; + g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; + b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)]; + g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)]; + b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +gray_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr, outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int g; + + inptr = input_buf[0][input_row++]; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + g = *inptr++; + rgb = PACK_SHORT_565(g, g, g); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + g = *inptr++; + rgb = PACK_SHORT_565(g, g, g); + g = *inptr++; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g)); + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + g = *inptr; + rgb = PACK_SHORT_565(g, g, g); + *(INT16*)outptr = (INT16)rgb; + } + } +} + + +INLINE +LOCAL(void) +gray_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr, outptr; + register JDIMENSION col; + register JSAMPLE * range_limit = cinfo->sample_range_limit; + JDIMENSION num_cols = cinfo->output_width; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int g; + + inptr = input_buf[0][input_row++]; + outptr = *output_buf++; + if (PACK_NEED_ALIGNMENT(outptr)) { + g = *inptr++; + g = range_limit[DITHER_565_R(g, d0)]; + rgb = PACK_SHORT_565(g, g, g); + *(INT16*)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + g = *inptr++; + g = range_limit[DITHER_565_R(g, d0)]; + rgb = PACK_SHORT_565(g, g, g); + d0 = DITHER_ROTATE(d0); + + g = *inptr++; + g = range_limit[DITHER_565_R(g, d0)]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g)); + d0 = DITHER_ROTATE(d0); + + WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); + outptr += 4; + } + if (num_cols & 1) { + g = *inptr; + g = range_limit[DITHER_565_R(g, d0)]; + rgb = PACK_SHORT_565(g, g, g); + *(INT16*)outptr = (INT16)rgb; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcolext.c glmark2-2021.02/src/libjpeg-turbo/jdcolext.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcolext.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdcolext.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,143 @@ +/* + * jdcolext.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2009, 2011, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains output colorspace conversion routines. + */ + + +/* This file is included by jdcolor.c */ + + +/* + * Convert some rows of samples to the output colorspace. + * + * Note that we change from noninterleaved, one-plane-per-component format + * to interleaved-pixel format. The output buffer is therefore three times + * as wide as the input buffer. + * A starting row offset is provided only for the input buffer. The caller + * can easily adjust the passed output_buf value to accommodate any row + * offset required on that side. + */ + +INLINE +LOCAL(void) +ycc_rgb_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int y, cb, cr; + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + register int * Crrtab = cconvert->Cr_r_tab; + register int * Cbbtab = cconvert->Cb_b_tab; + register JLONG * Crgtab = cconvert->Cr_g_tab; + register JLONG * Cbgtab = cconvert->Cb_g_tab; + SHIFT_TEMPS + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + y = GETJSAMPLE(inptr0[col]); + cb = GETJSAMPLE(inptr1[col]); + cr = GETJSAMPLE(inptr2[col]); + /* Range-limiting is essential due to noise introduced by DCT losses. */ + outptr[RGB_RED] = range_limit[y + Crrtab[cr]]; + outptr[RGB_GREEN] = range_limit[y + + ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]]; + /* Set unused byte to 0xFF so it can be interpreted as an opaque */ + /* alpha channel value */ +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + } +} + + +/* + * Convert grayscale to RGB: just duplicate the graylevel three times. + * This is provided to support applications that don't want to cope + * with grayscale as a separate case. + */ + +INLINE +LOCAL(void) +gray_rgb_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr, outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + inptr = input_buf[0][input_row++]; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + /* We can dispense with GETJSAMPLE() here */ + outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col]; + /* Set unused byte to 0xFF so it can be interpreted as an opaque */ + /* alpha channel value */ +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + } +} + + +/* + * Convert RGB to extended RGB: just swap the order of source pixels + */ + +INLINE +LOCAL(void) +rgb_rgb_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr0, inptr1, inptr2; + register JSAMPROW outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + /* We can dispense with GETJSAMPLE() here */ + outptr[RGB_RED] = inptr0[col]; + outptr[RGB_GREEN] = inptr1[col]; + outptr[RGB_BLUE] = inptr2[col]; + /* Set unused byte to 0xFF so it can be interpreted as an opaque */ + /* alpha channel value */ +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcolext.c.inc glmark2-2021.02/src/libjpeg-turbo/jdcolext.c.inc --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcolext.c.inc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdcolext.c.inc 1970-01-01 08:00:00.000000000 +0800 @@ -1,104 +0,0 @@ -/* - * jdcolext.c - * - * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009, 2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains output colorspace conversion routines. - */ - - -/* This file is included by jdcolor.c */ - - -/* - * Convert some rows of samples to the output colorspace. - * - * Note that we change from noninterleaved, one-plane-per-component format - * to interleaved-pixel format. The output buffer is therefore three times - * as wide as the input buffer. - * A starting row offset is provided only for the input buffer. The caller - * can easily adjust the passed output_buf value to accommodate any row - * offset required on that side. - */ - -INLINE -LOCAL(void) -ycc_rgb_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; - register int y, cb, cr; - register JSAMPROW outptr; - register JSAMPROW inptr0, inptr1, inptr2; - register JDIMENSION col; - JDIMENSION num_cols = cinfo->output_width; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - register int * Crrtab = cconvert->Cr_r_tab; - register int * Cbbtab = cconvert->Cb_b_tab; - register INT32 * Crgtab = cconvert->Cr_g_tab; - register INT32 * Cbgtab = cconvert->Cb_g_tab; - SHIFT_TEMPS - - while (--num_rows >= 0) { - inptr0 = input_buf[0][input_row]; - inptr1 = input_buf[1][input_row]; - inptr2 = input_buf[2][input_row]; - input_row++; - outptr = *output_buf++; - for (col = 0; col < num_cols; col++) { - y = GETJSAMPLE(inptr0[col]); - cb = GETJSAMPLE(inptr1[col]); - cr = GETJSAMPLE(inptr2[col]); - /* Range-limiting is essential due to noise introduced by DCT losses. */ - outptr[RGB_RED] = range_limit[y + Crrtab[cr]]; - outptr[RGB_GREEN] = range_limit[y + - ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], - SCALEBITS))]; - outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]]; - /* Set unused byte to 0xFF so it can be interpreted as an opaque */ - /* alpha channel value */ -#ifdef RGB_ALPHA - outptr[RGB_ALPHA] = 0xFF; -#endif - outptr += RGB_PIXELSIZE; - } - } -} - - -/* - * Convert grayscale to RGB: just duplicate the graylevel three times. - * This is provided to support applications that don't want to cope - * with grayscale as a separate case. - */ - -INLINE -LOCAL(void) -gray_rgb_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - register JSAMPROW inptr, outptr; - register JDIMENSION col; - JDIMENSION num_cols = cinfo->output_width; - - while (--num_rows >= 0) { - inptr = input_buf[0][input_row++]; - outptr = *output_buf++; - for (col = 0; col < num_cols; col++) { - /* We can dispense with GETJSAMPLE() here */ - outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col]; - /* Set unused byte to 0xFF so it can be interpreted as an opaque */ - /* alpha channel value */ -#ifdef RGB_ALPHA - outptr[RGB_ALPHA] = 0xFF; -#endif - outptr += RGB_PIXELSIZE; - } - } -} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcolor.c glmark2-2021.02/src/libjpeg-turbo/jdcolor.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdcolor.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdcolor.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,15 @@ /* * jdcolor.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2011 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009, 2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2009, 2011-2012, 2014-2015, D. R. Commander. + * Copyright (C) 2013, Linaro Limited. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains output colorspace conversion routines. */ @@ -14,7 +18,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jsimd.h" -#include "config.h" +#include "jconfigint.h" /* Private subobject */ @@ -23,24 +27,32 @@ struct jpeg_color_deconverter pub; /* public fields */ /* Private state for YCC->RGB conversion */ - int * Cr_r_tab; /* => table for Cr to R conversion */ - int * Cb_b_tab; /* => table for Cb to B conversion */ - INT32 * Cr_g_tab; /* => table for Cr to G conversion */ - INT32 * Cb_g_tab; /* => table for Cb to G conversion */ + int *Cr_r_tab; /* => table for Cr to R conversion */ + int *Cb_b_tab; /* => table for Cb to B conversion */ + JLONG *Cr_g_tab; /* => table for Cr to G conversion */ + JLONG *Cb_g_tab; /* => table for Cb to G conversion */ + + /* Private state for RGB->Y conversion */ + JLONG *rgb_y_tab; /* => table for RGB to Y conversion */ } my_color_deconverter; -typedef my_color_deconverter * my_cconvert_ptr; +typedef my_color_deconverter *my_cconvert_ptr; /**************** YCbCr -> RGB conversion: most common case **************/ +/**************** RGB -> Y conversion: less common case **************/ /* * YCbCr is defined per CCIR 601-1, except that Cb and Cr are * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5. * The conversion equations to be implemented are therefore - * R = Y + 1.40200 * Cr - * G = Y - 0.34414 * Cb - 0.71414 * Cr - * B = Y + 1.77200 * Cb + * + * R = Y + 1.40200 * Cr + * G = Y - 0.34414 * Cb - 0.71414 * Cr + * B = Y + 1.77200 * Cb + * + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * * where Cb and Cr represent the incoming values less CENTERJSAMPLE. * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.) * @@ -61,14 +73,26 @@ * together before rounding. */ -#define SCALEBITS 16 /* speediest right-shift on some machines */ -#define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) -#define FIX(x) ((INT32) ((x) * (1L<Y conversion and divide it up into + * three parts, instead of doing three alloc_small requests. This lets us + * use a single table base address, which can be held in a register in the + * inner loops on many machines (more than can hold all three addresses, + * anyway). + */ + +#define R_Y_OFF 0 /* offset to R => Y section */ +#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ +#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ +#define TABLE_SIZE (3*(MAXJSAMPLE+1)) /* Include inline routines for colorspace extensions */ -#include "jdcolext.c.inc" +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -80,13 +104,15 @@ #define RGB_PIXELSIZE EXT_RGB_PIXELSIZE #define ycc_rgb_convert_internal ycc_extrgb_convert_internal #define gray_rgb_convert_internal gray_extrgb_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extrgb_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_RGBX_RED #define RGB_GREEN EXT_RGBX_GREEN @@ -95,7 +121,8 @@ #define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE #define ycc_rgb_convert_internal ycc_extrgbx_convert_internal #define gray_rgb_convert_internal gray_extrgbx_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extrgbx_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -103,6 +130,7 @@ #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGR_RED #define RGB_GREEN EXT_BGR_GREEN @@ -110,13 +138,15 @@ #define RGB_PIXELSIZE EXT_BGR_PIXELSIZE #define ycc_rgb_convert_internal ycc_extbgr_convert_internal #define gray_rgb_convert_internal gray_extbgr_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extbgr_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGRX_RED #define RGB_GREEN EXT_BGRX_GREEN @@ -125,7 +155,8 @@ #define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE #define ycc_rgb_convert_internal ycc_extbgrx_convert_internal #define gray_rgb_convert_internal gray_extbgrx_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extbgrx_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -133,6 +164,7 @@ #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XBGR_RED #define RGB_GREEN EXT_XBGR_GREEN @@ -141,7 +173,8 @@ #define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE #define ycc_rgb_convert_internal ycc_extxbgr_convert_internal #define gray_rgb_convert_internal gray_extxbgr_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extxbgr_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -149,6 +182,7 @@ #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XRGB_RED #define RGB_GREEN EXT_XRGB_GREEN @@ -157,7 +191,8 @@ #define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE #define ycc_rgb_convert_internal ycc_extxrgb_convert_internal #define gray_rgb_convert_internal gray_extxrgb_convert_internal -#include "jdcolext.c.inc" +#define rgb_rgb_convert_internal rgb_extxrgb_convert_internal +#include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE @@ -165,6 +200,7 @@ #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal /* @@ -176,31 +212,31 @@ { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; int i; - INT32 x; + JLONG x; SHIFT_TEMPS cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(int)); + (MAXJSAMPLE+1) * sizeof(int)); cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(int)); - cconvert->Cr_g_tab = (INT32 *) + (MAXJSAMPLE+1) * sizeof(int)); + cconvert->Cr_g_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(INT32)); - cconvert->Cb_g_tab = (INT32 *) + (MAXJSAMPLE+1) * sizeof(JLONG)); + cconvert->Cb_g_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(INT32)); + (MAXJSAMPLE+1) * sizeof(JLONG)); for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) { /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */ /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */ /* Cr=>R value is nearest int to 1.40200 * x */ cconvert->Cr_r_tab[i] = (int) - RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); + RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); /* Cb=>B value is nearest int to 1.77200 * x */ cconvert->Cb_b_tab[i] = (int) - RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); + RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); /* Cr=>G value is scaled-up -0.71414 * x */ cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x; /* Cb=>G value is scaled-up -0.34414 * x */ @@ -216,8 +252,8 @@ METHODDEF(void) ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { switch (cinfo->out_color_space) { case JCS_EXT_RGB: @@ -260,32 +296,122 @@ /* + * Initialize for RGB->grayscale colorspace conversion. + */ + +LOCAL(void) +build_rgb_y_table (j_decompress_ptr cinfo) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + JLONG *rgb_y_tab; + JLONG i; + + /* Allocate and fill in the conversion tables. */ + cconvert->rgb_y_tab = rgb_y_tab = (JLONG *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (TABLE_SIZE * sizeof(JLONG))); + + for (i = 0; i <= MAXJSAMPLE; i++) { + rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i; + rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i; + rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; + } +} + + +/* + * Convert RGB to grayscale. + */ + +METHODDEF(void) +rgb_gray_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + register int r, g, b; + register JLONG *ctab = cconvert->rgb_y_tab; + register JSAMPROW outptr; + register JSAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + r = GETJSAMPLE(inptr0[col]); + g = GETJSAMPLE(inptr1[col]); + b = GETJSAMPLE(inptr2[col]); + /* Y */ + outptr[col] = (JSAMPLE) + ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) + >> SCALEBITS); + } + } +} + + +/* * Color conversion for no colorspace change: just copy the data, * converting from separate-planes to interleaved representation. */ METHODDEF(void) null_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { - register JSAMPROW inptr, outptr; - register JDIMENSION count; + register JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr; + register JDIMENSION col; register int num_components = cinfo->num_components; JDIMENSION num_cols = cinfo->output_width; int ci; - while (--num_rows >= 0) { - for (ci = 0; ci < num_components; ci++) { - inptr = input_buf[ci][input_row]; - outptr = output_buf[0] + ci; - for (count = num_cols; count > 0; count--) { - *outptr = *inptr++; /* needn't bother with GETJSAMPLE() here */ - outptr += num_components; + if (num_components == 3) { + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + *outptr++ = inptr0[col]; + *outptr++ = inptr1[col]; + *outptr++ = inptr2[col]; } } - input_row++; - output_buf++; + } else if (num_components == 4) { + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + inptr3 = input_buf[3][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + *outptr++ = inptr0[col]; + *outptr++ = inptr1[col]; + *outptr++ = inptr2[col]; + *outptr++ = inptr3[col]; + } + } + } else { + while (--num_rows >= 0) { + for (ci = 0; ci < num_components; ci++) { + inptr = input_buf[ci][input_row]; + outptr = *output_buf; + for (col = 0; col < num_cols; col++) { + outptr[ci] = inptr[col]; + outptr += num_components; + } + } + output_buf++; + input_row++; + } } } @@ -298,11 +424,11 @@ METHODDEF(void) grayscale_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0, - num_rows, cinfo->output_width); + num_rows, cinfo->output_width); } @@ -312,8 +438,8 @@ METHODDEF(void) gray_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { switch (cinfo->out_color_space) { case JCS_EXT_RGB: @@ -353,6 +479,52 @@ /* + * Convert plain RGB to extended RGB + */ + +METHODDEF(void) +rgb_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGR: + rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + default: + rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + } +} + + +/* * Adobe-style YCCK->CMYK conversion. * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same * conversion as above, while passing K (black) unchanged. @@ -361,8 +533,8 @@ METHODDEF(void) ycck_cmyk_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int y, cb, cr; @@ -371,11 +543,11 @@ register JDIMENSION col; JDIMENSION num_cols = cinfo->output_width; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - register int * Crrtab = cconvert->Cr_r_tab; - register int * Cbbtab = cconvert->Cb_b_tab; - register INT32 * Crgtab = cconvert->Cr_g_tab; - register INT32 * Cbgtab = cconvert->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + register int *Crrtab = cconvert->Cr_r_tab; + register int *Cbbtab = cconvert->Cb_b_tab; + register JLONG *Crgtab = cconvert->Cr_g_tab; + register JLONG *Cbgtab = cconvert->Cb_g_tab; SHIFT_TEMPS while (--num_rows >= 0) { @@ -390,13 +562,13 @@ cb = GETJSAMPLE(inptr1[col]); cr = GETJSAMPLE(inptr2[col]); /* Range-limiting is essential due to noise introduced by DCT losses. */ - outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */ - outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */ - ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], - SCALEBITS)))]; - outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */ + outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */ + outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */ + ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS)))]; + outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */ /* K passes through unchanged */ - outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */ + outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */ outptr += 4; } } @@ -404,6 +576,164 @@ /* + * RGB565 conversion + */ + +#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ + (((g) << 3) & 0x7E0) | ((b) >> 3)) +#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ + (((g) << 11) & 0xE000) | \ + (((b) << 5) & 0x1F00)) + +#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) +#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) + +#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) + +#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels) ((*(int *)(addr)) = pixels) + +#define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) +#define DITHER_565_G(g, dither) ((g) + (((dither) & 0xFF) >> 1)) +#define DITHER_565_B(b, dither) ((b) + ((dither) & 0xFF)) + + +/* Declarations for ordered dithering + * + * We use a 4x4 ordered dither array packed into 32 bits. This array is + * sufficent for dithering RGB888 to RGB565. + */ + +#define DITHER_MASK 0x3 +#define DITHER_ROTATE(x) ((((x) & 0xFF) << 24) | (((x) >> 8) & 0x00FFFFFF)) +static const JLONG dither_matrix[4] = { + 0x0008020A, + 0x0C040E06, + 0x030B0109, + 0x0F070D05 +}; + + +static INLINE boolean is_big_endian(void) +{ + int test_value = 1; + if(*(char *)&test_value != 1) + return TRUE; + return FALSE; +} + + +/* Include inline routines for RGB565 conversion */ + +#define PACK_SHORT_565 PACK_SHORT_565_LE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE +#define ycc_rgb565_convert_internal ycc_rgb565_convert_le +#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_le +#define rgb_rgb565_convert_internal rgb_rgb565_convert_le +#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_le +#define gray_rgb565_convert_internal gray_rgb565_convert_le +#define gray_rgb565D_convert_internal gray_rgb565D_convert_le +#include "jdcol565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef ycc_rgb565_convert_internal +#undef ycc_rgb565D_convert_internal +#undef rgb_rgb565_convert_internal +#undef rgb_rgb565D_convert_internal +#undef gray_rgb565_convert_internal +#undef gray_rgb565D_convert_internal + +#define PACK_SHORT_565 PACK_SHORT_565_BE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE +#define ycc_rgb565_convert_internal ycc_rgb565_convert_be +#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_be +#define rgb_rgb565_convert_internal rgb_rgb565_convert_be +#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_be +#define gray_rgb565_convert_internal gray_rgb565_convert_be +#define gray_rgb565D_convert_internal gray_rgb565D_convert_be +#include "jdcol565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef ycc_rgb565_convert_internal +#undef ycc_rgb565D_convert_internal +#undef rgb_rgb565_convert_internal +#undef rgb_rgb565D_convert_internal +#undef gray_rgb565_convert_internal +#undef gray_rgb565D_convert_internal + + +METHODDEF(void) +ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + ycc_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + ycc_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +ycc_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + ycc_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + ycc_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +rgb_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + rgb_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + rgb_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +rgb_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + rgb_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + rgb_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +gray_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + gray_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + gray_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +gray_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + gray_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + gray_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +/* * Empty method for start_pass. */ @@ -426,7 +756,7 @@ cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_color_deconverter)); + sizeof(my_color_deconverter)); cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert; cconvert->pub.start_pass = start_pass_dcolor; @@ -449,7 +779,7 @@ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); break; - default: /* JCS_UNKNOWN can be anything */ + default: /* JCS_UNKNOWN can be anything */ if (cinfo->num_components < 1) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); break; @@ -464,11 +794,14 @@ case JCS_GRAYSCALE: cinfo->out_color_components = 1; if (cinfo->jpeg_color_space == JCS_GRAYSCALE || - cinfo->jpeg_color_space == JCS_YCbCr) { + cinfo->jpeg_color_space == JCS_YCbCr) { cconvert->pub.color_convert = grayscale_convert; /* For color->grayscale conversion, only the Y (0) component is needed */ for (ci = 1; ci < cinfo->num_components; ci++) - cinfo->comp_info[ci].component_needed = FALSE; + cinfo->comp_info[ci].component_needed = FALSE; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + cconvert->pub.color_convert = rgb_gray_convert; + build_rgb_y_table(cinfo); } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; @@ -494,13 +827,48 @@ } } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { cconvert->pub.color_convert = gray_rgb_convert; - } else if (cinfo->jpeg_color_space == cinfo->out_color_space && - rgb_pixelsize[cinfo->out_color_space] == 3) { - cconvert->pub.color_convert = null_convert; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + if (rgb_red[cinfo->out_color_space] == 0 && + rgb_green[cinfo->out_color_space] == 1 && + rgb_blue[cinfo->out_color_space] == 2 && + rgb_pixelsize[cinfo->out_color_space] == 3) + cconvert->pub.color_convert = null_convert; + else + cconvert->pub.color_convert = rgb_rgb_convert; } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; + case JCS_RGB565: + cinfo->out_color_components = 3; + if (cinfo->dither_mode == JDITHER_NONE) { + if (cinfo->jpeg_color_space == JCS_YCbCr) { + if (jsimd_can_ycc_rgb565()) + cconvert->pub.color_convert = jsimd_ycc_rgb565_convert; + else { + cconvert->pub.color_convert = ycc_rgb565_convert; + build_ycc_rgb_table(cinfo); + } + } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { + cconvert->pub.color_convert = gray_rgb565_convert; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + cconvert->pub.color_convert = rgb_rgb565_convert; + } else + ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); + } else { + /* only ordered dithering is supported */ + if (cinfo->jpeg_color_space == JCS_YCbCr) { + cconvert->pub.color_convert = ycc_rgb565D_convert; + build_ycc_rgb_table(cinfo); + } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { + cconvert->pub.color_convert = gray_rgb565D_convert; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + cconvert->pub.color_convert = rgb_rgb565D_convert; + } else + ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); + } + break; + case JCS_CMYK: cinfo->out_color_components = 4; if (cinfo->jpeg_color_space == JCS_YCCK) { @@ -517,7 +885,7 @@ if (cinfo->out_color_space == cinfo->jpeg_color_space) { cinfo->out_color_components = cinfo->num_components; cconvert->pub.color_convert = null_convert; - } else /* unsupported non-null conversion */ + } else /* unsupported non-null conversion */ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdct.h glmark2-2021.02/src/libjpeg-turbo/jdct.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdct.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdct.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,14 +1,17 @@ /* * jdct.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This include file contains common declarations for the forward and * inverse DCT modules. These declarations are private to the DCT managers * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms. - * The individual DCT algorithms are kept in separate files to ease + * The individual DCT algorithms are kept in separate files to ease * machine-dependent tuning (e.g., assembly coding). */ @@ -16,7 +19,7 @@ /* * A forward DCT routine is given a pointer to a work area of type DCTELEM[]; * the DCT is to be performed in-place in that buffer. Type DCTELEM is int - * for 8-bit samples, INT32 for 12-bit samples. (NOTE: Floating-point DCT + * for 8-bit samples, JLONG for 12-bit samples. (NOTE: Floating-point DCT * implementations use an array of type FAST_FLOAT, instead.) * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE). * The DCT outputs are returned scaled up by a factor of 8; they therefore @@ -29,7 +32,7 @@ #if BITS_IN_JSAMPLE == 8 #ifndef WITH_SIMD -typedef int DCTELEM; /* 16 or 32 bits is fine */ +typedef int DCTELEM; /* 16 or 32 bits is fine */ typedef unsigned int UDCTELEM; typedef unsigned long long UDCTELEM2; #else @@ -38,8 +41,7 @@ typedef unsigned int UDCTELEM2; #endif #else -typedef INT32 DCTELEM; /* must have 32 bits */ -typedef UINT32 UDCTELEM; +typedef JLONG DCTELEM; /* must have 32 bits */ typedef unsigned long long UDCTELEM2; #endif @@ -64,10 +66,10 @@ typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */ #if BITS_IN_JSAMPLE == 8 typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */ -#define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ +#define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ #else -typedef INT32 IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ -#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ +typedef JLONG IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ +#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ #endif typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ @@ -86,57 +88,79 @@ #define RANGE_MASK (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */ -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_fdct_islow jFDislow -#define jpeg_fdct_ifast jFDifast -#define jpeg_fdct_float jFDfloat -#define jpeg_idct_islow jRDislow -#define jpeg_idct_ifast jRDifast -#define jpeg_idct_float jRDfloat -#define jpeg_idct_4x4 jRD4x4 -#define jpeg_idct_2x2 jRD2x2 -#define jpeg_idct_1x1 jRD1x1 -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - /* Extern declarations for the forward and inverse DCT routines. */ -EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data)); -EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data)); -EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data)); +EXTERN(void) jpeg_fdct_islow (DCTELEM *data); +EXTERN(void) jpeg_fdct_ifast (DCTELEM *data); +EXTERN(void) jpeg_fdct_float (FAST_FLOAT *data); EXTERN(void) jpeg_idct_islow - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_ifast - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_float - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_7x7 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_6x6 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_5x5 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_4x4 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_3x3 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_2x2 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); EXTERN(void) jpeg_idct_1x1 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_9x9 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_10x10 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_11x11 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_12x12 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_13x13 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_14x14 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_15x15 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_16x16 + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); /* * Macros for handling fixed-point arithmetic; these are used by many * but not all of the DCT/IDCT modules. * - * All values are expected to be of type INT32. + * All values are expected to be of type JLONG. * Fractional constants are scaled left by CONST_BITS bits. * CONST_BITS is defined within each module using these macros, * and may differ from one module to the next. */ -#define ONE ((INT32) 1) +#define ONE ((JLONG) 1) #define CONST_SCALE (ONE << CONST_BITS) /* Convert a positive real constant to an integer scaled by CONST_SCALE. @@ -144,16 +168,16 @@ * thus causing a lot of useless floating-point operations at run time. */ -#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) +#define FIX(x) ((JLONG) ((x) * CONST_SCALE + 0.5)) -/* Descale and correctly round an INT32 value that's scaled by N bits. +/* Descale and correctly round a JLONG value that's scaled by N bits. * We assume RIGHT_SHIFT rounds towards minus infinity, so adding * the fudge factor is correct for either sign of X. */ #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. +/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result. * This macro is used only when the two inputs will actually be no more than * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a * full 32x32 multiply. This provides a useful speedup on many machines. @@ -162,23 +186,23 @@ * correct combination of casts. */ -#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ #define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT16) (const))) #endif -#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ -#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT32) (const))) +#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ +#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((JLONG) (const))) #endif -#ifndef MULTIPLY16C16 /* default definition */ +#ifndef MULTIPLY16C16 /* default definition */ #define MULTIPLY16C16(var,const) ((var) * (const)) #endif /* Same except both inputs are variables. */ -#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ #define MULTIPLY16V16(var1,var2) (((INT16) (var1)) * ((INT16) (var2))) #endif -#ifndef MULTIPLY16V16 /* default definition */ +#ifndef MULTIPLY16V16 /* default definition */ #define MULTIPLY16V16(var1,var2) ((var1) * (var2)) #endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jddctmgr.c glmark2-2021.02/src/libjpeg-turbo/jddctmgr.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jddctmgr.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jddctmgr.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,15 @@ /* * jddctmgr.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2002-2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2010, 2015, D. R. Commander. + * Copyright (C) 2013, MIPS Technologies, Inc., California + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the inverse-DCT management logic. * This code selects a particular IDCT implementation to be used, @@ -20,7 +24,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #include "jsimddct.h" #include "jpegcomp.h" @@ -45,7 +49,7 @@ /* Private subobject for this module */ typedef struct { - struct jpeg_inverse_dct pub; /* public fields */ + struct jpeg_inverse_dct pub; /* public fields */ /* This array contains the IDCT method code that each multiplier table * is currently set up for, or -1 if it's not yet set up. @@ -55,7 +59,7 @@ int cur_method[MAX_COMPONENTS]; } my_idct_controller; -typedef my_idct_controller * my_idct_ptr; +typedef my_idct_controller *my_idct_ptr; /* Allocated multiplier tables: big enough for any supported variant */ @@ -97,7 +101,7 @@ jpeg_component_info *compptr; int method = 0; inverse_DCT_method_ptr method_ptr = NULL; - JQUANT_TBL * qtbl; + JQUANT_TBL *qtbl; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { @@ -106,57 +110,117 @@ #ifdef IDCT_SCALING_SUPPORTED case 1: method_ptr = jpeg_idct_1x1; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctred uses islow-style table */ break; case 2: if (jsimd_can_idct_2x2()) method_ptr = jsimd_idct_2x2; else method_ptr = jpeg_idct_2x2; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctred uses islow-style table */ + break; + case 3: + method_ptr = jpeg_idct_3x3; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ break; case 4: if (jsimd_can_idct_4x4()) method_ptr = jsimd_idct_4x4; else method_ptr = jpeg_idct_4x4; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctred uses islow-style table */ + break; + case 5: + method_ptr = jpeg_idct_5x5; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 6: +#if defined(__mips__) + if (jsimd_can_idct_6x6()) + method_ptr = jsimd_idct_6x6; + else +#endif + method_ptr = jpeg_idct_6x6; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 7: + method_ptr = jpeg_idct_7x7; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ break; #endif case DCTSIZE: switch (cinfo->dct_method) { #ifdef DCT_ISLOW_SUPPORTED case JDCT_ISLOW: - if (jsimd_can_idct_islow()) - method_ptr = jsimd_idct_islow; - else - method_ptr = jpeg_idct_islow; - method = JDCT_ISLOW; - break; + if (jsimd_can_idct_islow()) + method_ptr = jsimd_idct_islow; + else + method_ptr = jpeg_idct_islow; + method = JDCT_ISLOW; + break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: - if (jsimd_can_idct_ifast()) - method_ptr = jsimd_idct_ifast; - else - method_ptr = jpeg_idct_ifast; - method = JDCT_IFAST; - break; + if (jsimd_can_idct_ifast()) + method_ptr = jsimd_idct_ifast; + else + method_ptr = jpeg_idct_ifast; + method = JDCT_IFAST; + break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: - if (jsimd_can_idct_float()) - method_ptr = jsimd_idct_float; - else - method_ptr = jpeg_idct_float; - method = JDCT_FLOAT; - break; + if (jsimd_can_idct_float()) + method_ptr = jsimd_idct_float; + else + method_ptr = jpeg_idct_float; + method = JDCT_FLOAT; + break; #endif default: - ERREXIT(cinfo, JERR_NOT_COMPILED); - break; + ERREXIT(cinfo, JERR_NOT_COMPILED); + break; } break; +#ifdef IDCT_SCALING_SUPPORTED + case 9: + method_ptr = jpeg_idct_9x9; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 10: + method_ptr = jpeg_idct_10x10; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 11: + method_ptr = jpeg_idct_11x11; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 12: +#if defined(__mips__) + if (jsimd_can_idct_12x12()) + method_ptr = jsimd_idct_12x12; + else +#endif + method_ptr = jpeg_idct_12x12; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 13: + method_ptr = jpeg_idct_13x13; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 14: + method_ptr = jpeg_idct_14x14; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 15: + method_ptr = jpeg_idct_15x15; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case 16: + method_ptr = jpeg_idct_16x16; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; +#endif default: ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size); break; @@ -172,81 +236,81 @@ if (! compptr->component_needed || idct->cur_method[ci] == method) continue; qtbl = compptr->quant_table; - if (qtbl == NULL) /* happens if no data yet for component */ + if (qtbl == NULL) /* happens if no data yet for component */ continue; idct->cur_method[ci] = method; switch (method) { #ifdef PROVIDE_ISLOW_TABLES case JDCT_ISLOW: { - /* For LL&M IDCT method, multipliers are equal to raw quantization - * coefficients, but are stored as ints to ensure access efficiency. - */ - ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table; - for (i = 0; i < DCTSIZE2; i++) { - ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i]; - } + /* For LL&M IDCT method, multipliers are equal to raw quantization + * coefficients, but are stored as ints to ensure access efficiency. + */ + ISLOW_MULT_TYPE *ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table; + for (i = 0; i < DCTSIZE2; i++) { + ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i]; + } } break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: { - /* For AA&N IDCT method, multipliers are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - * For integer operation, the multiplier table is to be scaled by - * IFAST_SCALE_BITS. - */ - IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table; + /* For AA&N IDCT method, multipliers are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * For integer operation, the multiplier table is to be scaled by + * IFAST_SCALE_BITS. + */ + IFAST_MULT_TYPE *ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table; #define CONST_BITS 14 - static const INT16 aanscales[DCTSIZE2] = { - /* precomputed values scaled up by 14 bits */ - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, - 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, - 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, - 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, - 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 - }; - SHIFT_TEMPS - - for (i = 0; i < DCTSIZE2; i++) { - ifmtbl[i] = (IFAST_MULT_TYPE) - DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], - (INT32) aanscales[i]), - CONST_BITS-IFAST_SCALE_BITS); - } + static const INT16 aanscales[DCTSIZE2] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 + }; + SHIFT_TEMPS + + for (i = 0; i < DCTSIZE2; i++) { + ifmtbl[i] = (IFAST_MULT_TYPE) + DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], + (JLONG) aanscales[i]), + CONST_BITS-IFAST_SCALE_BITS); + } } break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: { - /* For float AA&N IDCT method, multipliers are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - */ - FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table; - int row, col; - static const double aanscalefactor[DCTSIZE] = { - 1.0, 1.387039845, 1.306562965, 1.175875602, - 1.0, 0.785694958, 0.541196100, 0.275899379 - }; - - i = 0; - for (row = 0; row < DCTSIZE; row++) { - for (col = 0; col < DCTSIZE; col++) { - fmtbl[i] = (FLOAT_MULT_TYPE) - ((double) qtbl->quantval[i] * - aanscalefactor[row] * aanscalefactor[col]); - i++; - } - } + /* For float AA&N IDCT method, multipliers are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + */ + FLOAT_MULT_TYPE *fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table; + int row, col; + static const double aanscalefactor[DCTSIZE] = { + 1.0, 1.387039845, 1.306562965, 1.175875602, + 1.0, 0.785694958, 0.541196100, 0.275899379 + }; + + i = 0; + for (row = 0; row < DCTSIZE; row++) { + for (col = 0; col < DCTSIZE; col++) { + fmtbl[i] = (FLOAT_MULT_TYPE) + ((double) qtbl->quantval[i] * + aanscalefactor[row] * aanscalefactor[col]); + i++; + } + } } break; #endif @@ -271,7 +335,7 @@ idct = (my_idct_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_idct_controller)); + sizeof(my_idct_controller)); cinfo->idct = (struct jpeg_inverse_dct *) idct; idct->pub.start_pass = start_pass; @@ -280,8 +344,8 @@ /* Allocate and pre-zero a multiplier table for each component */ compptr->dct_table = (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(multiplier_table)); - MEMZERO(compptr->dct_table, SIZEOF(multiplier_table)); + sizeof(multiplier_table)); + MEMZERO(compptr->dct_table, sizeof(multiplier_table)); /* Mark multiplier table not yet set up for any method */ idct->cur_method[ci] = -1; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdhuff.c glmark2-2021.02/src/libjpeg-turbo/jdhuff.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdhuff.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdhuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jdhuff.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2011, 2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains Huffman entropy decoding routines. * @@ -18,8 +20,9 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdhuff.h" /* Declarations shared with jdphuff.c */ +#include "jdhuff.h" /* Declarations shared with jdphuff.c */ #include "jpegcomp.h" +#include "jstdhuff.c" /* @@ -43,10 +46,10 @@ #else #if MAX_COMPS_IN_SCAN == 4 #define ASSIGN_STATE(dest,src) \ - ((dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) + ((dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -57,27 +60,27 @@ /* These fields are loaded into local variables at start of each MCU. * In case of suspension, we exit WITHOUT updating them. */ - bitread_perm_state bitstate; /* Bit buffer at start of MCU */ - savable_state saved; /* Other state at start of MCU */ + bitread_perm_state bitstate; /* Bit buffer at start of MCU */ + savable_state saved; /* Other state at start of MCU */ /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ /* Pointers to derived tables (these workspaces have image lifespan) */ - d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; - d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; + d_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS]; + d_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS]; /* Precalculated info set up by start_pass for use in decode_mcu: */ /* Pointers to derived tables to be used for each block within an MCU */ - d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU]; - d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU]; + d_derived_tbl *dc_cur_tbls[D_MAX_BLOCKS_IN_MCU]; + d_derived_tbl *ac_cur_tbls[D_MAX_BLOCKS_IN_MCU]; /* Whether we care about the DC and AC coefficient values for each block */ boolean dc_needed[D_MAX_BLOCKS_IN_MCU]; boolean ac_needed[D_MAX_BLOCKS_IN_MCU]; } huff_entropy_decoder; -typedef huff_entropy_decoder * huff_entropy_ptr; +typedef huff_entropy_decoder *huff_entropy_ptr; /* @@ -89,7 +92,8 @@ { huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; int ci, blkn, dctbl, actbl; - jpeg_component_info * compptr; + d_derived_tbl **pdtbl; + jpeg_component_info *compptr; /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG. * This ought to be an error condition, but we make it a warning because @@ -105,10 +109,10 @@ actbl = compptr->ac_tbl_no; /* Compute derived values for Huffman tables */ /* We may do this more than once for a table, but it's not expensive */ - jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, - & entropy->dc_derived_tbls[dctbl]); - jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, - & entropy->ac_derived_tbls[actbl]); + pdtbl = entropy->dc_derived_tbls + dctbl; + jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, pdtbl); + pdtbl = entropy->ac_derived_tbls + actbl; + jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, pdtbl); /* Initialize DC predictions to 0 */ entropy->saved.last_dc_val[ci] = 0; } @@ -149,7 +153,7 @@ GLOBAL(void) jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, - d_derived_tbl ** pdtbl) + d_derived_tbl **pdtbl) { JHUFF_TBL *htbl; d_derived_tbl *dtbl; @@ -175,26 +179,26 @@ if (*pdtbl == NULL) *pdtbl = (d_derived_tbl *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(d_derived_tbl)); + sizeof(d_derived_tbl)); dtbl = *pdtbl; - dtbl->pub = htbl; /* fill in back link */ - + dtbl->pub = htbl; /* fill in back link */ + /* Figure C.1: make table of Huffman code length for each symbol */ p = 0; for (l = 1; l <= 16; l++) { i = (int) htbl->bits[l]; - if (i < 0 || p + i > 256) /* protect against table overrun */ + if (i < 0 || p + i > 256) /* protect against table overrun */ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); while (i--) huffsize[p++] = (char) l; } huffsize[p] = 0; numsymbols = p; - + /* Figure C.2: generate the codes themselves */ /* We also validate that the counts represent a legal Huffman code tree. */ - + code = 0; si = huffsize[0]; p = 0; @@ -206,7 +210,7 @@ /* code is now 1 more than the last code used for codelength si; but * it must still fit in si bits, since no code is allowed to be all ones. */ - if (((INT32) code) >= (((INT32) 1) << si)) + if (((JLONG) code) >= (((JLONG) 1) << si)) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); code <<= 1; si++; @@ -220,11 +224,11 @@ /* valoffset[l] = huffval[] index of 1st symbol of code length l, * minus the minimum code of length l */ - dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p]; + dtbl->valoffset[l] = (JLONG) p - (JLONG) huffcode[p]; p += htbl->bits[l]; dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */ } else { - dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ + dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ } } dtbl->valoffset[17] = 0; @@ -247,8 +251,8 @@ /* Generate left-justified code followed by all possible bit sequences */ lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l); for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) { - dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; - lookbits++; + dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; + lookbits++; } } } @@ -263,7 +267,7 @@ for (i = 0; i < numsymbols; i++) { int sym = htbl->huffval[i]; if (sym < 0 || sym > 15) - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); } } } @@ -285,20 +289,20 @@ */ #ifdef SLOW_SHIFT_32 -#define MIN_GET_BITS 15 /* minimum allowable value */ +#define MIN_GET_BITS 15 /* minimum allowable value */ #else #define MIN_GET_BITS (BIT_BUF_SIZE-7) #endif GLOBAL(boolean) -jpeg_fill_bit_buffer (bitread_working_state * state, - register bit_buf_type get_buffer, register int bits_left, - int nbits) +jpeg_fill_bit_buffer (bitread_working_state *state, + register bit_buf_type get_buffer, register int bits_left, + int nbits) /* Load up the bit buffer to a depth of at least nbits */ { /* Copy heavily used state fields into locals (hopefully registers) */ - register const JOCTET * next_input_byte = state->next_input_byte; + register const JOCTET *next_input_byte = state->next_input_byte; register size_t bytes_in_buffer = state->bytes_in_buffer; j_decompress_ptr cinfo = state->cinfo; @@ -306,54 +310,54 @@ /* (It is assumed that no request will be for more than that many bits.) */ /* We fail to do so only if we hit a marker or are forced to suspend. */ - if (cinfo->unread_marker == 0) { /* cannot advance past a marker */ + if (cinfo->unread_marker == 0) { /* cannot advance past a marker */ while (bits_left < MIN_GET_BITS) { register int c; /* Attempt to read a byte */ if (bytes_in_buffer == 0) { - if (! (*cinfo->src->fill_input_buffer) (cinfo)) - return FALSE; - next_input_byte = cinfo->src->next_input_byte; - bytes_in_buffer = cinfo->src->bytes_in_buffer; + if (! (*cinfo->src->fill_input_buffer) (cinfo)) + return FALSE; + next_input_byte = cinfo->src->next_input_byte; + bytes_in_buffer = cinfo->src->bytes_in_buffer; } bytes_in_buffer--; c = GETJOCTET(*next_input_byte++); /* If it's 0xFF, check and discard stuffed zero byte */ if (c == 0xFF) { - /* Loop here to discard any padding FF's on terminating marker, - * so that we can save a valid unread_marker value. NOTE: we will - * accept multiple FF's followed by a 0 as meaning a single FF data - * byte. This data pattern is not valid according to the standard. - */ - do { - if (bytes_in_buffer == 0) { - if (! (*cinfo->src->fill_input_buffer) (cinfo)) - return FALSE; - next_input_byte = cinfo->src->next_input_byte; - bytes_in_buffer = cinfo->src->bytes_in_buffer; - } - bytes_in_buffer--; - c = GETJOCTET(*next_input_byte++); - } while (c == 0xFF); - - if (c == 0) { - /* Found FF/00, which represents an FF data byte */ - c = 0xFF; - } else { - /* Oops, it's actually a marker indicating end of compressed data. - * Save the marker code for later use. - * Fine point: it might appear that we should save the marker into - * bitread working state, not straight into permanent state. But - * once we have hit a marker, we cannot need to suspend within the - * current MCU, because we will read no more bytes from the data - * source. So it is OK to update permanent state right away. - */ - cinfo->unread_marker = c; - /* See if we need to insert some fake zero bits. */ - goto no_more_bytes; - } + /* Loop here to discard any padding FF's on terminating marker, + * so that we can save a valid unread_marker value. NOTE: we will + * accept multiple FF's followed by a 0 as meaning a single FF data + * byte. This data pattern is not valid according to the standard. + */ + do { + if (bytes_in_buffer == 0) { + if (! (*cinfo->src->fill_input_buffer) (cinfo)) + return FALSE; + next_input_byte = cinfo->src->next_input_byte; + bytes_in_buffer = cinfo->src->bytes_in_buffer; + } + bytes_in_buffer--; + c = GETJOCTET(*next_input_byte++); + } while (c == 0xFF); + + if (c == 0) { + /* Found FF/00, which represents an FF data byte */ + c = 0xFF; + } else { + /* Oops, it's actually a marker indicating end of compressed data. + * Save the marker code for later use. + * Fine point: it might appear that we should save the marker into + * bitread working state, not straight into permanent state. But + * once we have hit a marker, we cannot need to suspend within the + * current MCU, because we will read no more bytes from the data + * source. So it is OK to update permanent state right away. + */ + cinfo->unread_marker = c; + /* See if we need to insert some fake zero bits. */ + goto no_more_bytes; + } } /* OK, load c into get_buffer */ @@ -373,8 +377,8 @@ * appears per data segment. */ if (! cinfo->entropy->insufficient_data) { - WARNMS(cinfo, JWRN_HIT_MARKER); - cinfo->entropy->insufficient_data = TRUE; + WARNMS(cinfo, JWRN_HIT_MARKER); + cinfo->entropy->insufficient_data = TRUE; } /* Fill the buffer with zero bits */ get_buffer <<= MIN_GET_BITS - bits_left; @@ -417,11 +421,11 @@ } \ } -#if __WORDSIZE == 64 || defined(_WIN64) +#if SIZEOF_SIZE_T==8 || defined(_WIN64) /* Pre-fetch 48 bytes, because the holding register is 64-bit */ #define FILL_BIT_BUFFER_FAST \ - if (bits_left < 16) { \ + if (bits_left <= 16) { \ GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \ } @@ -429,7 +433,7 @@ /* Pre-fetch 16 bytes, because the holding register is 32-bit */ #define FILL_BIT_BUFFER_FAST \ - if (bits_left < 16) { \ + if (bits_left <= 16) { \ GET_BYTE GET_BYTE \ } @@ -442,12 +446,12 @@ */ GLOBAL(int) -jpeg_huff_decode (bitread_working_state * state, - register bit_buf_type get_buffer, register int bits_left, - d_derived_tbl * htbl, int min_bits) +jpeg_huff_decode (bitread_working_state *state, + register bit_buf_type get_buffer, register int bits_left, + d_derived_tbl *htbl, int min_bits) { register int l = min_bits; - register INT32 code; + register JLONG code; /* HUFF_DECODE has determined that the code is at least min_bits */ /* bits long, so fetch that many bits in one swoop. */ @@ -473,7 +477,7 @@ if (l > 16) { WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE); - return 0; /* fake a zero as the safest result */ + return 0; /* fake a zero as the safest result */ } return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ]; @@ -488,7 +492,8 @@ #define AVOID_TABLES #ifdef AVOID_TABLES -#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((-1)<<(s)) + 1))) +#define NEG_1 ((unsigned int)-1) +#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((NEG_1)<<(s)) + 1))) #else @@ -560,9 +565,9 @@ ASSIGN_STATE(state, entropy->saved); for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - JBLOCKROW block = MCU_data[blkn]; - d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn]; - d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; + JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; + d_derived_tbl *dctbl = entropy->dc_cur_tbls[blkn]; + d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn]; register int s, k, r; /* Decode a single block's worth of coefficients */ @@ -580,11 +585,13 @@ int ci = cinfo->MCU_membership[blkn]; s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; - /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ - (*block)[0] = (JCOEF) s; + if (block) { + /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ + (*block)[0] = (JCOEF) s; + } } - if (entropy->ac_needed[blkn]) { + if (entropy->ac_needed[blkn] && block) { /* Section F.2.2.2: decode the AC coefficients */ /* Since zeroes are skipped, output area must be cleared beforehand */ @@ -593,7 +600,7 @@ r = s >> 4; s &= 15; - + if (s) { k += r; CHECK_BIT_BUFFER(br_state, s, return FALSE); @@ -657,12 +664,12 @@ ASSIGN_STATE(state, entropy->saved); for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - JBLOCKROW block = MCU_data[blkn]; - d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn]; - d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; + JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; + d_derived_tbl *dctbl = entropy->dc_cur_tbls[blkn]; + d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn]; register int s, k, r, l; - HUFF_DECODE_FAST(s, l, dctbl); + HUFF_DECODE_FAST(s, l, dctbl, slow_decode_mcu); if (s) { FILL_BIT_BUFFER_FAST r = GET_BITS(s); @@ -673,16 +680,17 @@ int ci = cinfo->MCU_membership[blkn]; s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; - (*block)[0] = (JCOEF) s; + if (block) + (*block)[0] = (JCOEF) s; } - if (entropy->ac_needed[blkn]) { + if (entropy->ac_needed[blkn] && block) { for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE_FAST(s, l, actbl); + HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu); r = s >> 4; s &= 15; - + if (s) { k += r; FILL_BIT_BUFFER_FAST @@ -698,7 +706,7 @@ } else { for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE_FAST(s, l, actbl); + HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu); r = s >> 4; s &= 15; @@ -715,6 +723,7 @@ } if (cinfo->unread_marker != 0) { +slow_decode_mcu: cinfo->unread_marker = 0; return FALSE; } @@ -742,7 +751,7 @@ * this module, since we'll just re-assign them on the next call.) */ -#define BUFSIZE (DCTSIZE2 * 2) +#define BUFSIZE (DCTSIZE2 * 8) METHODDEF(boolean) decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) @@ -754,7 +763,7 @@ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; usefast = 0; } @@ -794,9 +803,15 @@ huff_entropy_ptr entropy; int i; + /* Motion JPEG frames typically do not include the Huffman tables if they + are the default tables. Thus, if the tables are not set by the time + the Huffman decoder is initialized (usually within the body of + jpeg_start_decompress()), we set them to default values. */ + std_huff_tables((j_common_ptr) cinfo); + entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(huff_entropy_decoder)); + sizeof(huff_entropy_decoder)); cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; entropy->pub.start_pass = start_pass_huff_decoder; entropy->pub.decode_mcu = decode_mcu; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdhuff.h glmark2-2021.02/src/libjpeg-turbo/jdhuff.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdhuff.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdhuff.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,34 +1,30 @@ /* * jdhuff.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2010-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2010-2011, 2015-2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains declarations for Huffman entropy decoding routines * that are shared between the sequential decoder (jdhuff.c) and the * progressive decoder (jdphuff.c). No other modules need to see these. */ -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_make_d_derived_tbl jMkDDerived -#define jpeg_fill_bit_buffer jFilBitBuf -#define jpeg_huff_decode jHufDecode -#endif /* NEED_SHORT_EXTERNAL_NAMES */ +#include "jconfigint.h" /* Derived data constructed for each Huffman table */ -#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */ +#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */ typedef struct { /* Basic tables: (element [0] of each array is unused) */ - INT32 maxcode[18]; /* largest code of length k (-1 if none) */ + JLONG maxcode[18]; /* largest code of length k (-1 if none) */ /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */ - INT32 valoffset[18]; /* huffval[] offset for codes of length k */ + JLONG valoffset[18]; /* huffval[] offset for codes of length k */ /* valoffset[k] = huffval[] index of 1st symbol of code length k, less * the smallest code of length k; so given a code of length k, the * corresponding symbol is huffval[code + valoffset[k]] @@ -52,8 +48,8 @@ /* Expand a Huffman table definition into the derived format */ EXTERN(void) jpeg_make_d_derived_tbl - JPP((j_decompress_ptr cinfo, boolean isDC, int tblno, - d_derived_tbl ** pdtbl)); + (j_decompress_ptr cinfo, boolean isDC, int tblno, + d_derived_tbl ** pdtbl); /* @@ -74,15 +70,19 @@ * necessary. */ -#if __WORDSIZE == 64 || defined(_WIN64) +#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T) +#error Cannot determine word size +#endif + +#if SIZEOF_SIZE_T==8 || defined(_WIN64) -typedef size_t bit_buf_type; /* type of bit-extraction buffer */ -#define BIT_BUF_SIZE 64 /* size of buffer in bits */ +typedef size_t bit_buf_type; /* type of bit-extraction buffer */ +#define BIT_BUF_SIZE 64 /* size of buffer in bits */ #else -typedef INT32 bit_buf_type; /* type of bit-extraction buffer */ -#define BIT_BUF_SIZE 32 /* size of buffer in bits */ +typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */ +#define BIT_BUF_SIZE 32 /* size of buffer in bits */ #endif @@ -93,43 +93,43 @@ * because not all machines measure sizeof in 8-bit bytes. */ -typedef struct { /* Bitreading state saved across MCUs */ - bit_buf_type get_buffer; /* current bit-extraction buffer */ - int bits_left; /* # of unused bits in it */ +typedef struct { /* Bitreading state saved across MCUs */ + bit_buf_type get_buffer; /* current bit-extraction buffer */ + int bits_left; /* # of unused bits in it */ } bitread_perm_state; -typedef struct { /* Bitreading working state within an MCU */ +typedef struct { /* Bitreading working state within an MCU */ /* Current data source location */ /* We need a copy, rather than munging the original, in case of suspension */ - const JOCTET * next_input_byte; /* => next byte to read from source */ - size_t bytes_in_buffer; /* # of bytes remaining in source buffer */ + const JOCTET *next_input_byte; /* => next byte to read from source */ + size_t bytes_in_buffer; /* # of bytes remaining in source buffer */ /* Bit input buffer --- note these values are kept in register variables, * not in this struct, inside the inner loops. */ - bit_buf_type get_buffer; /* current bit-extraction buffer */ - int bits_left; /* # of unused bits in it */ + bit_buf_type get_buffer; /* current bit-extraction buffer */ + int bits_left; /* # of unused bits in it */ /* Pointer needed by jpeg_fill_bit_buffer. */ - j_decompress_ptr cinfo; /* back link to decompress master record */ + j_decompress_ptr cinfo; /* back link to decompress master record */ } bitread_working_state; /* Macros to declare and load/save bitread local variables. */ #define BITREAD_STATE_VARS \ - register bit_buf_type get_buffer; \ - register int bits_left; \ - bitread_working_state br_state + register bit_buf_type get_buffer; \ + register int bits_left; \ + bitread_working_state br_state #define BITREAD_LOAD_STATE(cinfop,permstate) \ - br_state.cinfo = cinfop; \ - br_state.next_input_byte = cinfop->src->next_input_byte; \ - br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ - get_buffer = permstate.get_buffer; \ - bits_left = permstate.bits_left; + br_state.cinfo = cinfop; \ + br_state.next_input_byte = cinfop->src->next_input_byte; \ + br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ + get_buffer = permstate.get_buffer; \ + bits_left = permstate.bits_left; #define BITREAD_SAVE_STATE(cinfop,permstate) \ - cinfop->src->next_input_byte = br_state.next_input_byte; \ - cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ - permstate.get_buffer = get_buffer; \ - permstate.bits_left = bits_left + cinfop->src->next_input_byte = br_state.next_input_byte; \ + cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ + permstate.get_buffer = get_buffer; \ + permstate.bits_left = bits_left /* * These macros provide the in-line portion of bit fetching. @@ -137,37 +137,37 @@ * before using GET_BITS, PEEK_BITS, or DROP_BITS. * The variables get_buffer and bits_left are assumed to be locals, * but the state struct might not be (jpeg_huff_decode needs this). - * CHECK_BIT_BUFFER(state,n,action); - * Ensure there are N bits in get_buffer; if suspend, take action. + * CHECK_BIT_BUFFER(state,n,action); + * Ensure there are N bits in get_buffer; if suspend, take action. * val = GET_BITS(n); - * Fetch next N bits. + * Fetch next N bits. * val = PEEK_BITS(n); - * Fetch next N bits without removing them from the buffer. - * DROP_BITS(n); - * Discard next N bits. + * Fetch next N bits without removing them from the buffer. + * DROP_BITS(n); + * Discard next N bits. * The value N should be a simple variable, not an expression, because it * is evaluated multiple times. */ #define CHECK_BIT_BUFFER(state,nbits,action) \ - { if (bits_left < (nbits)) { \ - if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits)) \ - { action; } \ - get_buffer = (state).get_buffer; bits_left = (state).bits_left; } } + { if (bits_left < (nbits)) { \ + if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits)) \ + { action; } \ + get_buffer = (state).get_buffer; bits_left = (state).bits_left; } } #define GET_BITS(nbits) \ - (((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1)) + (((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1)) #define PEEK_BITS(nbits) \ - (((int) (get_buffer >> (bits_left - (nbits)))) & ((1<<(nbits))-1)) + (((int) (get_buffer >> (bits_left - (nbits)))) & ((1<<(nbits))-1)) #define DROP_BITS(nbits) \ - (bits_left -= (nbits)) + (bits_left -= (nbits)) /* Load up the bit buffer to a depth of at least nbits */ EXTERN(boolean) jpeg_fill_bit_buffer - JPP((bitread_working_state * state, register bit_buf_type get_buffer, - register int bits_left, int nbits)); + (bitread_working_state *state, register bit_buf_type get_buffer, + register int bits_left, int nbits); /* @@ -203,12 +203,12 @@ } else { \ slowlabel: \ if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \ - { failaction; } \ + { failaction; } \ get_buffer = state.get_buffer; bits_left = state.bits_left; \ } \ } -#define HUFF_DECODE_FAST(s,nb,htbl) \ +#define HUFF_DECODE_FAST(s,nb,htbl,slowlabel) \ FILL_BIT_BUFFER_FAST; \ s = PEEK_BITS(HUFF_LOOKAHEAD); \ s = htbl->lookup[s]; \ @@ -225,10 +225,12 @@ s |= GET_BITS(1); \ nb++; \ } \ - s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) & 0xFF ]; \ + if (nb > 16) \ + goto slowlabel; \ + s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) ]; \ } /* Out-of-line case for Huffman code fetching */ EXTERN(int) jpeg_huff_decode - JPP((bitread_working_state * state, register bit_buf_type get_buffer, - register int bits_left, d_derived_tbl * htbl, int min_bits)); + (bitread_working_state *state, register bit_buf_type get_buffer, + register int bits_left, d_derived_tbl *htbl, int min_bits); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdinput.c glmark2-2021.02/src/libjpeg-turbo/jdinput.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdinput.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdinput.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,13 @@ /* * jdinput.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Modified 2002-2009 by Guido Vollbeding. - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains input control logic for the JPEG decompressor. * These routines are concerned with controlling the decompressor's input @@ -24,93 +26,20 @@ typedef struct { struct jpeg_input_controller pub; /* public fields */ - boolean inheaders; /* TRUE until first SOS is reached */ + boolean inheaders; /* TRUE until first SOS is reached */ } my_input_controller; -typedef my_input_controller * my_inputctl_ptr; +typedef my_input_controller *my_inputctl_ptr; /* Forward declarations */ -METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo)); +METHODDEF(int) consume_markers (j_decompress_ptr cinfo); /* * Routines to calculate various quantities related to the size of the image. */ - -#if JPEG_LIB_VERSION >= 80 -/* - * Compute output image dimensions and related values. - * NOTE: this is exported for possible use by application. - * Hence it mustn't do anything that can't be done twice. - */ - -GLOBAL(void) -jpeg_core_output_dimensions (j_decompress_ptr cinfo) -/* Do computations that are needed before master selection phase. - * This function is used for transcoding and full decompression. - */ -{ -#ifdef IDCT_SCALING_SUPPORTED - int ci; - jpeg_component_info *compptr; - - /* Compute actual output image dimensions and DCT scaling choices. */ - if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) { - /* Provide 1/block_size scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size); - cinfo->min_DCT_h_scaled_size = 1; - cinfo->min_DCT_v_scaled_size = 1; - } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) { - /* Provide 2/block_size scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size); - cinfo->min_DCT_h_scaled_size = 2; - cinfo->min_DCT_v_scaled_size = 2; - } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) { - /* Provide 4/block_size scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size); - cinfo->min_DCT_h_scaled_size = 4; - cinfo->min_DCT_v_scaled_size = 4; - } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) { - /* Provide 8/block_size scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size); - cinfo->min_DCT_h_scaled_size = 8; - cinfo->min_DCT_v_scaled_size = 8; - } - /* Recompute dimensions of components */ - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size; - compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size; - } - -#else /* !IDCT_SCALING_SUPPORTED */ - - /* Hardwire it to "no scaling" */ - cinfo->output_width = cinfo->image_width; - cinfo->output_height = cinfo->image_height; - /* jdinput.c has already initialized DCT_scaled_size, - * and has computed unscaled downsampled_width and downsampled_height. - */ - -#endif /* IDCT_SCALING_SUPPORTED */ -} -#endif - - LOCAL(void) initial_setup (j_decompress_ptr cinfo) /* Called once, when first SOS marker is reached */ @@ -130,7 +59,7 @@ /* Check that number of components won't exceed internal array sizes */ if (cinfo->num_components > MAX_COMPONENTS) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPONENTS); + MAX_COMPONENTS); /* Compute maximum sampling factors; check factor validity */ cinfo->max_h_samp_factor = 1; @@ -138,12 +67,12 @@ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR || - compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) + compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) ERREXIT(cinfo, JERR_BAD_SAMPLING); cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor, - compptr->h_samp_factor); + compptr->h_samp_factor); cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor, - compptr->v_samp_factor); + compptr->v_samp_factor); } #if JPEG_LIB_VERSION >=80 @@ -173,10 +102,15 @@ /* Size in DCT blocks */ compptr->width_in_blocks = (JDIMENSION) jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + (long) (cinfo->max_h_samp_factor * DCTSIZE)); compptr->height_in_blocks = (JDIMENSION) jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + (long) (cinfo->max_v_samp_factor * DCTSIZE)); + /* Set the first and last MCU columns to decompress from multi-scan images. + * By default, decompress all of the MCU columns. + */ + cinfo->master->first_MCU_col[ci] = 0; + cinfo->master->last_MCU_col[ci] = compptr->width_in_blocks - 1; /* downsampled_width and downsampled_height will also be overridden by * jdmaster.c if we are doing full decompression. The transcoder library * doesn't use these values, but the calling application might. @@ -184,10 +118,10 @@ /* Size in samples */ compptr->downsampled_width = (JDIMENSION) jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) cinfo->max_h_samp_factor); + (long) cinfo->max_h_samp_factor); compptr->downsampled_height = (JDIMENSION) jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) cinfo->max_v_samp_factor); + (long) cinfo->max_v_samp_factor); /* Mark component needed, until color conversion says otherwise */ compptr->component_needed = TRUE; /* Mark no quantization table yet saved for component */ @@ -197,7 +131,7 @@ /* Compute number of fully interleaved MCU rows. */ cinfo->total_iMCU_rows = (JDIMENSION) jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + (long) (cinfo->max_v_samp_factor*DCTSIZE)); /* Decide whether file contains multiple scans */ if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode) @@ -214,16 +148,16 @@ { int ci, mcublks, tmp; jpeg_component_info *compptr; - + if (cinfo->comps_in_scan == 1) { - + /* Noninterleaved (single-component) scan */ compptr = cinfo->cur_comp_info[0]; - + /* Overall image size in MCUs */ cinfo->MCUs_per_row = compptr->width_in_blocks; cinfo->MCU_rows_in_scan = compptr->height_in_blocks; - + /* For noninterleaved scan, always one block per MCU */ compptr->MCU_width = 1; compptr->MCU_height = 1; @@ -236,28 +170,28 @@ tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor); if (tmp == 0) tmp = compptr->v_samp_factor; compptr->last_row_height = tmp; - + /* Prepare array describing MCU composition */ cinfo->blocks_in_MCU = 1; cinfo->MCU_membership[0] = 0; - + } else { - + /* Interleaved (multi-component) scan */ if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan, - MAX_COMPS_IN_SCAN); - + MAX_COMPS_IN_SCAN); + /* Overall image size in MCUs */ cinfo->MCUs_per_row = (JDIMENSION) jdiv_round_up((long) cinfo->image_width, - (long) (cinfo->max_h_samp_factor*DCTSIZE)); + (long) (cinfo->max_h_samp_factor*DCTSIZE)); cinfo->MCU_rows_in_scan = (JDIMENSION) jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); - + (long) (cinfo->max_v_samp_factor*DCTSIZE)); + cinfo->blocks_in_MCU = 0; - + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; /* Sampling factors give # of blocks of component in each MCU */ @@ -275,12 +209,12 @@ /* Prepare array describing MCU composition */ mcublks = compptr->MCU_blocks; if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU) - ERREXIT(cinfo, JERR_BAD_MCU_SIZE); + ERREXIT(cinfo, JERR_BAD_MCU_SIZE); while (mcublks-- > 0) { - cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; + cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; } } - + } } @@ -311,7 +245,7 @@ { int ci, qtblno; jpeg_component_info *compptr; - JQUANT_TBL * qtbl; + JQUANT_TBL *qtbl; for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; @@ -321,13 +255,13 @@ /* Make sure specified quantization table is present */ qtblno = compptr->quant_tbl_no; if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || - cinfo->quant_tbl_ptrs[qtblno] == NULL) + cinfo->quant_tbl_ptrs[qtblno] == NULL) ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); /* OK, save away the quantization table */ qtbl = (JQUANT_TBL *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(JQUANT_TBL)); - MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL)); + sizeof(JQUANT_TBL)); + MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL)); compptr->quant_table = qtbl; } } @@ -386,31 +320,31 @@ val = (*cinfo->marker->read_markers) (cinfo); switch (val) { - case JPEG_REACHED_SOS: /* Found SOS */ - if (inputctl->inheaders) { /* 1st SOS */ + case JPEG_REACHED_SOS: /* Found SOS */ + if (inputctl->inheaders) { /* 1st SOS */ initial_setup(cinfo); inputctl->inheaders = FALSE; /* Note: start_input_pass must be called by jdmaster.c * before any more input can be consumed. jdapimin.c is * responsible for enforcing this sequencing. */ - } else { /* 2nd or later SOS marker */ + } else { /* 2nd or later SOS marker */ if (! inputctl->pub.has_multiple_scans) - ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */ + ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */ start_input_pass(cinfo); } break; - case JPEG_REACHED_EOI: /* Found EOI */ + case JPEG_REACHED_EOI: /* Found EOI */ inputctl->pub.eoi_reached = TRUE; - if (inputctl->inheaders) { /* Tables-only datastream, apparently */ + if (inputctl->inheaders) { /* Tables-only datastream, apparently */ if (cinfo->marker->saw_SOF) - ERREXIT(cinfo, JERR_SOF_NO_SOS); + ERREXIT(cinfo, JERR_SOF_NO_SOS); } else { /* Prevent infinite loop in coef ctlr's decompress_data routine * if user set output_scan_number larger than number of scans. */ if (cinfo->output_scan_number > cinfo->input_scan_number) - cinfo->output_scan_number = cinfo->input_scan_number; + cinfo->output_scan_number = cinfo->input_scan_number; } break; case JPEG_SUSPENDED: @@ -455,7 +389,7 @@ /* Create subobject in permanent pool */ inputctl = (my_inputctl_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(my_input_controller)); + sizeof(my_input_controller)); cinfo->inputctl = (struct jpeg_input_controller *) inputctl; /* Initialize method pointers */ inputctl->pub.consume_input = consume_markers; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmainct.c glmark2-2021.02/src/libjpeg-turbo/jdmainct.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmainct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmainct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jdmainct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the main buffer controller for decompression. * The main buffer lies between the JPEG decompressor proper and the @@ -14,10 +16,8 @@ * supplies the equivalent of the main buffer in that case. */ -#define JPEG_INTERNALS #include "jinclude.h" -#include "jpeglib.h" -#include "jpegcomp.h" +#include "jdmainct.h" /* @@ -111,47 +111,17 @@ */ -/* Private buffer controller object */ - -typedef struct { - struct jpeg_d_main_controller pub; /* public fields */ - - /* Pointer to allocated workspace (M or M+2 row groups). */ - JSAMPARRAY buffer[MAX_COMPONENTS]; - - boolean buffer_full; /* Have we gotten an iMCU row from decoder? */ - JDIMENSION rowgroup_ctr; /* counts row groups output to postprocessor */ - - /* Remaining fields are only used in the context case. */ - - /* These are the master pointers to the funny-order pointer lists. */ - JSAMPIMAGE xbuffer[2]; /* pointers to weird pointer lists */ - - int whichptr; /* indicates which pointer set is now in use */ - int context_state; /* process_data state machine status */ - JDIMENSION rowgroups_avail; /* row groups available to postprocessor */ - JDIMENSION iMCU_row_ctr; /* counts iMCU rows to detect image top/bot */ -} my_main_controller; - -typedef my_main_controller * my_main_ptr; - -/* context_state values: */ -#define CTX_PREPARE_FOR_IMCU 0 /* need to prepare for MCU row */ -#define CTX_PROCESS_IMCU 1 /* feeding iMCU to postprocessor */ -#define CTX_POSTPONED_ROW 2 /* feeding postponed row group */ - - /* Forward declarations */ METHODDEF(void) process_data_simple_main - JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); METHODDEF(void) process_data_context_main - JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); #ifdef QUANT_2PASS_SUPPORTED METHODDEF(void) process_data_crank_post - JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); #endif @@ -172,7 +142,7 @@ */ main_ptr->xbuffer[0] = (JSAMPIMAGE) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components * 2 * SIZEOF(JSAMPARRAY)); + cinfo->num_components * 2 * sizeof(JSAMPARRAY)); main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; @@ -184,8 +154,8 @@ */ xbuf = (JSAMPARRAY) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW)); - xbuf += rgroup; /* want one row group at negative offsets */ + 2 * (rgroup * (M + 4)) * sizeof(JSAMPROW)); + xbuf += rgroup; /* want one row group at negative offsets */ main_ptr->xbuffer[0][ci] = xbuf; xbuf += rgroup * (M + 4); main_ptr->xbuffer[1][ci] = xbuf; @@ -237,34 +207,6 @@ LOCAL(void) -set_wraparound_pointers (j_decompress_ptr cinfo) -/* Set up the "wraparound" pointers at top and bottom of the pointer lists. - * This changes the pointer list state from top-of-image to the normal state. - */ -{ - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; - int ci, i, rgroup; - int M = cinfo->_min_DCT_scaled_size; - jpeg_component_info *compptr; - JSAMPARRAY xbuf0, xbuf1; - - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / - cinfo->_min_DCT_scaled_size; /* height of a row group of component */ - xbuf0 = main_ptr->xbuffer[0][ci]; - xbuf1 = main_ptr->xbuffer[1][ci]; - for (i = 0; i < rgroup; i++) { - xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i]; - xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i]; - xbuf0[rgroup*(M+2) + i] = xbuf0[i]; - xbuf1[rgroup*(M+2) + i] = xbuf1[i]; - } - } -} - - -LOCAL(void) set_bottom_pointers (j_decompress_ptr cinfo) /* Change the pointer lists to duplicate the last sample row at the bottom * of the image. whichptr indicates which xbuffer holds the final iMCU row. @@ -315,14 +257,14 @@ if (cinfo->upsample->need_context_rows) { main_ptr->pub.process_data = process_data_context_main; make_funny_pointers(cinfo); /* Create the xbuffer[] lists */ - main_ptr->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ + main_ptr->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ main_ptr->context_state = CTX_PREPARE_FOR_IMCU; main_ptr->iMCU_row_ctr = 0; } else { /* Simple case with no context needed */ main_ptr->pub.process_data = process_data_simple_main; } - main_ptr->buffer_full = FALSE; /* Mark buffer empty */ + main_ptr->buffer_full = FALSE; /* Mark buffer empty */ main_ptr->rowgroup_ctr = 0; break; #ifdef QUANT_2PASS_SUPPORTED @@ -345,8 +287,8 @@ METHODDEF(void) process_data_simple_main (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_main_ptr main_ptr = (my_main_ptr) cinfo->main; JDIMENSION rowgroups_avail; @@ -354,8 +296,8 @@ /* Read input data if we haven't filled the main buffer yet */ if (! main_ptr->buffer_full) { if (! (*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer)) - return; /* suspension forced, can do nothing more */ - main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ + return; /* suspension forced, can do nothing more */ + main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ } /* There are always min_DCT_scaled_size row groups in an iMCU row. */ @@ -367,8 +309,8 @@ /* Feed the postprocessor */ (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer, - &main_ptr->rowgroup_ctr, rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); + &main_ptr->rowgroup_ctr, rowgroups_avail, + output_buf, out_row_ctr, out_rows_avail); /* Has postprocessor consumed all the data yet? If so, mark buffer empty */ if (main_ptr->rowgroup_ctr >= rowgroups_avail) { @@ -385,18 +327,18 @@ METHODDEF(void) process_data_context_main (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_main_ptr main_ptr = (my_main_ptr) cinfo->main; /* Read input data if we haven't filled the main buffer yet */ if (! main_ptr->buffer_full) { if (! (*cinfo->coef->decompress_data) (cinfo, - main_ptr->xbuffer[main_ptr->whichptr])) - return; /* suspension forced, can do nothing more */ - main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ - main_ptr->iMCU_row_ctr++; /* count rows received */ + main_ptr->xbuffer[main_ptr->whichptr])) + return; /* suspension forced, can do nothing more */ + main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ + main_ptr->iMCU_row_ctr++; /* count rows received */ } /* Postprocessor typically will not swallow all the input data it is handed @@ -408,13 +350,13 @@ case CTX_POSTPONED_ROW: /* Call postprocessor using previously set pointers for postponed row */ (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr], - &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); + &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, + output_buf, out_row_ctr, out_rows_avail); if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail) - return; /* Need to suspend */ + return; /* Need to suspend */ main_ptr->context_state = CTX_PREPARE_FOR_IMCU; if (*out_row_ctr >= out_rows_avail) - return; /* Postprocessor exactly filled output buf */ + return; /* Postprocessor exactly filled output buf */ /*FALLTHROUGH*/ case CTX_PREPARE_FOR_IMCU: /* Prepare to process first M-1 row groups of this iMCU row */ @@ -430,15 +372,15 @@ case CTX_PROCESS_IMCU: /* Call postprocessor using previously set pointers */ (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr], - &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); + &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, + output_buf, out_row_ctr, out_rows_avail); if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail) - return; /* Need to suspend */ + return; /* Need to suspend */ /* After the first iMCU, change wraparound pointers to normal state */ if (main_ptr->iMCU_row_ctr == 1) set_wraparound_pointers(cinfo); /* Prepare to load new iMCU row using other xbuffer list */ - main_ptr->whichptr ^= 1; /* 0=>1 or 1=>0 */ + main_ptr->whichptr ^= 1; /* 0=>1 or 1=>0 */ main_ptr->buffer_full = FALSE; /* Still need to process last row group of this iMCU row, */ /* which is saved at index M+1 of the other xbuffer */ @@ -459,12 +401,12 @@ METHODDEF(void) process_data_crank_post (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL, - (JDIMENSION *) NULL, (JDIMENSION) 0, - output_buf, out_row_ctr, out_rows_avail); + (JDIMENSION *) NULL, (JDIMENSION) 0, + output_buf, out_row_ctr, out_rows_avail); } #endif /* QUANT_2PASS_SUPPORTED */ @@ -483,11 +425,11 @@ main_ptr = (my_main_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_main_controller)); + sizeof(my_main_controller)); cinfo->main = (struct jpeg_d_main_controller *) main_ptr; main_ptr->pub.start_pass = start_pass_main; - if (need_full_buffer) /* shouldn't happen */ + if (need_full_buffer) /* shouldn't happen */ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); /* Allocate the workspace. @@ -507,8 +449,8 @@ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / cinfo->_min_DCT_scaled_size; /* height of a row group of component */ main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - compptr->width_in_blocks * compptr->_DCT_scaled_size, - (JDIMENSION) (rgroup * ngroups)); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + compptr->width_in_blocks * compptr->_DCT_scaled_size, + (JDIMENSION) (rgroup * ngroups)); } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmainct.h glmark2-2021.02/src/libjpeg-turbo/jdmainct.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmainct.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmainct.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,71 @@ +/* + * jdmainct.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +#define JPEG_INTERNALS +#include "jpeglib.h" +#include "jpegcomp.h" + + +/* Private buffer controller object */ + +typedef struct { + struct jpeg_d_main_controller pub; /* public fields */ + + /* Pointer to allocated workspace (M or M+2 row groups). */ + JSAMPARRAY buffer[MAX_COMPONENTS]; + + boolean buffer_full; /* Have we gotten an iMCU row from decoder? */ + JDIMENSION rowgroup_ctr; /* counts row groups output to postprocessor */ + + /* Remaining fields are only used in the context case. */ + + /* These are the master pointers to the funny-order pointer lists. */ + JSAMPIMAGE xbuffer[2]; /* pointers to weird pointer lists */ + + int whichptr; /* indicates which pointer set is now in use */ + int context_state; /* process_data state machine status */ + JDIMENSION rowgroups_avail; /* row groups available to postprocessor */ + JDIMENSION iMCU_row_ctr; /* counts iMCU rows to detect image top/bot */ +} my_main_controller; + +typedef my_main_controller *my_main_ptr; + + +/* context_state values: */ +#define CTX_PREPARE_FOR_IMCU 0 /* need to prepare for MCU row */ +#define CTX_PROCESS_IMCU 1 /* feeding iMCU to postprocessor */ +#define CTX_POSTPONED_ROW 2 /* feeding postponed row group */ + + +LOCAL(void) +set_wraparound_pointers (j_decompress_ptr cinfo) +/* Set up the "wraparound" pointers at top and bottom of the pointer lists. + * This changes the pointer list state from top-of-image to the normal state. + */ +{ + my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + int ci, i, rgroup; + int M = cinfo->_min_DCT_scaled_size; + jpeg_component_info *compptr; + JSAMPARRAY xbuf0, xbuf1; + + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / + cinfo->_min_DCT_scaled_size; /* height of a row group of component */ + xbuf0 = main_ptr->xbuffer[0][ci]; + xbuf1 = main_ptr->xbuffer[1][ci]; + for (i = 0; i < rgroup; i++) { + xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i]; + xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i]; + xbuf0[rgroup*(M+2) + i] = xbuf0[i]; + xbuf1[rgroup*(M+2) + i] = xbuf1[i]; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmarker.c glmark2-2021.02/src/libjpeg-turbo/jdmarker.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmarker.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmarker.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jdmarker.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. - * Copyright (C) 2012, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2012, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains routines to decode JPEG datastream markers. * Most of the complexity arises from our desire to support input @@ -18,29 +20,29 @@ #include "jpeglib.h" -typedef enum { /* JPEG marker codes */ +typedef enum { /* JPEG marker codes */ M_SOF0 = 0xc0, M_SOF1 = 0xc1, M_SOF2 = 0xc2, M_SOF3 = 0xc3, - + M_SOF5 = 0xc5, M_SOF6 = 0xc6, M_SOF7 = 0xc7, - + M_JPG = 0xc8, M_SOF9 = 0xc9, M_SOF10 = 0xca, M_SOF11 = 0xcb, - + M_SOF13 = 0xcd, M_SOF14 = 0xce, M_SOF15 = 0xcf, - + M_DHT = 0xc4, - + M_DAC = 0xcc, - + M_RST0 = 0xd0, M_RST1 = 0xd1, M_RST2 = 0xd2, @@ -49,7 +51,7 @@ M_RST5 = 0xd5, M_RST6 = 0xd6, M_RST7 = 0xd7, - + M_SOI = 0xd8, M_EOI = 0xd9, M_SOS = 0xda, @@ -58,7 +60,7 @@ M_DRI = 0xdd, M_DHP = 0xde, M_EXP = 0xdf, - + M_APP0 = 0xe0, M_APP1 = 0xe1, M_APP2 = 0xe2, @@ -75,13 +77,13 @@ M_APP13 = 0xed, M_APP14 = 0xee, M_APP15 = 0xef, - + M_JPG0 = 0xf0, M_JPG13 = 0xfd, M_COM = 0xfe, - + M_TEM = 0x01, - + M_ERROR = 0x100 } JPEG_MARKER; @@ -100,12 +102,12 @@ unsigned int length_limit_APPn[16]; /* Status of COM/APPn marker saving */ - jpeg_saved_marker_ptr cur_marker; /* NULL if not processing a marker */ - unsigned int bytes_read; /* data bytes read so far in marker */ + jpeg_saved_marker_ptr cur_marker; /* NULL if not processing a marker */ + unsigned int bytes_read; /* data bytes read so far in marker */ /* Note: cur_marker is not linked into marker_list until it's all read. */ } my_marker_reader; -typedef my_marker_reader * my_marker_ptr; +typedef my_marker_reader *my_marker_ptr; /* @@ -118,49 +120,49 @@ /* Declare and initialize local copies of input pointer/count */ #define INPUT_VARS(cinfo) \ - struct jpeg_source_mgr * datasrc = (cinfo)->src; \ - const JOCTET * next_input_byte = datasrc->next_input_byte; \ - size_t bytes_in_buffer = datasrc->bytes_in_buffer + struct jpeg_source_mgr *datasrc = (cinfo)->src; \ + const JOCTET *next_input_byte = datasrc->next_input_byte; \ + size_t bytes_in_buffer = datasrc->bytes_in_buffer /* Unload the local copies --- do this only at a restart boundary */ #define INPUT_SYNC(cinfo) \ - ( datasrc->next_input_byte = next_input_byte, \ - datasrc->bytes_in_buffer = bytes_in_buffer ) + ( datasrc->next_input_byte = next_input_byte, \ + datasrc->bytes_in_buffer = bytes_in_buffer ) /* Reload the local copies --- used only in MAKE_BYTE_AVAIL */ #define INPUT_RELOAD(cinfo) \ - ( next_input_byte = datasrc->next_input_byte, \ - bytes_in_buffer = datasrc->bytes_in_buffer ) + ( next_input_byte = datasrc->next_input_byte, \ + bytes_in_buffer = datasrc->bytes_in_buffer ) /* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available. * Note we do *not* do INPUT_SYNC before calling fill_input_buffer, * but we must reload the local copies after a successful fill. */ #define MAKE_BYTE_AVAIL(cinfo,action) \ - if (bytes_in_buffer == 0) { \ - if (! (*datasrc->fill_input_buffer) (cinfo)) \ - { action; } \ - INPUT_RELOAD(cinfo); \ - } + if (bytes_in_buffer == 0) { \ + if (! (*datasrc->fill_input_buffer) (cinfo)) \ + { action; } \ + INPUT_RELOAD(cinfo); \ + } /* Read a byte into variable V. * If must suspend, take the specified action (typically "return FALSE"). */ #define INPUT_BYTE(cinfo,V,action) \ - MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V = GETJOCTET(*next_input_byte++); ) + MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ + bytes_in_buffer--; \ + V = GETJOCTET(*next_input_byte++); ) /* As above, but read two bytes interpreted as an unsigned 16-bit integer. - * V should be declared unsigned int or perhaps INT32. + * V should be declared unsigned int or perhaps JLONG. */ #define INPUT_2BYTES(cinfo,V,action) \ - MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \ - MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V += GETJOCTET(*next_input_byte++); ) + MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ + bytes_in_buffer--; \ + V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \ + MAKE_BYTE_AVAIL(cinfo,action); \ + bytes_in_buffer--; \ + V += GETJOCTET(*next_input_byte++); ) /* @@ -199,7 +201,7 @@ /* Process an SOI marker */ { int i; - + TRACEMS(cinfo, 1, JTRC_SOI); if (cinfo->marker->saw_SOI) @@ -238,9 +240,9 @@ get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) /* Process a SOFn marker */ { - INT32 length; + JLONG length; int c, ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; INPUT_VARS(cinfo); cinfo->progressive_mode = is_prog; @@ -256,8 +258,8 @@ length -= 8; TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker, - (int) cinfo->image_width, (int) cinfo->image_height, - cinfo->num_components); + (int) cinfo->image_width, (int) cinfo->image_height, + cinfo->num_components); if (cinfo->marker->saw_SOF) ERREXIT(cinfo, JERR_SOF_DUPLICATE); @@ -272,11 +274,11 @@ if (length != (cinfo->num_components * 3)) ERREXIT(cinfo, JERR_BAD_LENGTH); - if (cinfo->comp_info == NULL) /* do only once, even if suspend */ + if (cinfo->comp_info == NULL) /* do only once, even if suspend */ cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components * SIZEOF(jpeg_component_info)); - + ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->num_components * sizeof(jpeg_component_info)); + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { compptr->component_index = ci; @@ -287,8 +289,8 @@ INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE); TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT, - compptr->component_id, compptr->h_samp_factor, - compptr->v_samp_factor, compptr->quant_tbl_no); + compptr->component_id, compptr->h_samp_factor, + compptr->v_samp_factor, compptr->quant_tbl_no); } cinfo->marker->saw_SOF = TRUE; @@ -302,9 +304,9 @@ get_sos (j_decompress_ptr cinfo) /* Process a SOS marker */ { - INT32 length; - int i, ci, n, c, cc; - jpeg_component_info * compptr; + JLONG length; + int i, ci, n, c, cc, pi; + jpeg_component_info *compptr; INPUT_VARS(cinfo); if (! cinfo->marker->saw_SOF) @@ -323,17 +325,18 @@ /* Collect the component-spec parameters */ - for (i = 0; i < cinfo->num_components; i++) + for (i = 0; i < MAX_COMPS_IN_SCAN; i++) cinfo->cur_comp_info[i] = NULL; for (i = 0; i < n; i++) { INPUT_BYTE(cinfo, cc, return FALSE); INPUT_BYTE(cinfo, c, return FALSE); - - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + + for (ci = 0, compptr = cinfo->comp_info; + ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN; + ci++, compptr++) { if (cc == compptr->component_id && !cinfo->cur_comp_info[ci]) - goto id_found; + goto id_found; } ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc); @@ -343,9 +346,16 @@ cinfo->cur_comp_info[i] = compptr; compptr->dc_tbl_no = (c >> 4) & 15; compptr->ac_tbl_no = (c ) & 15; - + TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc, - compptr->dc_tbl_no, compptr->ac_tbl_no); + compptr->dc_tbl_no, compptr->ac_tbl_no); + + /* This CSi (cc) should differ from the previous CSi */ + for (pi = 0; pi < i; pi++) { + if (cinfo->cur_comp_info[pi] == compptr) { + ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc); + } + } } /* Collect the additional scan parameters Ss, Se, Ah/Al. */ @@ -358,7 +368,7 @@ cinfo->Al = (c ) & 15; TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se, - cinfo->Ah, cinfo->Al); + cinfo->Ah, cinfo->Al); /* Prepare to scan data & restart markers */ cinfo->marker->next_restart_num = 0; @@ -377,13 +387,13 @@ get_dac (j_decompress_ptr cinfo) /* Process a DAC marker */ { - INT32 length; + JLONG length; int index, val; INPUT_VARS(cinfo); INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - + while (length > 0) { INPUT_BYTE(cinfo, index, return FALSE); INPUT_BYTE(cinfo, val, return FALSE); @@ -397,11 +407,11 @@ if (index >= NUM_ARITH_TBLS) { /* define AC table */ cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val; - } else { /* define DC table */ + } else { /* define DC table */ cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F); cinfo->arith_dc_U[index] = (UINT8) (val >> 4); if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index]) - ERREXIT1(cinfo, JERR_DAC_VALUE, val); + ERREXIT1(cinfo, JERR_DAC_VALUE, val); } } @@ -423,7 +433,7 @@ get_dht (j_decompress_ptr cinfo) /* Process a DHT marker */ { - INT32 length; + JLONG length; UINT8 bits[17]; UINT8 huffval[256]; int i, index, count; @@ -432,12 +442,12 @@ INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - + while (length > 16) { INPUT_BYTE(cinfo, index, return FALSE); TRACEMS1(cinfo, 1, JTRC_DHT, index); - + bits[0] = 0; count = 0; for (i = 1; i <= 16; i++) { @@ -448,38 +458,41 @@ length -= 1 + 16; TRACEMS8(cinfo, 2, JTRC_HUFFBITS, - bits[1], bits[2], bits[3], bits[4], - bits[5], bits[6], bits[7], bits[8]); + bits[1], bits[2], bits[3], bits[4], + bits[5], bits[6], bits[7], bits[8]); TRACEMS8(cinfo, 2, JTRC_HUFFBITS, - bits[9], bits[10], bits[11], bits[12], - bits[13], bits[14], bits[15], bits[16]); + bits[9], bits[10], bits[11], bits[12], + bits[13], bits[14], bits[15], bits[16]); /* Here we just do minimal validation of the counts to avoid walking * off the end of our table space. jdhuff.c will check more carefully. */ - if (count > 256 || ((INT32) count) > length) + if (count > 256 || ((JLONG) count) > length) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); for (i = 0; i < count; i++) INPUT_BYTE(cinfo, huffval[i], return FALSE); + MEMZERO(&huffval[count], (256 - count) * sizeof(UINT8)); + length -= count; - if (index & 0x10) { /* AC table definition */ + if (index & 0x10) { /* AC table definition */ index -= 0x10; + if (index < 0 || index >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_DHT_INDEX, index); htblptr = &cinfo->ac_huff_tbl_ptrs[index]; - } else { /* DC table definition */ + } else { /* DC table definition */ + if (index < 0 || index >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_DHT_INDEX, index); htblptr = &cinfo->dc_huff_tbl_ptrs[index]; } - if (index < 0 || index >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_DHT_INDEX, index); - if (*htblptr == NULL) *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); - - MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits)); - MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval)); + + MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits)); + MEMCOPY((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval)); } if (length != 0) @@ -494,7 +507,7 @@ get_dqt (j_decompress_ptr cinfo) /* Process a DQT marker */ { - INT32 length; + JLONG length; int n, i, prec; unsigned int tmp; JQUANT_TBL *quant_ptr; @@ -512,27 +525,27 @@ if (n >= NUM_QUANT_TBLS) ERREXIT1(cinfo, JERR_DQT_INDEX, n); - + if (cinfo->quant_tbl_ptrs[n] == NULL) cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo); quant_ptr = cinfo->quant_tbl_ptrs[n]; for (i = 0; i < DCTSIZE2; i++) { if (prec) - INPUT_2BYTES(cinfo, tmp, return FALSE); + INPUT_2BYTES(cinfo, tmp, return FALSE); else - INPUT_BYTE(cinfo, tmp, return FALSE); + INPUT_BYTE(cinfo, tmp, return FALSE); /* We convert the zigzag-order table to natural array order. */ quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp; } if (cinfo->err->trace_level >= 2) { for (i = 0; i < DCTSIZE2; i += 8) { - TRACEMS8(cinfo, 2, JTRC_QUANTVALS, - quant_ptr->quantval[i], quant_ptr->quantval[i+1], - quant_ptr->quantval[i+2], quant_ptr->quantval[i+3], - quant_ptr->quantval[i+4], quant_ptr->quantval[i+5], - quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]); + TRACEMS8(cinfo, 2, JTRC_QUANTVALS, + quant_ptr->quantval[i], quant_ptr->quantval[i+1], + quant_ptr->quantval[i+2], quant_ptr->quantval[i+3], + quant_ptr->quantval[i+4], quant_ptr->quantval[i+5], + quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]); } } @@ -552,12 +565,12 @@ get_dri (j_decompress_ptr cinfo) /* Process a DRI marker */ { - INT32 length; + JLONG length; unsigned int tmp; INPUT_VARS(cinfo); INPUT_2BYTES(cinfo, length, return FALSE); - + if (length != 4) ERREXIT(cinfo, JERR_BAD_LENGTH); @@ -579,20 +592,20 @@ * JFIF and Adobe markers, respectively. */ -#define APP0_DATA_LEN 14 /* Length of interesting data in APP0 */ -#define APP14_DATA_LEN 12 /* Length of interesting data in APP14 */ -#define APPN_DATA_LEN 14 /* Must be the largest of the above!! */ +#define APP0_DATA_LEN 14 /* Length of interesting data in APP0 */ +#define APP14_DATA_LEN 12 /* Length of interesting data in APP14 */ +#define APPN_DATA_LEN 14 /* Must be the largest of the above!! */ LOCAL(void) -examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data, - unsigned int datalen, INT32 remaining) +examine_app0 (j_decompress_ptr cinfo, JOCTET *data, + unsigned int datalen, JLONG remaining) /* Examine first few bytes from an APP0. * Take appropriate action if it is a JFIF marker. * datalen is # of bytes at data[], remaining is length of rest of marker data. */ { - INT32 totallen = (INT32) datalen + remaining; + JLONG totallen = (JLONG) datalen + remaining; if (datalen >= APP0_DATA_LEN && GETJOCTET(data[0]) == 0x4A && @@ -615,18 +628,18 @@ */ if (cinfo->JFIF_major_version != 1) WARNMS2(cinfo, JWRN_JFIF_MAJOR, - cinfo->JFIF_major_version, cinfo->JFIF_minor_version); + cinfo->JFIF_major_version, cinfo->JFIF_minor_version); /* Generate trace messages */ TRACEMS5(cinfo, 1, JTRC_JFIF, - cinfo->JFIF_major_version, cinfo->JFIF_minor_version, - cinfo->X_density, cinfo->Y_density, cinfo->density_unit); + cinfo->JFIF_major_version, cinfo->JFIF_minor_version, + cinfo->X_density, cinfo->Y_density, cinfo->density_unit); /* Validate thumbnail dimensions and issue appropriate messages */ if (GETJOCTET(data[12]) | GETJOCTET(data[13])) TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, - GETJOCTET(data[12]), GETJOCTET(data[13])); + GETJOCTET(data[12]), GETJOCTET(data[13])); totallen -= APP0_DATA_LEN; if (totallen != - ((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3)) + ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG) 3)) TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen); } else if (datalen >= 6 && GETJOCTET(data[0]) == 0x4A && @@ -650,7 +663,7 @@ break; default: TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, - GETJOCTET(data[5]), (int) totallen); + GETJOCTET(data[5]), (int) totallen); break; } } else { @@ -661,8 +674,8 @@ LOCAL(void) -examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data, - unsigned int datalen, INT32 remaining) +examine_app14 (j_decompress_ptr cinfo, JOCTET *data, + unsigned int datalen, JLONG remaining) /* Examine first few bytes from an APP14. * Take appropriate action if it is an Adobe marker. * datalen is # of bytes at data[], remaining is length of rest of marker data. @@ -695,7 +708,7 @@ get_interesting_appn (j_decompress_ptr cinfo) /* Process an APP0 or APP14 marker without saving it */ { - INT32 length; + JLONG length; JOCTET b[APPN_DATA_LEN]; unsigned int i, numtoread; INPUT_VARS(cinfo); @@ -717,10 +730,10 @@ /* process it */ switch (cinfo->unread_marker) { case M_APP0: - examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length); + examine_app0(cinfo, (JOCTET *) b, numtoread, length); break; case M_APP14: - examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length); + examine_app14(cinfo, (JOCTET *) b, numtoread, length); break; default: /* can't get here unless jpeg_save_markers chooses wrong processor */ @@ -746,33 +759,33 @@ my_marker_ptr marker = (my_marker_ptr) cinfo->marker; jpeg_saved_marker_ptr cur_marker = marker->cur_marker; unsigned int bytes_read, data_length; - JOCTET FAR * data; - INT32 length = 0; + JOCTET *data; + JLONG length = 0; INPUT_VARS(cinfo); if (cur_marker == NULL) { /* begin reading a marker */ INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - if (length >= 0) { /* watch out for bogus length word */ + if (length >= 0) { /* watch out for bogus length word */ /* figure out how much we want to save */ unsigned int limit; if (cinfo->unread_marker == (int) M_COM) - limit = marker->length_limit_COM; + limit = marker->length_limit_COM; else - limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0]; + limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0]; if ((unsigned int) length < limit) - limit = (unsigned int) length; + limit = (unsigned int) length; /* allocate and initialize the marker item */ cur_marker = (jpeg_saved_marker_ptr) - (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(struct jpeg_marker_struct) + limit); + (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(struct jpeg_marker_struct) + limit); cur_marker->next = NULL; cur_marker->marker = (UINT8) cinfo->unread_marker; cur_marker->original_length = (unsigned int) length; cur_marker->data_length = limit; /* data area is just beyond the jpeg_marker_struct */ - data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1); + data = cur_marker->data = (JOCTET *) (cur_marker + 1); marker->cur_marker = cur_marker; marker->bytes_read = 0; bytes_read = 0; @@ -790,7 +803,7 @@ } while (bytes_read < data_length) { - INPUT_SYNC(cinfo); /* move the restart point to here */ + INPUT_SYNC(cinfo); /* move the restart point to here */ marker->bytes_read = bytes_read; /* If there's not at least one byte in buffer, suspend */ MAKE_BYTE_AVAIL(cinfo, return FALSE); @@ -803,14 +816,14 @@ } /* Done reading what we want to read */ - if (cur_marker != NULL) { /* will be NULL if bogus length word */ + if (cur_marker != NULL) { /* will be NULL if bogus length word */ /* Add new marker to end of list */ if (cinfo->marker_list == NULL) { cinfo->marker_list = cur_marker; } else { jpeg_saved_marker_ptr prev = cinfo->marker_list; while (prev->next != NULL) - prev = prev->next; + prev = prev->next; prev->next = cur_marker; } /* Reset pointer & calc remaining data length */ @@ -830,12 +843,12 @@ break; default: TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, - (int) (data_length + length)); + (int) (data_length + length)); break; } /* skip any remaining data -- could be lots */ - INPUT_SYNC(cinfo); /* do before skip_input_data */ + INPUT_SYNC(cinfo); /* do before skip_input_data */ if (length > 0) (*cinfo->src->skip_input_data) (cinfo, (long) length); @@ -849,15 +862,15 @@ skip_variable (j_decompress_ptr cinfo) /* Skip over an unknown or uninteresting variable-length marker */ { - INT32 length; + JLONG length; INPUT_VARS(cinfo); INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - + TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length); - INPUT_SYNC(cinfo); /* do before skip_input_data */ + INPUT_SYNC(cinfo); /* do before skip_input_data */ if (length > 0) (*cinfo->src->skip_input_data) (cinfo, (long) length); @@ -901,7 +914,7 @@ INPUT_BYTE(cinfo, c, return FALSE); } while (c == 0xFF); if (c != 0) - break; /* found a valid marker, exit loop */ + break; /* found a valid marker, exit loop */ /* Reach here if we found a stuffed-zero data sequence (FF/00). * Discard it and loop back to try again. */ @@ -961,11 +974,11 @@ /* NB: first_marker() enforces the requirement that SOI appear first. */ if (cinfo->unread_marker == 0) { if (! cinfo->marker->saw_SOI) { - if (! first_marker(cinfo)) - return JPEG_SUSPENDED; + if (! first_marker(cinfo)) + return JPEG_SUSPENDED; } else { - if (! next_marker(cinfo)) - return JPEG_SUSPENDED; + if (! next_marker(cinfo)) + return JPEG_SUSPENDED; } } /* At this point cinfo->unread_marker contains the marker code and the @@ -975,74 +988,74 @@ switch (cinfo->unread_marker) { case M_SOI: if (! get_soi(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_SOF0: /* Baseline */ - case M_SOF1: /* Extended sequential, Huffman */ + case M_SOF0: /* Baseline */ + case M_SOF1: /* Extended sequential, Huffman */ if (! get_sof(cinfo, FALSE, FALSE)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_SOF2: /* Progressive, Huffman */ + case M_SOF2: /* Progressive, Huffman */ if (! get_sof(cinfo, TRUE, FALSE)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_SOF9: /* Extended sequential, arithmetic */ + case M_SOF9: /* Extended sequential, arithmetic */ if (! get_sof(cinfo, FALSE, TRUE)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_SOF10: /* Progressive, arithmetic */ + case M_SOF10: /* Progressive, arithmetic */ if (! get_sof(cinfo, TRUE, TRUE)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; /* Currently unsupported SOFn types */ - case M_SOF3: /* Lossless, Huffman */ - case M_SOF5: /* Differential sequential, Huffman */ - case M_SOF6: /* Differential progressive, Huffman */ - case M_SOF7: /* Differential lossless, Huffman */ - case M_JPG: /* Reserved for JPEG extensions */ - case M_SOF11: /* Lossless, arithmetic */ - case M_SOF13: /* Differential sequential, arithmetic */ - case M_SOF14: /* Differential progressive, arithmetic */ - case M_SOF15: /* Differential lossless, arithmetic */ + case M_SOF3: /* Lossless, Huffman */ + case M_SOF5: /* Differential sequential, Huffman */ + case M_SOF6: /* Differential progressive, Huffman */ + case M_SOF7: /* Differential lossless, Huffman */ + case M_JPG: /* Reserved for JPEG extensions */ + case M_SOF11: /* Lossless, arithmetic */ + case M_SOF13: /* Differential sequential, arithmetic */ + case M_SOF14: /* Differential progressive, arithmetic */ + case M_SOF15: /* Differential lossless, arithmetic */ ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker); break; case M_SOS: if (! get_sos(cinfo)) - return JPEG_SUSPENDED; - cinfo->unread_marker = 0; /* processed the marker */ + return JPEG_SUSPENDED; + cinfo->unread_marker = 0; /* processed the marker */ return JPEG_REACHED_SOS; - + case M_EOI: TRACEMS(cinfo, 1, JTRC_EOI); - cinfo->unread_marker = 0; /* processed the marker */ + cinfo->unread_marker = 0; /* processed the marker */ return JPEG_REACHED_EOI; - + case M_DAC: if (! get_dac(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - + case M_DHT: if (! get_dht(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - + case M_DQT: if (! get_dqt(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - + case M_DRI: if (! get_dri(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - + case M_APP0: case M_APP1: case M_APP2: @@ -1060,16 +1073,16 @@ case M_APP14: case M_APP15: if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[ - cinfo->unread_marker - (int) M_APP0]) (cinfo)) - return JPEG_SUSPENDED; + cinfo->unread_marker - (int) M_APP0]) (cinfo)) + return JPEG_SUSPENDED; break; - + case M_COM: if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - case M_RST0: /* these are all parameterless */ + case M_RST0: /* these are all parameterless */ case M_RST1: case M_RST2: case M_RST3: @@ -1081,12 +1094,12 @@ TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker); break; - case M_DNL: /* Ignore DNL ... perhaps the wrong thing */ + case M_DNL: /* Ignore DNL ... perhaps the wrong thing */ if (! skip_variable(cinfo)) - return JPEG_SUSPENDED; + return JPEG_SUSPENDED; break; - default: /* must be DHP, EXP, JPGn, or RESn */ + default: /* must be DHP, EXP, JPGn, or RESn */ /* For now, we treat the reserved markers as fatal errors since they are * likely to be used to signal incompatible JPEG Part 3 extensions. * Once the JPEG 3 version-number marker is well defined, this code @@ -1132,7 +1145,7 @@ /* Uh-oh, the restart markers have been messed up. */ /* Let the data source manager determine how to resync. */ if (! (*cinfo->src->resync_to_restart) (cinfo, - cinfo->marker->next_restart_num)) + cinfo->marker->next_restart_num)) return FALSE; } @@ -1197,25 +1210,25 @@ { int marker = cinfo->unread_marker; int action = 1; - + /* Always put up a warning. */ WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired); - + /* Outer loop handles repeated decision after scanning forward. */ for (;;) { if (marker < (int) M_SOF0) - action = 2; /* invalid marker */ + action = 2; /* invalid marker */ else if (marker < (int) M_RST0 || marker > (int) M_RST7) - action = 3; /* valid non-restart marker */ + action = 3; /* valid non-restart marker */ else { if (marker == ((int) M_RST0 + ((desired+1) & 7)) || - marker == ((int) M_RST0 + ((desired+2) & 7))) - action = 3; /* one of the next two expected restarts */ + marker == ((int) M_RST0 + ((desired+2) & 7))) + action = 3; /* one of the next two expected restarts */ else if (marker == ((int) M_RST0 + ((desired-1) & 7)) || - marker == ((int) M_RST0 + ((desired-2) & 7))) - action = 2; /* a prior restart, so advance */ + marker == ((int) M_RST0 + ((desired-2) & 7))) + action = 2; /* a prior restart, so advance */ else - action = 1; /* desired restart or too far away */ + action = 1; /* desired restart or too far away */ } TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action); switch (action) { @@ -1226,7 +1239,7 @@ case 2: /* Scan to the next marker, and repeat the decision loop. */ if (! next_marker(cinfo)) - return FALSE; + return FALSE; marker = cinfo->unread_marker; break; case 3: @@ -1247,10 +1260,10 @@ { my_marker_ptr marker = (my_marker_ptr) cinfo->marker; - cinfo->comp_info = NULL; /* until allocated by get_sof */ - cinfo->input_scan_number = 0; /* no SOS seen yet */ - cinfo->unread_marker = 0; /* no pending marker */ - marker->pub.saw_SOI = FALSE; /* set internal state too */ + cinfo->comp_info = NULL; /* until allocated by get_sof */ + cinfo->input_scan_number = 0; /* no SOS seen yet */ + cinfo->unread_marker = 0; /* no pending marker */ + marker->pub.saw_SOI = FALSE; /* set internal state too */ marker->pub.saw_SOF = FALSE; marker->pub.discarded_bytes = 0; marker->cur_marker = NULL; @@ -1271,7 +1284,7 @@ /* Create subobject in permanent pool */ marker = (my_marker_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(my_marker_reader)); + sizeof(my_marker_reader)); cinfo->marker = (struct jpeg_marker_reader *) marker; /* Initialize public method pointers */ marker->pub.reset_marker_reader = reset_marker_reader; @@ -1302,7 +1315,7 @@ GLOBAL(void) jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, - unsigned int length_limit) + unsigned int length_limit) { my_marker_ptr marker = (my_marker_ptr) cinfo->marker; long maxlength; @@ -1311,7 +1324,7 @@ /* Length limit mustn't be larger than what we can allocate * (should only be a concern in a 16-bit environment). */ - maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct); + maxlength = cinfo->mem->max_alloc_chunk - sizeof(struct jpeg_marker_struct); if (((long) length_limit) > maxlength) length_limit = (unsigned int) maxlength; @@ -1351,7 +1364,7 @@ GLOBAL(void) jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code, - jpeg_marker_parser_method routine) + jpeg_marker_parser_method routine) { my_marker_ptr marker = (my_marker_ptr) cinfo->marker; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmaster.c glmark2-2021.02/src/libjpeg-turbo/jdmaster.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmaster.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmaster.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,15 @@ /* * jdmaster.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 2002-2009 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2011, 2016, D. R. Commander. + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains master control logic for the JPEG decompressor. * These routines are concerned with selecting the modules to be executed @@ -16,25 +21,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jpegcomp.h" - - -/* Private state */ - -typedef struct { - struct jpeg_decomp_master pub; /* public fields */ - - int pass_number; /* # of passes completed */ - - boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */ - - /* Saved references to initialized quantizer modules, - * in case we need to switch modes. - */ - struct jpeg_color_quantizer * quantizer_1pass; - struct jpeg_color_quantizer * quantizer_2pass; -} my_decomp_master; - -typedef my_decomp_master * my_master_ptr; +#include "jdmaster.h" /* @@ -49,9 +36,10 @@ /* Merging is the equivalent of plain box-filter upsampling */ if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling) return FALSE; - /* jdmerge.c only supports YCC=>RGB color conversion */ + /* jdmerge.c only supports YCC=>RGB and YCC=>RGB565 color conversion */ if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 || (cinfo->out_color_space != JCS_RGB && + cinfo->out_color_space != JCS_RGB565 && cinfo->out_color_space != JCS_EXT_RGB && cinfo->out_color_space != JCS_EXT_RGBX && cinfo->out_color_space != JCS_EXT_BGR && @@ -61,8 +49,12 @@ cinfo->out_color_space != JCS_EXT_RGBA && cinfo->out_color_space != JCS_EXT_BGRA && cinfo->out_color_space != JCS_EXT_ABGR && - cinfo->out_color_space != JCS_EXT_ARGB) || - cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space]) + cinfo->out_color_space != JCS_EXT_ARGB)) + return FALSE; + if ((cinfo->out_color_space == JCS_RGB565 && + cinfo->out_color_components != 3) || + (cinfo->out_color_space != JCS_RGB565 && + cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space])) return FALSE; /* and it only handles 2h1v or 2h2v sampling ratios */ if (cinfo->comp_info[0].h_samp_factor != 2 || @@ -78,7 +70,7 @@ cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size) return FALSE; /* ??? also need to test for upsample-time rescaling, when & if supported */ - return TRUE; /* by golly, it'll work... */ + return TRUE; /* by golly, it'll work... */ #else return FALSE; #endif @@ -89,6 +81,177 @@ * Compute output image dimensions and related values. * NOTE: this is exported for possible use by application. * Hence it mustn't do anything that can't be done twice. + */ + +#if JPEG_LIB_VERSION >= 80 +GLOBAL(void) +#else +LOCAL(void) +#endif +jpeg_core_output_dimensions (j_decompress_ptr cinfo) +/* Do computations that are needed before master selection phase. + * This function is used for transcoding and full decompression. + */ +{ +#ifdef IDCT_SCALING_SUPPORTED + int ci; + jpeg_component_info *compptr; + + /* Compute actual output image dimensions and DCT scaling choices. */ + if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) { + /* Provide 1/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 1; + cinfo->_min_DCT_v_scaled_size = 1; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) { + /* Provide 2/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 2L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 2L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 2; + cinfo->_min_DCT_v_scaled_size = 2; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) { + /* Provide 3/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 3L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 3L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 3; + cinfo->_min_DCT_v_scaled_size = 3; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) { + /* Provide 4/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 4L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 4L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 4; + cinfo->_min_DCT_v_scaled_size = 4; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) { + /* Provide 5/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 5L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 5L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 5; + cinfo->_min_DCT_v_scaled_size = 5; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) { + /* Provide 6/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 6L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 6L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 6; + cinfo->_min_DCT_v_scaled_size = 6; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) { + /* Provide 7/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 7L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 7L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 7; + cinfo->_min_DCT_v_scaled_size = 7; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) { + /* Provide 8/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 8L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 8L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 8; + cinfo->_min_DCT_v_scaled_size = 8; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) { + /* Provide 9/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 9L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 9L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 9; + cinfo->_min_DCT_v_scaled_size = 9; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) { + /* Provide 10/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 10L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 10L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 10; + cinfo->_min_DCT_v_scaled_size = 10; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) { + /* Provide 11/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 11L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 11L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 11; + cinfo->_min_DCT_v_scaled_size = 11; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) { + /* Provide 12/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 12L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 12L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 12; + cinfo->_min_DCT_v_scaled_size = 12; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) { + /* Provide 13/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 13L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 13L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 13; + cinfo->_min_DCT_v_scaled_size = 13; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) { + /* Provide 14/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 14L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 14L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 14; + cinfo->_min_DCT_v_scaled_size = 14; + } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) { + /* Provide 15/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 15L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 15L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 15; + cinfo->_min_DCT_v_scaled_size = 15; + } else { + /* Provide 16/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 16L, (long) DCTSIZE); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 16L, (long) DCTSIZE); + cinfo->_min_DCT_h_scaled_size = 16; + cinfo->_min_DCT_v_scaled_size = 16; + } + + /* Recompute dimensions of components */ + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size; + compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size; + } + +#else /* !IDCT_SCALING_SUPPORTED */ + + /* Hardwire it to "no scaling" */ + cinfo->output_width = cinfo->image_width; + cinfo->output_height = cinfo->image_height; + /* jdinput.c has already initialized DCT_scaled_size, + * and has computed unscaled downsampled_width and downsampled_height. + */ + +#endif /* IDCT_SCALING_SUPPORTED */ +} + + +/* + * Compute output image dimensions and related values. + * NOTE: this is exported for possible use by application. + * Hence it mustn't do anything that can't be done twice. * Also note that it may be called before the master module is initialized! */ @@ -105,65 +268,24 @@ if (cinfo->global_state != DSTATE_READY) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + /* Compute core output image dimensions and DCT scaling choices. */ + jpeg_core_output_dimensions(cinfo); + #ifdef IDCT_SCALING_SUPPORTED - /* Compute actual output image dimensions and DCT scaling choices. */ - if (cinfo->scale_num * 8 <= cinfo->scale_denom) { - /* Provide 1/8 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 8L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 8L); -#if JPEG_LIB_VERSION >= 70 - cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 1; -#else - cinfo->min_DCT_scaled_size = 1; -#endif - } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) { - /* Provide 1/4 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 4L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 4L); -#if JPEG_LIB_VERSION >= 70 - cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 2; -#else - cinfo->min_DCT_scaled_size = 2; -#endif - } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) { - /* Provide 1/2 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 2L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 2L); -#if JPEG_LIB_VERSION >= 70 - cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 4; -#else - cinfo->min_DCT_scaled_size = 4; -#endif - } else { - /* Provide 1/1 scaling */ - cinfo->output_width = cinfo->image_width; - cinfo->output_height = cinfo->image_height; -#if JPEG_LIB_VERSION >= 70 - cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE; -#else - cinfo->min_DCT_scaled_size = DCTSIZE; -#endif - } /* In selecting the actual DCT scaling for each component, we try to * scale up the chroma components via IDCT scaling rather than upsampling. * This saves time if the upsampler gets to use 1:1 scaling. - * Note this code assumes that the supported DCT scalings are powers of 2. + * Note this code adapts subsampling ratios which are powers of 2. */ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { int ssize = cinfo->_min_DCT_scaled_size; while (ssize < DCTSIZE && - (compptr->h_samp_factor * ssize * 2 <= - cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) && - (compptr->v_samp_factor * ssize * 2 <= - cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size)) { + ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) % + (compptr->h_samp_factor * ssize * 2) == 0) && + ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) % + (compptr->v_samp_factor * ssize * 2) == 0)) { ssize = ssize * 2; } #if JPEG_LIB_VERSION >= 70 @@ -181,12 +303,12 @@ /* Size in samples, after IDCT scaling */ compptr->downsampled_width = (JDIMENSION) jdiv_round_up((long) cinfo->image_width * - (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size), - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size), + (long) (cinfo->max_h_samp_factor * DCTSIZE)); compptr->downsampled_height = (JDIMENSION) jdiv_round_up((long) cinfo->image_height * - (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size), - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size), + (long) (cinfo->max_v_samp_factor * DCTSIZE)); } #else /* !IDCT_SCALING_SUPPORTED */ @@ -220,18 +342,19 @@ cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space]; break; case JCS_YCbCr: + case JCS_RGB565: cinfo->out_color_components = 3; break; case JCS_CMYK: case JCS_YCCK: cinfo->out_color_components = 4; break; - default: /* else must be same colorspace as in file */ + default: /* else must be same colorspace as in file */ cinfo->out_color_components = cinfo->num_components; break; } cinfo->output_components = (cinfo->quantize_colors ? 1 : - cinfo->out_color_components); + cinfo->out_color_components); /* See if upsampler will want to emit more than one row at a time */ if (use_merged_upsample(cinfo)) @@ -248,20 +371,20 @@ * processes are inner loops and need to be as fast as possible. On most * machines, particularly CPUs with pipelines or instruction prefetch, * a (subscript-check-less) C table lookup - * x = sample_range_limit[x]; + * x = sample_range_limit[x]; * is faster than explicit tests - * if (x < 0) x = 0; - * else if (x > MAXJSAMPLE) x = MAXJSAMPLE; + * if (x < 0) x = 0; + * else if (x > MAXJSAMPLE) x = MAXJSAMPLE; * These processes all use a common table prepared by the routine below. * * For most steps we can mathematically guarantee that the initial value * of x is within MAXJSAMPLE+1 of the legal range, so a table running from * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient. But for the initial - * limiting step (just after the IDCT), a wildly out-of-range value is + * limiting step (just after the IDCT), a wildly out-of-range value is * possible if the input data is corrupt. To avoid any chance of indexing * off the end of memory and getting a bad-pointer trap, we perform the * post-IDCT limiting thus: - * x = range_limit[x & MASK]; + * x = range_limit[x & MASK]; * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit * samples. Under normal circumstances this is more than enough range and * a correct output will be generated; with bogus input data the mask will @@ -279,37 +402,34 @@ * We can save some space by overlapping the start of the post-IDCT table * with the simpler range limiting table. The post-IDCT table begins at * sample_range_limit + CENTERJSAMPLE. - * - * Note that the table is allocated in near data space on PCs; it's small - * enough and used often enough to justify this. */ LOCAL(void) prepare_range_limit_table (j_decompress_ptr cinfo) /* Allocate and fill in the sample_range_limit table */ { - JSAMPLE * table; + JSAMPLE *table; int i; table = (JSAMPLE *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE)); - table += (MAXJSAMPLE+1); /* allow negative subscripts of simple table */ + (5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * sizeof(JSAMPLE)); + table += (MAXJSAMPLE+1); /* allow negative subscripts of simple table */ cinfo->sample_range_limit = table; /* First segment of "simple" table: limit[x] = 0 for x < 0 */ - MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE)); + MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * sizeof(JSAMPLE)); /* Main part of "simple" table: limit[x] = x */ for (i = 0; i <= MAXJSAMPLE; i++) table[i] = (JSAMPLE) i; - table += CENTERJSAMPLE; /* Point to where post-IDCT table starts */ + table += CENTERJSAMPLE; /* Point to where post-IDCT table starts */ /* End of simple table, rest of first half of post-IDCT table */ for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++) table[i] = MAXJSAMPLE; /* Second half of post-IDCT table */ MEMZERO(table + (2 * (MAXJSAMPLE+1)), - (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE)); + (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * sizeof(JSAMPLE)); MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE), - cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE)); + cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE)); } @@ -442,6 +562,12 @@ /* Initialize input side of decompressor to consume first scan. */ (*cinfo->inputctl->start_input_pass) (cinfo); + /* Set the first and last iMCU columns to decompress from single-scan images. + * By default, decompress all of the iMCU columns. + */ + cinfo->master->first_iMCU_col = 0; + cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1; + #ifdef D_MULTISCAN_FILES_SUPPORTED /* If jpeg_start_decompress will read the whole file, initialize * progress monitoring appropriately. The input step is counted @@ -497,24 +623,24 @@ if (cinfo->quantize_colors && cinfo->colormap == NULL) { /* Select new quantization method */ if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) { - cinfo->cquantize = master->quantizer_2pass; - master->pub.is_dummy_pass = TRUE; + cinfo->cquantize = master->quantizer_2pass; + master->pub.is_dummy_pass = TRUE; } else if (cinfo->enable_1pass_quant) { - cinfo->cquantize = master->quantizer_1pass; + cinfo->cquantize = master->quantizer_1pass; } else { - ERREXIT(cinfo, JERR_MODE_CHANGE); + ERREXIT(cinfo, JERR_MODE_CHANGE); } } (*cinfo->idct->start_pass) (cinfo); (*cinfo->coef->start_output_pass) (cinfo); if (! cinfo->raw_data_out) { if (! master->using_merged_upsample) - (*cinfo->cconvert->start_pass) (cinfo); + (*cinfo->cconvert->start_pass) (cinfo); (*cinfo->upsample->start_pass) (cinfo); if (cinfo->quantize_colors) - (*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass); + (*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass); (*cinfo->post->start_pass) (cinfo, - (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU)); + (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU)); (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU); } } @@ -523,7 +649,7 @@ if (cinfo->progress != NULL) { cinfo->progress->completed_passes = master->pass_number; cinfo->progress->total_passes = master->pass_number + - (master->pub.is_dummy_pass ? 2 : 1); + (master->pub.is_dummy_pass ? 2 : 1); /* In buffered-image mode, we assume one more output pass if EOI not * yet reached, but no more passes if EOI has been reached. */ @@ -586,16 +712,13 @@ GLOBAL(void) jinit_master_decompress (j_decompress_ptr cinfo) { - my_master_ptr master; + my_master_ptr master = (my_master_ptr) cinfo->master; - master = (my_master_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_decomp_master)); - cinfo->master = (struct jpeg_decomp_master *) master; master->pub.prepare_for_output_pass = prepare_for_output_pass; master->pub.finish_output_pass = finish_output_pass; master->pub.is_dummy_pass = FALSE; + master->pub.jinit_upsampler_no_alloc = FALSE; master_selection(cinfo); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmaster.h glmark2-2021.02/src/libjpeg-turbo/jdmaster.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmaster.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmaster.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,28 @@ +/* + * jdmaster.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1995, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains the master control structure for the JPEG decompressor. + */ + +/* Private state */ + +typedef struct { + struct jpeg_decomp_master pub; /* public fields */ + + int pass_number; /* # of passes completed */ + + boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */ + + /* Saved references to initialized quantizer modules, + * in case we need to switch modes. + */ + struct jpeg_color_quantizer *quantizer_1pass; + struct jpeg_color_quantizer *quantizer_2pass; +} my_decomp_master; + +typedef my_decomp_master *my_master_ptr; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmerge.c glmark2-2021.02/src/libjpeg-turbo/jdmerge.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmerge.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmerge.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,14 @@ /* * jdmerge.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009, 2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander. + * Copyright (C) 2013, Linaro Limited. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains code for merged upsampling/color conversion. * @@ -16,19 +19,19 @@ * (ie, box filtering), we can save some work in color conversion by * calculating all the output pixels corresponding to a pair of chroma * samples at one time. In the conversion equations - * R = Y + K1 * Cr - * G = Y + K2 * Cb + K3 * Cr - * B = Y + K4 * Cb + * R = Y + K1 * Cr + * G = Y + K2 * Cb + K3 * Cr + * B = Y + K4 * Cb * only the Y term varies among the group of pixels corresponding to a pair * of chroma samples, so the rest of the terms can be calculated just once. * At typical sampling ratios, this eliminates half or three-quarters of the * multiplications needed for color conversion. * * This file currently provides implementations for the following cases: - * YCbCr => RGB color conversion only. - * Sampling ratios of 2h1v or 2h2v. - * No scaling needed at upsample time. - * Corner-aligned (non-CCIR601) sampling alignment. + * YCbCr => RGB color conversion only. + * Sampling ratios of 2h1v or 2h2v. + * No scaling needed at upsample time. + * Corner-aligned (non-CCIR601) sampling alignment. * Other special cases could be added, but in most applications these are * the only common cases. (For uncommon cases we fall back on the more * general code in jdsample.c and jdcolor.c.) @@ -38,7 +41,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jsimd.h" -#include "config.h" +#include "jconfigint.h" #ifdef UPSAMPLE_MERGING_SUPPORTED @@ -46,18 +49,17 @@ /* Private subobject */ typedef struct { - struct jpeg_upsampler pub; /* public fields */ + struct jpeg_upsampler pub; /* public fields */ /* Pointer to routine to do actual upsampling/conversion of one row group */ - JMETHOD(void, upmethod, (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf)); + void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); /* Private state for YCC->RGB conversion */ - int * Cr_r_tab; /* => table for Cr to R conversion */ - int * Cb_b_tab; /* => table for Cb to B conversion */ - INT32 * Cr_g_tab; /* => table for Cr to G conversion */ - INT32 * Cb_g_tab; /* => table for Cb to G conversion */ + int *Cr_r_tab; /* => table for Cr to R conversion */ + int *Cb_b_tab; /* => table for Cb to B conversion */ + JLONG *Cr_g_tab; /* => table for Cr to G conversion */ + JLONG *Cb_g_tab; /* => table for Cb to G conversion */ /* For 2:1 vertical sampling, we produce two output rows at a time. * We need a "spare" row buffer to hold the second output row if the @@ -65,22 +67,22 @@ * to discard the dummy last row if the image height is odd. */ JSAMPROW spare_row; - boolean spare_full; /* T if spare buffer is occupied */ + boolean spare_full; /* T if spare buffer is occupied */ - JDIMENSION out_row_width; /* samples per output row */ - JDIMENSION rows_to_go; /* counts rows remaining in image */ + JDIMENSION out_row_width; /* samples per output row */ + JDIMENSION rows_to_go; /* counts rows remaining in image */ } my_upsampler; -typedef my_upsampler * my_upsample_ptr; +typedef my_upsampler *my_upsample_ptr; -#define SCALEBITS 16 /* speediest right-shift on some machines */ -#define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) -#define FIX(x) ((INT32) ((x) * (1L<upsample; int i; - INT32 x; + JLONG x; SHIFT_TEMPS upsample->Cr_r_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(int)); + (MAXJSAMPLE+1) * sizeof(int)); upsample->Cb_b_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(int)); - upsample->Cr_g_tab = (INT32 *) + (MAXJSAMPLE+1) * sizeof(int)); + upsample->Cr_g_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(INT32)); - upsample->Cb_g_tab = (INT32 *) + (MAXJSAMPLE+1) * sizeof(JLONG)); + upsample->Cb_g_tab = (JLONG *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * SIZEOF(INT32)); + (MAXJSAMPLE+1) * sizeof(JLONG)); for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) { /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */ /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */ /* Cr=>R value is nearest int to 1.40200 * x */ upsample->Cr_r_tab[i] = (int) - RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); + RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); /* Cb=>B value is nearest int to 1.77200 * x */ upsample->Cb_b_tab[i] = (int) - RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); + RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); /* Cr=>G value is scaled-up -0.71414 * x */ upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x; /* Cb=>G value is scaled-up -0.34414 * x */ @@ -239,20 +249,23 @@ METHODDEF(void) merged_2v_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) /* 2:1 vertical sampling case: may need a spare row. */ { my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; JSAMPROW work_ptrs[2]; - JDIMENSION num_rows; /* number of rows returned to caller */ + JDIMENSION num_rows; /* number of rows returned to caller */ if (upsample->spare_full) { /* If we have a spare row saved from a previous cycle, just return it. */ + JDIMENSION size = upsample->out_row_width; + if (cinfo->out_color_space == JCS_RGB565) + size = cinfo->output_width * 2; jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0, - 1, upsample->out_row_width); + 1, size); num_rows = 1; upsample->spare_full = FALSE; } else { @@ -288,17 +301,17 @@ METHODDEF(void) merged_1v_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) /* 1:1 vertical sampling case: much easier, never need a spare row. */ { my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; /* Just do the upsampling. */ (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, - output_buf + *out_row_ctr); + output_buf + *out_row_ctr); /* Adjust counts */ (*out_row_ctr)++; (*in_row_group_ctr)++; @@ -321,8 +334,8 @@ METHODDEF(void) h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { switch (cinfo->out_color_space) { case JCS_EXT_RGB: @@ -367,8 +380,8 @@ METHODDEF(void) h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { switch (cinfo->out_color_space) { case JCS_EXT_RGB: @@ -408,6 +421,151 @@ /* + * RGB565 conversion + */ + +#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ + (((g) << 3) & 0x7E0) | ((b) >> 3)) +#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ + (((g) << 11) & 0xE000) | \ + (((b) << 5) & 0x1F00)) + +#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) +#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) + +#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) + +#define WRITE_TWO_PIXELS_LE(addr, pixels) { \ + ((INT16*)(addr))[0] = (INT16)(pixels); \ + ((INT16*)(addr))[1] = (INT16)((pixels) >> 16); \ +} +#define WRITE_TWO_PIXELS_BE(addr, pixels) { \ + ((INT16*)(addr))[1] = (INT16)(pixels); \ + ((INT16*)(addr))[0] = (INT16)((pixels) >> 16); \ +} + +#define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) +#define DITHER_565_G(g, dither) ((g) + (((dither) & 0xFF) >> 1)) +#define DITHER_565_B(b, dither) ((b) + ((dither) & 0xFF)) + + +/* Declarations for ordered dithering + * + * We use a 4x4 ordered dither array packed into 32 bits. This array is + * sufficent for dithering RGB888 to RGB565. + */ + +#define DITHER_MASK 0x3 +#define DITHER_ROTATE(x) ((((x) & 0xFF) << 24) | (((x) >> 8) & 0x00FFFFFF)) +static const JLONG dither_matrix[4] = { + 0x0008020A, + 0x0C040E06, + 0x030B0109, + 0x0F070D05 +}; + + +/* Include inline routines for RGB565 conversion */ + +#define PACK_SHORT_565 PACK_SHORT_565_LE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE +#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_LE +#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_le +#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_le +#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_le +#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_le +#include "jdmrg565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef WRITE_TWO_PIXELS +#undef h2v1_merged_upsample_565_internal +#undef h2v1_merged_upsample_565D_internal +#undef h2v2_merged_upsample_565_internal +#undef h2v2_merged_upsample_565D_internal + +#define PACK_SHORT_565 PACK_SHORT_565_BE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE +#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_BE +#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_be +#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_be +#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_be +#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_be +#include "jdmrg565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef WRITE_TWO_PIXELS +#undef h2v1_merged_upsample_565_internal +#undef h2v1_merged_upsample_565D_internal +#undef h2v2_merged_upsample_565_internal +#undef h2v2_merged_upsample_565D_internal + + +static INLINE boolean is_big_endian(void) +{ + int test_value = 1; + if(*(char *)&test_value != 1) + return TRUE; + return FALSE; +} + + +METHODDEF(void) +h2v1_merged_upsample_565 (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + if (is_big_endian()) + h2v1_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v1_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr, + output_buf); + } + + +METHODDEF(void) +h2v1_merged_upsample_565D (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + if (is_big_endian()) + h2v1_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v1_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr, + output_buf); +} + + +METHODDEF(void) +h2v2_merged_upsample_565 (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + if (is_big_endian()) + h2v2_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v2_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr, + output_buf); +} + + +METHODDEF(void) +h2v2_merged_upsample_565D (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + if (is_big_endian()) + h2v2_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v2_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr, + output_buf); +} + + +/* * Module initialization routine for merged upsampling/color conversion. * * NB: this is called under the conditions determined by use_merged_upsample() @@ -422,7 +580,7 @@ upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_upsampler)); + sizeof(my_upsampler)); cinfo->upsample = (struct jpeg_upsampler *) upsample; upsample->pub.start_pass = start_pass_merged_upsample; upsample->pub.need_context_rows = FALSE; @@ -435,16 +593,30 @@ upsample->upmethod = jsimd_h2v2_merged_upsample; else upsample->upmethod = h2v2_merged_upsample; + if (cinfo->out_color_space == JCS_RGB565) { + if (cinfo->dither_mode != JDITHER_NONE) { + upsample->upmethod = h2v2_merged_upsample_565D; + } else { + upsample->upmethod = h2v2_merged_upsample_565; + } + } /* Allocate a spare row buffer */ upsample->spare_row = (JSAMPROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (size_t) (upsample->out_row_width * SIZEOF(JSAMPLE))); + (size_t) (upsample->out_row_width * sizeof(JSAMPLE))); } else { upsample->pub.upsample = merged_1v_upsample; if (jsimd_can_h2v1_merged_upsample()) upsample->upmethod = jsimd_h2v1_merged_upsample; else upsample->upmethod = h2v1_merged_upsample; + if (cinfo->out_color_space == JCS_RGB565) { + if (cinfo->dither_mode != JDITHER_NONE) { + upsample->upmethod = h2v1_merged_upsample_565D; + } else { + upsample->upmethod = h2v1_merged_upsample_565; + } + } /* No spare row needed */ upsample->spare_row = NULL; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmrg565.c glmark2-2021.02/src/libjpeg-turbo/jdmrg565.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmrg565.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmrg565.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,356 @@ +/* + * jdmrg565.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains code for merged upsampling/color conversion. + */ + + +INLINE +LOCAL(void) +h2v1_merged_upsample_565_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr; + JSAMPROW inptr0, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + unsigned int r, g, b; + JLONG rgb; + SHIFT_TEMPS + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + + /* Loop for each pair of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 2 Y values and emit 2 pixels */ + y = GETJSAMPLE(*inptr0++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr, rgb); + outptr += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr0); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } + } + + +INLINE +LOCAL(void) +h2v1_merged_upsample_565D_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr; + JSAMPROW inptr0, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + unsigned int r, g, b; + JLONG rgb; + SHIFT_TEMPS + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + + /* Loop for each pair of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 2 Y values and emit 2 pixels */ + y = GETJSAMPLE(*inptr0++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr, rgb); + outptr += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr0); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = (INT16)rgb; + } +} + + +INLINE +LOCAL(void) +h2v2_merged_upsample_565_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr0, outptr1; + JSAMPROW inptr00, inptr01, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + unsigned int r, g, b; + JLONG rgb; + SHIFT_TEMPS + + inptr00 = input_buf[0][in_row_group_ctr * 2]; + inptr01 = input_buf[0][in_row_group_ctr * 2 + 1]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr0 = output_buf[0]; + outptr1 = output_buf[1]; + + /* Loop for each group of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 4 Y values and emit 4 pixels */ + y = GETJSAMPLE(*inptr00++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr00++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr0, rgb); + outptr0 += 4; + + y = GETJSAMPLE(*inptr01++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr01++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr1, rgb); + outptr1 += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + y = GETJSAMPLE(*inptr00); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr0 = (INT16)rgb; + + y = GETJSAMPLE(*inptr01); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr1 = (INT16)rgb; + } +} + + +INLINE +LOCAL(void) +h2v2_merged_upsample_565D_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr0, outptr1; + JSAMPROW inptr00, inptr01, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + JLONG d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK]; + unsigned int r, g, b; + JLONG rgb; + SHIFT_TEMPS + + inptr00 = input_buf[0][in_row_group_ctr*2]; + inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr0 = output_buf[0]; + outptr1 = output_buf[1]; + + /* Loop for each group of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 4 Y values and emit 4 pixels */ + y = GETJSAMPLE(*inptr00++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr00++); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + d1 = DITHER_ROTATE(d1); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr0, rgb); + outptr0 += 4; + + y = GETJSAMPLE(*inptr01++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr01++); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + d1 = DITHER_ROTATE(d1); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr1, rgb); + outptr1 += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + y = GETJSAMPLE(*inptr00); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr0 = (INT16)rgb; + + y = GETJSAMPLE(*inptr01); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr1 = (INT16)rgb; + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmrgext.c glmark2-2021.02/src/libjpeg-turbo/jdmrgext.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmrgext.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmrgext.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,186 @@ +/* + * jdmrgext.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2011, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains code for merged upsampling/color conversion. + */ + + +/* This file is included by jdmerge.c */ + + +/* + * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. + */ + +INLINE +LOCAL(void) +h2v1_merged_upsample_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr; + JSAMPROW inptr0, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + SHIFT_TEMPS + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + /* Loop for each pair of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + /* Fetch 2 Y values and emit 2 pixels */ + y = GETJSAMPLE(*inptr0++); + outptr[RGB_RED] = range_limit[y + cred]; + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + y = GETJSAMPLE(*inptr0++); + outptr[RGB_RED] = range_limit[y + cred]; + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr0); + outptr[RGB_RED] = range_limit[y + cred]; + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + } +} + + +/* + * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. + */ + +INLINE +LOCAL(void) +h2v2_merged_upsample_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr0, outptr1; + JSAMPROW inptr00, inptr01, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + JLONG * Crgtab = upsample->Cr_g_tab; + JLONG * Cbgtab = upsample->Cb_g_tab; + SHIFT_TEMPS + + inptr00 = input_buf[0][in_row_group_ctr*2]; + inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr0 = output_buf[0]; + outptr1 = output_buf[1]; + /* Loop for each group of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + /* Fetch 4 Y values and emit 4 pixels */ + y = GETJSAMPLE(*inptr00++); + outptr0[RGB_RED] = range_limit[y + cred]; + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif + outptr0 += RGB_PIXELSIZE; + y = GETJSAMPLE(*inptr00++); + outptr0[RGB_RED] = range_limit[y + cred]; + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif + outptr0 += RGB_PIXELSIZE; + y = GETJSAMPLE(*inptr01++); + outptr1[RGB_RED] = range_limit[y + cred]; + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif + outptr1 += RGB_PIXELSIZE; + y = GETJSAMPLE(*inptr01++); + outptr1[RGB_RED] = range_limit[y + cred]; + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif + outptr1 += RGB_PIXELSIZE; + } + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr00); + outptr0[RGB_RED] = range_limit[y + cred]; + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif + y = GETJSAMPLE(*inptr01); + outptr1[RGB_RED] = range_limit[y + cred]; + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmrgext.c.inc glmark2-2021.02/src/libjpeg-turbo/jdmrgext.c.inc --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdmrgext.c.inc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdmrgext.c.inc 1970-01-01 08:00:00.000000000 +0800 @@ -1,156 +0,0 @@ -/* - * jdmrgext.c - * - * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains code for merged upsampling/color conversion. - */ - - -/* This file is included by jdmerge.c */ - - -/* - * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. - */ - -INLINE -LOCAL(void) -h2v1_merged_upsample_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - register int y, cred, cgreen, cblue; - int cb, cr; - register JSAMPROW outptr; - JSAMPROW inptr0, inptr1, inptr2; - JDIMENSION col; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - INT32 * Crgtab = upsample->Cr_g_tab; - INT32 * Cbgtab = upsample->Cb_g_tab; - SHIFT_TEMPS - - inptr0 = input_buf[0][in_row_group_ctr]; - inptr1 = input_buf[1][in_row_group_ctr]; - inptr2 = input_buf[2][in_row_group_ctr]; - outptr = output_buf[0]; - /* Loop for each pair of output pixels */ - for (col = cinfo->output_width >> 1; col > 0; col--) { - /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); - outptr[RGB_RED] = range_limit[y + cred]; - outptr[RGB_GREEN] = range_limit[y + cgreen]; - outptr[RGB_BLUE] = range_limit[y + cblue]; - outptr += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr0++); - outptr[RGB_RED] = range_limit[y + cred]; - outptr[RGB_GREEN] = range_limit[y + cgreen]; - outptr[RGB_BLUE] = range_limit[y + cblue]; - outptr += RGB_PIXELSIZE; - } - /* If image width is odd, do the last output column separately */ - if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); - outptr[RGB_RED] = range_limit[y + cred]; - outptr[RGB_GREEN] = range_limit[y + cgreen]; - outptr[RGB_BLUE] = range_limit[y + cblue]; - } -} - - -/* - * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. - */ - -INLINE -LOCAL(void) -h2v2_merged_upsample_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - register int y, cred, cgreen, cblue; - int cb, cr; - register JSAMPROW outptr0, outptr1; - JSAMPROW inptr00, inptr01, inptr1, inptr2; - JDIMENSION col; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - INT32 * Crgtab = upsample->Cr_g_tab; - INT32 * Cbgtab = upsample->Cb_g_tab; - SHIFT_TEMPS - - inptr00 = input_buf[0][in_row_group_ctr*2]; - inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; - inptr1 = input_buf[1][in_row_group_ctr]; - inptr2 = input_buf[2][in_row_group_ctr]; - outptr0 = output_buf[0]; - outptr1 = output_buf[1]; - /* Loop for each group of output pixels */ - for (col = cinfo->output_width >> 1; col > 0; col--) { - /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); - outptr0[RGB_RED] = range_limit[y + cred]; - outptr0[RGB_GREEN] = range_limit[y + cgreen]; - outptr0[RGB_BLUE] = range_limit[y + cblue]; - outptr0 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr00++); - outptr0[RGB_RED] = range_limit[y + cred]; - outptr0[RGB_GREEN] = range_limit[y + cgreen]; - outptr0[RGB_BLUE] = range_limit[y + cblue]; - outptr0 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr01++); - outptr1[RGB_RED] = range_limit[y + cred]; - outptr1[RGB_GREEN] = range_limit[y + cgreen]; - outptr1[RGB_BLUE] = range_limit[y + cblue]; - outptr1 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr01++); - outptr1[RGB_RED] = range_limit[y + cred]; - outptr1[RGB_GREEN] = range_limit[y + cgreen]; - outptr1[RGB_BLUE] = range_limit[y + cblue]; - outptr1 += RGB_PIXELSIZE; - } - /* If image width is odd, do the last output column separately */ - if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr00); - outptr0[RGB_RED] = range_limit[y + cred]; - outptr0[RGB_GREEN] = range_limit[y + cgreen]; - outptr0[RGB_BLUE] = range_limit[y + cblue]; - y = GETJSAMPLE(*inptr01); - outptr1[RGB_RED] = range_limit[y + cred]; - outptr1[RGB_GREEN] = range_limit[y + cgreen]; - outptr1[RGB_BLUE] = range_limit[y + cblue]; - } -} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdphuff.c glmark2-2021.02/src/libjpeg-turbo/jdphuff.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdphuff.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdphuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdphuff.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains Huffman entropy decoding routines for progressive JPEG. * @@ -17,7 +20,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdhuff.h" /* Declarations shared with jdhuff.c */ +#include "jdhuff.h" /* Declarations shared with jdhuff.c */ #ifdef D_PROGRESSIVE_SUPPORTED @@ -30,8 +33,8 @@ */ typedef struct { - unsigned int EOBRUN; /* remaining EOBs in EOBRUN */ - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ + unsigned int EOBRUN; /* remaining EOBs in EOBRUN */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; /* This macro is to work around compilers with missing or broken @@ -44,11 +47,11 @@ #else #if MAX_COMPS_IN_SCAN == 4 #define ASSIGN_STATE(dest,src) \ - ((dest).EOBRUN = (src).EOBRUN, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) + ((dest).EOBRUN = (src).EOBRUN, \ + (dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -59,29 +62,29 @@ /* These fields are loaded into local variables at start of each MCU. * In case of suspension, we exit WITHOUT updating them. */ - bitread_perm_state bitstate; /* Bit buffer at start of MCU */ - savable_state saved; /* Other state at start of MCU */ + bitread_perm_state bitstate; /* Bit buffer at start of MCU */ + savable_state saved; /* Other state at start of MCU */ /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ /* Pointers to derived tables (these workspaces have image lifespan) */ - d_derived_tbl * derived_tbls[NUM_HUFF_TBLS]; + d_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; - d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */ + d_derived_tbl *ac_derived_tbl; /* active table during an AC scan */ } phuff_entropy_decoder; -typedef phuff_entropy_decoder * phuff_entropy_ptr; +typedef phuff_entropy_decoder *phuff_entropy_ptr; /* Forward declarations */ -METHODDEF(boolean) decode_mcu_DC_first JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_AC_first JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_DC_refine JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); +METHODDEF(boolean) decode_mcu_DC_first (j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) decode_mcu_AC_first (j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) decode_mcu_DC_refine (j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) decode_mcu_AC_refine (j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); /* @@ -94,8 +97,9 @@ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; boolean is_DC_band, bad; int ci, coefi, tbl; + d_derived_tbl **pdtbl; int *coef_bit_ptr; - jpeg_component_info * compptr; + jpeg_component_info *compptr; is_DC_band = (cinfo->Ss == 0); @@ -117,7 +121,7 @@ if (cinfo->Al != cinfo->Ah-1) bad = TRUE; } - if (cinfo->Al > 13) /* need not check for < 0 */ + if (cinfo->Al > 13) /* need not check for < 0 */ bad = TRUE; /* Arguably the maximum Al value should be less than 13 for 8-bit precision, * but the spec doesn't say so, and we try to be liberal about what we @@ -127,7 +131,7 @@ */ if (bad) ERREXIT4(cinfo, JERR_BAD_PROGRESSION, - cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); + cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); /* Update progression status, and verify that scan order is legal. * Note that inter-scan inconsistencies are treated as warnings * not fatal errors ... not clear if this is right way to behave. @@ -140,7 +144,7 @@ for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; if (cinfo->Ah != expected) - WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); coef_bit_ptr[coefi] = cinfo->Al; } } @@ -164,15 +168,15 @@ * We may build same derived table more than once, but it's not expensive. */ if (is_DC_band) { - if (cinfo->Ah == 0) { /* DC refinement needs no table */ - tbl = compptr->dc_tbl_no; - jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, - & entropy->derived_tbls[tbl]); + if (cinfo->Ah == 0) { /* DC refinement needs no table */ + tbl = compptr->dc_tbl_no; + pdtbl = entropy->derived_tbls + tbl; + jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl); } } else { tbl = compptr->ac_tbl_no; - jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, - & entropy->derived_tbls[tbl]); + pdtbl = entropy->derived_tbls + tbl; + jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl); /* remember the single active table */ entropy->ac_derived_tbl = entropy->derived_tbls[tbl]; } @@ -198,9 +202,11 @@ * On some machines, a shift and add will be faster than a table lookup. */ +#define AVOID_TABLES #ifdef AVOID_TABLES -#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) +#define NEG_1 ((unsigned)-1) +#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((NEG_1)<<(s)) + 1) : (x)) #else @@ -263,7 +269,7 @@ /* * Huffman MCU decoding. * Each of these routines decodes and returns one MCU's worth of - * Huffman-compressed coefficients. + * Huffman-compressed coefficients. * The coefficients are reordered from zigzag order into natural array order, * but are not dequantized. * @@ -284,7 +290,7 @@ METHODDEF(boolean) decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Al = cinfo->Al; register int s, r; @@ -292,14 +298,14 @@ JBLOCKROW block; BITREAD_STATE_VARS; savable_state state; - d_derived_tbl * tbl; - jpeg_component_info * compptr; + d_derived_tbl *tbl; + jpeg_component_info *compptr; /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; } /* If we've run out of data, just leave the MCU set to zeroes. @@ -324,16 +330,16 @@ /* Section F.2.2.1: decode the DC coefficient difference */ HUFF_DECODE(s, br_state, tbl, return FALSE, label1); if (s) { - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); } /* Convert DC difference to actual value, update last_dc_val */ s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */ - (*block)[0] = (JCOEF) (s << Al); + (*block)[0] = (JCOEF) LEFT_SHIFT(s, Al); } /* Completed MCU, so update state */ @@ -355,7 +361,7 @@ METHODDEF(boolean) decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Se = cinfo->Se; int Al = cinfo->Al; @@ -363,13 +369,13 @@ unsigned int EOBRUN; JBLOCKROW block; BITREAD_STATE_VARS; - d_derived_tbl * tbl; + d_derived_tbl *tbl; /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; } /* If we've run out of data, just leave the MCU set to zeroes. @@ -380,49 +386,49 @@ /* Load up working state. * We can avoid loading/saving bitread state if in an EOB run. */ - EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ + EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ /* There is always only one block per MCU */ - if (EOBRUN > 0) /* if it's a band of zeroes... */ - EOBRUN--; /* ...process it now (we do nothing) */ + if (EOBRUN > 0) /* if it's a band of zeroes... */ + EOBRUN--; /* ...process it now (we do nothing) */ else { BITREAD_LOAD_STATE(cinfo,entropy->bitstate); block = MCU_data[0]; tbl = entropy->ac_derived_tbl; for (k = cinfo->Ss; k <= Se; k++) { - HUFF_DECODE(s, br_state, tbl, return FALSE, label2); - r = s >> 4; - s &= 15; - if (s) { - k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); - /* Scale and output coefficient in natural (dezigzagged) order */ - (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al); - } else { - if (r == 15) { /* ZRL */ - k += 15; /* skip 15 zeroes in band */ - } else { /* EOBr, run length is 2^r + appended bits */ - EOBRUN = 1 << r; - if (r) { /* EOBr, r > 0 */ - CHECK_BIT_BUFFER(br_state, r, return FALSE); - r = GET_BITS(r); - EOBRUN += r; - } - EOBRUN--; /* this band is processed at this moment */ - break; /* force end-of-band */ - } - } + HUFF_DECODE(s, br_state, tbl, return FALSE, label2); + r = s >> 4; + s &= 15; + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + /* Scale and output coefficient in natural (dezigzagged) order */ + (*block)[jpeg_natural_order[k]] = (JCOEF) LEFT_SHIFT(s, Al); + } else { + if (r == 15) { /* ZRL */ + k += 15; /* skip 15 zeroes in band */ + } else { /* EOBr, run length is 2^r + appended bits */ + EOBRUN = 1 << r; + if (r) { /* EOBr, r > 0 */ + CHECK_BIT_BUFFER(br_state, r, return FALSE); + r = GET_BITS(r); + EOBRUN += r; + } + EOBRUN--; /* this band is processed at this moment */ + break; /* force end-of-band */ + } + } } BITREAD_SAVE_STATE(cinfo,entropy->bitstate); } /* Completed MCU, so update state */ - entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ + entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ } /* Account for restart interval (no-op if not using restarts) */ @@ -440,9 +446,9 @@ METHODDEF(boolean) decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ int blkn; JBLOCKROW block; BITREAD_STATE_VARS; @@ -451,7 +457,7 @@ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; } /* Not worth the cycles to check insufficient_data here, @@ -489,17 +495,17 @@ METHODDEF(boolean) decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Se = cinfo->Se; - int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ - int m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ + int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + int m1 = (NEG_1) << cinfo->Al; /* -1 in the bit position being coded */ register int s, k, r; unsigned int EOBRUN; JBLOCKROW block; JCOEFPTR thiscoef; BITREAD_STATE_VARS; - d_derived_tbl * tbl; + d_derived_tbl *tbl; int num_newnz; int newnz_pos[DCTSIZE2]; @@ -507,7 +513,7 @@ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) if (! process_restart(cinfo)) - return FALSE; + return FALSE; } /* If we've run out of data, don't modify the MCU. @@ -535,58 +541,58 @@ if (EOBRUN == 0) { for (; k <= Se; k++) { - HUFF_DECODE(s, br_state, tbl, goto undoit, label3); - r = s >> 4; - s &= 15; - if (s) { - if (s != 1) /* size of new coef should always be 1 */ - WARNMS(cinfo, JWRN_HUFF_BAD_CODE); - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) - s = p1; /* newly nonzero coef is positive */ - else - s = m1; /* newly nonzero coef is negative */ - } else { - if (r != 15) { - EOBRUN = 1 << r; /* EOBr, run length is 2^r + appended bits */ - if (r) { - CHECK_BIT_BUFFER(br_state, r, goto undoit); - r = GET_BITS(r); - EOBRUN += r; - } - break; /* rest of block is handled by EOB logic */ - } - /* note s = 0 for processing ZRL */ - } - /* Advance over already-nonzero coefs and r still-zero coefs, - * appending correction bits to the nonzeroes. A correction bit is 1 - * if the absolute value of the coefficient must be increased. - */ - do { - thiscoef = *block + jpeg_natural_order[k]; - if (*thiscoef != 0) { - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) { - if ((*thiscoef & p1) == 0) { /* do nothing if already set it */ - if (*thiscoef >= 0) - *thiscoef += p1; - else - *thiscoef += m1; - } - } - } else { - if (--r < 0) - break; /* reached target zero coefficient */ - } - k++; - } while (k <= Se); - if (s) { - int pos = jpeg_natural_order[k]; - /* Output newly nonzero coefficient */ - (*block)[pos] = (JCOEF) s; - /* Remember its position in case we have to suspend */ - newnz_pos[num_newnz++] = pos; - } + HUFF_DECODE(s, br_state, tbl, goto undoit, label3); + r = s >> 4; + s &= 15; + if (s) { + if (s != 1) /* size of new coef should always be 1 */ + WARNMS(cinfo, JWRN_HUFF_BAD_CODE); + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) + s = p1; /* newly nonzero coef is positive */ + else + s = m1; /* newly nonzero coef is negative */ + } else { + if (r != 15) { + EOBRUN = 1 << r; /* EOBr, run length is 2^r + appended bits */ + if (r) { + CHECK_BIT_BUFFER(br_state, r, goto undoit); + r = GET_BITS(r); + EOBRUN += r; + } + break; /* rest of block is handled by EOB logic */ + } + /* note s = 0 for processing ZRL */ + } + /* Advance over already-nonzero coefs and r still-zero coefs, + * appending correction bits to the nonzeroes. A correction bit is 1 + * if the absolute value of the coefficient must be increased. + */ + do { + thiscoef = *block + jpeg_natural_order[k]; + if (*thiscoef != 0) { + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) { + if ((*thiscoef & p1) == 0) { /* do nothing if already set it */ + if (*thiscoef >= 0) + *thiscoef += p1; + else + *thiscoef += m1; + } + } + } else { + if (--r < 0) + break; /* reached target zero coefficient */ + } + k++; + } while (k <= Se); + if (s) { + int pos = jpeg_natural_order[k]; + /* Output newly nonzero coefficient */ + (*block)[pos] = (JCOEF) s; + /* Remember its position in case we have to suspend */ + newnz_pos[num_newnz++] = pos; + } } } @@ -597,18 +603,18 @@ * if the absolute value of the coefficient must be increased. */ for (; k <= Se; k++) { - thiscoef = *block + jpeg_natural_order[k]; - if (*thiscoef != 0) { - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) { - if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */ - if (*thiscoef >= 0) - *thiscoef += p1; - else - *thiscoef += m1; - } - } - } + thiscoef = *block + jpeg_natural_order[k]; + if (*thiscoef != 0) { + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) { + if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */ + if (*thiscoef >= 0) + *thiscoef += p1; + else + *thiscoef += m1; + } + } + } } /* Count one block completed in EOB run */ EOBRUN--; @@ -646,7 +652,7 @@ entropy = (phuff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(phuff_entropy_decoder)); + sizeof(phuff_entropy_decoder)); cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; entropy->pub.start_pass = start_pass_phuff_decoder; @@ -658,9 +664,9 @@ /* Create progression status table */ cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components*DCTSIZE2*SIZEOF(int)); + cinfo->num_components*DCTSIZE2*sizeof(int)); coef_bit_ptr = & cinfo->coef_bits[0][0]; - for (ci = 0; ci < cinfo->num_components; ci++) + for (ci = 0; ci < cinfo->num_components; ci++) for (i = 0; i < DCTSIZE2; i++) *coef_bit_ptr++ = -1; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdpostct.c glmark2-2021.02/src/libjpeg-turbo/jdpostct.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdpostct.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdpostct.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdpostct.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the decompression postprocessing controller. * This controller manages the upsampling, color conversion, and color @@ -31,37 +34,34 @@ * For two-pass color quantization, we need a full-image buffer; * for one-pass operation, a strip buffer is sufficient. */ - jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */ - JSAMPARRAY buffer; /* strip buffer, or current strip of virtual */ - JDIMENSION strip_height; /* buffer size in rows */ + jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */ + JSAMPARRAY buffer; /* strip buffer, or current strip of virtual */ + JDIMENSION strip_height; /* buffer size in rows */ /* for two-pass mode only: */ - JDIMENSION starting_row; /* row # of first row in current strip */ - JDIMENSION next_row; /* index of next row to fill/empty in strip */ + JDIMENSION starting_row; /* row # of first row in current strip */ + JDIMENSION next_row; /* index of next row to fill/empty in strip */ } my_post_controller; -typedef my_post_controller * my_post_ptr; +typedef my_post_controller *my_post_ptr; /* Forward declarations */ METHODDEF(void) post_process_1pass - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); #ifdef QUANT_2PASS_SUPPORTED METHODDEF(void) post_process_prepass - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); METHODDEF(void) post_process_2pass - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); #endif @@ -84,9 +84,9 @@ * allocate a strip buffer. Use the virtual-array buffer as workspace. */ if (post->buffer == NULL) { - post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, - (JDIMENSION) 0, post->strip_height, TRUE); + post->buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, post->whole_image, + (JDIMENSION) 0, post->strip_height, TRUE); } } else { /* For single-pass processing without color quantization, @@ -124,10 +124,10 @@ METHODDEF(void) post_process_1pass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_post_ptr post = (my_post_ptr) cinfo->post; JDIMENSION num_rows, max_rows; @@ -139,11 +139,11 @@ max_rows = post->strip_height; num_rows = 0; (*cinfo->upsample->upsample) (cinfo, - input_buf, in_row_group_ctr, in_row_groups_avail, - post->buffer, &num_rows, max_rows); + input_buf, in_row_group_ctr, in_row_groups_avail, + post->buffer, &num_rows, max_rows); /* Quantize and emit data. */ (*cinfo->cquantize->color_quantize) (cinfo, - post->buffer, output_buf + *out_row_ctr, (int) num_rows); + post->buffer, output_buf + *out_row_ctr, (int) num_rows); *out_row_ctr += num_rows; } @@ -156,10 +156,10 @@ METHODDEF(void) post_process_prepass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_post_ptr post = (my_post_ptr) cinfo->post; JDIMENSION old_next_row, num_rows; @@ -167,22 +167,22 @@ /* Reposition virtual buffer if at start of strip. */ if (post->next_row == 0) { post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, - post->starting_row, post->strip_height, TRUE); + ((j_common_ptr) cinfo, post->whole_image, + post->starting_row, post->strip_height, TRUE); } /* Upsample some data (up to a strip height's worth). */ old_next_row = post->next_row; (*cinfo->upsample->upsample) (cinfo, - input_buf, in_row_group_ctr, in_row_groups_avail, - post->buffer, &post->next_row, post->strip_height); + input_buf, in_row_group_ctr, in_row_groups_avail, + post->buffer, &post->next_row, post->strip_height); /* Allow quantizer to scan new data. No data is emitted, */ /* but we advance out_row_ctr so outer loop can tell when we're done. */ if (post->next_row > old_next_row) { num_rows = post->next_row - old_next_row; (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row, - (JSAMPARRAY) NULL, (int) num_rows); + (JSAMPARRAY) NULL, (int) num_rows); *out_row_ctr += num_rows; } @@ -200,10 +200,10 @@ METHODDEF(void) post_process_2pass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_post_ptr post = (my_post_ptr) cinfo->post; JDIMENSION num_rows, max_rows; @@ -211,8 +211,8 @@ /* Reposition virtual buffer if at start of strip. */ if (post->next_row == 0) { post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, - post->starting_row, post->strip_height, FALSE); + ((j_common_ptr) cinfo, post->whole_image, + post->starting_row, post->strip_height, FALSE); } /* Determine number of rows to emit. */ @@ -227,8 +227,8 @@ /* Quantize and emit data. */ (*cinfo->cquantize->color_quantize) (cinfo, - post->buffer + post->next_row, output_buf + *out_row_ctr, - (int) num_rows); + post->buffer + post->next_row, output_buf + *out_row_ctr, + (int) num_rows); *out_row_ctr += num_rows; /* Advance if we filled the strip. */ @@ -253,11 +253,11 @@ post = (my_post_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_post_controller)); + sizeof(my_post_controller)); cinfo->post = (struct jpeg_d_post_controller *) post; post->pub.start_pass = start_pass_dpost; - post->whole_image = NULL; /* flag for no virtual arrays */ - post->buffer = NULL; /* flag for no strip buffer */ + post->whole_image = NULL; /* flag for no virtual arrays */ + post->buffer = NULL; /* flag for no strip buffer */ /* Create the quantization buffer, if needed */ if (cinfo->quantize_colors) { @@ -271,20 +271,20 @@ /* We round up the number of rows to a multiple of the strip height. */ #ifdef QUANT_2PASS_SUPPORTED post->whole_image = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - cinfo->output_width * cinfo->out_color_components, - (JDIMENSION) jround_up((long) cinfo->output_height, - (long) post->strip_height), - post->strip_height); + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + cinfo->output_width * cinfo->out_color_components, + (JDIMENSION) jround_up((long) cinfo->output_height, + (long) post->strip_height), + post->strip_height); #else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); #endif /* QUANT_2PASS_SUPPORTED */ } else { /* One-pass color quantization: just make a strip buffer. */ post->buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->output_width * cinfo->out_color_components, - post->strip_height); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->output_width * cinfo->out_color_components, + post->strip_height); } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdsample.c glmark2-2021.02/src/libjpeg-turbo/jdsample.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdsample.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdsample.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,15 @@ /* * jdsample.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2010, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Copyright (C) 2010, 2015-2016, D. R. Commander. + * Copyright (C) 2014, MIPS Technologies, Inc., California + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains upsampling routines. * @@ -20,50 +24,12 @@ * Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7. */ -#define JPEG_INTERNALS #include "jinclude.h" -#include "jpeglib.h" +#include "jdsample.h" #include "jsimd.h" #include "jpegcomp.h" -/* Pointer to routine to upsample a single component */ -typedef JMETHOD(void, upsample1_ptr, - (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); - -/* Private subobject */ - -typedef struct { - struct jpeg_upsampler pub; /* public fields */ - - /* Color conversion buffer. When using separate upsampling and color - * conversion steps, this buffer holds one upsampled row group until it - * has been color converted and output. - * Note: we do not allocate any storage for component(s) which are full-size, - * ie do not need rescaling. The corresponding entry of color_buf[] is - * simply set to point to the input data array, thereby avoiding copying. - */ - JSAMPARRAY color_buf[MAX_COMPONENTS]; - - /* Per-component upsampling method pointers */ - upsample1_ptr methods[MAX_COMPONENTS]; - - int next_row_out; /* counts rows emitted from color_buf */ - JDIMENSION rows_to_go; /* counts rows remaining in image */ - - /* Height of an input row group for each component. */ - int rowgroup_height[MAX_COMPONENTS]; - - /* These arrays save pixel expansion factors so that int_expand need not - * recompute them each time. They are unused for other upsampling methods. - */ - UINT8 h_expand[MAX_COMPONENTS]; - UINT8 v_expand[MAX_COMPONENTS]; -} my_upsampler; - -typedef my_upsampler * my_upsample_ptr; - /* * Initialize for an upsampling pass. @@ -91,26 +57,26 @@ METHODDEF(void) sep_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) + JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; JDIMENSION num_rows; /* Fill the conversion buffer, if it's empty */ if (upsample->next_row_out >= cinfo->max_v_samp_factor) { for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { + ci++, compptr++) { /* Invoke per-component upsample method. Notice we pass a POINTER * to color_buf[ci], so that fullsize_upsample can change it. */ (*upsample->methods[ci]) (cinfo, compptr, - input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]), - upsample->color_buf + ci); + input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]), + upsample->color_buf + ci); } upsample->next_row_out = 0; } @@ -122,7 +88,7 @@ /* Not more than the distance to the end of the image. Need this test * in case the image height is not a multiple of max_v_samp_factor: */ - if (num_rows > upsample->rows_to_go) + if (num_rows > upsample->rows_to_go) num_rows = upsample->rows_to_go; /* And not more than what the client can accept: */ out_rows_avail -= *out_row_ctr; @@ -130,9 +96,9 @@ num_rows = out_rows_avail; (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf, - (JDIMENSION) upsample->next_row_out, - output_buf + *out_row_ctr, - (int) num_rows); + (JDIMENSION) upsample->next_row_out, + output_buf + *out_row_ctr, + (int) num_rows); /* Adjust counts */ *out_row_ctr += num_rows; @@ -158,8 +124,8 @@ */ METHODDEF(void) -fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { *output_data_ptr = input_data; } @@ -171,10 +137,10 @@ */ METHODDEF(void) -noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +noop_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { - *output_data_ptr = NULL; /* safety check */ + *output_data_ptr = NULL; /* safety check */ } @@ -190,8 +156,8 @@ */ METHODDEF(void) -int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; JSAMPARRAY output_data = *output_data_ptr; @@ -212,15 +178,15 @@ outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; /* don't need GETJSAMPLE() here */ for (h = h_expand; h > 0; h--) { - *outptr++ = invalue; + *outptr++ = invalue; } } /* Generate any additional output rows by duplicating the first one */ if (v_expand > 1) { jcopy_sample_rows(output_data, outrow, output_data, outrow+1, - v_expand-1, cinfo->output_width); + v_expand-1, cinfo->output_width); } inrow++; outrow += v_expand; @@ -234,8 +200,8 @@ */ METHODDEF(void) -h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -248,7 +214,7 @@ outptr = output_data[inrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; /* don't need GETJSAMPLE() here */ *outptr++ = invalue; *outptr++ = invalue; } @@ -262,8 +228,8 @@ */ METHODDEF(void) -h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -277,12 +243,12 @@ outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; /* don't need GETJSAMPLE() here */ *outptr++ = invalue; *outptr++ = invalue; } jcopy_sample_rows(output_data, outrow, output_data, outrow+1, - 1, cinfo->output_width); + 1, cinfo->output_width); inrow++; outrow += 2; } @@ -305,8 +271,8 @@ */ METHODDEF(void) -h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -346,15 +312,15 @@ */ METHODDEF(void) -h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr0, inptr1, outptr; #if BITS_IN_JSAMPLE == 8 register int thiscolsum, lastcolsum, nextcolsum; #else - register INT32 thiscolsum, lastcolsum, nextcolsum; + register JLONG thiscolsum, lastcolsum, nextcolsum; #endif register JDIMENSION colctr; int inrow, outrow, v; @@ -364,10 +330,10 @@ for (v = 0; v < 2; v++) { /* inptr0 points to nearest input row, inptr1 points to next nearest */ inptr0 = input_data[inrow]; - if (v == 0) /* next nearest is row above */ - inptr1 = input_data[inrow-1]; - else /* next nearest is row below */ - inptr1 = input_data[inrow+1]; + if (v == 0) /* next nearest is row above */ + inptr1 = input_data[inrow-1]; + else /* next nearest is row below */ + inptr1 = input_data[inrow+1]; outptr = output_data[outrow++]; /* Special case for first column */ @@ -378,12 +344,12 @@ lastcolsum = thiscolsum; thiscolsum = nextcolsum; for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { - /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */ - /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */ - nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4); - lastcolsum = thiscolsum; thiscolsum = nextcolsum; + /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */ + /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */ + nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4); + *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4); + lastcolsum = thiscolsum; thiscolsum = nextcolsum; } /* Special case for last column */ @@ -404,19 +370,22 @@ { my_upsample_ptr upsample; int ci; - jpeg_component_info * compptr; + jpeg_component_info *compptr; boolean need_buffer, do_fancy; int h_in_group, v_in_group, h_out_group, v_out_group; - upsample = (my_upsample_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_upsampler)); - cinfo->upsample = (struct jpeg_upsampler *) upsample; - upsample->pub.start_pass = start_pass_upsample; - upsample->pub.upsample = sep_upsample; - upsample->pub.need_context_rows = FALSE; /* until we find out differently */ + if (!cinfo->master->jinit_upsampler_no_alloc) { + upsample = (my_upsample_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(my_upsampler)); + cinfo->upsample = (struct jpeg_upsampler *) upsample; + upsample->pub.start_pass = start_pass_upsample; + upsample->pub.upsample = sep_upsample; + upsample->pub.need_context_rows = FALSE; /* until we find out differently */ + } else + upsample = (my_upsample_ptr) cinfo->upsample; - if (cinfo->CCIR601_sampling) /* this isn't supported */ + if (cinfo->CCIR601_sampling) /* this isn't supported */ ERREXIT(cinfo, JERR_CCIR601_NOTIMPL); /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1, @@ -433,9 +402,9 @@ * are to be converted to max_h_samp_factor * max_v_samp_factor pixels. */ h_in_group = (compptr->h_samp_factor * compptr->_DCT_scaled_size) / - cinfo->_min_DCT_scaled_size; + cinfo->_min_DCT_scaled_size; v_in_group = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / - cinfo->_min_DCT_scaled_size; + cinfo->_min_DCT_scaled_size; h_out_group = cinfo->max_h_samp_factor; v_out_group = cinfo->max_v_samp_factor; upsample->rowgroup_height[ci] = v_in_group; /* save for use later */ @@ -449,48 +418,53 @@ upsample->methods[ci] = fullsize_upsample; need_buffer = FALSE; } else if (h_in_group * 2 == h_out_group && - v_in_group == v_out_group) { + v_in_group == v_out_group) { /* Special cases for 2h1v upsampling */ if (do_fancy && compptr->downsampled_width > 2) { - if (jsimd_can_h2v1_fancy_upsample()) - upsample->methods[ci] = jsimd_h2v1_fancy_upsample; - else - upsample->methods[ci] = h2v1_fancy_upsample; + if (jsimd_can_h2v1_fancy_upsample()) + upsample->methods[ci] = jsimd_h2v1_fancy_upsample; + else + upsample->methods[ci] = h2v1_fancy_upsample; } else { - if (jsimd_can_h2v1_upsample()) - upsample->methods[ci] = jsimd_h2v1_upsample; - else - upsample->methods[ci] = h2v1_upsample; + if (jsimd_can_h2v1_upsample()) + upsample->methods[ci] = jsimd_h2v1_upsample; + else + upsample->methods[ci] = h2v1_upsample; } } else if (h_in_group * 2 == h_out_group && - v_in_group * 2 == v_out_group) { + v_in_group * 2 == v_out_group) { /* Special cases for 2h2v upsampling */ if (do_fancy && compptr->downsampled_width > 2) { - if (jsimd_can_h2v2_fancy_upsample()) - upsample->methods[ci] = jsimd_h2v2_fancy_upsample; - else - upsample->methods[ci] = h2v2_fancy_upsample; - upsample->pub.need_context_rows = TRUE; + if (jsimd_can_h2v2_fancy_upsample()) + upsample->methods[ci] = jsimd_h2v2_fancy_upsample; + else + upsample->methods[ci] = h2v2_fancy_upsample; + upsample->pub.need_context_rows = TRUE; } else { - if (jsimd_can_h2v2_upsample()) - upsample->methods[ci] = jsimd_h2v2_upsample; - else - upsample->methods[ci] = h2v2_upsample; + if (jsimd_can_h2v2_upsample()) + upsample->methods[ci] = jsimd_h2v2_upsample; + else + upsample->methods[ci] = h2v2_upsample; } } else if ((h_out_group % h_in_group) == 0 && - (v_out_group % v_in_group) == 0) { + (v_out_group % v_in_group) == 0) { /* Generic integral-factors upsampling method */ - upsample->methods[ci] = int_upsample; +#if defined(__mips__) + if (jsimd_can_int_upsample()) + upsample->methods[ci] = jsimd_int_upsample; + else +#endif + upsample->methods[ci] = int_upsample; upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group); upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group); } else ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL); - if (need_buffer) { + if (need_buffer && !cinfo->master->jinit_upsampler_no_alloc) { upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) jround_up((long) cinfo->output_width, - (long) cinfo->max_h_samp_factor), - (JDIMENSION) cinfo->max_v_samp_factor); + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) jround_up((long) cinfo->output_width, + (long) cinfo->max_h_samp_factor), + (JDIMENSION) cinfo->max_v_samp_factor); } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdsample.h glmark2-2021.02/src/libjpeg-turbo/jdsample.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdsample.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdsample.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,50 @@ +/* + * jdsample.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +#define JPEG_INTERNALS +#include "jpeglib.h" + + +/* Pointer to routine to upsample a single component */ +typedef void (*upsample1_ptr) (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +/* Private subobject */ + +typedef struct { + struct jpeg_upsampler pub; /* public fields */ + + /* Color conversion buffer. When using separate upsampling and color + * conversion steps, this buffer holds one upsampled row group until it + * has been color converted and output. + * Note: we do not allocate any storage for component(s) which are full-size, + * ie do not need rescaling. The corresponding entry of color_buf[] is + * simply set to point to the input data array, thereby avoiding copying. + */ + JSAMPARRAY color_buf[MAX_COMPONENTS]; + + /* Per-component upsampling method pointers */ + upsample1_ptr methods[MAX_COMPONENTS]; + + int next_row_out; /* counts rows emitted from color_buf */ + JDIMENSION rows_to_go; /* counts rows remaining in image */ + + /* Height of an input row group for each component. */ + int rowgroup_height[MAX_COMPONENTS]; + + /* These arrays save pixel expansion factors so that int_expand need not + * recompute them each time. They are unused for other upsampling methods. + */ + UINT8 h_expand[MAX_COMPONENTS]; + UINT8 v_expand[MAX_COMPONENTS]; +} my_upsampler; + +typedef my_upsampler *my_upsample_ptr; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdtrans.c glmark2-2021.02/src/libjpeg-turbo/jdtrans.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jdtrans.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jdtrans.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jdtrans.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains library routines for transcoding decompression, * that is, reading raw DCT coefficient arrays from an input JPEG file. @@ -16,7 +19,7 @@ /* Forward declarations */ -LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo)); +LOCAL(void) transdecode_master_selection (j_decompress_ptr cinfo); /* @@ -55,20 +58,20 @@ int retcode; /* Call progress monitor hook if present */ if (cinfo->progress != NULL) - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); /* Absorb some more input */ retcode = (*cinfo->inputctl->consume_input) (cinfo); if (retcode == JPEG_SUSPENDED) - return NULL; + return NULL; if (retcode == JPEG_REACHED_EOI) - break; + break; /* Advance progress counter if appropriate */ if (cinfo->progress != NULL && - (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { - if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { - /* startup underestimated number of scans; ratchet up one scan */ - cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; - } + (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { + if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { + /* startup underestimated number of scans; ratchet up one scan */ + cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; + } } } /* Set state so that jpeg_finish_decompress does the right thing */ @@ -84,7 +87,7 @@ } /* Oops, improper usage */ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); - return NULL; /* keep compiler happy */ + return NULL; /* keep compiler happy */ } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jerror.c glmark2-2021.02/src/libjpeg-turbo/jerror.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jerror.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jerror.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jerror.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains simple error-reporting and trace-message routines. * These are suitable for Unix-like systems and others where writing to @@ -28,7 +31,7 @@ #include #endif -#ifndef EXIT_FAILURE /* define exit() codes if not provided */ +#ifndef EXIT_FAILURE /* define exit() codes if not provided */ #define EXIT_FAILURE 1 #endif @@ -41,11 +44,7 @@ * want to refer to it directly. */ -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_std_message_table jMsgTable -#endif - -#define JMESSAGE(code,string) string , +#define JMESSAGE(code,string) string , const char * const jpeg_std_message_table[] = { #include "jerror.h" @@ -105,7 +104,7 @@ #ifdef USE_WINDOWS_MESSAGEBOX /* Display it in a message dialog box */ MessageBox(GetActiveWindow(), buffer, "JPEG Library Error", - MB_OK | MB_ICONERROR); + MB_OK | MB_ICONERROR); #else /* Send it to stderr, adding a newline */ fprintf(stderr, "%s\n", buffer); @@ -127,7 +126,7 @@ METHODDEF(void) emit_message (j_common_ptr cinfo, int msg_level) { - struct jpeg_error_mgr * err = cinfo->err; + struct jpeg_error_mgr *err = cinfo->err; if (msg_level < 0) { /* It's a warning message. Since corrupt files may generate many warnings, @@ -154,12 +153,12 @@ */ METHODDEF(void) -format_message (j_common_ptr cinfo, char * buffer) +format_message (j_common_ptr cinfo, char *buffer) { - struct jpeg_error_mgr * err = cinfo->err; + struct jpeg_error_mgr *err = cinfo->err; int msg_code = err->msg_code; - const char * msgtext = NULL; - const char * msgptr; + const char *msgtext = NULL; + const char *msgptr; char ch; boolean isstring; @@ -167,8 +166,8 @@ if (msg_code > 0 && msg_code <= err->last_jpeg_message) { msgtext = err->jpeg_message_table[msg_code]; } else if (err->addon_message_table != NULL && - msg_code >= err->first_addon_message && - msg_code <= err->last_addon_message) { + msg_code >= err->first_addon_message && + msg_code <= err->last_addon_message) { msgtext = err->addon_message_table[msg_code - err->first_addon_message]; } @@ -193,10 +192,10 @@ sprintf(buffer, msgtext, err->msg_parm.s); else sprintf(buffer, msgtext, - err->msg_parm.i[0], err->msg_parm.i[1], - err->msg_parm.i[2], err->msg_parm.i[3], - err->msg_parm.i[4], err->msg_parm.i[5], - err->msg_parm.i[6], err->msg_parm.i[7]); + err->msg_parm.i[0], err->msg_parm.i[1], + err->msg_parm.i[2], err->msg_parm.i[3], + err->msg_parm.i[4], err->msg_parm.i[5], + err->msg_parm.i[6], err->msg_parm.i[7]); } @@ -213,22 +212,22 @@ { cinfo->err->num_warnings = 0; /* trace_level is not reset since it is an application-supplied parameter */ - cinfo->err->msg_code = 0; /* may be useful as a flag for "no error" */ + cinfo->err->msg_code = 0; /* may be useful as a flag for "no error" */ } /* * Fill in the standard error-handling methods in a jpeg_error_mgr object. * Typical call is: - * struct jpeg_compress_struct cinfo; - * struct jpeg_error_mgr err; + * struct jpeg_compress_struct cinfo; + * struct jpeg_error_mgr err; * - * cinfo.err = jpeg_std_error(&err); + * cinfo.err = jpeg_std_error(&err); * after which the application may override some of the methods. */ GLOBAL(struct jpeg_error_mgr *) -jpeg_std_error (struct jpeg_error_mgr * err) +jpeg_std_error (struct jpeg_error_mgr *err) { err->error_exit = error_exit; err->emit_message = emit_message; @@ -236,16 +235,16 @@ err->format_message = format_message; err->reset_error_mgr = reset_error_mgr; - err->trace_level = 0; /* default = no tracing */ - err->num_warnings = 0; /* no warnings emitted yet */ - err->msg_code = 0; /* may be useful as a flag for "no error" */ + err->trace_level = 0; /* default = no tracing */ + err->num_warnings = 0; /* no warnings emitted yet */ + err->msg_code = 0; /* may be useful as a flag for "no error" */ /* Initialize message table pointers */ err->jpeg_message_table = jpeg_std_message_table; err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1; err->addon_message_table = NULL; - err->first_addon_message = 0; /* for safety */ + err->first_addon_message = 0; /* for safety */ err->last_addon_message = 0; return err; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jerror.h glmark2-2021.02/src/libjpeg-turbo/jerror.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jerror.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jerror.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jerror.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file defines the error and message codes for the JPEG library. * Edit this file to add new codes, or to translate the message strings to @@ -33,7 +36,7 @@ typedef enum { -#define JMESSAGE(code,string) code , +#define JMESSAGE(code,string) code , #endif /* JMAKE_ENUM_LIST */ @@ -42,7 +45,7 @@ /* For maintenance convenience, list is alphabetical by message code name */ #if JPEG_LIB_VERSION < 70 JMESSAGE(JERR_ARITH_NOTIMPL, - "Sorry, arithmetic coding is not implemented") + "Sorry, arithmetic coding is not implemented") #endif JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix") JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix") @@ -55,26 +58,26 @@ JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported") #if JPEG_LIB_VERSION >= 70 JMESSAGE(JERR_BAD_DROP_SAMPLING, - "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c") + "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c") #endif JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition") JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace") JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace") JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length") JMESSAGE(JERR_BAD_LIB_VERSION, - "Wrong JPEG library version: library is %d, caller expects %d") + "Wrong JPEG library version: library is %d, caller expects %d") JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan") JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d") JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d") JMESSAGE(JERR_BAD_PROGRESSION, - "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d") + "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d") JMESSAGE(JERR_BAD_PROG_SCRIPT, - "Invalid progressive parameters at scan script entry %d") + "Invalid progressive parameters at scan script entry %d") JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors") JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d") JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d") JMESSAGE(JERR_BAD_STRUCT_SIZE, - "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u") + "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u") JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access") JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small") JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here") @@ -98,7 +101,7 @@ JMESSAGE(JERR_INPUT_EMPTY, "Empty input file") JMESSAGE(JERR_INPUT_EOF, "Premature end of input file") JMESSAGE(JERR_MISMATCHED_QUANT_TABLE, - "Cannot transcode due to multiple use of quantization table %d") + "Cannot transcode due to multiple use of quantization table %d") JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data") JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change") JMESSAGE(JERR_NOTIMPL, "Not implemented yet") @@ -113,7 +116,7 @@ JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x") JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)") JMESSAGE(JERR_QUANT_COMPONENTS, - "Cannot quantize more than %d color components") + "Cannot quantize more than %d color components") JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors") JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors") JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers") @@ -125,19 +128,19 @@ JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file") JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file") JMESSAGE(JERR_TFILE_WRITE, - "Write failed on temporary file --- out of disk space?") + "Write failed on temporary file --- out of disk space?") JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines") JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x") JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up") JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation") JMESSAGE(JERR_XMS_READ, "Read from XMS failed") JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed") -JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT) +JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT_SHORT) JMESSAGE(JMSG_VERSION, JVERSION) JMESSAGE(JTRC_16BIT_TABLES, - "Caution: quantization tables are too coarse for baseline JPEG") + "Caution: quantization tables are too coarse for baseline JPEG") JMESSAGE(JTRC_ADOBE, - "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d") + "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d") JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u") JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u") JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x") @@ -150,9 +153,9 @@ JMESSAGE(JTRC_HUFFBITS, " %3d %3d %3d %3d %3d %3d %3d %3d") JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d %d") JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE, - "Warning: thumbnail image size does not match data length %u") + "Warning: thumbnail image size does not match data length %u") JMESSAGE(JTRC_JFIF_EXTENSION, - "JFIF extension marker: type 0x%02x, length %u") + "JFIF extension marker: type 0x%02x, length %u") JMESSAGE(JTRC_JFIF_THUMBNAIL, " with %d x %d thumbnail image") JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u") JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x") @@ -163,7 +166,7 @@ JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d") JMESSAGE(JTRC_RST, "RST%d") JMESSAGE(JTRC_SMOOTH_NOTIMPL, - "Smoothing not supported with nonstandard sampling ratios") + "Smoothing not supported with nonstandard sampling ratios") JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d") JMESSAGE(JTRC_SOF_COMPONENT, " Component %d: %dhx%dv q=%d") JMESSAGE(JTRC_SOI, "Start of Image") @@ -173,13 +176,13 @@ JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s") JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s") JMESSAGE(JTRC_THUMB_JPEG, - "JFIF extension marker: JPEG-compressed thumbnail image, length %u") + "JFIF extension marker: JPEG-compressed thumbnail image, length %u") JMESSAGE(JTRC_THUMB_PALETTE, - "JFIF extension marker: palette thumbnail image, length %u") + "JFIF extension marker: palette thumbnail image, length %u") JMESSAGE(JTRC_THUMB_RGB, - "JFIF extension marker: RGB thumbnail image, length %u") + "JFIF extension marker: RGB thumbnail image, length %u") JMESSAGE(JTRC_UNKNOWN_IDS, - "Unrecognized component IDs %d %d %d, assuming YCbCr") + "Unrecognized component IDs %d %d %d, assuming YCbCr") JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u") JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u") JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d") @@ -187,15 +190,15 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") #endif JMESSAGE(JWRN_BOGUS_PROGRESSION, - "Inconsistent progression sequence for component %d coefficient %d") + "Inconsistent progression sequence for component %d coefficient %d") JMESSAGE(JWRN_EXTRANEOUS_DATA, - "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x") + "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x") JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment") JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code") JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d") JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file") JMESSAGE(JWRN_MUST_RESYNC, - "Corrupt JPEG data: found marker 0x%02x instead of RST%d") + "Corrupt JPEG data: found marker 0x%02x instead of RST%d") JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG") JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines") #if JPEG_LIB_VERSION < 70 @@ -255,7 +258,7 @@ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) -#define MAKESTMT(stuff) do { stuff } while (0) +#define MAKESTMT(stuff) do { stuff } while (0) /* Nonfatal errors (we can keep going, but the data is probably corrupt) */ #define WARNMS(cinfo,code) \ @@ -286,26 +289,26 @@ (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl))) #define TRACEMS3(cinfo,lvl,code,p1,p2,p3) \ MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \ - (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \ + (cinfo)->err->msg_code = (code); \ + (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) #define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4) \ MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ - (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + (cinfo)->err->msg_code = (code); \ + (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) #define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5) \ MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ - _mp[4] = (p5); \ - (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + _mp[4] = (p5); \ + (cinfo)->err->msg_code = (code); \ + (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) #define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8) \ MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ - _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \ - (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \ + (cinfo)->err->msg_code = (code); \ + (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) #define TRACEMSS(cinfo,lvl,code,str) \ ((cinfo)->err->msg_code = (code), \ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jfdctflt.c glmark2-2021.02/src/libjpeg-turbo/jfdctflt.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jfdctflt.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jfdctflt.c 2021-04-25 01:47:22.000000000 +0800 @@ -3,7 +3,8 @@ * * Copyright (C) 1994-1996, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a floating-point implementation of the * forward DCT (Discrete Cosine Transform). @@ -20,8 +21,8 @@ * This implementation is based on Arai, Agui, and Nakajima's algorithm for * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in * Japanese, but the algorithm is described in the Pennebaker & Mitchell - * JPEG textbook (see REFERENCES section in file README). The following code - * is based directly on figure 4-8 in P&M. + * JPEG textbook (see REFERENCES section in file README.ijg). The following + * code is based directly on figure 4-8 in P&M. * While an 8-point DCT cannot be done in less than 11 multiplies, it is * possible to arrange the computation so that many of the multiplies are * simple scalings of the final outputs. These multiplies can then be @@ -37,7 +38,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_FLOAT_SUPPORTED @@ -56,7 +57,7 @@ */ GLOBAL(void) -jpeg_fdct_float (FAST_FLOAT * data) +jpeg_fdct_float (FAST_FLOAT *data) { FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FAST_FLOAT tmp10, tmp11, tmp12, tmp13; @@ -76,24 +77,24 @@ tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[0] = tmp10 + tmp11; /* phase 3 */ dataptr[4] = tmp10 - tmp11; - + z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ - dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ dataptr[6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -103,15 +104,15 @@ z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; - dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[5] = z13 + z2; /* phase 6 */ dataptr[3] = z13 - z2; dataptr[1] = z11 + z4; dataptr[7] = z11 - z4; - dataptr += DCTSIZE; /* advance pointer to next row */ + dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. */ @@ -126,24 +127,24 @@ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ dataptr[DCTSIZE*4] = tmp10 - tmp11; - + z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ dataptr[DCTSIZE*6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -153,7 +154,7 @@ z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ @@ -161,7 +162,7 @@ dataptr[DCTSIZE*1] = z11 + z4; dataptr[DCTSIZE*7] = z11 - z4; - dataptr++; /* advance pointer to next column */ + dataptr++; /* advance pointer to next column */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jfdctfst.c glmark2-2021.02/src/libjpeg-turbo/jfdctfst.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jfdctfst.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jfdctfst.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jfdctfst.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a fast, not so accurate integer implementation of the * forward DCT (Discrete Cosine Transform). @@ -15,8 +18,8 @@ * This implementation is based on Arai, Agui, and Nakajima's algorithm for * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in * Japanese, but the algorithm is described in the Pennebaker & Mitchell - * JPEG textbook (see REFERENCES section in file README). The following code - * is based directly on figure 4-8 in P&M. + * JPEG textbook (see REFERENCES section in file README.ijg). The following + * code is based directly on figure 4-8 in P&M. * While an 8-point DCT cannot be done in less than 11 multiplies, it is * possible to arrange the computation so that many of the multiplies are * simple scalings of the final outputs. These multiplies can then be @@ -33,7 +36,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_IFAST_SUPPORTED @@ -76,10 +79,10 @@ */ #if CONST_BITS == 8 -#define FIX_0_382683433 ((INT32) 98) /* FIX(0.382683433) */ -#define FIX_0_541196100 ((INT32) 139) /* FIX(0.541196100) */ -#define FIX_0_707106781 ((INT32) 181) /* FIX(0.707106781) */ -#define FIX_1_306562965 ((INT32) 334) /* FIX(1.306562965) */ +#define FIX_0_382683433 ((JLONG) 98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((JLONG) 139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((JLONG) 181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((JLONG) 334) /* FIX(1.306562965) */ #else #define FIX_0_382683433 FIX(0.382683433) #define FIX_0_541196100 FIX(0.541196100) @@ -99,7 +102,7 @@ #endif -/* Multiply a DCTELEM variable by an INT32 constant, and immediately +/* Multiply a DCTELEM variable by an JLONG constant, and immediately * descale to yield a DCTELEM result. */ @@ -111,7 +114,7 @@ */ GLOBAL(void) -jpeg_fdct_ifast (DCTELEM * data) +jpeg_fdct_ifast (DCTELEM *data) { DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; DCTELEM tmp10, tmp11, tmp12, tmp13; @@ -132,24 +135,24 @@ tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[0] = tmp10 + tmp11; /* phase 3 */ dataptr[4] = tmp10 - tmp11; - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ - dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ dataptr[6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -159,15 +162,15 @@ z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; - dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[5] = z13 + z2; /* phase 6 */ dataptr[3] = z13 - z2; dataptr[1] = z11 + z4; dataptr[7] = z11 - z4; - dataptr += DCTSIZE; /* advance pointer to next row */ + dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. */ @@ -182,24 +185,24 @@ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ dataptr[DCTSIZE*4] = tmp10 - tmp11; - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ dataptr[DCTSIZE*6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -209,7 +212,7 @@ z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ @@ -217,7 +220,7 @@ dataptr[DCTSIZE*1] = z11 + z4; dataptr[DCTSIZE*7] = z11 - z4; - dataptr++; /* advance pointer to next column */ + dataptr++; /* advance pointer to next column */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jfdctint.c glmark2-2021.02/src/libjpeg-turbo/jfdctint.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jfdctint.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jfdctint.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jfdctint.c * + * This file was part of the Independent JPEG Group's software. * Copyright (C) 1991-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a slow-but-accurate integer implementation of the * forward DCT (Discrete Cosine Transform). @@ -26,7 +29,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_ISLOW_SUPPORTED @@ -67,7 +70,7 @@ * they are represented to better-than-integral precision. These outputs * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word * with the recommended scaling. (For 12-bit sample data, the intermediate - * array is INT32 anyway.) + * array is JLONG anyway.) * * To avoid overflow of the 32-bit intermediate results in pass 2, we must * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis @@ -79,7 +82,7 @@ #define PASS1_BITS 2 #else #define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ #endif /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus @@ -90,18 +93,18 @@ */ #if CONST_BITS == 13 -#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ +#define FIX_0_298631336 ((JLONG) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((JLONG) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((JLONG) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((JLONG) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((JLONG) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((JLONG) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((JLONG) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((JLONG) 25172) /* FIX(3.072711026) */ #else #define FIX_0_298631336 FIX(0.298631336) #define FIX_0_390180644 FIX(0.390180644) @@ -118,7 +121,7 @@ #endif -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. +/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result. * For 8-bit samples with the recommended scaling, all the variable * and constant values involved are no more than 16 bits wide, so a * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. @@ -137,11 +140,11 @@ */ GLOBAL(void) -jpeg_fdct_islow (DCTELEM * data) +jpeg_fdct_islow (DCTELEM *data) { - INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - INT32 tmp10, tmp11, tmp12, tmp13; - INT32 z1, z2, z3, z4, z5; + JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + JLONG tmp10, tmp11, tmp12, tmp13; + JLONG z1, z2, z3, z4, z5; DCTELEM *dataptr; int ctr; SHIFT_TEMPS @@ -160,36 +163,36 @@ tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; - + /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ - + tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - - dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); - dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); - + + dataptr[0] = (DCTELEM) LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); + dataptr[4] = (DCTELEM) LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS-PASS1_BITS); + CONST_BITS-PASS1_BITS); dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS-PASS1_BITS); - + CONST_BITS-PASS1_BITS); + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ - + z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - + tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ @@ -198,16 +201,16 @@ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - + z3 += z5; z4 += z5; - + dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); - - dataptr += DCTSIZE; /* advance pointer to next row */ + + dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. @@ -225,36 +228,36 @@ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ - + tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS+PASS1_BITS); + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS+PASS1_BITS); - + CONST_BITS+PASS1_BITS); + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ - + z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - + tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ @@ -263,20 +266,20 @@ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - + z3 += z5; z4 += z5; - + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, - CONST_BITS+PASS1_BITS); + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, - CONST_BITS+PASS1_BITS); + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, - CONST_BITS+PASS1_BITS); + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, - CONST_BITS+PASS1_BITS); - - dataptr++; /* advance pointer to next column */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jidctflt.c glmark2-2021.02/src/libjpeg-turbo/jidctflt.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jidctflt.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jidctflt.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,13 @@ /* * jidctflt.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a floating-point implementation of the * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine @@ -22,8 +26,8 @@ * This implementation is based on Arai, Agui, and Nakajima's algorithm for * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in * Japanese, but the algorithm is described in the Pennebaker & Mitchell - * JPEG textbook (see REFERENCES section in file README). The following code - * is based directly on figure 4-8 in P&M. + * JPEG textbook (see REFERENCES section in file README.ijg). The following + * code is based directly on figure 4-8 in P&M. * While an 8-point DCT cannot be done in less than 11 multiplies, it is * possible to arrange the computation so that many of the multiplies are * simple scalings of the final outputs. These multiplies can then be @@ -39,7 +43,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_FLOAT_SUPPORTED @@ -65,21 +69,21 @@ */ GLOBAL(void) -jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FAST_FLOAT tmp10, tmp11, tmp12, tmp13; FAST_FLOAT z5, z10, z11, z12, z13; JCOEFPTR inptr; - FLOAT_MULT_TYPE * quantptr; - FAST_FLOAT * wsptr; + FLOAT_MULT_TYPE *quantptr; + FAST_FLOAT *wsptr; JSAMPROW outptr; - JSAMPLE *range_limit = IDCT_range_limit(cinfo); + JSAMPLE *range_limit = cinfo->sample_range_limit; int ctr; FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */ - SHIFT_TEMPS + #define _0_125 ((FLOAT_MULT_TYPE)0.125) /* Pass 1: process columns from input, store into work array. */ @@ -95,14 +99,15 @@ * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ - + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { /* AC terms all zero */ - FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - + FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], + quantptr[DCTSIZE*0] * _0_125); + wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; @@ -111,53 +116,53 @@ wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; - - inptr++; /* advance pointers to next column */ + + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; continue; } - + /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0] * _0_125); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2] * _0_125); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4] * _0_125); + tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6] * _0_125); - tmp10 = tmp0 + tmp2; /* phase 3 */ + tmp10 = tmp0 + tmp2; /* phase 3 */ tmp11 = tmp0 - tmp2; - tmp13 = tmp1 + tmp3; /* phases 5-3 */ + tmp13 = tmp1 + tmp3; /* phases 5-3 */ tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */ - tmp0 = tmp10 + tmp13; /* phase 2 */ + tmp0 = tmp10 + tmp13; /* phase 2 */ tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; - + /* Odd part */ - tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1] * _0_125); + tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3] * _0_125); + tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5] * _0_125); + tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7] * _0_125); - z13 = tmp6 + tmp5; /* phase 6 */ + z13 = tmp6 + tmp5; /* phase 6 */ z10 = tmp6 - tmp5; z11 = tmp4 + tmp7; z12 = tmp4 - tmp7; - tmp7 = z11 + z13; /* phase 5 */ + tmp7 = z11 + z13; /* phase 5 */ tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */ z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ - tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ - tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ + tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */ + tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */ - tmp6 = tmp12 - tmp7; /* phase 2 */ + tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; + tmp4 = tmp10 - tmp5; wsptr[DCTSIZE*0] = tmp0 + tmp7; wsptr[DCTSIZE*7] = tmp0 - tmp7; @@ -165,16 +170,15 @@ wsptr[DCTSIZE*6] = tmp1 - tmp6; wsptr[DCTSIZE*2] = tmp2 + tmp5; wsptr[DCTSIZE*5] = tmp2 - tmp5; - wsptr[DCTSIZE*4] = tmp3 + tmp4; - wsptr[DCTSIZE*3] = tmp3 - tmp4; + wsptr[DCTSIZE*3] = tmp3 + tmp4; + wsptr[DCTSIZE*4] = tmp3 - tmp4; - inptr++; /* advance pointers to next column */ + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } - + /* Pass 2: process rows from work array, store into output array. */ - /* Note that we must descale the results by a factor of 8 == 2**3. */ wsptr = workspace; for (ctr = 0; ctr < DCTSIZE; ctr++) { @@ -184,11 +188,13 @@ * the simplification applies less often (typically 5% to 10% of the time). * And testing floats for zero is relatively expensive, so we don't bother. */ - + /* Even part */ - tmp10 = wsptr[0] + wsptr[4]; - tmp11 = wsptr[0] - wsptr[4]; + /* Apply signed->unsigned and prepare float->int conversion */ + z5 = wsptr[0] + ((FAST_FLOAT) CENTERJSAMPLE + (FAST_FLOAT) 0.5); + tmp10 = z5 + wsptr[4]; + tmp11 = z5 - wsptr[4]; tmp13 = wsptr[2] + wsptr[6]; tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13; @@ -209,33 +215,25 @@ tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ - tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ - tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ + tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */ + tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */ tmp6 = tmp12 - tmp7; tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; + tmp4 = tmp10 - tmp5; + + /* Final output stage: float->int conversion and range-limit */ - /* Final output stage: scale down by a factor of 8 and range-limit */ + outptr[0] = range_limit[((int) (tmp0 + tmp7)) & RANGE_MASK]; + outptr[7] = range_limit[((int) (tmp0 - tmp7)) & RANGE_MASK]; + outptr[1] = range_limit[((int) (tmp1 + tmp6)) & RANGE_MASK]; + outptr[6] = range_limit[((int) (tmp1 - tmp6)) & RANGE_MASK]; + outptr[2] = range_limit[((int) (tmp2 + tmp5)) & RANGE_MASK]; + outptr[5] = range_limit[((int) (tmp2 - tmp5)) & RANGE_MASK]; + outptr[3] = range_limit[((int) (tmp3 + tmp4)) & RANGE_MASK]; + outptr[4] = range_limit[((int) (tmp3 - tmp4)) & RANGE_MASK]; - outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ + wsptr += DCTSIZE; /* advance pointer to next row */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jidctfst.c glmark2-2021.02/src/libjpeg-turbo/jidctfst.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jidctfst.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jidctfst.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jidctfst.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a fast, not so accurate integer implementation of the * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine @@ -17,8 +20,8 @@ * This implementation is based on Arai, Agui, and Nakajima's algorithm for * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in * Japanese, but the algorithm is described in the Pennebaker & Mitchell - * JPEG textbook (see REFERENCES section in file README). The following code - * is based directly on figure 4-8 in P&M. + * JPEG textbook (see REFERENCES section in file README.ijg). The following + * code is based directly on figure 4-8 in P&M. * While an 8-point DCT cannot be done in less than 11 multiplies, it is * possible to arrange the computation so that many of the multiplies are * simple scalings of the final outputs. These multiplies can then be @@ -35,7 +38,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_IFAST_SUPPORTED @@ -78,7 +81,7 @@ #define PASS1_BITS 2 #else #define CONST_BITS 8 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ #endif /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus @@ -89,10 +92,10 @@ */ #if CONST_BITS == 8 -#define FIX_1_082392200 ((INT32) 277) /* FIX(1.082392200) */ -#define FIX_1_414213562 ((INT32) 362) /* FIX(1.414213562) */ -#define FIX_1_847759065 ((INT32) 473) /* FIX(1.847759065) */ -#define FIX_2_613125930 ((INT32) 669) /* FIX(2.613125930) */ +#define FIX_1_082392200 ((JLONG) 277) /* FIX(1.082392200) */ +#define FIX_1_414213562 ((JLONG) 362) /* FIX(1.414213562) */ +#define FIX_1_847759065 ((JLONG) 473) /* FIX(1.847759065) */ +#define FIX_2_613125930 ((JLONG) 669) /* FIX(2.613125930) */ #else #define FIX_1_082392200 FIX(1.082392200) #define FIX_1_414213562 FIX(1.414213562) @@ -112,7 +115,7 @@ #endif -/* Multiply a DCTELEM variable by an INT32 constant, and immediately +/* Multiply a DCTELEM variable by an JLONG constant, and immediately * descale to yield a DCTELEM result. */ @@ -122,27 +125,27 @@ /* Dequantize a coefficient by multiplying it by the multiplier-table * entry; produce a DCTELEM result. For 8-bit data a 16x16->16 * multiplication will do. For 12-bit data, the multiplier table is - * declared INT32, so a 32-bit multiply will be used. + * declared JLONG, so a 32-bit multiply will be used. */ #if BITS_IN_JSAMPLE == 8 #define DEQUANTIZE(coef,quantval) (((IFAST_MULT_TYPE) (coef)) * (quantval)) #else #define DEQUANTIZE(coef,quantval) \ - DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS) + DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS) #endif /* Like DESCALE, but applies to a DCTELEM and produces an int. - * We assume that int right shift is unsigned if INT32 right shift is. + * We assume that int right shift is unsigned if JLONG right shift is. */ #ifdef RIGHT_SHIFT_IS_UNSIGNED -#define ISHIFT_TEMPS DCTELEM ishift_temp; +#define ISHIFT_TEMPS DCTELEM ishift_temp; #if BITS_IN_JSAMPLE == 8 -#define DCTELEMBITS 16 /* DCTELEM may be 16 or 32 bits */ +#define DCTELEMBITS 16 /* DCTELEM may be 16 or 32 bits */ #else -#define DCTELEMBITS 32 /* DCTELEM must be 32 bits */ +#define DCTELEMBITS 32 /* DCTELEM must be 32 bits */ #endif #define IRIGHT_SHIFT(x,shft) \ ((ishift_temp = (x)) < 0 ? \ @@ -150,7 +153,7 @@ (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) #endif #ifdef USE_ACCURATE_ROUNDING @@ -165,22 +168,22 @@ */ GLOBAL(void) -jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; DCTELEM tmp10, tmp11, tmp12, tmp13; DCTELEM z5, z10, z11, z12, z13; JCOEFPTR inptr; - IFAST_MULT_TYPE * quantptr; - int * wsptr; + IFAST_MULT_TYPE *quantptr; + int *wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE2]; /* buffers data between passes */ - SHIFT_TEMPS /* for DESCALE */ - ISHIFT_TEMPS /* for IDESCALE */ + int workspace[DCTSIZE2]; /* buffers data between passes */ + SHIFT_TEMPS /* for DESCALE */ + ISHIFT_TEMPS /* for IDESCALE */ /* Pass 1: process columns from input, store into work array. */ @@ -196,11 +199,11 @@ * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ - + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { /* AC terms all zero */ int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); @@ -212,13 +215,13 @@ wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; - - inptr++; /* advance pointers to next column */ + + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; continue; } - + /* Even part */ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); @@ -226,17 +229,17 @@ tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - tmp10 = tmp0 + tmp2; /* phase 3 */ + tmp10 = tmp0 + tmp2; /* phase 3 */ tmp11 = tmp0 - tmp2; - tmp13 = tmp1 + tmp3; /* phases 5-3 */ + tmp13 = tmp1 + tmp3; /* phases 5-3 */ tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ - tmp0 = tmp10 + tmp13; /* phase 2 */ + tmp0 = tmp10 + tmp13; /* phase 2 */ tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; - + /* Odd part */ tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); @@ -244,19 +247,19 @@ tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - z13 = tmp6 + tmp5; /* phase 6 */ + z13 = tmp6 + tmp5; /* phase 6 */ z10 = tmp6 - tmp5; z11 = tmp4 + tmp7; z12 = tmp4 - tmp7; - tmp7 = z11 + z13; /* phase 5 */ + tmp7 = z11 + z13; /* phase 5 */ tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ - tmp6 = tmp12 - tmp7; /* phase 2 */ + tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; @@ -269,11 +272,11 @@ wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); - inptr++; /* advance pointers to next column */ + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } - + /* Pass 2: process rows from work array, store into output array. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. */ @@ -288,14 +291,14 @@ * test takes more time than it's worth. In that case this section * may be commented out. */ - + #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - + & RANGE_MASK]; + outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; @@ -305,11 +308,11 @@ outptr[6] = dcval; outptr[7] = dcval; - wsptr += DCTSIZE; /* advance pointer to next row */ + wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif - + /* Even part */ tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); @@ -317,7 +320,7 @@ tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) - - tmp13; + - tmp13; tmp0 = tmp10 + tmp13; tmp3 = tmp10 - tmp13; @@ -331,37 +334,37 @@ z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; - tmp7 = z11 + z13; /* phase 5 */ + tmp7 = z11 + z13; /* phase 5 */ tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ - tmp6 = tmp12 - tmp7; /* phase 2 */ + tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; /* Final output stage: scale down by a factor of 8 and range-limit */ outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) - & RANGE_MASK]; + & RANGE_MASK]; - wsptr += DCTSIZE; /* advance pointer to next row */ + wsptr += DCTSIZE; /* advance pointer to next row */ } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jidctint.c glmark2-2021.02/src/libjpeg-turbo/jidctint.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jidctint.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jidctint.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,13 @@ /* * jidctint.c * + * This file was part of the Independent JPEG Group's software. * Copyright (C) 1991-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modification developed 2002-2009 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains a slow-but-accurate integer implementation of the * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine @@ -23,12 +27,33 @@ * The advantage of this method is that no data path contains more than one * multiplication; this allows a very simple and accurate implementation in * scaled fixed-point arithmetic, with a minimal number of shifts. + * + * We also provide IDCT routines with various output sample block sizes for + * direct resolution reduction or enlargement without additional resampling: + * NxN (N=1...16) pixels for one 8x8 input DCT block. + * + * For N<8 we simply take the corresponding low-frequency coefficients of + * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block + * to yield the downscaled outputs. + * This can be seen as direct low-pass downsampling from the DCT domain + * point of view rather than the usual spatial domain point of view, + * yielding significant computational savings and results at least + * as good as common bilinear (averaging) spatial downsampling. + * + * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as + * lower frequencies and higher frequencies assumed to be zero. + * It turns out that the computational effort is similar to the 8x8 IDCT + * regarding the output size. + * Furthermore, the scaling and descaling is the same for all IDCT sizes. + * + * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases + * since there would be too many additional constants to pre-calculate. */ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef DCT_ISLOW_SUPPORTED @@ -38,7 +63,7 @@ */ #if DCTSIZE != 8 - Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ + Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ #endif @@ -67,7 +92,7 @@ * they are represented to better-than-integral precision. These outputs * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word * with the recommended scaling. (To scale up 12-bit sample data further, an - * intermediate INT32 array would be needed.) + * intermediate JLONG array would be needed.) * * To avoid overflow of the 32-bit intermediate results in pass 2, we must * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis @@ -79,7 +104,7 @@ #define PASS1_BITS 2 #else #define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ #endif /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus @@ -90,18 +115,18 @@ */ #if CONST_BITS == 13 -#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ +#define FIX_0_298631336 ((JLONG) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((JLONG) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((JLONG) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((JLONG) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((JLONG) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((JLONG) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((JLONG) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((JLONG) 25172) /* FIX(3.072711026) */ #else #define FIX_0_298631336 FIX(0.298631336) #define FIX_0_390180644 FIX(0.390180644) @@ -118,7 +143,7 @@ #endif -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. +/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result. * For 8-bit samples with the recommended scaling, all the variable * and constant values involved are no more than 16 bits wide, so a * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. @@ -145,20 +170,20 @@ */ GLOBAL(void) -jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp1, tmp2, tmp3; - INT32 tmp10, tmp11, tmp12, tmp13; - INT32 z1, z2, z3, z4, z5; + JLONG tmp0, tmp1, tmp2, tmp3; + JLONG tmp10, tmp11, tmp12, tmp13; + JLONG z1, z2, z3, z4, z5; JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE2]; /* buffers data between passes */ + int workspace[DCTSIZE2]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ @@ -177,14 +202,15 @@ * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ - + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { /* AC terms all zero */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), + PASS1_BITS); + wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; @@ -193,49 +219,49 @@ wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; - - inptr++; /* advance pointers to next column */ + + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; continue; } - + /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ - + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); - + z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - tmp0 = (z2 + z3) << CONST_BITS; - tmp1 = (z2 - z3) << CONST_BITS; - + tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); + tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); + tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ - + tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - + z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ @@ -244,17 +270,17 @@ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - + z3 += z5; z4 += z5; - + tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; - + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); @@ -263,12 +289,12 @@ wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); - - inptr++; /* advance pointers to next column */ + + inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } - + /* Pass 2: process rows from work array, store into output array. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. */ @@ -283,14 +309,14 @@ * test takes more time than it's worth. In that case this section * may be commented out. */ - + #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - + JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) + & RANGE_MASK]; + outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; @@ -300,44 +326,44 @@ outptr[6] = dcval; outptr[7] = dcval; - wsptr += DCTSIZE; /* advance pointer to next row */ + wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif - + /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ - - z2 = (INT32) wsptr[2]; - z3 = (INT32) wsptr[6]; - + + z2 = (JLONG) wsptr[2]; + z3 = (JLONG) wsptr[6]; + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); - - tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS; - tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS; - + + tmp0 = LEFT_SHIFT((JLONG) wsptr[0] + (JLONG) wsptr[4], CONST_BITS); + tmp1 = LEFT_SHIFT((JLONG) wsptr[0] - (JLONG) wsptr[4], CONST_BITS); + tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ - - tmp0 = (INT32) wsptr[7]; - tmp1 = (INT32) wsptr[5]; - tmp2 = (INT32) wsptr[3]; - tmp3 = (INT32) wsptr[1]; - + + tmp0 = (JLONG) wsptr[7]; + tmp1 = (JLONG) wsptr[5]; + tmp2 = (JLONG) wsptr[3]; + tmp3 = (JLONG) wsptr[1]; + z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ @@ -346,44 +372,2256 @@ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - + z3 += z5; z4 += z5; - + tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; - + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ + } +} + +#ifdef IDCT_SCALING_SUPPORTED + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 7x7 output block. + * + * Optimized algorithm with 12 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/14). + */ + +GLOBAL(void) +jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; + JLONG z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[7*7]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp13 = LEFT_SHIFT(tmp13, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ + tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ + tmp0 = z1 + z3; + z2 -= tmp0; + tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ + tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ + tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ + tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + + tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp1 += tmp2; + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ + tmp0 += z2; + tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ + + /* Final output stage */ + + wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 7 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp13 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp13 = LEFT_SHIFT(tmp13, CONST_BITS); + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[4]; + z3 = (JLONG) wsptr[6]; + + tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ + tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ + tmp0 = z1 + z3; + z2 -= tmp0; + tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ + tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ + tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ + tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + + tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp1 += tmp2; + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ + tmp0 += z2; + tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 7; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 6x6 output block. + * + * Optimized algorithm with 3 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/12). + */ + +GLOBAL(void) +jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; + JLONG z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[6*6]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ + tmp1 = tmp0 + tmp10; + tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); + tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ + tmp10 = tmp1 + tmp0; + tmp12 = tmp1 - tmp0; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS); + tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS); + tmp1 = LEFT_SHIFT(z1 - z2 - z3, PASS1_BITS); + + /* Final output stage */ + + wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[6*1] = (int) (tmp11 + tmp1); + wsptr[6*4] = (int) (tmp11 - tmp1); + wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 6 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + tmp2 = (JLONG) wsptr[4]; + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ + tmp1 = tmp0 + tmp10; + tmp11 = tmp0 - tmp10 - tmp10; + tmp10 = (JLONG) wsptr[2]; + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ + tmp10 = tmp1 + tmp0; + tmp12 = tmp1 - tmp0; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS); + tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS); + tmp1 = LEFT_SHIFT(z1 - z2 - z3, CONST_BITS); + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 6; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 5x5 output block. + * + * Optimized algorithm with 5 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/10). + */ + +GLOBAL(void) +jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp10, tmp11, tmp12; + JLONG z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[5*5]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp12 = LEFT_SHIFT(tmp12, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ + z3 = tmp12 + z2; + tmp10 = z3 + z1; + tmp11 = z3 - z1; + tmp12 -= LEFT_SHIFT(z2, 2); + + /* Odd part */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ + tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ + tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ + + /* Final output stage */ + + wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 5 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp12 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp12 = LEFT_SHIFT(tmp12, CONST_BITS); + tmp0 = (JLONG) wsptr[2]; + tmp1 = (JLONG) wsptr[4]; + z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ + z3 = tmp12 + z2; + tmp10 = z3 + z1; + tmp11 = z3 - z1; + tmp12 -= LEFT_SHIFT(z2, 2); + + /* Odd part */ + + z2 = (JLONG) wsptr[1]; + z3 = (JLONG) wsptr[3]; + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ + tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ + tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 5; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 3x3 output block. + * + * Optimized algorithm with 2 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/6). + */ + +GLOBAL(void) +jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp2, tmp10, tmp12; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[3*3]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ + tmp10 = tmp0 + tmp12; + tmp2 = tmp0 - tmp12 - tmp12; + + /* Odd part */ + + tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ + + /* Final output stage */ + + wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 3 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 3; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + tmp2 = (JLONG) wsptr[2]; + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ + tmp10 = tmp0 + tmp12; + tmp2 = tmp0 - tmp12 - tmp12; + + /* Odd part */ + + tmp12 = (JLONG) wsptr[1]; + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 3; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 9x9 output block. + * + * Optimized algorithm with 10 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/18). + */ + +GLOBAL(void) +jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*9]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ + tmp1 = tmp0 + tmp3; + tmp2 = tmp0 - tmp3 - tmp3; + + tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ + tmp11 = tmp2 + tmp0; + tmp14 = tmp2 - tmp0 - tmp0; + + tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ + tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ + tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ + + tmp10 = tmp1 + tmp0 - tmp3; + tmp12 = tmp1 - tmp0 + tmp2; + tmp13 = tmp1 - tmp2 + tmp3; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ + + tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ + tmp0 = tmp2 + tmp3 - z2; + tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ + tmp2 += z2 - tmp1; + tmp3 += z2 + tmp1; + tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 9 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 9; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[4]; + z3 = (JLONG) wsptr[6]; + + tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ + tmp1 = tmp0 + tmp3; + tmp2 = tmp0 - tmp3 - tmp3; + + tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ + tmp11 = tmp2 + tmp0; + tmp14 = tmp2 - tmp0 - tmp0; + + tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ + tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ + tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ + + tmp10 = tmp1 + tmp0 - tmp3; + tmp12 = tmp1 - tmp0 + tmp2; + tmp13 = tmp1 - tmp2 + tmp3; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ + + tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ + tmp0 = tmp2 + tmp3 - z2; + tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ + tmp2 += z2 - tmp1; + tmp3 += z2 + tmp1; + tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 10x10 output block. + * + * Optimized algorithm with 12 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/20). + */ + +GLOBAL(void) +jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24; + JLONG z1, z2, z3, z4, z5; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*10]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = LEFT_SHIFT(z3, CONST_BITS); + /* Add fudge factor here for final descale. */ + z3 += ONE << (CONST_BITS-PASS1_BITS-1); + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ + tmp10 = z3 + z1; + tmp11 = z3 - z2; + + tmp22 = RIGHT_SHIFT(z3 - LEFT_SHIFT(z1 - z2, 1), + CONST_BITS-PASS1_BITS); /* c0 = (c4-c8)*2 */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ + + tmp20 = tmp10 + tmp12; + tmp24 = tmp10 - tmp12; + tmp21 = tmp11 + tmp13; + tmp23 = tmp11 - tmp13; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z2 + z4; + tmp13 = z2 - z4; + + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ + z5 = LEFT_SHIFT(z3, CONST_BITS); + + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ + z4 = z5 + tmp12; + + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ + + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ + z4 = z5 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1); + + tmp12 = LEFT_SHIFT(z1 - tmp13 - z3, PASS1_BITS); + + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) (tmp22 + tmp12); + wsptr[8*7] = (int) (tmp22 - tmp12); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 10 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 10; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = LEFT_SHIFT(z3, CONST_BITS); + z4 = (JLONG) wsptr[4]; + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ + tmp10 = z3 + z1; + tmp11 = z3 - z2; + + tmp22 = z3 - LEFT_SHIFT(z1 - z2, 1); /* c0 = (c4-c8)*2 */ + + z2 = (JLONG) wsptr[2]; + z3 = (JLONG) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ + + tmp20 = tmp10 + tmp12; + tmp24 = tmp10 - tmp12; + tmp21 = tmp11 + tmp13; + tmp23 = tmp11 - tmp13; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z3 = LEFT_SHIFT(z3, CONST_BITS); + z4 = (JLONG) wsptr[7]; + + tmp11 = z2 + z4; + tmp13 = z2 - z4; + + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ + + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ + z4 = z3 + tmp12; + + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ + + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ + z4 = z3 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1); + + tmp12 = LEFT_SHIFT(z1 - tmp13, CONST_BITS) - z3; + + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 11x11 output block. + * + * Optimized algorithm with 24 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/22). + */ + +GLOBAL(void) +jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*11]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp10 = LEFT_SHIFT(tmp10, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ + tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ + z4 = z1 + z3; + tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ + z4 -= z2; + tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ + tmp21 = tmp20 + tmp23 + tmp25 - + MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ + tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ + tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ + tmp24 += tmp25; + tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ + tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ + MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ + tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z1 + z2; + tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ + tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ + tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ + z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ + tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ + tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ + z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ + tmp11 += z1; + tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ + tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ + MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ + MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 11 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 11; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp10 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp10 = LEFT_SHIFT(tmp10, CONST_BITS); + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[4]; + z3 = (JLONG) wsptr[6]; + + tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ + tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ + z4 = z1 + z3; + tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ + z4 -= z2; + tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ + tmp21 = tmp20 + tmp23 + tmp25 - + MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ + tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ + tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ + tmp24 += tmp25; + tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ + tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ + MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ + tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + tmp11 = z1 + z2; + tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ + tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ + tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ + z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ + tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ + tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ + z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ + tmp11 += z1; + tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ + tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ + MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ + MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 12x12 output block. + * + * Optimized algorithm with 15 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/24). + */ + +GLOBAL(void) +jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*12]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = LEFT_SHIFT(z3, CONST_BITS); + /* Add fudge factor here for final descale. */ + z3 += ONE << (CONST_BITS-PASS1_BITS-1); + + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ + z1 = LEFT_SHIFT(z1, CONST_BITS); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 = LEFT_SHIFT(z2, CONST_BITS); + + tmp12 = z1 - z2; + + tmp21 = z3 + tmp12; + tmp24 = z3 - tmp12; + + tmp12 = z4 + z2; + + tmp20 = tmp10 + tmp12; + tmp25 = tmp10 - tmp12; + + tmp12 = z4 - z1 - z2; + + tmp22 = tmp11 + tmp12; + tmp23 = tmp11 - tmp12; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + + tmp10 = z1 + z3; + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ + + z1 -= z4; + z2 -= z3; + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 12 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 12; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = LEFT_SHIFT(z3, CONST_BITS); + + z4 = (JLONG) wsptr[4]; + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + z1 = (JLONG) wsptr[2]; + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ + z1 = LEFT_SHIFT(z1, CONST_BITS); + z2 = (JLONG) wsptr[6]; + z2 = LEFT_SHIFT(z2, CONST_BITS); + + tmp12 = z1 - z2; + + tmp21 = z3 + tmp12; + tmp24 = z3 - tmp12; + + tmp12 = z4 + z2; + + tmp20 = tmp10 + tmp12; + tmp25 = tmp10 - tmp12; + + tmp12 = z4 - z1 - z2; + + tmp22 = tmp11 + tmp12; + tmp23 = tmp11 - tmp12; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + + tmp10 = z1 + z3; + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ + + z1 -= z4; + z2 -= z3; + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 13x13 output block. + * + * Optimized algorithm with 29 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/26). + */ + +GLOBAL(void) +jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*13]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = LEFT_SHIFT(z1, CONST_BITS); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ + + tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ + tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ + + tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ + tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ + + tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ + tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ + + tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ + tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ + tmp15 = z1 + z4; + tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ + tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ + tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ + tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ + tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ + tmp11 += tmp14; + tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ + tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ + tmp12 += tmp14; + tmp13 += tmp14; + tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ + tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ + MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ + z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ + tmp14 += z1; + tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ + MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 13 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 13; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = LEFT_SHIFT(z1, CONST_BITS); + + z2 = (JLONG) wsptr[2]; + z3 = (JLONG) wsptr[4]; + z4 = (JLONG) wsptr[6]; + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ + + tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ + tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ + + tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ + tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ + + tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ + tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ + + tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ + tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ + tmp15 = z1 + z4; + tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ + tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ + tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ + tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ + tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ + tmp11 += tmp14; + tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ + tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ + tmp12 += tmp14; + tmp13 += tmp14; + tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ + tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ + MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ + z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ + tmp14 += z1; + tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ + MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 14x14 output block. + * + * Optimized algorithm with 20 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/28). + */ + +GLOBAL(void) +jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*14]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = LEFT_SHIFT(z1, CONST_BITS); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ + + tmp10 = z1 + z2; + tmp11 = z1 + z3; + tmp12 = z1 - z4; + + tmp23 = RIGHT_SHIFT(z1 - LEFT_SHIFT(z2 + z3 - z4, 1), + CONST_BITS-PASS1_BITS); /* c0 = (c4+c12-c8)*2 */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ + + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ + + tmp20 = tmp10 + tmp13; + tmp26 = tmp10 - tmp13; + tmp21 = tmp11 + tmp14; + tmp25 = tmp11 - tmp14; + tmp22 = tmp12 + tmp15; + tmp24 = tmp12 - tmp15; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp13 = LEFT_SHIFT(z4, CONST_BITS); + + tmp14 = z1 + z3; + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ + tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ + z1 -= z2; + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ + tmp16 += tmp15; + z1 += z4; + z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ + tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ + tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ + z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ + tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ + tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ + + tmp13 = LEFT_SHIFT(z1 - z3, PASS1_BITS); + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) (tmp23 + tmp13); + wsptr[8*10] = (int) (tmp23 - tmp13); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 14 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 14; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = LEFT_SHIFT(z1, CONST_BITS); + z4 = (JLONG) wsptr[4]; + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ + + tmp10 = z1 + z2; + tmp11 = z1 + z3; + tmp12 = z1 - z4; + + tmp23 = z1 - LEFT_SHIFT(z2 + z3 - z4, 1); /* c0 = (c4+c12-c8)*2 */ + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[6]; + + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ + + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ + + tmp20 = tmp10 + tmp13; + tmp26 = tmp10 - tmp13; + tmp21 = tmp11 + tmp14; + tmp25 = tmp11 - tmp14; + tmp22 = tmp12 + tmp15; + tmp24 = tmp12 - tmp15; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + z4 = LEFT_SHIFT(z4, CONST_BITS); + + tmp14 = z1 + z3; + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ + tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ + z1 -= z2; + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ + tmp16 += tmp15; + tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ + tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ + tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ + tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ + tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ + tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ + + tmp13 = LEFT_SHIFT(z1 - z3, CONST_BITS) + z4; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 15x15 output block. + * + * Optimized algorithm with 22 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/30). + */ + +GLOBAL(void) +jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*15]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = LEFT_SHIFT(z1, CONST_BITS); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ + tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ + + tmp12 = z1 - tmp10; + tmp13 = z1 + tmp11; + z1 -= LEFT_SHIFT(tmp11 - tmp10, 1); /* c0 = (c6-c12)*2 */ + + z4 = z2 - z3; + z3 += z2; + tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ + z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ + + tmp20 = tmp13 + tmp10 + tmp11; + tmp23 = tmp12 - tmp10 + tmp11 + z2; + + tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ + + tmp25 = tmp13 - tmp10 - tmp11; + tmp26 = tmp12 + tmp10 - tmp11 - z2; + + tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ + + tmp21 = tmp12 + tmp10 + tmp11; + tmp24 = tmp13 - tmp10 + tmp11; + tmp11 += tmp11; + tmp22 = z1 + tmp11; /* c10 = c6-c12 */ + tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp13 = z2 - z4; + tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ + tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ + tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ + + tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ + tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ + z2 = z1 - z4; + tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ + + tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ + tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ + tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ + z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ + tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ + tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 15 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 15; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = LEFT_SHIFT(z1, CONST_BITS); + + z2 = (JLONG) wsptr[2]; + z3 = (JLONG) wsptr[4]; + z4 = (JLONG) wsptr[6]; + + tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ + tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ + + tmp12 = z1 - tmp10; + tmp13 = z1 + tmp11; + z1 -= LEFT_SHIFT(tmp11 - tmp10, 1); /* c0 = (c6-c12)*2 */ + + z4 = z2 - z3; + z3 += z2; + tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ + z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ + + tmp20 = tmp13 + tmp10 + tmp11; + tmp23 = tmp12 - tmp10 + tmp11 + z2; + + tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ + + tmp25 = tmp13 - tmp10 - tmp11; + tmp26 = tmp12 + tmp10 - tmp11 - z2; + + tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ + + tmp21 = tmp12 + tmp10 + tmp11; + tmp24 = tmp13 - tmp10 + tmp11; + tmp11 += tmp11; + tmp22 = z1 + tmp11; /* c10 = c6-c12 */ + tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z4 = (JLONG) wsptr[5]; + z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ + z4 = (JLONG) wsptr[7]; + + tmp13 = z2 - z4; + tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ + tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ + tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ + + tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ + tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ + z2 = z1 - z4; + tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ + + tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ + tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ + tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ + z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ + tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ + tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 16x16 output block. + * + * Optimized algorithm with 28 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/32). + */ + +GLOBAL(void) +jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; + JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; + JLONG z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*16]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + /* Add fudge factor here for final descale. */ + tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + tmp12 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z3 = z1 - z2; + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ + + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ + + tmp20 = tmp10 + tmp0; + tmp27 = tmp10 - tmp0; + tmp21 = tmp12 + tmp1; + tmp26 = tmp12 - tmp1; + tmp22 = tmp13 + tmp2; + tmp25 = tmp13 - tmp2; + tmp23 = tmp11 + tmp3; + tmp24 = tmp11 - tmp3; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z1 + z3; + + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ + tmp13 = tmp10 + tmp11 + tmp12 - + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ + z2 += z4; + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + tmp1 += z1; + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ + tmp12 += z2; + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + tmp2 += z2; + tmp3 += z2; + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ + tmp10 += z2; + tmp11 += z2; + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 16 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 16; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); + + z1 = (JLONG) wsptr[4]; + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + tmp12 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + + z1 = (JLONG) wsptr[2]; + z2 = (JLONG) wsptr[6]; + z3 = z1 - z2; + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ + + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ + + tmp20 = tmp10 + tmp0; + tmp27 = tmp10 - tmp0; + tmp21 = tmp12 + tmp1; + tmp26 = tmp12 - tmp1; + tmp22 = tmp13 + tmp2; + tmp25 = tmp13 - tmp2; + tmp23 = tmp11 + tmp3; + tmp24 = tmp11 - tmp3; + + /* Odd part */ + + z1 = (JLONG) wsptr[1]; + z2 = (JLONG) wsptr[3]; + z3 = (JLONG) wsptr[5]; + z4 = (JLONG) wsptr[7]; + + tmp11 = z1 + z3; + + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ + tmp13 = tmp10 + tmp11 + tmp12 - + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ + z2 += z4; + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + tmp1 += z1; + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ + tmp12 += z2; + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + tmp2 += z2; + tmp3 += z2; + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ + tmp10 += z2; + tmp11 += z2; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ } } +#endif /* IDCT_SCALING_SUPPORTED */ #endif /* DCT_ISLOW_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jidctred.c glmark2-2021.02/src/libjpeg-turbo/jidctred.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jidctred.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jidctred.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jidctred.c * + * This file was part of the Independent JPEG Group's software. * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains inverse-DCT routines that produce reduced-size output: * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block. @@ -23,7 +26,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ +#include "jdct.h" /* Private declarations for DCT subsystem */ #ifdef IDCT_SCALING_SUPPORTED @@ -44,7 +47,7 @@ #define PASS1_BITS 2 #else #define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ #endif /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus @@ -55,20 +58,20 @@ */ #if CONST_BITS == 13 -#define FIX_0_211164243 ((INT32) 1730) /* FIX(0.211164243) */ -#define FIX_0_509795579 ((INT32) 4176) /* FIX(0.509795579) */ -#define FIX_0_601344887 ((INT32) 4926) /* FIX(0.601344887) */ -#define FIX_0_720959822 ((INT32) 5906) /* FIX(0.720959822) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_850430095 ((INT32) 6967) /* FIX(0.850430095) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_061594337 ((INT32) 8697) /* FIX(1.061594337) */ -#define FIX_1_272758580 ((INT32) 10426) /* FIX(1.272758580) */ -#define FIX_1_451774981 ((INT32) 11893) /* FIX(1.451774981) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_2_172734803 ((INT32) 17799) /* FIX(2.172734803) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_624509785 ((INT32) 29692) /* FIX(3.624509785) */ +#define FIX_0_211164243 ((JLONG) 1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 ((JLONG) 4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 ((JLONG) 4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 ((JLONG) 5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 ((JLONG) 6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 ((JLONG) 8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 ((JLONG) 10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 ((JLONG) 11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 ((JLONG) 17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 ((JLONG) 29692) /* FIX(3.624509785) */ #else #define FIX_0_211164243 FIX(0.211164243) #define FIX_0_509795579 FIX(0.509795579) @@ -87,7 +90,7 @@ #endif -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. +/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result. * For 8-bit samples with the recommended scaling, all the variable * and constant values involved are no more than 16 bits wide, so a * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. @@ -115,19 +118,19 @@ */ GLOBAL(void) -jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp2, tmp10, tmp12; - INT32 z1, z2, z3, z4; + JLONG tmp0, tmp2, tmp10, tmp12; + JLONG z1, z2, z3, z4; JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE*4]; /* buffers data between passes */ + int workspace[DCTSIZE*4]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ @@ -140,57 +143,58 @@ if (ctr == DCTSIZE-4) continue; if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 && - inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 && + inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { /* AC terms all zero; we need not examine term 4 for 4x4 output */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), + PASS1_BITS); + wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; wsptr[DCTSIZE*3] = dcval; - + continue; } - + /* Even part */ - + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp0 <<= (CONST_BITS+1); - + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS+1); + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865); - + tmp10 = tmp0 + tmp2; tmp12 = tmp0 - tmp2; - + /* Odd part */ - + z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - + tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ - + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ - + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ - + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ - + + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ + + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ + + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ + tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ - + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ - + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ - + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ + + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ + + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ + + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ /* Final output stage */ - + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1); } - + /* Pass 2: process 4 rows from work array, store into output array. */ wsptr = workspace; @@ -200,64 +204,64 @@ #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - + JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) + & RANGE_MASK]; + outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; outptr[3] = dcval; - - wsptr += DCTSIZE; /* advance pointer to next row */ + + wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif - + /* Even part */ - - tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1); - - tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065) - + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865); - + + tmp0 = LEFT_SHIFT((JLONG) wsptr[0], CONST_BITS+1); + + tmp2 = MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + + MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865); + tmp10 = tmp0 + tmp2; tmp12 = tmp0 - tmp2; - + /* Odd part */ - - z1 = (INT32) wsptr[7]; - z2 = (INT32) wsptr[5]; - z3 = (INT32) wsptr[3]; - z4 = (INT32) wsptr[1]; - + + z1 = (JLONG) wsptr[7]; + z2 = (JLONG) wsptr[5]; + z3 = (JLONG) wsptr[3]; + z4 = (JLONG) wsptr[1]; + tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ - + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ - + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ - + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ - + + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ + + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ + + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ + tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ - + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ - + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ - + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ + + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ + + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ + + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ /* Final output stage */ - + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3+1) + & RANGE_MASK]; outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3+1) + & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3+1) + & RANGE_MASK]; outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ + CONST_BITS+PASS1_BITS+3+1) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ } } @@ -268,18 +272,18 @@ */ GLOBAL(void) -jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp10, z1; + JLONG tmp0, tmp10, z1; JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; + ISLOW_MULT_TYPE *quantptr; + int *wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE*2]; /* buffers data between passes */ + int workspace[DCTSIZE*2]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ @@ -292,21 +296,22 @@ if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6) continue; if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) { + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) { /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), + PASS1_BITS); + wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; - + continue; } - + /* Even part */ - + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp10 = z1 << (CONST_BITS+2); - + tmp10 = LEFT_SHIFT(z1, CONST_BITS+2); + /* Odd part */ z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); @@ -319,11 +324,11 @@ tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ /* Final output stage */ - + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2); wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2); } - + /* Pass 2: process 2 rows from work array, store into output array. */ wsptr = workspace; @@ -334,38 +339,38 @@ #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - + JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) + & RANGE_MASK]; + outptr[0] = dcval; outptr[1] = dcval; - - wsptr += DCTSIZE; /* advance pointer to next row */ + + wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif - + /* Even part */ - - tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2); - + + tmp10 = LEFT_SHIFT((JLONG) wsptr[0], CONST_BITS+2); + /* Odd part */ - tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */ - + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */ - + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */ - + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ + tmp0 = MULTIPLY((JLONG) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */ + + MULTIPLY((JLONG) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */ + + MULTIPLY((JLONG) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */ + + MULTIPLY((JLONG) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ /* Final output stage */ - + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3+2) - & RANGE_MASK]; + CONST_BITS+PASS1_BITS+3+2) + & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3+2) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ + CONST_BITS+PASS1_BITS+3+2) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ } } @@ -376,12 +381,12 @@ */ GLOBAL(void) -jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { int dcval; - ISLOW_MULT_TYPE * quantptr; + ISLOW_MULT_TYPE *quantptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); SHIFT_TEMPS @@ -390,7 +395,7 @@ */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; dcval = DEQUANTIZE(coef_block[0], quantptr[0]); - dcval = (int) DESCALE((INT32) dcval, 3); + dcval = (int) DESCALE((JLONG) dcval, 3); output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jinclude.h glmark2-2021.02/src/libjpeg-turbo/jinclude.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jinclude.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jinclude.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jinclude.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1994, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file exists to provide a single place to fix any problems with * including the wrong system include files. (Common problems are taken @@ -17,8 +20,8 @@ /* Include auto-config file to find out which system include files we need. */ -#include "jconfig.h" /* auto configuration options */ -#define JCONFIG_INCLUDED /* so that jpeglib.h doesn't do it again */ +#include "jconfig.h" /* auto configuration options */ +#define JCONFIG_INCLUDED /* so that jpeglib.h doesn't do it again */ /* * We need the NULL macro and size_t typedef. @@ -58,28 +61,18 @@ #ifdef NEED_BSD_STRINGS #include -#define MEMZERO(target,size) bzero((void *)(target), (size_t)(size)) -#define MEMCOPY(dest,src,size) bcopy((const void *)(src), (void *)(dest), (size_t)(size)) +#define MEMZERO(target,size) bzero((void *)(target), (size_t)(size)) +#define MEMCOPY(dest,src,size) bcopy((const void *)(src), (void *)(dest), (size_t)(size)) #else /* not BSD, assume ANSI/SysV string lib */ #include -#define MEMZERO(target,size) memset((void *)(target), 0, (size_t)(size)) -#define MEMCOPY(dest,src,size) memcpy((void *)(dest), (const void *)(src), (size_t)(size)) +#define MEMZERO(target,size) memset((void *)(target), 0, (size_t)(size)) +#define MEMCOPY(dest,src,size) memcpy((void *)(dest), (const void *)(src), (size_t)(size)) #endif /* - * In ANSI C, and indeed any rational implementation, size_t is also the - * type returned by sizeof(). However, it seems there are some irrational - * implementations out there, in which sizeof() returns an int even though - * size_t is defined as long or unsigned long. To ensure consistent results - * we always use this SIZEOF() macro in place of using sizeof() directly. - */ - -#define SIZEOF(object) ((size_t) sizeof(object)) - -/* * The modules that use fread() and fwrite() always invoke them through * these macros. On some systems you may need to twiddle the argument casts. * CAUTION: argument order is different from underlying functions! diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jmemmgr.c glmark2-2021.02/src/libjpeg-turbo/jmemmgr.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jmemmgr.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jmemmgr.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jmemmgr.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains the JPEG system-independent memory management * routines. This code is usable across a wide variety of machines; most @@ -25,14 +28,14 @@ */ #define JPEG_INTERNALS -#define AM_MEMORY_MANAGER /* we define jvirt_Xarray_control structs */ +#define AM_MEMORY_MANAGER /* we define jvirt_Xarray_control structs */ #include "jinclude.h" #include "jpeglib.h" -#include "jmemsys.h" /* import the system-dependent declarations */ +#include "jmemsys.h" /* import the system-dependent declarations */ #ifndef NO_GETENV -#ifndef HAVE_STDLIB_H /* should declare getenv() */ -extern char * getenv JPP((const char * name)); +#ifndef HAVE_STDLIB_H /* should declare getenv() */ +extern char *getenv (const char *name); #endif #endif @@ -67,9 +70,9 @@ * There isn't any really portable way to determine the worst-case alignment * requirement. This module assumes that the alignment requirement is * multiples of ALIGN_SIZE. - * By default, we define ALIGN_SIZE as sizeof(double). This is necessary on some - * workstations (where doubles really do need 8-byte alignment) and will work - * fine on nearly everything. If your machine has lesser alignment needs, + * By default, we define ALIGN_SIZE as sizeof(double). This is necessary on + * some workstations (where doubles really do need 8-byte alignment) and will + * work fine on nearly everything. If your machine has lesser alignment needs, * you can save a few bytes by making ALIGN_SIZE smaller. * The only place I know of where this will NOT work is certain Macintosh * 680x0 compilers that define double as a 10-byte IEEE extended float. @@ -78,9 +81,9 @@ * such a compiler. */ -#ifndef ALIGN_SIZE /* so can override from jconfig.h */ +#ifndef ALIGN_SIZE /* so can override from jconfig.h */ #ifndef WITH_SIMD -#define ALIGN_SIZE SIZEOF(double) +#define ALIGN_SIZE sizeof(double) #else #define ALIGN_SIZE 16 /* Most SIMD implementations require this */ #endif @@ -91,24 +94,23 @@ * request to jpeg_get_small() or jpeg_get_large(). There is no per-object * overhead within a pool, except for alignment padding. Each pool has a * header with a link to the next pool of the same class. - * Small and large pool headers are identical except that the latter's - * link pointer must be FAR on 80x86 machines. + * Small and large pool headers are identical. */ -typedef struct small_pool_struct * small_pool_ptr; +typedef struct small_pool_struct *small_pool_ptr; typedef struct small_pool_struct { - small_pool_ptr next; /* next in list of pools */ - size_t bytes_used; /* how many bytes already used within pool */ - size_t bytes_left; /* bytes still available in this pool */ + small_pool_ptr next; /* next in list of pools */ + size_t bytes_used; /* how many bytes already used within pool */ + size_t bytes_left; /* bytes still available in this pool */ } small_pool_hdr; -typedef struct large_pool_struct FAR * large_pool_ptr; +typedef struct large_pool_struct *large_pool_ptr; typedef struct large_pool_struct { - large_pool_ptr next; /* next in list of pools */ - size_t bytes_used; /* how many bytes already used within pool */ - size_t bytes_left; /* bytes still available in this pool */ + large_pool_ptr next; /* next in list of pools */ + size_t bytes_used; /* how many bytes already used within pool */ + size_t bytes_left; /* bytes still available in this pool */ } large_pool_hdr; /* @@ -116,7 +118,7 @@ */ typedef struct { - struct jpeg_memory_mgr pub; /* public fields */ + struct jpeg_memory_mgr pub; /* public fields */ /* Each pool identifier (lifetime class) names a linked list of pools. */ small_pool_ptr small_list[JPOOL_NUMPOOLS]; @@ -136,10 +138,10 @@ /* alloc_sarray and alloc_barray set this value for use by virtual * array routines. */ - JDIMENSION last_rowsperchunk; /* from most recent alloc_sarray/barray */ + JDIMENSION last_rowsperchunk; /* from most recent alloc_sarray/barray */ } my_memory_mgr; -typedef my_memory_mgr * my_mem_ptr; +typedef my_memory_mgr *my_mem_ptr; /* @@ -150,39 +152,39 @@ */ struct jvirt_sarray_control { - JSAMPARRAY mem_buffer; /* => the in-memory buffer */ - JDIMENSION rows_in_array; /* total virtual array height */ - JDIMENSION samplesperrow; /* width of array (and of memory buffer) */ - JDIMENSION maxaccess; /* max rows accessed by access_virt_sarray */ - JDIMENSION rows_in_mem; /* height of memory buffer */ - JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */ - JDIMENSION cur_start_row; /* first logical row # in the buffer */ - JDIMENSION first_undef_row; /* row # of first uninitialized row */ - boolean pre_zero; /* pre-zero mode requested? */ - boolean dirty; /* do current buffer contents need written? */ - boolean b_s_open; /* is backing-store data valid? */ - jvirt_sarray_ptr next; /* link to next virtual sarray control block */ - backing_store_info b_s_info; /* System-dependent control info */ + JSAMPARRAY mem_buffer; /* => the in-memory buffer */ + JDIMENSION rows_in_array; /* total virtual array height */ + JDIMENSION samplesperrow; /* width of array (and of memory buffer) */ + JDIMENSION maxaccess; /* max rows accessed by access_virt_sarray */ + JDIMENSION rows_in_mem; /* height of memory buffer */ + JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */ + JDIMENSION cur_start_row; /* first logical row # in the buffer */ + JDIMENSION first_undef_row; /* row # of first uninitialized row */ + boolean pre_zero; /* pre-zero mode requested? */ + boolean dirty; /* do current buffer contents need written? */ + boolean b_s_open; /* is backing-store data valid? */ + jvirt_sarray_ptr next; /* link to next virtual sarray control block */ + backing_store_info b_s_info; /* System-dependent control info */ }; struct jvirt_barray_control { - JBLOCKARRAY mem_buffer; /* => the in-memory buffer */ - JDIMENSION rows_in_array; /* total virtual array height */ - JDIMENSION blocksperrow; /* width of array (and of memory buffer) */ - JDIMENSION maxaccess; /* max rows accessed by access_virt_barray */ - JDIMENSION rows_in_mem; /* height of memory buffer */ - JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */ - JDIMENSION cur_start_row; /* first logical row # in the buffer */ - JDIMENSION first_undef_row; /* row # of first uninitialized row */ - boolean pre_zero; /* pre-zero mode requested? */ - boolean dirty; /* do current buffer contents need written? */ - boolean b_s_open; /* is backing-store data valid? */ - jvirt_barray_ptr next; /* link to next virtual barray control block */ - backing_store_info b_s_info; /* System-dependent control info */ + JBLOCKARRAY mem_buffer; /* => the in-memory buffer */ + JDIMENSION rows_in_array; /* total virtual array height */ + JDIMENSION blocksperrow; /* width of array (and of memory buffer) */ + JDIMENSION maxaccess; /* max rows accessed by access_virt_barray */ + JDIMENSION rows_in_mem; /* height of memory buffer */ + JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */ + JDIMENSION cur_start_row; /* first logical row # in the buffer */ + JDIMENSION first_undef_row; /* row # of first uninitialized row */ + boolean pre_zero; /* pre-zero mode requested? */ + boolean dirty; /* do current buffer contents need written? */ + boolean b_s_open; /* is backing-store data valid? */ + jvirt_barray_ptr next; /* link to next virtual barray control block */ + backing_store_info b_s_info; /* System-dependent control info */ }; -#ifdef MEM_STATS /* optional extra stuff for statistics */ +#ifdef MEM_STATS /* optional extra stuff for statistics */ LOCAL(void) print_mem_stats (j_common_ptr cinfo, int pool_id) @@ -196,19 +198,19 @@ * This is helpful because message parm array can't handle longs. */ fprintf(stderr, "Freeing pool %d, total space = %ld\n", - pool_id, mem->total_space_allocated); + pool_id, mem->total_space_allocated); for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL; lhdr_ptr = lhdr_ptr->next) { fprintf(stderr, " Large chunk used %ld\n", - (long) lhdr_ptr->bytes_used); + (long) lhdr_ptr->bytes_used); } for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL; shdr_ptr = shdr_ptr->next) { fprintf(stderr, " Small chunk used %ld free %ld\n", - (long) shdr_ptr->bytes_used, - (long) shdr_ptr->bytes_left); + (long) shdr_ptr->bytes_used, + (long) shdr_ptr->bytes_left); } } @@ -221,7 +223,7 @@ /* If we compiled MEM_STATS support, report alloc requests before dying */ { #ifdef MEM_STATS - cinfo->err->trace_level = 2; /* force self_destruct to report stats */ + cinfo->err->trace_level = 2; /* force self_destruct to report stats */ #endif ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which); } @@ -244,19 +246,19 @@ * adjustment. */ -static const size_t first_pool_slop[JPOOL_NUMPOOLS] = +static const size_t first_pool_slop[JPOOL_NUMPOOLS] = { - 1600, /* first PERMANENT pool */ - 16000 /* first IMAGE pool */ + 1600, /* first PERMANENT pool */ + 16000 /* first IMAGE pool */ }; -static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = +static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = { - 0, /* additional PERMANENT pools */ - 5000 /* additional IMAGE pools */ + 0, /* additional PERMANENT pools */ + 5000 /* additional IMAGE pools */ }; -#define MIN_SLOP 50 /* greater than 0 to avoid futile looping */ +#define MIN_SLOP 50 /* greater than 0 to avoid futile looping */ METHODDEF(void *) @@ -265,7 +267,7 @@ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; small_pool_ptr hdr_ptr, prev_hdr_ptr; - char * data_ptr; + char *data_ptr; size_t min_request, slop; /* @@ -274,20 +276,26 @@ * and so that algorithms can straddle outside the proper area up * to the next alignment. */ + if (sizeofobject > MAX_ALLOC_CHUNK) { + /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject + is close to SIZE_MAX. */ + out_of_memory(cinfo, 7); + } sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE); /* Check for unsatisfiable request (do now to ensure no overflow below) */ - if ((SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK) - out_of_memory(cinfo, 1); /* request exceeds malloc's ability */ + if ((sizeof(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > + MAX_ALLOC_CHUNK) + out_of_memory(cinfo, 1); /* request exceeds malloc's ability */ /* See if space is available in any existing pool */ if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ prev_hdr_ptr = NULL; hdr_ptr = mem->small_list[pool_id]; while (hdr_ptr != NULL) { if (hdr_ptr->bytes_left >= sizeofobject) - break; /* found pool with enough space */ + break; /* found pool with enough space */ prev_hdr_ptr = hdr_ptr; hdr_ptr = hdr_ptr->next; } @@ -295,8 +303,8 @@ /* Time to make a new pool? */ if (hdr_ptr == NULL) { /* min_request is what we need now, slop is what will be leftover */ - min_request = SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1; - if (prev_hdr_ptr == NULL) /* first pool in class? */ + min_request = sizeof(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1; + if (prev_hdr_ptr == NULL) /* first pool in class? */ slop = first_pool_slop[pool_id]; else slop = extra_pool_slop[pool_id]; @@ -307,17 +315,17 @@ for (;;) { hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop); if (hdr_ptr != NULL) - break; + break; slop /= 2; - if (slop < MIN_SLOP) /* give up when it gets real small */ - out_of_memory(cinfo, 2); /* jpeg_get_small failed */ + if (slop < MIN_SLOP) /* give up when it gets real small */ + out_of_memory(cinfo, 2); /* jpeg_get_small failed */ } mem->total_space_allocated += min_request + slop; /* Success, initialize the new pool header and add to end of list */ hdr_ptr->next = NULL; hdr_ptr->bytes_used = 0; hdr_ptr->bytes_left = sizeofobject + slop; - if (prev_hdr_ptr == NULL) /* first pool in class? */ + if (prev_hdr_ptr == NULL) /* first pool in class? */ mem->small_list[pool_id] = hdr_ptr; else prev_hdr_ptr->next = hdr_ptr; @@ -325,7 +333,7 @@ /* OK, allocate the object from the current pool */ data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */ - data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */ + data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE; data_ptr += hdr_ptr->bytes_used; /* point to place for object */ @@ -339,9 +347,8 @@ /* * Allocation of "large" objects. * - * The external semantics of these are the same as "small" objects, - * except that FAR pointers are used on 80x86. However the pool - * management heuristics are quite different. We assume that each + * The external semantics of these are the same as "small" objects. However, + * the pool management heuristics are quite different. We assume that each * request is large enough that it may as well be passed directly to * jpeg_get_large; the pool management just links everything together * so that we can free it all on demand. @@ -350,35 +357,42 @@ * deliberately bunch rows together to ensure a large request size. */ -METHODDEF(void FAR *) +METHODDEF(void *) alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject) /* Allocate a "large" object */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; large_pool_ptr hdr_ptr; - char FAR * data_ptr; + char *data_ptr; /* * Round up the requested size to a multiple of ALIGN_SIZE so that * algorithms can straddle outside the proper area up to the next * alignment. */ + if (sizeofobject > MAX_ALLOC_CHUNK) { + /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject + is close to SIZE_MAX. */ + out_of_memory(cinfo, 8); + } sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE); /* Check for unsatisfiable request (do now to ensure no overflow below) */ - if ((SIZEOF(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK) - out_of_memory(cinfo, 3); /* request exceeds malloc's ability */ + if ((sizeof(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > + MAX_ALLOC_CHUNK) + out_of_memory(cinfo, 3); /* request exceeds malloc's ability */ /* Always make a new pool */ if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject + - SIZEOF(large_pool_hdr) + - ALIGN_SIZE - 1); + sizeof(large_pool_hdr) + + ALIGN_SIZE - 1); if (hdr_ptr == NULL) - out_of_memory(cinfo, 4); /* jpeg_get_large failed */ - mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr) + ALIGN_SIZE - 1; + out_of_memory(cinfo, 4); /* jpeg_get_large failed */ + mem->total_space_allocated += sizeofobject + sizeof(large_pool_hdr) + + ALIGN_SIZE - 1; /* Success, initialize the new pool header and add to list */ hdr_ptr->next = mem->large_list[pool_id]; @@ -390,17 +404,16 @@ mem->large_list[pool_id] = hdr_ptr; data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */ - data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */ + data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE; - return (void FAR *) data_ptr; + return (void *) data_ptr; } /* * Creation of 2-D sample arrays. - * The pointers are in near heap, the samples themselves in FAR heap. * * To minimize allocation overhead and to allow I/O of large contiguous * blocks, we allocate the sample rows in groups of as many rows as possible @@ -417,7 +430,7 @@ METHODDEF(JSAMPARRAY) alloc_sarray (j_common_ptr cinfo, int pool_id, - JDIMENSION samplesperrow, JDIMENSION numrows) + JDIMENSION samplesperrow, JDIMENSION numrows) /* Allocate a 2-D sample array */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; @@ -427,13 +440,20 @@ long ltemp; /* Make sure each row is properly aligned */ - if ((ALIGN_SIZE % SIZEOF(JSAMPLE)) != 0) - out_of_memory(cinfo, 5); /* safety check */ - samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE)); + if ((ALIGN_SIZE % sizeof(JSAMPLE)) != 0) + out_of_memory(cinfo, 5); /* safety check */ + + if (samplesperrow > MAX_ALLOC_CHUNK) { + /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject + is close to SIZE_MAX. */ + out_of_memory(cinfo, 9); + } + samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / + sizeof(JSAMPLE)); /* Calculate max # of rows allowed in one allocation chunk */ - ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) / - ((long) samplesperrow * SIZEOF(JSAMPLE)); + ltemp = (MAX_ALLOC_CHUNK-sizeof(large_pool_hdr)) / + ((long) samplesperrow * sizeof(JSAMPLE)); if (ltemp <= 0) ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); if (ltemp < (long) numrows) @@ -444,15 +464,15 @@ /* Get space for row pointers (small object) */ result = (JSAMPARRAY) alloc_small(cinfo, pool_id, - (size_t) (numrows * SIZEOF(JSAMPROW))); + (size_t) (numrows * sizeof(JSAMPROW))); /* Get the rows themselves (large objects) */ currow = 0; while (currow < numrows) { rowsperchunk = MIN(rowsperchunk, numrows - currow); workspace = (JSAMPROW) alloc_large(cinfo, pool_id, - (size_t) ((size_t) rowsperchunk * (size_t) samplesperrow - * SIZEOF(JSAMPLE))); + (size_t) ((size_t) rowsperchunk * (size_t) samplesperrow + * sizeof(JSAMPLE))); for (i = rowsperchunk; i > 0; i--) { result[currow++] = workspace; workspace += samplesperrow; @@ -470,7 +490,7 @@ METHODDEF(JBLOCKARRAY) alloc_barray (j_common_ptr cinfo, int pool_id, - JDIMENSION blocksperrow, JDIMENSION numrows) + JDIMENSION blocksperrow, JDIMENSION numrows) /* Allocate a 2-D coefficient-block array */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; @@ -480,12 +500,12 @@ long ltemp; /* Make sure each row is properly aligned */ - if ((SIZEOF(JBLOCK) % ALIGN_SIZE) != 0) - out_of_memory(cinfo, 6); /* safety check */ + if ((sizeof(JBLOCK) % ALIGN_SIZE) != 0) + out_of_memory(cinfo, 6); /* safety check */ /* Calculate max # of rows allowed in one allocation chunk */ - ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) / - ((long) blocksperrow * SIZEOF(JBLOCK)); + ltemp = (MAX_ALLOC_CHUNK-sizeof(large_pool_hdr)) / + ((long) blocksperrow * sizeof(JBLOCK)); if (ltemp <= 0) ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); if (ltemp < (long) numrows) @@ -496,15 +516,15 @@ /* Get space for row pointers (small object) */ result = (JBLOCKARRAY) alloc_small(cinfo, pool_id, - (size_t) (numrows * SIZEOF(JBLOCKROW))); + (size_t) (numrows * sizeof(JBLOCKROW))); /* Get the rows themselves (large objects) */ currow = 0; while (currow < numrows) { rowsperchunk = MIN(rowsperchunk, numrows - currow); workspace = (JBLOCKROW) alloc_large(cinfo, pool_id, - (size_t) ((size_t) rowsperchunk * (size_t) blocksperrow - * SIZEOF(JBLOCK))); + (size_t) ((size_t) rowsperchunk * (size_t) blocksperrow + * sizeof(JBLOCK))); for (i = rowsperchunk; i > 0; i--) { result[currow++] = workspace; workspace += blocksperrow; @@ -554,8 +574,8 @@ METHODDEF(jvirt_sarray_ptr) request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero, - JDIMENSION samplesperrow, JDIMENSION numrows, - JDIMENSION maxaccess) + JDIMENSION samplesperrow, JDIMENSION numrows, + JDIMENSION maxaccess) /* Request a virtual 2-D sample array */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; @@ -563,18 +583,18 @@ /* Only IMAGE-lifetime virtual arrays are currently supported */ if (pool_id != JPOOL_IMAGE) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ /* get control block */ result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id, - SIZEOF(struct jvirt_sarray_control)); + sizeof(struct jvirt_sarray_control)); - result->mem_buffer = NULL; /* marks array not yet realized */ + result->mem_buffer = NULL; /* marks array not yet realized */ result->rows_in_array = numrows; result->samplesperrow = samplesperrow; result->maxaccess = maxaccess; result->pre_zero = pre_zero; - result->b_s_open = FALSE; /* no associated backing-store object */ + result->b_s_open = FALSE; /* no associated backing-store object */ result->next = mem->virt_sarray_list; /* add to list of virtual arrays */ mem->virt_sarray_list = result; @@ -584,8 +604,8 @@ METHODDEF(jvirt_barray_ptr) request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero, - JDIMENSION blocksperrow, JDIMENSION numrows, - JDIMENSION maxaccess) + JDIMENSION blocksperrow, JDIMENSION numrows, + JDIMENSION maxaccess) /* Request a virtual 2-D coefficient-block array */ { my_mem_ptr mem = (my_mem_ptr) cinfo->mem; @@ -593,18 +613,18 @@ /* Only IMAGE-lifetime virtual arrays are currently supported */ if (pool_id != JPOOL_IMAGE) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ /* get control block */ result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id, - SIZEOF(struct jvirt_barray_control)); + sizeof(struct jvirt_barray_control)); - result->mem_buffer = NULL; /* marks array not yet realized */ + result->mem_buffer = NULL; /* marks array not yet realized */ result->rows_in_array = numrows; result->blocksperrow = blocksperrow; result->maxaccess = maxaccess; result->pre_zero = pre_zero; - result->b_s_open = FALSE; /* no associated backing-store object */ + result->b_s_open = FALSE; /* no associated backing-store object */ result->next = mem->virt_barray_list; /* add to list of virtual arrays */ mem->virt_barray_list = result; @@ -631,26 +651,26 @@ for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) { if (sptr->mem_buffer == NULL) { /* if not realized yet */ space_per_minheight += (long) sptr->maxaccess * - (long) sptr->samplesperrow * SIZEOF(JSAMPLE); + (long) sptr->samplesperrow * sizeof(JSAMPLE); maximum_space += (long) sptr->rows_in_array * - (long) sptr->samplesperrow * SIZEOF(JSAMPLE); + (long) sptr->samplesperrow * sizeof(JSAMPLE); } } for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) { if (bptr->mem_buffer == NULL) { /* if not realized yet */ space_per_minheight += (long) bptr->maxaccess * - (long) bptr->blocksperrow * SIZEOF(JBLOCK); + (long) bptr->blocksperrow * sizeof(JBLOCK); maximum_space += (long) bptr->rows_in_array * - (long) bptr->blocksperrow * SIZEOF(JBLOCK); + (long) bptr->blocksperrow * sizeof(JBLOCK); } } if (space_per_minheight <= 0) - return; /* no unrealized arrays, no work */ + return; /* no unrealized arrays, no work */ /* Determine amount of memory to actually use; this is system-dependent. */ avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space, - mem->total_space_allocated); + mem->total_space_allocated); /* If the maximum space needed is available, make all the buffers full * height; otherwise parcel it out with the same number of minheights @@ -673,19 +693,19 @@ if (sptr->mem_buffer == NULL) { /* if not realized yet */ minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L; if (minheights <= max_minheights) { - /* This buffer fits in memory */ - sptr->rows_in_mem = sptr->rows_in_array; + /* This buffer fits in memory */ + sptr->rows_in_mem = sptr->rows_in_array; } else { - /* It doesn't fit in memory, create backing store. */ - sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess); - jpeg_open_backing_store(cinfo, & sptr->b_s_info, - (long) sptr->rows_in_array * - (long) sptr->samplesperrow * - (long) SIZEOF(JSAMPLE)); - sptr->b_s_open = TRUE; + /* It doesn't fit in memory, create backing store. */ + sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess); + jpeg_open_backing_store(cinfo, & sptr->b_s_info, + (long) sptr->rows_in_array * + (long) sptr->samplesperrow * + (long) sizeof(JSAMPLE)); + sptr->b_s_open = TRUE; } sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE, - sptr->samplesperrow, sptr->rows_in_mem); + sptr->samplesperrow, sptr->rows_in_mem); sptr->rowsperchunk = mem->last_rowsperchunk; sptr->cur_start_row = 0; sptr->first_undef_row = 0; @@ -697,19 +717,19 @@ if (bptr->mem_buffer == NULL) { /* if not realized yet */ minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L; if (minheights <= max_minheights) { - /* This buffer fits in memory */ - bptr->rows_in_mem = bptr->rows_in_array; + /* This buffer fits in memory */ + bptr->rows_in_mem = bptr->rows_in_array; } else { - /* It doesn't fit in memory, create backing store. */ - bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess); - jpeg_open_backing_store(cinfo, & bptr->b_s_info, - (long) bptr->rows_in_array * - (long) bptr->blocksperrow * - (long) SIZEOF(JBLOCK)); - bptr->b_s_open = TRUE; + /* It doesn't fit in memory, create backing store. */ + bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess); + jpeg_open_backing_store(cinfo, & bptr->b_s_info, + (long) bptr->rows_in_array * + (long) bptr->blocksperrow * + (long) sizeof(JBLOCK)); + bptr->b_s_open = TRUE; } bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE, - bptr->blocksperrow, bptr->rows_in_mem); + bptr->blocksperrow, bptr->rows_in_mem); bptr->rowsperchunk = mem->last_rowsperchunk; bptr->cur_start_row = 0; bptr->first_undef_row = 0; @@ -725,7 +745,7 @@ { long bytesperrow, file_offset, byte_count, rows, thisrow, i; - bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE); + bytesperrow = (long) ptr->samplesperrow * sizeof(JSAMPLE); file_offset = ptr->cur_start_row * bytesperrow; /* Loop to read or write each allocation chunk in mem_buffer */ for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) { @@ -736,17 +756,17 @@ rows = MIN(rows, (long) ptr->first_undef_row - thisrow); /* Transfer no more than fits in file */ rows = MIN(rows, (long) ptr->rows_in_array - thisrow); - if (rows <= 0) /* this chunk might be past end of file! */ + if (rows <= 0) /* this chunk might be past end of file! */ break; byte_count = rows * bytesperrow; if (writing) (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info, - (void FAR *) ptr->mem_buffer[i], - file_offset, byte_count); + (void *) ptr->mem_buffer[i], + file_offset, byte_count); else (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info, - (void FAR *) ptr->mem_buffer[i], - file_offset, byte_count); + (void *) ptr->mem_buffer[i], + file_offset, byte_count); file_offset += byte_count; } } @@ -758,7 +778,7 @@ { long bytesperrow, file_offset, byte_count, rows, thisrow, i; - bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK); + bytesperrow = (long) ptr->blocksperrow * sizeof(JBLOCK); file_offset = ptr->cur_start_row * bytesperrow; /* Loop to read or write each allocation chunk in mem_buffer */ for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) { @@ -769,17 +789,17 @@ rows = MIN(rows, (long) ptr->first_undef_row - thisrow); /* Transfer no more than fits in file */ rows = MIN(rows, (long) ptr->rows_in_array - thisrow); - if (rows <= 0) /* this chunk might be past end of file! */ + if (rows <= 0) /* this chunk might be past end of file! */ break; byte_count = rows * bytesperrow; if (writing) (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info, - (void FAR *) ptr->mem_buffer[i], - file_offset, byte_count); + (void *) ptr->mem_buffer[i], + file_offset, byte_count); else (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info, - (void FAR *) ptr->mem_buffer[i], - file_offset, byte_count); + (void *) ptr->mem_buffer[i], + file_offset, byte_count); file_offset += byte_count; } } @@ -787,8 +807,8 @@ METHODDEF(JSAMPARRAY) access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr, - JDIMENSION start_row, JDIMENSION num_rows, - boolean writable) + JDIMENSION start_row, JDIMENSION num_rows, + boolean writable) /* Access the part of a virtual sample array starting at start_row */ /* and extending for num_rows rows. writable is true if */ /* caller intends to modify the accessed area. */ @@ -826,7 +846,7 @@ ltemp = (long) end_row - (long) ptr->rows_in_mem; if (ltemp < 0) - ltemp = 0; /* don't fall off front end of file */ + ltemp = 0; /* don't fall off front end of file */ ptr->cur_start_row = (JDIMENSION) ltemp; } /* Read in the selected part of the array. @@ -841,25 +861,25 @@ */ if (ptr->first_undef_row < end_row) { if (ptr->first_undef_row < start_row) { - if (writable) /* writer skipped over a section of array */ - ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); - undef_row = start_row; /* but reader is allowed to read ahead */ + if (writable) /* writer skipped over a section of array */ + ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); + undef_row = start_row; /* but reader is allowed to read ahead */ } else { undef_row = ptr->first_undef_row; } if (writable) ptr->first_undef_row = end_row; if (ptr->pre_zero) { - size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE); + size_t bytesperrow = (size_t) ptr->samplesperrow * sizeof(JSAMPLE); undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */ end_row -= ptr->cur_start_row; while (undef_row < end_row) { - jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow); - undef_row++; + jzero_far((void *) ptr->mem_buffer[undef_row], bytesperrow); + undef_row++; } } else { - if (! writable) /* reader looking at undefined data */ - ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); + if (! writable) /* reader looking at undefined data */ + ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); } } /* Flag the buffer dirty if caller will write in it */ @@ -872,8 +892,8 @@ METHODDEF(JBLOCKARRAY) access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr, - JDIMENSION start_row, JDIMENSION num_rows, - boolean writable) + JDIMENSION start_row, JDIMENSION num_rows, + boolean writable) /* Access the part of a virtual block array starting at start_row */ /* and extending for num_rows rows. writable is true if */ /* caller intends to modify the accessed area. */ @@ -911,7 +931,7 @@ ltemp = (long) end_row - (long) ptr->rows_in_mem; if (ltemp < 0) - ltemp = 0; /* don't fall off front end of file */ + ltemp = 0; /* don't fall off front end of file */ ptr->cur_start_row = (JDIMENSION) ltemp; } /* Read in the selected part of the array. @@ -926,25 +946,25 @@ */ if (ptr->first_undef_row < end_row) { if (ptr->first_undef_row < start_row) { - if (writable) /* writer skipped over a section of array */ - ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); - undef_row = start_row; /* but reader is allowed to read ahead */ + if (writable) /* writer skipped over a section of array */ + ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); + undef_row = start_row; /* but reader is allowed to read ahead */ } else { undef_row = ptr->first_undef_row; } if (writable) ptr->first_undef_row = end_row; if (ptr->pre_zero) { - size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK); + size_t bytesperrow = (size_t) ptr->blocksperrow * sizeof(JBLOCK); undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */ end_row -= ptr->cur_start_row; while (undef_row < end_row) { - jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow); - undef_row++; + jzero_far((void *) ptr->mem_buffer[undef_row], bytesperrow); + undef_row++; } } else { - if (! writable) /* reader looking at undefined data */ - ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); + if (! writable) /* reader looking at undefined data */ + ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); } } /* Flag the buffer dirty if caller will write in it */ @@ -968,7 +988,7 @@ size_t space_freed; if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) - ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ + ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ #ifdef MEM_STATS if (cinfo->err->trace_level > 1) @@ -981,16 +1001,16 @@ jvirt_barray_ptr bptr; for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) { - if (sptr->b_s_open) { /* there may be no backing store */ - sptr->b_s_open = FALSE; /* prevent recursive close if error */ - (*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info); + if (sptr->b_s_open) { /* there may be no backing store */ + sptr->b_s_open = FALSE; /* prevent recursive close if error */ + (*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info); } } mem->virt_sarray_list = NULL; for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) { - if (bptr->b_s_open) { /* there may be no backing store */ - bptr->b_s_open = FALSE; /* prevent recursive close if error */ - (*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info); + if (bptr->b_s_open) { /* there may be no backing store */ + bptr->b_s_open = FALSE; /* prevent recursive close if error */ + (*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info); } } mem->virt_barray_list = NULL; @@ -1003,9 +1023,9 @@ while (lhdr_ptr != NULL) { large_pool_ptr next_lhdr_ptr = lhdr_ptr->next; space_freed = lhdr_ptr->bytes_used + - lhdr_ptr->bytes_left + - SIZEOF(large_pool_hdr); - jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed); + lhdr_ptr->bytes_left + + sizeof(large_pool_hdr); + jpeg_free_large(cinfo, (void *) lhdr_ptr, space_freed); mem->total_space_allocated -= space_freed; lhdr_ptr = next_lhdr_ptr; } @@ -1017,8 +1037,8 @@ while (shdr_ptr != NULL) { small_pool_ptr next_shdr_ptr = shdr_ptr->next; space_freed = shdr_ptr->bytes_used + - shdr_ptr->bytes_left + - SIZEOF(small_pool_hdr); + shdr_ptr->bytes_left + + sizeof(small_pool_hdr); jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed); mem->total_space_allocated -= space_freed; shdr_ptr = next_shdr_ptr; @@ -1045,10 +1065,10 @@ } /* Release the memory manager control block too. */ - jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr)); - cinfo->mem = NULL; /* ensures I will be called only once */ + jpeg_free_small(cinfo, (void *) cinfo->mem, sizeof(my_memory_mgr)); + cinfo->mem = NULL; /* ensures I will be called only once */ - jpeg_mem_term(cinfo); /* system-dependent cleanup */ + jpeg_mem_term(cinfo); /* system-dependent cleanup */ } @@ -1065,10 +1085,10 @@ int pool; size_t test_mac; - cinfo->mem = NULL; /* for safety if init fails */ + cinfo->mem = NULL; /* for safety if init fails */ /* Check for configuration errors. - * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably + * sizeof(ALIGN_TYPE) should be a power of 2; otherwise, it probably * doesn't reflect any real hardware alignment requirement. * The test is a little tricky: for X>0, X and X-1 have no one-bits * in common if and only if X is a power of 2, ie has only one one-bit. @@ -1089,10 +1109,10 @@ max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */ /* Attempt to allocate memory manager's control block */ - mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr)); + mem = (my_mem_ptr) jpeg_get_small(cinfo, sizeof(my_memory_mgr)); if (mem == NULL) { - jpeg_mem_term(cinfo); /* system-dependent cleanup */ + jpeg_mem_term(cinfo); /* system-dependent cleanup */ ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0); } @@ -1122,7 +1142,7 @@ mem->virt_sarray_list = NULL; mem->virt_barray_list = NULL; - mem->total_space_allocated = SIZEOF(my_memory_mgr); + mem->total_space_allocated = sizeof(my_memory_mgr); /* Declare ourselves open for business */ cinfo->mem = & mem->pub; @@ -1134,15 +1154,15 @@ * this feature. */ #ifndef NO_GETENV - { char * memenv; + { char *memenv; if ((memenv = getenv("JPEGMEM")) != NULL) { char ch = 'x'; if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) { - if (ch == 'm' || ch == 'M') - max_to_use *= 1000L; - mem->pub.max_memory_to_use = max_to_use * 1000L; + if (ch == 'm' || ch == 'M') + max_to_use *= 1000L; + mem->pub.max_memory_to_use = max_to_use * 1000L; } } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jmemnobs.c glmark2-2021.02/src/libjpeg-turbo/jmemnobs.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jmemnobs.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jmemnobs.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jmemnobs.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1992-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file provides a really simple implementation of the system- * dependent portion of the JPEG memory manager. This implementation @@ -18,11 +21,11 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jmemsys.h" /* import the system-dependent declarations */ +#include "jmemsys.h" /* import the system-dependent declarations */ -#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void * malloc JPP((size_t size)); -extern void free JPP((void *ptr)); +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void *malloc (size_t size); +extern void free (void *ptr); #endif @@ -38,7 +41,7 @@ } GLOBAL(void) -jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject) +jpeg_free_small (j_common_ptr cinfo, void *object, size_t sizeofobject) { free(object); } @@ -46,19 +49,16 @@ /* * "Large" objects are treated the same as "small" ones. - * NB: although we include FAR keywords in the routine declarations, - * this file won't actually work in 80x86 small/medium model; at least, - * you probably won't be able to process useful-size images in only 64KB. */ -GLOBAL(void FAR *) +GLOBAL(void *) jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject) { - return (void FAR *) malloc(sizeofobject); + return (void *) malloc(sizeofobject); } GLOBAL(void) -jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject) +jpeg_free_large (j_common_ptr cinfo, void *object, size_t sizeofobject) { free(object); } @@ -71,7 +71,7 @@ GLOBAL(size_t) jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed, - size_t max_bytes_needed, size_t already_allocated) + size_t max_bytes_needed, size_t already_allocated) { return max_bytes_needed; } @@ -85,7 +85,7 @@ GLOBAL(void) jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) + long total_bytes_needed) { ERREXIT(cinfo, JERR_NO_BACKING_STORE); } @@ -99,7 +99,7 @@ GLOBAL(long) jpeg_mem_init (j_common_ptr cinfo) { - return 0; /* just set max_memory_to_use to 0 */ + return 0; /* just set max_memory_to_use to 0 */ } GLOBAL(void) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jmemsys.h glmark2-2021.02/src/libjpeg-turbo/jmemsys.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jmemsys.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jmemsys.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jmemsys.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1992-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This include file defines the interface between the system-independent * and system-dependent portions of the JPEG memory manager. No other @@ -14,25 +17,10 @@ * in the IJG distribution. You may need to modify it if you write a * custom memory manager. If system-dependent changes are needed in * this file, the best method is to #ifdef them based on a configuration - * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR - * and USE_MAC_MEMMGR. + * symbol supplied in jconfig.h. */ -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_get_small jGetSmall -#define jpeg_free_small jFreeSmall -#define jpeg_get_large jGetLarge -#define jpeg_free_large jFreeLarge -#define jpeg_mem_available jMemAvail -#define jpeg_open_backing_store jOpenBackStore -#define jpeg_mem_init jMemInit -#define jpeg_mem_term jMemTerm -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - - /* * These two functions are used to allocate and release small chunks of * memory. (Typically the total amount requested through jpeg_get_small is @@ -41,40 +29,36 @@ * and free; in particular, jpeg_get_small must return NULL on failure. * On most systems, these ARE malloc and free. jpeg_free_small is passed the * size of the object being freed, just in case it's needed. - * On an 80x86 machine using small-data memory model, these manage near heap. */ -EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject)); -EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object, - size_t sizeofobject)); +EXTERN(void *) jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject); +EXTERN(void) jpeg_free_small (j_common_ptr cinfo, void *object, + size_t sizeofobject); /* * These two functions are used to allocate and release large chunks of * memory (up to the total free space designated by jpeg_mem_available). - * The interface is the same as above, except that on an 80x86 machine, - * far pointers are used. On most other machines these are identical to - * the jpeg_get/free_small routines; but we keep them separate anyway, - * in case a different allocation strategy is desirable for large chunks. + * These are identical to the jpeg_get/free_small routines; but we keep them + * separate anyway, in case a different allocation strategy is desirable for + * large chunks. */ -EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo, - size_t sizeofobject)); -EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object, - size_t sizeofobject)); +EXTERN(void *) jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject); +EXTERN(void) jpeg_free_large (j_common_ptr cinfo, void *object, + size_t sizeofobject); /* * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may * be requested in a single call to jpeg_get_large (and jpeg_get_small for that - * matter, but that case should never come into play). This macro is needed + * matter, but that case should never come into play). This macro was needed * to model the 64Kb-segment-size limit of far addressing on 80x86 machines. - * On those machines, we expect that jconfig.h will provide a proper value. - * On machines with 32-bit flat address spaces, any large constant may be used. + * On machines with flat address spaces, any large constant may be used. * * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type * size_t and will be a multiple of sizeof(align_type). */ -#ifndef MAX_ALLOC_CHUNK /* may be overridden in jconfig.h */ +#ifndef MAX_ALLOC_CHUNK /* may be overridden in jconfig.h */ #define MAX_ALLOC_CHUNK 1000000000L #endif @@ -100,10 +84,9 @@ * Conversely, zero may be returned to always use the minimum amount of memory. */ -EXTERN(size_t) jpeg_mem_available JPP((j_common_ptr cinfo, - size_t min_bytes_needed, - size_t max_bytes_needed, - size_t already_allocated)); +EXTERN(size_t) jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed, + size_t max_bytes_needed, + size_t already_allocated); /* @@ -113,56 +96,53 @@ * are private to the system-dependent backing store routines. */ -#define TEMP_NAME_LENGTH 64 /* max length of a temporary file's name */ +#define TEMP_NAME_LENGTH 64 /* max length of a temporary file's name */ -#ifdef USE_MSDOS_MEMMGR /* DOS-specific junk */ +#ifdef USE_MSDOS_MEMMGR /* DOS-specific junk */ -typedef unsigned short XMSH; /* type of extended-memory handles */ -typedef unsigned short EMSH; /* type of expanded-memory handles */ +typedef unsigned short XMSH; /* type of extended-memory handles */ +typedef unsigned short EMSH; /* type of expanded-memory handles */ typedef union { - short file_handle; /* DOS file handle if it's a temp file */ - XMSH xms_handle; /* handle if it's a chunk of XMS */ - EMSH ems_handle; /* handle if it's a chunk of EMS */ + short file_handle; /* DOS file handle if it's a temp file */ + XMSH xms_handle; /* handle if it's a chunk of XMS */ + EMSH ems_handle; /* handle if it's a chunk of EMS */ } handle_union; #endif /* USE_MSDOS_MEMMGR */ -#ifdef USE_MAC_MEMMGR /* Mac-specific junk */ +#ifdef USE_MAC_MEMMGR /* Mac-specific junk */ #include #endif /* USE_MAC_MEMMGR */ -typedef struct backing_store_struct * backing_store_ptr; +typedef struct backing_store_struct *backing_store_ptr; typedef struct backing_store_struct { /* Methods for reading/writing/closing this backing-store object */ - JMETHOD(void, read_backing_store, (j_common_ptr cinfo, - backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count)); - JMETHOD(void, write_backing_store, (j_common_ptr cinfo, - backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count)); - JMETHOD(void, close_backing_store, (j_common_ptr cinfo, - backing_store_ptr info)); + void (*read_backing_store) (j_common_ptr cinfo, backing_store_ptr info, + void *buffer_address, long file_offset, + long byte_count); + void (*write_backing_store) (j_common_ptr cinfo, backing_store_ptr info, + void *buffer_address, long file_offset, + long byte_count); + void (*close_backing_store) (j_common_ptr cinfo, backing_store_ptr info); /* Private fields for system-dependent backing-store management */ #ifdef USE_MSDOS_MEMMGR /* For the MS-DOS manager (jmemdos.c), we need: */ - handle_union handle; /* reference to backing-store storage object */ + handle_union handle; /* reference to backing-store storage object */ char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */ #else #ifdef USE_MAC_MEMMGR /* For the Mac manager (jmemmac.c), we need: */ - short temp_file; /* file reference number to temp file */ - FSSpec tempSpec; /* the FSSpec for the temp file */ + short temp_file; /* file reference number to temp file */ + FSSpec tempSpec; /* the FSSpec for the temp file */ char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */ #else /* For a typical implementation with temp files, we need: */ - FILE * temp_file; /* stdio reference to temp file */ + FILE *temp_file; /* stdio reference to temp file */ char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */ #endif #endif @@ -177,9 +157,9 @@ * just take an error exit.) */ -EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo, - backing_store_ptr info, - long total_bytes_needed)); +EXTERN(void) jpeg_open_backing_store (j_common_ptr cinfo, + backing_store_ptr info, + long total_bytes_needed); /* @@ -194,5 +174,5 @@ * all opened backing-store objects have been closed. */ -EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo)); -EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo)); +EXTERN(long) jpeg_mem_init (j_common_ptr cinfo); +EXTERN(void) jpeg_mem_term (j_common_ptr cinfo); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jmorecfg.h glmark2-2021.02/src/libjpeg-turbo/jmorecfg.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jmorecfg.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jmorecfg.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,13 @@ /* * jmorecfg.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. - * Copyright (C) 2009, 2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * Modified 1997-2009 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains additional configuration options that customize the * JPEG software for special applications or support machine-dependent @@ -13,18 +16,6 @@ /* - * Define BITS_IN_JSAMPLE as either - * 8 for 8-bit sample values (the usual setting) - * 12 for 12-bit sample values - * Only 8 and 12 are legal data precisions for lossy JPEG according to the - * JPEG standard, and the IJG code does not support anything else! - * We do not support run-time selection of data precision, sorry. - */ - -#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */ - - -/* * Maximum number of components (color channels) allowed in JPEG image. * To meet the letter of the JPEG spec, set this to 255. However, darn * few applications need more than 4 channels (maybe 5 for CMYK + alpha @@ -33,7 +24,7 @@ * bytes of storage, whether actually used in an image or not.) */ -#define MAX_COMPONENTS 10 /* maximum number of image components */ +#define MAX_COMPONENTS 10 /* maximum number of image components */ /* @@ -71,8 +62,8 @@ #endif /* HAVE_UNSIGNED_CHAR */ -#define MAXJSAMPLE 255 -#define CENTERJSAMPLE 128 +#define MAXJSAMPLE 255 +#define CENTERJSAMPLE 128 #endif /* BITS_IN_JSAMPLE == 8 */ @@ -85,8 +76,8 @@ typedef short JSAMPLE; #define GETJSAMPLE(value) ((int) (value)) -#define MAXJSAMPLE 4095 -#define CENTERJSAMPLE 2048 +#define MAXJSAMPLE 4095 +#define CENTERJSAMPLE 2048 #endif /* BITS_IN_JSAMPLE == 12 */ @@ -152,21 +143,52 @@ /* INT16 must hold at least the values -32768..32767. */ -#ifndef XMD_H /* X11/xmd.h correctly defines INT16 */ +#ifndef XMD_H /* X11/xmd.h correctly defines INT16 */ typedef short INT16; #endif -/* INT32 must hold at least signed 32-bit values. */ +/* INT32 must hold at least signed 32-bit values. + * + * NOTE: The INT32 typedef dates back to libjpeg v5 (1994.) Integers were + * sometimes 16-bit back then (MS-DOS), which is why INT32 is typedef'd to + * long. It also wasn't common (or at least as common) in 1994 for INT32 to be + * defined by platform headers. Since then, however, INT32 is defined in + * several other common places: + * + * Xmd.h (X11 header) typedefs INT32 to int on 64-bit platforms and long on + * 32-bit platforms (i.e always a 32-bit signed type.) + * + * basetsd.h (Win32 header) typedefs INT32 to int (always a 32-bit signed type + * on modern platforms.) + * + * qglobal.h (Qt header) typedefs INT32 to int (always a 32-bit signed type on + * modern platforms.) + * + * This is a recipe for conflict, since "long" and "int" aren't always + * compatible types. Since the definition of INT32 has technically been part + * of the libjpeg API for more than 20 years, we can't remove it, but we do not + * use it internally any longer. We instead define a separate type (JLONG) + * for internal use, which ensures that internal behavior will always be the + * same regardless of any external headers that may be included. + */ -#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ +#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ +#ifndef _BASETSD_H_ /* Microsoft defines it in basetsd.h */ +#ifndef _BASETSD_H /* MinGW is slightly different */ +#ifndef QGLOBAL_H /* Qt defines it in qglobal.h */ typedef long INT32; #endif +#endif +#endif +#endif /* Datatype used for image dimensions. The JPEG standard only supports * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore * "unsigned int" is sufficient on all machines. However, if you need to * handle larger images and you don't mind deviating from the spec, you - * can change this datatype. + * can change this datatype. (Note that changing this datatype will + * potentially require modifying the SIMD code. The x86-64 SIMD extensions, + * in particular, assume a 32-bit JDIMENSION.) */ typedef unsigned int JDIMENSION; @@ -182,39 +204,31 @@ */ /* a function called through method pointers: */ -#define METHODDEF(type) static type +#define METHODDEF(type) static type /* a function used only in its module: */ -#define LOCAL(type) static type +#define LOCAL(type) static type /* a function referenced thru EXTERNs: */ -#define GLOBAL(type) type +#define GLOBAL(type) type /* a reference to a GLOBAL function: */ -#define EXTERN(type) extern type +#define EXTERN(type) extern type -/* This macro is used to declare a "method", that is, a function pointer. - * We want to supply prototype parameters if the compiler can cope. - * Note that the arglist parameter must be parenthesized! - * Again, you can customize this if you need special linkage keywords. +/* Originally, this macro was used as a way of defining function prototypes + * for both modern compilers as well as older compilers that did not support + * prototype parameters. libjpeg-turbo has never supported these older, + * non-ANSI compilers, but the macro is still included because there is some + * software out there that uses it. */ -#ifdef HAVE_PROTOTYPES #define JMETHOD(type,methodname,arglist) type (*methodname) arglist -#else -#define JMETHOD(type,methodname,arglist) type (*methodname) () -#endif -/* Here is the pseudo-keyword for declaring pointers that must be "far" - * on 80x86 machines. Most of the specialized coding for 80x86 is handled - * by just saying "FAR *" where such a pointer is needed. In a few places - * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol. +/* libjpeg-turbo no longer supports platforms that have far symbols (MS-DOS), + * but again, some software relies on this macro. */ -#ifdef NEED_FAR_POINTERS -#define FAR far -#else +#undef FAR #define FAR -#endif /* @@ -227,11 +241,11 @@ #ifndef HAVE_BOOLEAN typedef int boolean; #endif -#ifndef FALSE /* in case these macros already exist */ -#define FALSE 0 /* values of boolean */ +#ifndef FALSE /* in case these macros already exist */ +#define FALSE 0 /* values of boolean */ #endif #ifndef TRUE -#define TRUE 1 +#define TRUE 1 #endif @@ -259,15 +273,15 @@ /* Capability options common to encoder and decoder: */ -#define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ -#define DCT_IFAST_SUPPORTED /* faster, less accurate integer method */ -#define DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */ +#define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ +#define DCT_IFAST_SUPPORTED /* faster, less accurate integer method */ +#define DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */ /* Encoder capability options: */ #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ -#define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ -#define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */ +#define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ +#define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */ /* Note: if you selected 12-bit data precision, it is dangerous to turn off * ENTROPY_OPT_SUPPORTED. The standard Huffman tables are only good for 8-bit * precision, so jchuff.c normally uses entropy optimization to compute @@ -281,39 +295,45 @@ /* Decoder capability options: */ #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ -#define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ -#define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ +#define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ +#define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ #define BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */ -#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ +#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ #undef UPSAMPLE_SCALING_SUPPORTED /* Output rescaling at upsample stage? */ #define UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */ -#define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ -#define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ +#define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ +#define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ /* more capability options later, no doubt */ /* - * Ordering of RGB data in scanlines passed to or from the application. - * If your application wants to deal with data in the order B,G,R, just - * change these macros. You can also deal with formats such as R,G,B,X - * (one extra byte per pixel) by changing RGB_PIXELSIZE. Note that changing - * the offsets will also change the order in which colormap data is organized. - * RESTRICTIONS: - * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats. - * 2. These macros only affect RGB<=>YCbCr color conversion, so they are not - * useful if you are using JPEG color spaces other than YCbCr or grayscale. - * 3. The color quantizer modules will not behave desirably if RGB_PIXELSIZE - * is not 3 (they don't understand about dummy color components!). So you - * can't use color quantization if you change that value. - */ - -#define RGB_RED 0 /* Offset of Red in an RGB scanline element */ -#define RGB_GREEN 1 /* Offset of Green */ -#define RGB_BLUE 2 /* Offset of Blue */ -#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ + * The RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros are a vestigial + * feature of libjpeg. The idea was that, if an application developer needed + * to compress from/decompress to a BGR/BGRX/RGBX/XBGR/XRGB buffer, they could + * change these macros, rebuild libjpeg, and link their application statically + * with it. In reality, few people ever did this, because there were some + * severe restrictions involved (cjpeg and djpeg no longer worked properly, + * compressing/decompressing RGB JPEGs no longer worked properly, and the color + * quantizer wouldn't work with pixel sizes other than 3.) Further, since all + * of the O/S-supplied versions of libjpeg were built with the default values + * of RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE, many applications have + * come to regard these values as immutable. + * + * The libjpeg-turbo colorspace extensions provide a much cleaner way of + * compressing from/decompressing to buffers with arbitrary component orders + * and pixel sizes. Thus, we do not support changing the values of RGB_RED, + * RGB_GREEN, RGB_BLUE, or RGB_PIXELSIZE. In addition to the restrictions + * listed above, changing these values will also break the SIMD extensions and + * the regression tests. + */ + +#define RGB_RED 0 /* Offset of Red in an RGB scanline element */ +#define RGB_GREEN 1 /* Offset of Green */ +#define RGB_BLUE 2 /* Offset of Blue */ +#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ -#define JPEG_NUMCS 16 +#define JPEG_NUMCS 17 #define EXT_RGB_RED 0 #define EXT_RGB_GREEN 1 @@ -348,25 +368,29 @@ static const int rgb_red[JPEG_NUMCS] = { -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED, EXT_BGR_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED, - EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED + EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED, + -1 }; static const int rgb_green[JPEG_NUMCS] = { -1, -1, RGB_GREEN, -1, -1, -1, EXT_RGB_GREEN, EXT_RGBX_GREEN, EXT_BGR_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN, - EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN + EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN, + -1 }; static const int rgb_blue[JPEG_NUMCS] = { -1, -1, RGB_BLUE, -1, -1, -1, EXT_RGB_BLUE, EXT_RGBX_BLUE, EXT_BGR_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE, - EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE + EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE, + -1 }; static const int rgb_pixelsize[JPEG_NUMCS] = { -1, -1, RGB_PIXELSIZE, -1, -1, -1, EXT_RGB_PIXELSIZE, EXT_RGBX_PIXELSIZE, EXT_BGR_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE, - EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE + EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE, + -1 }; /* Definitions for speed-related optimizations. */ @@ -378,7 +402,7 @@ #ifndef MULTIPLIER #ifndef WITH_SIMD -#define MULTIPLIER int /* type for fastest integer multiply */ +#define MULTIPLIER int /* type for fastest integer multiply */ #else #define MULTIPLIER short /* prefer 16-bit with SIMD for parellelism */ #endif @@ -388,17 +412,10 @@ /* FAST_FLOAT should be either float or double, whichever is done faster * by your compiler. (Note that this type is only used in the floating point * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.) - * Typically, float is faster in ANSI C compilers, while double is faster in - * pre-ANSI compilers (because they insist on converting to double anyway). - * The code below therefore chooses float if we have ANSI-style prototypes. */ #ifndef FAST_FLOAT -#ifdef HAVE_PROTOTYPES #define FAST_FLOAT float -#else -#define FAST_FLOAT double -#endif #endif #endif /* JPEG_INTERNAL_OPTIONS */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpegcomp.h glmark2-2021.02/src/libjpeg-turbo/jpegcomp.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpegcomp.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jpegcomp.h 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,8 @@ * jpegcomp.h * * Copyright (C) 2010, D. R. Commander - * For conditions of distribution and use, see the accompanying README file. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * JPEG compatibility macros * These declarations are considered internal to the JPEG library; most @@ -11,6 +12,8 @@ #if JPEG_LIB_VERSION >= 70 #define _DCT_scaled_size DCT_h_scaled_size +#define _DCT_h_scaled_size DCT_h_scaled_size +#define _DCT_v_scaled_size DCT_v_scaled_size #define _min_DCT_scaled_size min_DCT_h_scaled_size #define _min_DCT_h_scaled_size min_DCT_h_scaled_size #define _min_DCT_v_scaled_size min_DCT_v_scaled_size @@ -18,6 +21,8 @@ #define _jpeg_height jpeg_height #else #define _DCT_scaled_size DCT_scaled_size +#define _DCT_h_scaled_size DCT_scaled_size +#define _DCT_v_scaled_size DCT_scaled_size #define _min_DCT_scaled_size min_DCT_scaled_size #define _min_DCT_h_scaled_size min_DCT_scaled_size #define _min_DCT_v_scaled_size min_DCT_scaled_size diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpegint.h glmark2-2021.02/src/libjpeg-turbo/jpegint.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpegint.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jpegint.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,14 @@ /* * jpegint.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2015-2016, D. R. Commander + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file provides common declarations for the various JPEG modules. * These declarations are considered internal to the JPEG library; most @@ -14,121 +18,127 @@ /* Declarations for both compression & decompression */ -typedef enum { /* Operating modes for buffer controllers */ - JBUF_PASS_THRU, /* Plain stripwise operation */ - /* Remaining modes require a full-image buffer to have been created */ - JBUF_SAVE_SOURCE, /* Run source subobject only, save output */ - JBUF_CRANK_DEST, /* Run dest subobject only, using saved data */ - JBUF_SAVE_AND_PASS /* Run both subobjects, save output */ +typedef enum { /* Operating modes for buffer controllers */ + JBUF_PASS_THRU, /* Plain stripwise operation */ + /* Remaining modes require a full-image buffer to have been created */ + JBUF_SAVE_SOURCE, /* Run source subobject only, save output */ + JBUF_CRANK_DEST, /* Run dest subobject only, using saved data */ + JBUF_SAVE_AND_PASS /* Run both subobjects, save output */ } J_BUF_MODE; /* Values of global_state field (jdapi.c has some dependencies on ordering!) */ -#define CSTATE_START 100 /* after create_compress */ -#define CSTATE_SCANNING 101 /* start_compress done, write_scanlines OK */ -#define CSTATE_RAW_OK 102 /* start_compress done, write_raw_data OK */ -#define CSTATE_WRCOEFS 103 /* jpeg_write_coefficients done */ -#define DSTATE_START 200 /* after create_decompress */ -#define DSTATE_INHEADER 201 /* reading header markers, no SOS yet */ -#define DSTATE_READY 202 /* found SOS, ready for start_decompress */ -#define DSTATE_PRELOAD 203 /* reading multiscan file in start_decompress*/ -#define DSTATE_PRESCAN 204 /* performing dummy pass for 2-pass quant */ -#define DSTATE_SCANNING 205 /* start_decompress done, read_scanlines OK */ -#define DSTATE_RAW_OK 206 /* start_decompress done, read_raw_data OK */ -#define DSTATE_BUFIMAGE 207 /* expecting jpeg_start_output */ -#define DSTATE_BUFPOST 208 /* looking for SOS/EOI in jpeg_finish_output */ -#define DSTATE_RDCOEFS 209 /* reading file in jpeg_read_coefficients */ -#define DSTATE_STOPPING 210 /* looking for EOI in jpeg_finish_decompress */ +#define CSTATE_START 100 /* after create_compress */ +#define CSTATE_SCANNING 101 /* start_compress done, write_scanlines OK */ +#define CSTATE_RAW_OK 102 /* start_compress done, write_raw_data OK */ +#define CSTATE_WRCOEFS 103 /* jpeg_write_coefficients done */ +#define DSTATE_START 200 /* after create_decompress */ +#define DSTATE_INHEADER 201 /* reading header markers, no SOS yet */ +#define DSTATE_READY 202 /* found SOS, ready for start_decompress */ +#define DSTATE_PRELOAD 203 /* reading multiscan file in start_decompress*/ +#define DSTATE_PRESCAN 204 /* performing dummy pass for 2-pass quant */ +#define DSTATE_SCANNING 205 /* start_decompress done, read_scanlines OK */ +#define DSTATE_RAW_OK 206 /* start_decompress done, read_raw_data OK */ +#define DSTATE_BUFIMAGE 207 /* expecting jpeg_start_output */ +#define DSTATE_BUFPOST 208 /* looking for SOS/EOI in jpeg_finish_output */ +#define DSTATE_RDCOEFS 209 /* reading file in jpeg_read_coefficients */ +#define DSTATE_STOPPING 210 /* looking for EOI in jpeg_finish_decompress */ + + +/* JLONG must hold at least signed 32-bit values. */ +typedef long JLONG; + + +/* + * Left shift macro that handles a negative operand without causing any + * sanitizer warnings + */ + +#define LEFT_SHIFT(a, b) ((JLONG)((unsigned long)(a) << (b))) /* Declarations for compression modules */ /* Master control module */ struct jpeg_comp_master { - JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo)); - JMETHOD(void, pass_startup, (j_compress_ptr cinfo)); - JMETHOD(void, finish_pass, (j_compress_ptr cinfo)); + void (*prepare_for_pass) (j_compress_ptr cinfo); + void (*pass_startup) (j_compress_ptr cinfo); + void (*finish_pass) (j_compress_ptr cinfo); /* State variables made visible to other modules */ - boolean call_pass_startup; /* True if pass_startup must be called */ - boolean is_last_pass; /* True during last pass */ + boolean call_pass_startup; /* True if pass_startup must be called */ + boolean is_last_pass; /* True during last pass */ }; /* Main buffer control (downsampled-data buffer) */ struct jpeg_c_main_controller { - JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(void, process_data, (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail)); + void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode); + void (*process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf, + JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail); }; /* Compression preprocessing (downsampling input buffer control) */ struct jpeg_c_prep_controller { - JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(void, pre_process_data, (j_compress_ptr cinfo, - JSAMPARRAY input_buf, - JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail, - JSAMPIMAGE output_buf, - JDIMENSION *out_row_group_ctr, - JDIMENSION out_row_groups_avail)); + void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode); + void (*pre_process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf, + JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail, + JSAMPIMAGE output_buf, + JDIMENSION *out_row_group_ctr, + JDIMENSION out_row_groups_avail); }; /* Coefficient buffer control */ struct jpeg_c_coef_controller { - JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(boolean, compress_data, (j_compress_ptr cinfo, - JSAMPIMAGE input_buf)); + void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode); + boolean (*compress_data) (j_compress_ptr cinfo, JSAMPIMAGE input_buf); }; /* Colorspace conversion */ struct jpeg_color_converter { - JMETHOD(void, start_pass, (j_compress_ptr cinfo)); - JMETHOD(void, color_convert, (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + void (*start_pass) (j_compress_ptr cinfo); + void (*color_convert) (j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows); }; /* Downsampling */ struct jpeg_downsampler { - JMETHOD(void, start_pass, (j_compress_ptr cinfo)); - JMETHOD(void, downsample, (j_compress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_index, - JSAMPIMAGE output_buf, - JDIMENSION out_row_group_index)); + void (*start_pass) (j_compress_ptr cinfo); + void (*downsample) (j_compress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_index, JSAMPIMAGE output_buf, + JDIMENSION out_row_group_index); - boolean need_context_rows; /* TRUE if need rows above & below */ + boolean need_context_rows; /* TRUE if need rows above & below */ }; /* Forward DCT (also controls coefficient quantization) */ struct jpeg_forward_dct { - JMETHOD(void, start_pass, (j_compress_ptr cinfo)); + void (*start_pass) (j_compress_ptr cinfo); /* perhaps this should be an array??? */ - JMETHOD(void, forward_DCT, (j_compress_ptr cinfo, - jpeg_component_info * compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks)); + void (*forward_DCT) (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, + JDIMENSION num_blocks); }; /* Entropy encoding */ struct jpeg_entropy_encoder { - JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics)); - JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data)); - JMETHOD(void, finish_pass, (j_compress_ptr cinfo)); + void (*start_pass) (j_compress_ptr cinfo, boolean gather_statistics); + boolean (*encode_mcu) (j_compress_ptr cinfo, JBLOCKROW *MCU_data); + void (*finish_pass) (j_compress_ptr cinfo); }; /* Marker writing */ struct jpeg_marker_writer { - JMETHOD(void, write_file_header, (j_compress_ptr cinfo)); - JMETHOD(void, write_frame_header, (j_compress_ptr cinfo)); - JMETHOD(void, write_scan_header, (j_compress_ptr cinfo)); - JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo)); - JMETHOD(void, write_tables_only, (j_compress_ptr cinfo)); + void (*write_file_header) (j_compress_ptr cinfo); + void (*write_frame_header) (j_compress_ptr cinfo); + void (*write_scan_header) (j_compress_ptr cinfo); + void (*write_file_trailer) (j_compress_ptr cinfo); + void (*write_tables_only) (j_compress_ptr cinfo); /* These routines are exported to allow insertion of extra markers */ /* Probably only COM and APPn markers should be written this way */ - JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker, - unsigned int datalen)); - JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val)); + void (*write_marker_header) (j_compress_ptr cinfo, int marker, + unsigned int datalen); + void (*write_marker_byte) (j_compress_ptr cinfo, int val); }; @@ -136,138 +146,137 @@ /* Master control module */ struct jpeg_decomp_master { - JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo)); + void (*prepare_for_output_pass) (j_decompress_ptr cinfo); + void (*finish_output_pass) (j_decompress_ptr cinfo); /* State variables made visible to other modules */ - boolean is_dummy_pass; /* True during 1st pass for 2-pass quant */ + boolean is_dummy_pass; /* True during 1st pass for 2-pass quant */ + + /* Partial decompression variables */ + JDIMENSION first_iMCU_col; + JDIMENSION last_iMCU_col; + JDIMENSION first_MCU_col[MAX_COMPS_IN_SCAN]; + JDIMENSION last_MCU_col[MAX_COMPS_IN_SCAN]; + boolean jinit_upsampler_no_alloc; }; /* Input control module */ struct jpeg_input_controller { - JMETHOD(int, consume_input, (j_decompress_ptr cinfo)); - JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo)); - JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo)); + int (*consume_input) (j_decompress_ptr cinfo); + void (*reset_input_controller) (j_decompress_ptr cinfo); + void (*start_input_pass) (j_decompress_ptr cinfo); + void (*finish_input_pass) (j_decompress_ptr cinfo); /* State variables made visible to other modules */ - boolean has_multiple_scans; /* True if file has multiple scans */ - boolean eoi_reached; /* True when EOI has been consumed */ + boolean has_multiple_scans; /* True if file has multiple scans */ + boolean eoi_reached; /* True when EOI has been consumed */ }; /* Main buffer control (downsampled-data buffer) */ struct jpeg_d_main_controller { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(void, process_data, (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode); + void (*process_data) (j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); }; /* Coefficient buffer control */ struct jpeg_d_coef_controller { - JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo)); - JMETHOD(int, consume_data, (j_decompress_ptr cinfo)); - JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo)); - JMETHOD(int, decompress_data, (j_decompress_ptr cinfo, - JSAMPIMAGE output_buf)); + void (*start_input_pass) (j_decompress_ptr cinfo); + int (*consume_data) (j_decompress_ptr cinfo); + void (*start_output_pass) (j_decompress_ptr cinfo); + int (*decompress_data) (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); /* Pointer to array of coefficient virtual arrays, or NULL if none */ jvirt_barray_ptr *coef_arrays; }; /* Decompression postprocessing (color quantization buffer control) */ struct jpeg_d_post_controller { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)); - JMETHOD(void, post_process_data, (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode); + void (*post_process_data) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); }; /* Marker reading & parsing */ struct jpeg_marker_reader { - JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo)); + void (*reset_marker_reader) (j_decompress_ptr cinfo); /* Read markers until SOS or EOI. * Returns same codes as are defined for jpeg_consume_input: * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI. */ - JMETHOD(int, read_markers, (j_decompress_ptr cinfo)); + int (*read_markers) (j_decompress_ptr cinfo); /* Read a restart marker --- exported for use by entropy decoder only */ jpeg_marker_parser_method read_restart_marker; /* State of marker reader --- nominally internal, but applications * supplying COM or APPn handlers might like to know the state. */ - boolean saw_SOI; /* found SOI? */ - boolean saw_SOF; /* found SOF? */ - int next_restart_num; /* next restart number expected (0-7) */ - unsigned int discarded_bytes; /* # of bytes skipped looking for a marker */ + boolean saw_SOI; /* found SOI? */ + boolean saw_SOF; /* found SOF? */ + int next_restart_num; /* next restart number expected (0-7) */ + unsigned int discarded_bytes; /* # of bytes skipped looking for a marker */ }; /* Entropy decoding */ struct jpeg_entropy_decoder { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); - JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); + void (*start_pass) (j_decompress_ptr cinfo); + boolean (*decode_mcu) (j_decompress_ptr cinfo, JBLOCKROW *MCU_data); /* This is here to share code between baseline and progressive decoders; */ /* other modules probably should not use it */ - boolean insufficient_data; /* set TRUE after emitting warning */ + boolean insufficient_data; /* set TRUE after emitting warning */ }; /* Inverse DCT (also performs dequantization) */ -typedef JMETHOD(void, inverse_DCT_method_ptr, - (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col)); +typedef void (*inverse_DCT_method_ptr) (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, + JDIMENSION output_col); struct jpeg_inverse_dct { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); + void (*start_pass) (j_decompress_ptr cinfo); /* It is useful to allow each component to have a separate IDCT method. */ inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS]; }; /* Upsampling (note that upsampler must also call color converter) */ struct jpeg_upsampler { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, upsample, (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail)); + void (*start_pass) (j_decompress_ptr cinfo); + void (*upsample) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); - boolean need_context_rows; /* TRUE if need rows above & below */ + boolean need_context_rows; /* TRUE if need rows above & below */ }; /* Colorspace conversion */ struct jpeg_color_deconverter { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, color_convert, (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + void (*start_pass) (j_decompress_ptr cinfo); + void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows); }; /* Color quantization or color precision reduction */ struct jpeg_color_quantizer { - JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan)); - JMETHOD(void, color_quantize, (j_decompress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPARRAY output_buf, - int num_rows)); - JMETHOD(void, finish_pass, (j_decompress_ptr cinfo)); - JMETHOD(void, new_color_map, (j_decompress_ptr cinfo)); + void (*start_pass) (j_decompress_ptr cinfo, boolean is_pre_scan); + void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows); + void (*finish_pass) (j_decompress_ptr cinfo); + void (*new_color_map) (j_decompress_ptr cinfo); }; /* Miscellaneous useful macros */ #undef MAX -#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) #undef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MIN(a,b) ((a) < (b) ? (a) : (b)) /* We assume that right shift corresponds to signed division by 2 with @@ -275,126 +284,84 @@ * shift" instructions that shift in copies of the sign bit. But some * C compilers implement >> with an unsigned shift. For these machines you * must define RIGHT_SHIFT_IS_UNSIGNED. - * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity. + * RIGHT_SHIFT provides a proper signed right shift of a JLONG quantity. * It is only applied with constant shift counts. SHIFT_TEMPS must be * included in the variables of any routine using RIGHT_SHIFT. */ #ifdef RIGHT_SHIFT_IS_UNSIGNED -#define SHIFT_TEMPS INT32 shift_temp; +#define SHIFT_TEMPS JLONG shift_temp; #define RIGHT_SHIFT(x,shft) \ - ((shift_temp = (x)) < 0 ? \ - (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \ - (shift_temp >> (shft))) + ((shift_temp = (x)) < 0 ? \ + (shift_temp >> (shft)) | ((~((JLONG) 0)) << (32-(shft))) : \ + (shift_temp >> (shft))) #else #define SHIFT_TEMPS -#define RIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define RIGHT_SHIFT(x,shft) ((x) >> (shft)) #endif -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jinit_compress_master jICompress -#define jinit_c_master_control jICMaster -#define jinit_c_main_controller jICMainC -#define jinit_c_prep_controller jICPrepC -#define jinit_c_coef_controller jICCoefC -#define jinit_color_converter jICColor -#define jinit_downsampler jIDownsampler -#define jinit_forward_dct jIFDCT -#define jinit_huff_encoder jIHEncoder -#define jinit_phuff_encoder jIPHEncoder -#define jinit_arith_encoder jIAEncoder -#define jinit_marker_writer jIMWriter -#define jinit_master_decompress jIDMaster -#define jinit_d_main_controller jIDMainC -#define jinit_d_coef_controller jIDCoefC -#define jinit_d_post_controller jIDPostC -#define jinit_input_controller jIInCtlr -#define jinit_marker_reader jIMReader -#define jinit_huff_decoder jIHDecoder -#define jinit_phuff_decoder jIPHDecoder -#define jinit_arith_decoder jIADecoder -#define jinit_inverse_dct jIIDCT -#define jinit_upsampler jIUpsampler -#define jinit_color_deconverter jIDColor -#define jinit_1pass_quantizer jI1Quant -#define jinit_2pass_quantizer jI2Quant -#define jinit_merged_upsampler jIMUpsampler -#define jinit_memory_mgr jIMemMgr -#define jdiv_round_up jDivRound -#define jround_up jRound -#define jcopy_sample_rows jCopySamples -#define jcopy_block_row jCopyBlocks -#define jzero_far jZeroFar -#define jpeg_zigzag_order jZIGTable -#define jpeg_natural_order jZAGTable -#define jpeg_aritab jAriTab -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - - /* Compression module initialization routines */ -EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo, - boolean transcode_only)); -EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo)); +EXTERN(void) jinit_compress_master (j_compress_ptr cinfo); +EXTERN(void) jinit_c_master_control (j_compress_ptr cinfo, + boolean transcode_only); +EXTERN(void) jinit_c_main_controller (j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_c_prep_controller (j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_c_coef_controller (j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_color_converter (j_compress_ptr cinfo); +EXTERN(void) jinit_downsampler (j_compress_ptr cinfo); +EXTERN(void) jinit_forward_dct (j_compress_ptr cinfo); +EXTERN(void) jinit_huff_encoder (j_compress_ptr cinfo); +EXTERN(void) jinit_phuff_encoder (j_compress_ptr cinfo); +EXTERN(void) jinit_arith_encoder (j_compress_ptr cinfo); +EXTERN(void) jinit_marker_writer (j_compress_ptr cinfo); /* Decompression module initialization routines */ -EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo, - boolean need_full_buffer)); -EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo)); +EXTERN(void) jinit_master_decompress (j_decompress_ptr cinfo); +EXTERN(void) jinit_d_main_controller (j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_d_coef_controller (j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_d_post_controller (j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_input_controller (j_decompress_ptr cinfo); +EXTERN(void) jinit_marker_reader (j_decompress_ptr cinfo); +EXTERN(void) jinit_huff_decoder (j_decompress_ptr cinfo); +EXTERN(void) jinit_phuff_decoder (j_decompress_ptr cinfo); +EXTERN(void) jinit_arith_decoder (j_decompress_ptr cinfo); +EXTERN(void) jinit_inverse_dct (j_decompress_ptr cinfo); +EXTERN(void) jinit_upsampler (j_decompress_ptr cinfo); +EXTERN(void) jinit_color_deconverter (j_decompress_ptr cinfo); +EXTERN(void) jinit_1pass_quantizer (j_decompress_ptr cinfo); +EXTERN(void) jinit_2pass_quantizer (j_decompress_ptr cinfo); +EXTERN(void) jinit_merged_upsampler (j_decompress_ptr cinfo); /* Memory manager initialization */ -EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo)); +EXTERN(void) jinit_memory_mgr (j_common_ptr cinfo); /* Utility routines in jutils.c */ -EXTERN(long) jdiv_round_up JPP((long a, long b)); -EXTERN(long) jround_up JPP((long a, long b)); -EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row, - JSAMPARRAY output_array, int dest_row, - int num_rows, JDIMENSION num_cols)); -EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row, - JDIMENSION num_blocks)); -EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero)); +EXTERN(long) jdiv_round_up (long a, long b); +EXTERN(long) jround_up (long a, long b); +EXTERN(void) jcopy_sample_rows (JSAMPARRAY input_array, int source_row, + JSAMPARRAY output_array, int dest_row, + int num_rows, JDIMENSION num_cols); +EXTERN(void) jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row, + JDIMENSION num_blocks); +EXTERN(void) jzero_far (void *target, size_t bytestozero); /* Constant tables in jutils.c */ -#if 0 /* This table is not actually needed in v6a */ +#if 0 /* This table is not actually needed in v6a */ extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */ #endif extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */ /* Arithmetic coding probability estimation tables in jaricom.c */ -extern const INT32 jpeg_aritab[]; +extern const JLONG jpeg_aritab[]; /* Suppress undefined-structure complaints if necessary. */ #ifdef INCOMPLETE_TYPES_BROKEN -#ifndef AM_MEMORY_MANAGER /* only jmemmgr.c defines these */ +#ifndef AM_MEMORY_MANAGER /* only jmemmgr.c defines these */ struct jvirt_sarray_control { long dummy; }; struct jvirt_barray_control { long dummy; }; #endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpeglib.h glmark2-2021.02/src/libjpeg-turbo/jpeglib.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpeglib.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jpeglib.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,14 @@ /* * jpeglib.h * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. * Modified 2002-2009 by Guido Vollbeding. - * Copyright (C) 2009-2011, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander. + * Copyright (C) 2015, Google, Inc. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file defines the application interface for the JPEG library. * Most applications using the library need only include this file, @@ -15,6 +18,10 @@ #ifndef JPEGLIB_H #define JPEGLIB_H +/* Begin chromium edits */ +#include "jpeglibmangler.h" +/* End chromium edits */ + /* * First we include the configuration files that record how this * installation of the JPEG library is set up. jconfig.h can be @@ -22,10 +29,10 @@ * manual configuration options that most people need not worry about. */ -#ifndef JCONFIG_INCLUDED /* in case jinclude.h already did */ -#include "jconfig.h" /* widely used configuration options */ +#ifndef JCONFIG_INCLUDED /* in case jinclude.h already did */ +#include "jconfig.h" /* widely used configuration options */ #endif -#include "jmorecfg.h" /* seldom changed options */ +#include "jmorecfg.h" /* seldom changed options */ #ifdef __cplusplus @@ -40,13 +47,13 @@ * if you want to be compatible. */ -#define DCTSIZE 8 /* The basic DCT block is 8x8 samples */ -#define DCTSIZE2 64 /* DCTSIZE squared; # of elements in a block */ -#define NUM_QUANT_TBLS 4 /* Quantization tables are numbered 0..3 */ -#define NUM_HUFF_TBLS 4 /* Huffman tables are numbered 0..3 */ -#define NUM_ARITH_TBLS 16 /* Arith-coding tables are numbered 0..15 */ -#define MAX_COMPS_IN_SCAN 4 /* JPEG limit on # of components in one scan */ -#define MAX_SAMP_FACTOR 4 /* JPEG limit on sampling factors */ +#define DCTSIZE 8 /* The basic DCT block is 8x8 samples */ +#define DCTSIZE2 64 /* DCTSIZE squared; # of elements in a block */ +#define NUM_QUANT_TBLS 4 /* Quantization tables are numbered 0..3 */ +#define NUM_HUFF_TBLS 4 /* Huffman tables are numbered 0..3 */ +#define NUM_ARITH_TBLS 16 /* Arith-coding tables are numbered 0..15 */ +#define MAX_COMPS_IN_SCAN 4 /* JPEG limit on # of components in one scan */ +#define MAX_SAMP_FACTOR 4 /* JPEG limit on sampling factors */ /* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard; * the PostScript DCT filter can emit files with many more than 10 blocks/MCU. * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU @@ -61,20 +68,18 @@ /* Data structures for images (arrays of samples and of DCT coefficients). - * On 80x86 machines, the image arrays are too big for near pointers, - * but the pointer arrays can fit in near memory. */ -typedef JSAMPLE FAR *JSAMPROW; /* ptr to one image row of pixel samples. */ -typedef JSAMPROW *JSAMPARRAY; /* ptr to some rows (a 2-D sample array) */ -typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */ - -typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */ -typedef JBLOCK FAR *JBLOCKROW; /* pointer to one row of coefficient blocks */ -typedef JBLOCKROW *JBLOCKARRAY; /* a 2-D array of coefficient blocks */ -typedef JBLOCKARRAY *JBLOCKIMAGE; /* a 3-D array of coefficient blocks */ +typedef JSAMPLE *JSAMPROW; /* ptr to one image row of pixel samples. */ +typedef JSAMPROW *JSAMPARRAY; /* ptr to some rows (a 2-D sample array) */ +typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */ + +typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */ +typedef JBLOCK *JBLOCKROW; /* pointer to one row of coefficient blocks */ +typedef JBLOCKROW *JBLOCKARRAY; /* a 2-D array of coefficient blocks */ +typedef JBLOCKARRAY *JBLOCKIMAGE; /* a 3-D array of coefficient blocks */ -typedef JCOEF FAR *JCOEFPTR; /* useful in a couple of places */ +typedef JCOEF *JCOEFPTR; /* useful in a couple of places */ /* Types for JPEG compression parameters and working tables. */ @@ -87,13 +92,13 @@ * (not the zigzag order in which they are stored in a JPEG DQT marker). * CAUTION: IJG versions prior to v6a kept this array in zigzag order. */ - UINT16 quantval[DCTSIZE2]; /* quantization step for each coefficient */ + UINT16 quantval[DCTSIZE2]; /* quantization step for each coefficient */ /* This field is used only during compression. It's initialized FALSE when * the table is created, and set TRUE when it's been output to the file. * You could suppress output of a table by setting this to TRUE. * (See jpeg_suppress_tables for an example.) */ - boolean sent_table; /* TRUE when table has been output */ + boolean sent_table; /* TRUE when table has been output */ } JQUANT_TBL; @@ -101,15 +106,15 @@ typedef struct { /* These two fields directly represent the contents of a JPEG DHT marker */ - UINT8 bits[17]; /* bits[k] = # of symbols with codes of */ - /* length k bits; bits[0] is unused */ - UINT8 huffval[256]; /* The symbols, in order of incr code length */ + UINT8 bits[17]; /* bits[k] = # of symbols with codes of */ + /* length k bits; bits[0] is unused */ + UINT8 huffval[256]; /* The symbols, in order of incr code length */ /* This field is used only during compression. It's initialized FALSE when * the table is created, and set TRUE when it's been output to the file. * You could suppress output of a table by setting this to TRUE. * (See jpeg_suppress_tables for an example.) */ - boolean sent_table; /* TRUE when table has been output */ + boolean sent_table; /* TRUE when table has been output */ } JHUFF_TBL; @@ -119,20 +124,20 @@ /* These values are fixed over the whole image. */ /* For compression, they must be supplied by parameter setup; */ /* for decompression, they are read from the SOF marker. */ - int component_id; /* identifier for this component (0..255) */ - int component_index; /* its index in SOF or cinfo->comp_info[] */ - int h_samp_factor; /* horizontal sampling factor (1..4) */ - int v_samp_factor; /* vertical sampling factor (1..4) */ - int quant_tbl_no; /* quantization table selector (0..3) */ + int component_id; /* identifier for this component (0..255) */ + int component_index; /* its index in SOF or cinfo->comp_info[] */ + int h_samp_factor; /* horizontal sampling factor (1..4) */ + int v_samp_factor; /* vertical sampling factor (1..4) */ + int quant_tbl_no; /* quantization table selector (0..3) */ /* These values may vary between scans. */ /* For compression, they must be supplied by parameter setup; */ /* for decompression, they are read from the SOS marker. */ /* The decompressor output side may not use these variables. */ - int dc_tbl_no; /* DC entropy table selector (0..3) */ - int ac_tbl_no; /* AC entropy table selector (0..3) */ - + int dc_tbl_no; /* DC entropy table selector (0..3) */ + int ac_tbl_no; /* AC entropy table selector (0..3) */ + /* Remaining fields should be treated as private by applications. */ - + /* These values are computed during compression or decompression startup: */ /* Component's size in DCT blocks. * Any dummy blocks added to complete an MCU are not counted; therefore @@ -143,8 +148,8 @@ /* Size of a DCT block in samples. Always DCTSIZE for compression. * For decompression this is the size of the output from one DCT block, * reflecting any scaling we choose to apply during the IDCT step. - * Values of 1,2,4,8 are likely to be supported. Note that different - * components may receive different IDCT scalings. + * Values from 1 to 16 are supported. + * Note that different components may receive different IDCT scalings. */ #if JPEG_LIB_VERSION >= 70 int DCT_h_scaled_size; @@ -158,53 +163,53 @@ * and similarly for height. For decompression, IDCT scaling is included, so * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE) */ - JDIMENSION downsampled_width; /* actual width in samples */ + JDIMENSION downsampled_width; /* actual width in samples */ JDIMENSION downsampled_height; /* actual height in samples */ /* This flag is used only for decompression. In cases where some of the * components will be ignored (eg grayscale output from YCbCr image), * we can skip most computations for the unused components. */ - boolean component_needed; /* do we need the value of this component? */ + boolean component_needed; /* do we need the value of this component? */ /* These values are computed before starting a scan of the component. */ /* The decompressor output side may not use these variables. */ - int MCU_width; /* number of blocks per MCU, horizontally */ - int MCU_height; /* number of blocks per MCU, vertically */ - int MCU_blocks; /* MCU_width * MCU_height */ - int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */ - int last_col_width; /* # of non-dummy blocks across in last MCU */ - int last_row_height; /* # of non-dummy blocks down in last MCU */ + int MCU_width; /* number of blocks per MCU, horizontally */ + int MCU_height; /* number of blocks per MCU, vertically */ + int MCU_blocks; /* MCU_width * MCU_height */ + int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */ + int last_col_width; /* # of non-dummy blocks across in last MCU */ + int last_row_height; /* # of non-dummy blocks down in last MCU */ /* Saved quantization table for component; NULL if none yet saved. * See jdinput.c comments about the need for this information. * This field is currently used only for decompression. */ - JQUANT_TBL * quant_table; + JQUANT_TBL *quant_table; /* Private per-component storage for DCT or IDCT subsystem. */ - void * dct_table; + void *dct_table; } jpeg_component_info; /* The script for encoding a multiple-scan file is an array of these: */ typedef struct { - int comps_in_scan; /* number of components encoded in this scan */ + int comps_in_scan; /* number of components encoded in this scan */ int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */ - int Ss, Se; /* progressive JPEG spectral selection parms */ - int Ah, Al; /* progressive JPEG successive approx. parms */ + int Ss, Se; /* progressive JPEG spectral selection parms */ + int Ah, Al; /* progressive JPEG successive approx. parms */ } jpeg_scan_info; /* The decompressor can save APPn and COM markers in a list of these: */ -typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr; +typedef struct jpeg_marker_struct *jpeg_saved_marker_ptr; struct jpeg_marker_struct { - jpeg_saved_marker_ptr next; /* next in list, or NULL */ - UINT8 marker; /* marker code: JPEG_COM, or JPEG_APP0+n */ - unsigned int original_length; /* # bytes of data in the file */ - unsigned int data_length; /* # bytes of data saved at data[] */ - JOCTET FAR * data; /* the data contained in the marker */ + jpeg_saved_marker_ptr next; /* next in list, or NULL */ + UINT8 marker; /* marker code: JPEG_COM, or JPEG_APP0+n */ + unsigned int original_length; /* # bytes of data in the file */ + unsigned int data_length; /* # bytes of data saved at data[] */ + JOCTET *data; /* the data contained in the marker */ /* the marker length word is not counted in data_length or original_length */ }; @@ -214,102 +219,102 @@ #define JCS_ALPHA_EXTENSIONS 1 typedef enum { - JCS_UNKNOWN, /* error/unspecified */ - JCS_GRAYSCALE, /* monochrome */ - JCS_RGB, /* red/green/blue as specified by the RGB_RED, RGB_GREEN, - RGB_BLUE, and RGB_PIXELSIZE macros */ - JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */ - JCS_CMYK, /* C/M/Y/K */ - JCS_YCCK, /* Y/Cb/Cr/K */ - JCS_EXT_RGB, /* red/green/blue */ - JCS_EXT_RGBX, /* red/green/blue/x */ - JCS_EXT_BGR, /* blue/green/red */ - JCS_EXT_BGRX, /* blue/green/red/x */ - JCS_EXT_XBGR, /* x/blue/green/red */ - JCS_EXT_XRGB, /* x/red/green/blue */ - /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX, - JCS_EXT_XBGR, or JCS_EXT_XRGB during decompression, the X byte is - undefined, and in order to ensure the best performance, - libjpeg-turbo can set that byte to whatever value it wishes. Use - the following colorspace constants to ensure that the X byte is set - to 0xFF, so that it can be interpreted as an opaque alpha - channel. */ - JCS_EXT_RGBA, /* red/green/blue/alpha */ - JCS_EXT_BGRA, /* blue/green/red/alpha */ - JCS_EXT_ABGR, /* alpha/blue/green/red */ - JCS_EXT_ARGB /* alpha/red/green/blue */ + JCS_UNKNOWN, /* error/unspecified */ + JCS_GRAYSCALE, /* monochrome */ + JCS_RGB, /* red/green/blue as specified by the RGB_RED, + RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros */ + JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */ + JCS_CMYK, /* C/M/Y/K */ + JCS_YCCK, /* Y/Cb/Cr/K */ + JCS_EXT_RGB, /* red/green/blue */ + JCS_EXT_RGBX, /* red/green/blue/x */ + JCS_EXT_BGR, /* blue/green/red */ + JCS_EXT_BGRX, /* blue/green/red/x */ + JCS_EXT_XBGR, /* x/blue/green/red */ + JCS_EXT_XRGB, /* x/red/green/blue */ + /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR, + or JCS_EXT_XRGB during decompression, the X byte is undefined, and in + order to ensure the best performance, libjpeg-turbo can set that byte to + whatever value it wishes. Use the following colorspace constants to + ensure that the X byte is set to 0xFF, so that it can be interpreted as an + opaque alpha channel. */ + JCS_EXT_RGBA, /* red/green/blue/alpha */ + JCS_EXT_BGRA, /* blue/green/red/alpha */ + JCS_EXT_ABGR, /* alpha/blue/green/red */ + JCS_EXT_ARGB, /* alpha/red/green/blue */ + JCS_RGB565 /* 5-bit red/6-bit green/5-bit blue */ } J_COLOR_SPACE; /* DCT/IDCT algorithm options. */ typedef enum { - JDCT_ISLOW, /* slow but accurate integer algorithm */ - JDCT_IFAST, /* faster, less accurate integer method */ - JDCT_FLOAT /* floating-point: accurate, fast on fast HW */ + JDCT_ISLOW, /* slow but accurate integer algorithm */ + JDCT_IFAST, /* faster, less accurate integer method */ + JDCT_FLOAT /* floating-point: accurate, fast on fast HW */ } J_DCT_METHOD; -#ifndef JDCT_DEFAULT /* may be overridden in jconfig.h */ +#ifndef JDCT_DEFAULT /* may be overridden in jconfig.h */ #define JDCT_DEFAULT JDCT_ISLOW #endif -#ifndef JDCT_FASTEST /* may be overridden in jconfig.h */ +#ifndef JDCT_FASTEST /* may be overridden in jconfig.h */ #define JDCT_FASTEST JDCT_IFAST #endif /* Dithering options for decompression. */ typedef enum { - JDITHER_NONE, /* no dithering */ - JDITHER_ORDERED, /* simple ordered dither */ - JDITHER_FS /* Floyd-Steinberg error diffusion dither */ + JDITHER_NONE, /* no dithering */ + JDITHER_ORDERED, /* simple ordered dither */ + JDITHER_FS /* Floyd-Steinberg error diffusion dither */ } J_DITHER_MODE; /* Common fields between JPEG compression and decompression master structs. */ #define jpeg_common_fields \ - struct jpeg_error_mgr * err; /* Error handler module */\ - struct jpeg_memory_mgr * mem; /* Memory manager module */\ - struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\ - void * client_data; /* Available for use by application */\ - boolean is_decompressor; /* So common code can tell which is which */\ - int global_state /* For checking call sequence validity */ + struct jpeg_error_mgr *err; /* Error handler module */\ + struct jpeg_memory_mgr *mem; /* Memory manager module */\ + struct jpeg_progress_mgr *progress; /* Progress monitor, or NULL if none */\ + void *client_data; /* Available for use by application */\ + boolean is_decompressor; /* So common code can tell which is which */\ + int global_state /* For checking call sequence validity */ /* Routines that are to be used by both halves of the library are declared * to receive a pointer to this structure. There are no actual instances of * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct. */ struct jpeg_common_struct { - jpeg_common_fields; /* Fields common to both master struct types */ + jpeg_common_fields; /* Fields common to both master struct types */ /* Additional fields follow in an actual jpeg_compress_struct or * jpeg_decompress_struct. All three structs must agree on these * initial fields! (This would be a lot cleaner in C++.) */ }; -typedef struct jpeg_common_struct * j_common_ptr; -typedef struct jpeg_compress_struct * j_compress_ptr; -typedef struct jpeg_decompress_struct * j_decompress_ptr; +typedef struct jpeg_common_struct *j_common_ptr; +typedef struct jpeg_compress_struct *j_compress_ptr; +typedef struct jpeg_decompress_struct *j_decompress_ptr; /* Master record for a compression instance */ struct jpeg_compress_struct { - jpeg_common_fields; /* Fields shared with jpeg_decompress_struct */ + jpeg_common_fields; /* Fields shared with jpeg_decompress_struct */ /* Destination for compressed data */ - struct jpeg_destination_mgr * dest; + struct jpeg_destination_mgr *dest; /* Description of source image --- these fields must be filled in by * outer application before starting compression. in_color_space must * be correct before you can even call jpeg_set_defaults(). */ - JDIMENSION image_width; /* input image width */ - JDIMENSION image_height; /* input image height */ - int input_components; /* # of color components in input image */ - J_COLOR_SPACE in_color_space; /* colorspace of input image */ + JDIMENSION image_width; /* input image width */ + JDIMENSION image_height; /* input image height */ + int input_components; /* # of color components in input image */ + J_COLOR_SPACE in_color_space; /* colorspace of input image */ - double input_gamma; /* image gamma of input image */ + double input_gamma; /* image gamma of input image */ /* Compression parameters --- these fields must be set before calling * jpeg_start_compress(). We recommend calling jpeg_set_defaults() to @@ -322,8 +327,8 @@ #if JPEG_LIB_VERSION >= 70 unsigned int scale_num, scale_denom; /* fraction by which to scale image */ - JDIMENSION jpeg_width; /* scaled JPEG image width */ - JDIMENSION jpeg_height; /* scaled JPEG image height */ + JDIMENSION jpeg_width; /* scaled JPEG image width */ + JDIMENSION jpeg_height; /* scaled JPEG image height */ /* Dimensions of actual JPEG image that will be written to file, * derived from input dimensions by scaling factors above. * These fields are computed by jpeg_start_compress(). @@ -332,15 +337,15 @@ */ #endif - int data_precision; /* bits of precision in image data */ + int data_precision; /* bits of precision in image data */ - int num_components; /* # of color components in JPEG image */ + int num_components; /* # of color components in JPEG image */ J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */ - jpeg_component_info * comp_info; + jpeg_component_info *comp_info; /* comp_info[i] describes component that appears i'th in SOF */ - JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS]; + JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS]; #if JPEG_LIB_VERSION >= 70 int q_scale_factor[NUM_QUANT_TBLS]; #endif @@ -348,30 +353,30 @@ * and corresponding scale factors (percentage, initialized 100). */ - JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; - JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; + JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; + JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; /* ptrs to Huffman coding tables, or NULL if not defined */ UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */ UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */ UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */ - int num_scans; /* # of entries in scan_info array */ - const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */ + int num_scans; /* # of entries in scan_info array */ + const jpeg_scan_info *scan_info; /* script for multi-scan file, or NULL */ /* The default value of scan_info is NULL, which causes a single-scan * sequential JPEG file to be emitted. To create a multi-scan file, * set num_scans and scan_info to point to an array of scan definitions. */ - boolean raw_data_in; /* TRUE=caller supplies downsampled data */ - boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ - boolean optimize_coding; /* TRUE=optimize entropy encoding parms */ - boolean CCIR601_sampling; /* TRUE=first samples are cosited */ + boolean raw_data_in; /* TRUE=caller supplies downsampled data */ + boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ + boolean optimize_coding; /* TRUE=optimize entropy encoding parms */ + boolean CCIR601_sampling; /* TRUE=first samples are cosited */ #if JPEG_LIB_VERSION >= 70 boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */ #endif - int smoothing_factor; /* 1..100, or 0 for no input smoothing */ - J_DCT_METHOD dct_method; /* DCT algorithm selector */ + int smoothing_factor; /* 1..100, or 0 for no input smoothing */ + J_DCT_METHOD dct_method; /* DCT algorithm selector */ /* The restart interval can be specified in absolute MCUs by setting * restart_interval, or in MCU rows by setting restart_in_rows @@ -379,28 +384,28 @@ * for each scan). */ unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */ - int restart_in_rows; /* if > 0, MCU rows per restart interval */ + int restart_in_rows; /* if > 0, MCU rows per restart interval */ /* Parameters controlling emission of special markers. */ - boolean write_JFIF_header; /* should a JFIF marker be written? */ - UINT8 JFIF_major_version; /* What to write for the JFIF version number */ + boolean write_JFIF_header; /* should a JFIF marker be written? */ + UINT8 JFIF_major_version; /* What to write for the JFIF version number */ UINT8 JFIF_minor_version; /* These three values are not used by the JPEG code, merely copied */ /* into the JFIF APP0 marker. density_unit can be 0 for unknown, */ /* 1 for dots/inch, or 2 for dots/cm. Note that the pixel aspect */ /* ratio is defined by X_density/Y_density even when density_unit=0. */ - UINT8 density_unit; /* JFIF code for pixel size units */ - UINT16 X_density; /* Horizontal pixel density */ - UINT16 Y_density; /* Vertical pixel density */ - boolean write_Adobe_marker; /* should an Adobe marker be written? */ - + UINT8 density_unit; /* JFIF code for pixel size units */ + UINT16 X_density; /* Horizontal pixel density */ + UINT16 Y_density; /* Vertical pixel density */ + boolean write_Adobe_marker; /* should an Adobe marker be written? */ + /* State variable: index of next scanline to be written to * jpeg_write_scanlines(). Application may use this to control its * processing loop, e.g., "while (next_scanline < image_height)". */ - JDIMENSION next_scanline; /* 0 .. image_height-1 */ + JDIMENSION next_scanline; /* 0 .. image_height-1 */ /* Remaining fields are known throughout compressor, but generally * should not be touched by a surrounding application. @@ -409,59 +414,59 @@ /* * These fields are computed during compression startup */ - boolean progressive_mode; /* TRUE if scan script uses progressive mode */ - int max_h_samp_factor; /* largest h_samp_factor */ - int max_v_samp_factor; /* largest v_samp_factor */ + boolean progressive_mode; /* TRUE if scan script uses progressive mode */ + int max_h_samp_factor; /* largest h_samp_factor */ + int max_v_samp_factor; /* largest v_samp_factor */ #if JPEG_LIB_VERSION >= 70 - int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ - int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ + int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ + int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ #endif - JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */ + JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */ /* The coefficient controller receives data in units of MCU rows as defined * for fully interleaved scans (whether the JPEG file is interleaved or not). * There are v_samp_factor * DCTSIZE sample rows of each component in an * "iMCU" (interleaved MCU) row. */ - + /* * These fields are valid during any one scan. * They describe the components and MCUs actually appearing in the scan. */ - int comps_in_scan; /* # of JPEG components in this scan */ - jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN]; + int comps_in_scan; /* # of JPEG components in this scan */ + jpeg_component_info *cur_comp_info[MAX_COMPS_IN_SCAN]; /* *cur_comp_info[i] describes component that appears i'th in SOS */ - - JDIMENSION MCUs_per_row; /* # of MCUs across the image */ - JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */ - - int blocks_in_MCU; /* # of DCT blocks per MCU */ + + JDIMENSION MCUs_per_row; /* # of MCUs across the image */ + JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */ + + int blocks_in_MCU; /* # of DCT blocks per MCU */ int MCU_membership[C_MAX_BLOCKS_IN_MCU]; /* MCU_membership[i] is index in cur_comp_info of component owning */ /* i'th block in an MCU */ - int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ + int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ #if JPEG_LIB_VERSION >= 80 - int block_size; /* the basic DCT block size: 1..16 */ - const int * natural_order; /* natural-order position array */ - int lim_Se; /* min( Se, DCTSIZE2-1 ) */ + int block_size; /* the basic DCT block size: 1..16 */ + const int *natural_order; /* natural-order position array */ + int lim_Se; /* min( Se, DCTSIZE2-1 ) */ #endif /* * Links to compression subobjects (methods and private variables of modules) */ - struct jpeg_comp_master * master; - struct jpeg_c_main_controller * main; - struct jpeg_c_prep_controller * prep; - struct jpeg_c_coef_controller * coef; - struct jpeg_marker_writer * marker; - struct jpeg_color_converter * cconvert; - struct jpeg_downsampler * downsample; - struct jpeg_forward_dct * fdct; - struct jpeg_entropy_encoder * entropy; - jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */ + struct jpeg_comp_master *master; + struct jpeg_c_main_controller *main; + struct jpeg_c_prep_controller *prep; + struct jpeg_c_coef_controller *coef; + struct jpeg_marker_writer *marker; + struct jpeg_color_converter *cconvert; + struct jpeg_downsampler *downsample; + struct jpeg_forward_dct *fdct; + struct jpeg_entropy_encoder *entropy; + jpeg_scan_info *script_space; /* workspace for jpeg_simple_progression */ int script_space_size; }; @@ -469,17 +474,17 @@ /* Master record for a decompression instance */ struct jpeg_decompress_struct { - jpeg_common_fields; /* Fields shared with jpeg_compress_struct */ + jpeg_common_fields; /* Fields shared with jpeg_compress_struct */ /* Source of compressed data */ - struct jpeg_source_mgr * src; + struct jpeg_source_mgr *src; /* Basic description of image --- filled in by jpeg_read_header(). */ /* Application may inspect these values to decide how to process image. */ - JDIMENSION image_width; /* nominal image width (from SOF marker) */ - JDIMENSION image_height; /* nominal image height */ - int num_components; /* # of color components in JPEG image */ + JDIMENSION image_width; /* nominal image width (from SOF marker) */ + JDIMENSION image_height; /* nominal image height */ + int num_components; /* # of color components in JPEG image */ J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */ /* Decompression processing parameters --- these fields must be set before @@ -491,24 +496,24 @@ unsigned int scale_num, scale_denom; /* fraction by which to scale image */ - double output_gamma; /* image gamma wanted in output */ + double output_gamma; /* image gamma wanted in output */ - boolean buffered_image; /* TRUE=multiple output passes */ - boolean raw_data_out; /* TRUE=downsampled data wanted */ + boolean buffered_image; /* TRUE=multiple output passes */ + boolean raw_data_out; /* TRUE=downsampled data wanted */ - J_DCT_METHOD dct_method; /* IDCT algorithm selector */ - boolean do_fancy_upsampling; /* TRUE=apply fancy upsampling */ - boolean do_block_smoothing; /* TRUE=apply interblock smoothing */ + J_DCT_METHOD dct_method; /* IDCT algorithm selector */ + boolean do_fancy_upsampling; /* TRUE=apply fancy upsampling */ + boolean do_block_smoothing; /* TRUE=apply interblock smoothing */ - boolean quantize_colors; /* TRUE=colormapped output wanted */ + boolean quantize_colors; /* TRUE=colormapped output wanted */ /* the following are ignored if not quantize_colors: */ - J_DITHER_MODE dither_mode; /* type of color dithering to use */ - boolean two_pass_quantize; /* TRUE=use two-pass color quantization */ - int desired_number_of_colors; /* max # colors to use in created colormap */ + J_DITHER_MODE dither_mode; /* type of color dithering to use */ + boolean two_pass_quantize; /* TRUE=use two-pass color quantization */ + int desired_number_of_colors; /* max # colors to use in created colormap */ /* these are significant only in buffered-image mode: */ - boolean enable_1pass_quant; /* enable future use of 1-pass quantizer */ + boolean enable_1pass_quant; /* enable future use of 1-pass quantizer */ boolean enable_external_quant;/* enable future use of external colormap */ - boolean enable_2pass_quant; /* enable future use of 2-pass quantizer */ + boolean enable_2pass_quant; /* enable future use of 2-pass quantizer */ /* Description of actual output image that will be returned to application. * These fields are computed by jpeg_start_decompress(). @@ -516,14 +521,14 @@ * in advance of calling jpeg_start_decompress(). */ - JDIMENSION output_width; /* scaled image width */ - JDIMENSION output_height; /* scaled image height */ - int out_color_components; /* # of color components in out_color_space */ - int output_components; /* # of color components returned */ + JDIMENSION output_width; /* scaled image width */ + JDIMENSION output_height; /* scaled image height */ + int out_color_components; /* # of color components in out_color_space */ + int output_components; /* # of color components returned */ /* output_components is 1 (a colormap index) when quantizing colors; * otherwise it equals out_color_components. */ - int rec_outbuf_height; /* min recommended height of scanline buffer */ + int rec_outbuf_height; /* min recommended height of scanline buffer */ /* If the buffer passed to jpeg_read_scanlines() is less than this many rows * high, space and time will be wasted due to unnecessary data copying. * Usually rec_outbuf_height will be 1 or 2, at most 4. @@ -535,8 +540,8 @@ * jpeg_start_decompress or jpeg_start_output. * The map has out_color_components rows and actual_number_of_colors columns. */ - int actual_number_of_colors; /* number of entries in use */ - JSAMPARRAY colormap; /* The color map as a 2-D pixel array */ + int actual_number_of_colors; /* number of entries in use */ + JSAMPARRAY colormap; /* The color map as a 2-D pixel array */ /* State variables: these variables indicate the progress of decompression. * The application may examine these but must not modify them. @@ -546,20 +551,20 @@ * Application may use this to control its processing loop, e.g., * "while (output_scanline < output_height)". */ - JDIMENSION output_scanline; /* 0 .. output_height-1 */ + JDIMENSION output_scanline; /* 0 .. output_height-1 */ /* Current input scan number and number of iMCU rows completed in scan. * These indicate the progress of the decompressor input side. */ - int input_scan_number; /* Number of SOS markers seen so far */ - JDIMENSION input_iMCU_row; /* Number of iMCU rows completed */ + int input_scan_number; /* Number of SOS markers seen so far */ + JDIMENSION input_iMCU_row; /* Number of iMCU rows completed */ /* The "output scan number" is the notional scan being displayed by the * output side. The decompressor will not allow output scan/row number * to get ahead of input scan/row, but it can fall arbitrarily far behind. */ - int output_scan_number; /* Nominal scan number being displayed */ - JDIMENSION output_iMCU_row; /* Number of iMCU rows read */ + int output_scan_number; /* Nominal scan number being displayed */ + JDIMENSION output_iMCU_row; /* Number of iMCU rows read */ /* Current progression status. coef_bits[c][i] indicates the precision * with which component c's DCT coefficient i (in zigzag order) is known. @@ -568,7 +573,7 @@ * (thus, 0 at completion of the progression). * This pointer is NULL when reading a non-progressive file. */ - int (*coef_bits)[DCTSIZE2]; /* -1 or current Al value for each coef */ + int (*coef_bits)[DCTSIZE2]; /* -1 or current Al value for each coef */ /* Internal JPEG parameters --- the application usually need not look at * these fields. Note that the decompressor output side may not use @@ -579,27 +584,27 @@ * datastreams when processing abbreviated JPEG datastreams. */ - JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS]; + JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS]; /* ptrs to coefficient quantization tables, or NULL if not defined */ - JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; - JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; + JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; + JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; /* ptrs to Huffman coding tables, or NULL if not defined */ /* These parameters are never carried across datastreams, since they * are given in SOF/SOS markers or defined to be reset by SOI. */ - int data_precision; /* bits of precision in image data */ + int data_precision; /* bits of precision in image data */ - jpeg_component_info * comp_info; + jpeg_component_info *comp_info; /* comp_info[i] describes component that appears i'th in SOF */ #if JPEG_LIB_VERSION >= 80 - boolean is_baseline; /* TRUE if Baseline SOF0 encountered */ + boolean is_baseline; /* TRUE if Baseline SOF0 encountered */ #endif - boolean progressive_mode; /* TRUE if SOFn specifies progressive mode */ - boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ + boolean progressive_mode; /* TRUE if SOFn specifies progressive mode */ + boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */ UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */ @@ -610,17 +615,17 @@ /* These fields record data obtained from optional markers recognized by * the JPEG library. */ - boolean saw_JFIF_marker; /* TRUE iff a JFIF APP0 marker was found */ + boolean saw_JFIF_marker; /* TRUE iff a JFIF APP0 marker was found */ /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */ - UINT8 JFIF_major_version; /* JFIF version number */ + UINT8 JFIF_major_version; /* JFIF version number */ UINT8 JFIF_minor_version; - UINT8 density_unit; /* JFIF code for pixel size units */ - UINT16 X_density; /* Horizontal pixel density */ - UINT16 Y_density; /* Vertical pixel density */ - boolean saw_Adobe_marker; /* TRUE iff an Adobe APP14 marker was found */ - UINT8 Adobe_transform; /* Color transform code from Adobe marker */ + UINT8 density_unit; /* JFIF code for pixel size units */ + UINT16 X_density; /* Horizontal pixel density */ + UINT16 Y_density; /* Vertical pixel density */ + boolean saw_Adobe_marker; /* TRUE iff an Adobe APP14 marker was found */ + UINT8 Adobe_transform; /* Color transform code from Adobe marker */ - boolean CCIR601_sampling; /* TRUE=first samples are cosited */ + boolean CCIR601_sampling; /* TRUE=first samples are cosited */ /* Aside from the specific data retained from APPn markers known to the * library, the uninterpreted contents of any or all APPn and COM markers @@ -635,17 +640,17 @@ /* * These fields are computed during decompression startup */ - int max_h_samp_factor; /* largest h_samp_factor */ - int max_v_samp_factor; /* largest v_samp_factor */ + int max_h_samp_factor; /* largest h_samp_factor */ + int max_v_samp_factor; /* largest v_samp_factor */ #if JPEG_LIB_VERSION >= 70 - int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ - int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ + int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ + int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ #else - int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */ + int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */ #endif - JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */ + JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */ /* The coefficient controller's input and output progress is measured in * units of "iMCU" (interleaved MCU) rows. These are the same as MCU rows * in fully interleaved JPEG scans, but are used whether the scan is @@ -654,33 +659,33 @@ * v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row. */ - JSAMPLE * sample_range_limit; /* table for fast range-limiting */ + JSAMPLE *sample_range_limit; /* table for fast range-limiting */ /* * These fields are valid during any one scan. * They describe the components and MCUs actually appearing in the scan. * Note that the decompressor output side must not use these fields. */ - int comps_in_scan; /* # of JPEG components in this scan */ - jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN]; + int comps_in_scan; /* # of JPEG components in this scan */ + jpeg_component_info *cur_comp_info[MAX_COMPS_IN_SCAN]; /* *cur_comp_info[i] describes component that appears i'th in SOS */ - JDIMENSION MCUs_per_row; /* # of MCUs across the image */ - JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */ + JDIMENSION MCUs_per_row; /* # of MCUs across the image */ + JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */ - int blocks_in_MCU; /* # of DCT blocks per MCU */ + int blocks_in_MCU; /* # of DCT blocks per MCU */ int MCU_membership[D_MAX_BLOCKS_IN_MCU]; /* MCU_membership[i] is index in cur_comp_info of component owning */ /* i'th block in an MCU */ - int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ + int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ #if JPEG_LIB_VERSION >= 80 /* These fields are derived from Se of first SOS marker. */ - int block_size; /* the basic DCT block size: 1..16 */ - const int * natural_order; /* natural-order position array for entropy decode */ - int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */ + int block_size; /* the basic DCT block size: 1..16 */ + const int *natural_order; /* natural-order position array for entropy decode */ + int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */ #endif /* This field is shared between entropy decoder and marker parser. @@ -692,17 +697,17 @@ /* * Links to decompression subobjects (methods, private variables of modules) */ - struct jpeg_decomp_master * master; - struct jpeg_d_main_controller * main; - struct jpeg_d_coef_controller * coef; - struct jpeg_d_post_controller * post; - struct jpeg_input_controller * inputctl; - struct jpeg_marker_reader * marker; - struct jpeg_entropy_decoder * entropy; - struct jpeg_inverse_dct * idct; - struct jpeg_upsampler * upsample; - struct jpeg_color_deconverter * cconvert; - struct jpeg_color_quantizer * cquantize; + struct jpeg_decomp_master *master; + struct jpeg_d_main_controller *main; + struct jpeg_d_coef_controller *coef; + struct jpeg_d_post_controller *post; + struct jpeg_input_controller *inputctl; + struct jpeg_marker_reader *marker; + struct jpeg_entropy_decoder *entropy; + struct jpeg_inverse_dct *idct; + struct jpeg_upsampler *upsample; + struct jpeg_color_deconverter *cconvert; + struct jpeg_color_quantizer *cquantize; }; @@ -718,17 +723,17 @@ struct jpeg_error_mgr { /* Error exit handler: does not return to caller */ - JMETHOD(void, error_exit, (j_common_ptr cinfo)); + void (*error_exit) (j_common_ptr cinfo); /* Conditionally emit a trace or warning message */ - JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level)); + void (*emit_message) (j_common_ptr cinfo, int msg_level); /* Routine that actually outputs a trace or error message */ - JMETHOD(void, output_message, (j_common_ptr cinfo)); + void (*output_message) (j_common_ptr cinfo); /* Format a message string for the most recent JPEG error or message */ - JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer)); -#define JMSG_LENGTH_MAX 200 /* recommended size of format_message buffer */ + void (*format_message) (j_common_ptr cinfo, char *buffer); +#define JMSG_LENGTH_MAX 200 /* recommended size of format_message buffer */ /* Reset error state variables at start of a new image */ - JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo)); - + void (*reset_error_mgr) (j_common_ptr cinfo); + /* The message ID code and any parameters are saved here. * A message can have one string parameter or up to 8 int parameters. */ @@ -738,18 +743,18 @@ int i[8]; char s[JMSG_STR_PARM_MAX]; } msg_parm; - + /* Standard state variables for error facility */ - - int trace_level; /* max msg_level that will be displayed */ - + + int trace_level; /* max msg_level that will be displayed */ + /* For recoverable corrupt-data errors, we emit a warning message, * but keep going unless emit_message chooses to abort. emit_message * should count warnings in num_warnings. The surrounding application * can check for bad data by seeing if num_warnings is nonzero at the * end of processing. */ - long num_warnings; /* number of corrupt-data warnings */ + long num_warnings; /* number of corrupt-data warnings */ /* These fields point to the table(s) of error message strings. * An application can change the table pointer to switch to a different @@ -761,52 +766,52 @@ * First table includes all errors generated by JPEG library itself. * Error code 0 is reserved for a "no such error string" message. */ - const char * const * jpeg_message_table; /* Library errors */ + const char * const *jpeg_message_table; /* Library errors */ int last_jpeg_message; /* Table contains strings 0..last_jpeg_message */ /* Second table can be added by application (see cjpeg/djpeg for example). * It contains strings numbered first_addon_message..last_addon_message. */ - const char * const * addon_message_table; /* Non-library errors */ - int first_addon_message; /* code for first string in addon table */ - int last_addon_message; /* code for last string in addon table */ + const char * const *addon_message_table; /* Non-library errors */ + int first_addon_message; /* code for first string in addon table */ + int last_addon_message; /* code for last string in addon table */ }; /* Progress monitor object */ struct jpeg_progress_mgr { - JMETHOD(void, progress_monitor, (j_common_ptr cinfo)); + void (*progress_monitor) (j_common_ptr cinfo); - long pass_counter; /* work units completed in this pass */ - long pass_limit; /* total number of work units in this pass */ - int completed_passes; /* passes completed so far */ - int total_passes; /* total number of passes expected */ + long pass_counter; /* work units completed in this pass */ + long pass_limit; /* total number of work units in this pass */ + int completed_passes; /* passes completed so far */ + int total_passes; /* total number of passes expected */ }; /* Data destination object for compression */ struct jpeg_destination_mgr { - JOCTET * next_output_byte; /* => next byte to write in buffer */ - size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ - JMETHOD(void, init_destination, (j_compress_ptr cinfo)); - JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo)); - JMETHOD(void, term_destination, (j_compress_ptr cinfo)); + void (*init_destination) (j_compress_ptr cinfo); + boolean (*empty_output_buffer) (j_compress_ptr cinfo); + void (*term_destination) (j_compress_ptr cinfo); }; /* Data source object for decompression */ struct jpeg_source_mgr { - const JOCTET * next_input_byte; /* => next byte to read from buffer */ - size_t bytes_in_buffer; /* # of bytes remaining in buffer */ + const JOCTET *next_input_byte; /* => next byte to read from buffer */ + size_t bytes_in_buffer; /* # of bytes remaining in buffer */ - JMETHOD(void, init_source, (j_decompress_ptr cinfo)); - JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo)); - JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes)); - JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired)); - JMETHOD(void, term_source, (j_decompress_ptr cinfo)); + void (*init_source) (j_decompress_ptr cinfo); + boolean (*fill_input_buffer) (j_decompress_ptr cinfo); + void (*skip_input_data) (j_decompress_ptr cinfo, long num_bytes); + boolean (*resync_to_restart) (j_decompress_ptr cinfo, int desired); + void (*term_source) (j_decompress_ptr cinfo); }; @@ -821,51 +826,42 @@ * successful. */ -#define JPOOL_PERMANENT 0 /* lasts until master record is destroyed */ -#define JPOOL_IMAGE 1 /* lasts until done with image/datastream */ -#define JPOOL_NUMPOOLS 2 +#define JPOOL_PERMANENT 0 /* lasts until master record is destroyed */ +#define JPOOL_IMAGE 1 /* lasts until done with image/datastream */ +#define JPOOL_NUMPOOLS 2 -typedef struct jvirt_sarray_control * jvirt_sarray_ptr; -typedef struct jvirt_barray_control * jvirt_barray_ptr; +typedef struct jvirt_sarray_control *jvirt_sarray_ptr; +typedef struct jvirt_barray_control *jvirt_barray_ptr; struct jpeg_memory_mgr { /* Method pointers */ - JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id, - size_t sizeofobject)); - JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id, - size_t sizeofobject)); - JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id, - JDIMENSION samplesperrow, - JDIMENSION numrows)); - JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id, - JDIMENSION blocksperrow, - JDIMENSION numrows)); - JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo, - int pool_id, - boolean pre_zero, - JDIMENSION samplesperrow, - JDIMENSION numrows, - JDIMENSION maxaccess)); - JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo, - int pool_id, - boolean pre_zero, - JDIMENSION blocksperrow, - JDIMENSION numrows, - JDIMENSION maxaccess)); - JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo)); - JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo, - jvirt_sarray_ptr ptr, - JDIMENSION start_row, - JDIMENSION num_rows, - boolean writable)); - JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo, - jvirt_barray_ptr ptr, - JDIMENSION start_row, - JDIMENSION num_rows, - boolean writable)); - JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id)); - JMETHOD(void, self_destruct, (j_common_ptr cinfo)); + void *(*alloc_small) (j_common_ptr cinfo, int pool_id, size_t sizeofobject); + void *(*alloc_large) (j_common_ptr cinfo, int pool_id, + size_t sizeofobject); + JSAMPARRAY (*alloc_sarray) (j_common_ptr cinfo, int pool_id, + JDIMENSION samplesperrow, JDIMENSION numrows); + JBLOCKARRAY (*alloc_barray) (j_common_ptr cinfo, int pool_id, + JDIMENSION blocksperrow, JDIMENSION numrows); + jvirt_sarray_ptr (*request_virt_sarray) (j_common_ptr cinfo, int pool_id, + boolean pre_zero, + JDIMENSION samplesperrow, + JDIMENSION numrows, + JDIMENSION maxaccess); + jvirt_barray_ptr (*request_virt_barray) (j_common_ptr cinfo, int pool_id, + boolean pre_zero, + JDIMENSION blocksperrow, + JDIMENSION numrows, + JDIMENSION maxaccess); + void (*realize_virt_arrays) (j_common_ptr cinfo); + JSAMPARRAY (*access_virt_sarray) (j_common_ptr cinfo, jvirt_sarray_ptr ptr, + JDIMENSION start_row, JDIMENSION num_rows, + boolean writable); + JBLOCKARRAY (*access_virt_barray) (j_common_ptr cinfo, jvirt_barray_ptr ptr, + JDIMENSION start_row, JDIMENSION num_rows, + boolean writable); + void (*free_pool) (j_common_ptr cinfo, int pool_id); + void (*self_destruct) (j_common_ptr cinfo); /* Limit on memory allocation for this JPEG object. (Note that this is * merely advisory, not a guaranteed maximum; it only affects the space @@ -882,96 +878,21 @@ /* Routine signature for application-supplied marker processing methods. * Need not pass marker code since it is stored in cinfo->unread_marker. */ -typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo)); - - -/* Declarations for routines called by application. - * The JPP macro hides prototype parameters from compilers that can't cope. - * Note JPP requires double parentheses. - */ - -#ifdef HAVE_PROTOTYPES -#define JPP(arglist) arglist -#else -#define JPP(arglist) () -#endif +typedef boolean (*jpeg_marker_parser_method) (j_decompress_ptr cinfo); -/* Short forms of external names for systems with brain-damaged linkers. - * We shorten external names to be unique in the first six letters, which - * is good enough for all known systems. - * (If your compiler itself needs names to be unique in less than 15 - * characters, you are out of luck. Get a better compiler.) +/* Originally, this macro was used as a way of defining function prototypes + * for both modern compilers as well as older compilers that did not support + * prototype parameters. libjpeg-turbo has never supported these older, + * non-ANSI compilers, but the macro is still included because there is some + * software out there that uses it. */ -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_std_error jStdError -#define jpeg_CreateCompress jCreaCompress -#define jpeg_CreateDecompress jCreaDecompress -#define jpeg_destroy_compress jDestCompress -#define jpeg_destroy_decompress jDestDecompress -#define jpeg_stdio_dest jStdDest -#define jpeg_stdio_src jStdSrc -#if JPEG_LIB_VERSION >= 80 -#define jpeg_mem_dest jMemDest -#define jpeg_mem_src jMemSrc -#endif -#define jpeg_set_defaults jSetDefaults -#define jpeg_set_colorspace jSetColorspace -#define jpeg_default_colorspace jDefColorspace -#define jpeg_set_quality jSetQuality -#define jpeg_set_linear_quality jSetLQuality -#if JPEG_LIB_VERSION >= 70 -#define jpeg_default_qtables jDefQTables -#endif -#define jpeg_add_quant_table jAddQuantTable -#define jpeg_quality_scaling jQualityScaling -#define jpeg_simple_progression jSimProgress -#define jpeg_suppress_tables jSuppressTables -#define jpeg_alloc_quant_table jAlcQTable -#define jpeg_alloc_huff_table jAlcHTable -#define jpeg_start_compress jStrtCompress -#define jpeg_write_scanlines jWrtScanlines -#define jpeg_finish_compress jFinCompress -#if JPEG_LIB_VERSION >= 70 -#define jpeg_calc_jpeg_dimensions jCjpegDimensions -#endif -#define jpeg_write_raw_data jWrtRawData -#define jpeg_write_marker jWrtMarker -#define jpeg_write_m_header jWrtMHeader -#define jpeg_write_m_byte jWrtMByte -#define jpeg_write_tables jWrtTables -#define jpeg_read_header jReadHeader -#define jpeg_start_decompress jStrtDecompress -#define jpeg_read_scanlines jReadScanlines -#define jpeg_finish_decompress jFinDecompress -#define jpeg_read_raw_data jReadRawData -#define jpeg_has_multiple_scans jHasMultScn -#define jpeg_start_output jStrtOutput -#define jpeg_finish_output jFinOutput -#define jpeg_input_complete jInComplete -#define jpeg_new_colormap jNewCMap -#define jpeg_consume_input jConsumeInput -#if JPEG_LIB_VERSION >= 80 -#define jpeg_core_output_dimensions jCoreDimensions -#endif -#define jpeg_calc_output_dimensions jCalcDimensions -#define jpeg_save_markers jSaveMarkers -#define jpeg_set_marker_processor jSetMarker -#define jpeg_read_coefficients jReadCoefs -#define jpeg_write_coefficients jWrtCoefs -#define jpeg_copy_critical_parameters jCopyCrit -#define jpeg_abort_compress jAbrtCompress -#define jpeg_abort_decompress jAbrtDecompress -#define jpeg_abort jAbort -#define jpeg_destroy jDestroy -#define jpeg_resync_to_restart jResyncRestart -#endif /* NEED_SHORT_EXTERNAL_NAMES */ +#define JPP(arglist) arglist /* Default error-management setup */ -EXTERN(struct jpeg_error_mgr *) jpeg_std_error - JPP((struct jpeg_error_mgr * err)); +EXTERN(struct jpeg_error_mgr *) jpeg_std_error (struct jpeg_error_mgr *err); /* Initialization of JPEG compression objects. * jpeg_create_compress() and jpeg_create_decompress() are the exported @@ -982,97 +903,89 @@ */ #define jpeg_create_compress(cinfo) \ jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \ - (size_t) sizeof(struct jpeg_compress_struct)) + (size_t) sizeof(struct jpeg_compress_struct)) #define jpeg_create_decompress(cinfo) \ jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \ - (size_t) sizeof(struct jpeg_decompress_struct)) -EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo, - int version, size_t structsize)); -EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo, - int version, size_t structsize)); + (size_t) sizeof(struct jpeg_decompress_struct)) +EXTERN(void) jpeg_CreateCompress (j_compress_ptr cinfo, int version, + size_t structsize); +EXTERN(void) jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, + size_t structsize); /* Destruction of JPEG compression objects */ -EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo)); -EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo)); +EXTERN(void) jpeg_destroy_compress (j_compress_ptr cinfo); +EXTERN(void) jpeg_destroy_decompress (j_decompress_ptr cinfo); /* Standard data source and destination managers: stdio streams. */ /* Caller is responsible for opening the file before and closing after. */ -EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile)); -EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile)); +EXTERN(void) jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile); +EXTERN(void) jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile); -#if JPEG_LIB_VERSION >= 80 +#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) /* Data source and destination managers: memory buffers. */ -EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo, - unsigned char ** outbuffer, - unsigned long * outsize)); -EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo, - unsigned char * inbuffer, - unsigned long insize)); +EXTERN(void) jpeg_mem_dest (j_compress_ptr cinfo, unsigned char **outbuffer, + unsigned long *outsize); +EXTERN(void) jpeg_mem_src (j_decompress_ptr cinfo, + const unsigned char *inbuffer, + unsigned long insize); #endif /* Default parameter setup for compression */ -EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo)); +EXTERN(void) jpeg_set_defaults (j_compress_ptr cinfo); /* Compression parameter setup aids */ -EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo, - J_COLOR_SPACE colorspace)); -EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo)); -EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality, - boolean force_baseline)); -EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo, - int scale_factor, - boolean force_baseline)); +EXTERN(void) jpeg_set_colorspace (j_compress_ptr cinfo, + J_COLOR_SPACE colorspace); +EXTERN(void) jpeg_default_colorspace (j_compress_ptr cinfo); +EXTERN(void) jpeg_set_quality (j_compress_ptr cinfo, int quality, + boolean force_baseline); +EXTERN(void) jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, + boolean force_baseline); #if JPEG_LIB_VERSION >= 70 -EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo, - boolean force_baseline)); +EXTERN(void) jpeg_default_qtables (j_compress_ptr cinfo, + boolean force_baseline); #endif -EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl, - const unsigned int *basic_table, - int scale_factor, - boolean force_baseline)); -EXTERN(int) jpeg_quality_scaling JPP((int quality)); -EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo)); -EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo, - boolean suppress)); -EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo)); -EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo)); +EXTERN(void) jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, + const unsigned int *basic_table, + int scale_factor, boolean force_baseline); +EXTERN(int) jpeg_quality_scaling (int quality); +EXTERN(void) jpeg_simple_progression (j_compress_ptr cinfo); +EXTERN(void) jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress); +EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table (j_common_ptr cinfo); +EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table (j_common_ptr cinfo); /* Main entry points for compression */ -EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo, - boolean write_all_tables)); -EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo, - JSAMPARRAY scanlines, - JDIMENSION num_lines)); -EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo)); +EXTERN(void) jpeg_start_compress (j_compress_ptr cinfo, + boolean write_all_tables); +EXTERN(JDIMENSION) jpeg_write_scanlines (j_compress_ptr cinfo, + JSAMPARRAY scanlines, + JDIMENSION num_lines); +EXTERN(void) jpeg_finish_compress (j_compress_ptr cinfo); #if JPEG_LIB_VERSION >= 70 /* Precalculate JPEG dimensions for current compression parameters. */ -EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo)); +EXTERN(void) jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo); #endif /* Replaces jpeg_write_scanlines when writing raw downsampled data. */ -EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo, - JSAMPIMAGE data, - JDIMENSION num_lines)); +EXTERN(JDIMENSION) jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines); /* Write a special marker. See libjpeg.txt concerning safe usage. */ -EXTERN(void) jpeg_write_marker - JPP((j_compress_ptr cinfo, int marker, - const JOCTET * dataptr, unsigned int datalen)); +EXTERN(void) jpeg_write_marker (j_compress_ptr cinfo, int marker, + const JOCTET *dataptr, unsigned int datalen); /* Same, but piecemeal. */ -EXTERN(void) jpeg_write_m_header - JPP((j_compress_ptr cinfo, int marker, unsigned int datalen)); -EXTERN(void) jpeg_write_m_byte - JPP((j_compress_ptr cinfo, int val)); +EXTERN(void) jpeg_write_m_header (j_compress_ptr cinfo, int marker, + unsigned int datalen); +EXTERN(void) jpeg_write_m_byte (j_compress_ptr cinfo, int val); /* Alternate compression function: just write an abbreviated table file */ -EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo)); +EXTERN(void) jpeg_write_tables (j_compress_ptr cinfo); /* Decompression startup: read start of JPEG datastream to see what's there */ -EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo, - boolean require_image)); +EXTERN(int) jpeg_read_header (j_decompress_ptr cinfo, boolean require_image); /* Return value is one of: */ -#define JPEG_SUSPENDED 0 /* Suspended due to lack of input data */ -#define JPEG_HEADER_OK 1 /* Found valid image datastream */ -#define JPEG_HEADER_TABLES_ONLY 2 /* Found valid table-specs-only datastream */ +#define JPEG_SUSPENDED 0 /* Suspended due to lack of input data */ +#define JPEG_HEADER_OK 1 /* Found valid image datastream */ +#define JPEG_HEADER_TABLES_ONLY 2 /* Found valid table-specs-only datastream */ /* If you pass require_image = TRUE (normal case), you need not check for * a TABLES_ONLY return code; an abbreviated file will cause an error exit. * JPEG_SUSPENDED is only possible if you use a data source module that can @@ -1080,54 +993,55 @@ */ /* Main entry points for decompression */ -EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo)); -EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo, - JSAMPARRAY scanlines, - JDIMENSION max_lines)); -EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo)); +EXTERN(boolean) jpeg_start_decompress (j_decompress_ptr cinfo); +EXTERN(JDIMENSION) jpeg_read_scanlines (j_decompress_ptr cinfo, + JSAMPARRAY scanlines, + JDIMENSION max_lines); +EXTERN(JDIMENSION) jpeg_skip_scanlines (j_decompress_ptr cinfo, + JDIMENSION num_lines); +EXTERN(void) jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width); +EXTERN(boolean) jpeg_finish_decompress (j_decompress_ptr cinfo); /* Replaces jpeg_read_scanlines when reading raw downsampled data. */ -EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo, - JSAMPIMAGE data, - JDIMENSION max_lines)); +EXTERN(JDIMENSION) jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines); /* Additional entry points for buffered-image mode. */ -EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo)); -EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo, - int scan_number)); -EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo)); -EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo)); -EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo)); -EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo)); +EXTERN(boolean) jpeg_has_multiple_scans (j_decompress_ptr cinfo); +EXTERN(boolean) jpeg_start_output (j_decompress_ptr cinfo, int scan_number); +EXTERN(boolean) jpeg_finish_output (j_decompress_ptr cinfo); +EXTERN(boolean) jpeg_input_complete (j_decompress_ptr cinfo); +EXTERN(void) jpeg_new_colormap (j_decompress_ptr cinfo); +EXTERN(int) jpeg_consume_input (j_decompress_ptr cinfo); /* Return value is one of: */ -/* #define JPEG_SUSPENDED 0 Suspended due to lack of input data */ -#define JPEG_REACHED_SOS 1 /* Reached start of new scan */ -#define JPEG_REACHED_EOI 2 /* Reached end of image */ -#define JPEG_ROW_COMPLETED 3 /* Completed one iMCU row */ -#define JPEG_SCAN_COMPLETED 4 /* Completed last iMCU row of a scan */ +/* #define JPEG_SUSPENDED 0 Suspended due to lack of input data */ +#define JPEG_REACHED_SOS 1 /* Reached start of new scan */ +#define JPEG_REACHED_EOI 2 /* Reached end of image */ +#define JPEG_ROW_COMPLETED 3 /* Completed one iMCU row */ +#define JPEG_SCAN_COMPLETED 4 /* Completed last iMCU row of a scan */ /* Precalculate output dimensions for current decompression parameters. */ #if JPEG_LIB_VERSION >= 80 -EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo)); +EXTERN(void) jpeg_core_output_dimensions (j_decompress_ptr cinfo); #endif -EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo)); +EXTERN(void) jpeg_calc_output_dimensions (j_decompress_ptr cinfo); /* Control saving of COM and APPn markers into marker_list. */ -EXTERN(void) jpeg_save_markers - JPP((j_decompress_ptr cinfo, int marker_code, - unsigned int length_limit)); +EXTERN(void) jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit); /* Install a special processing method for COM or APPn markers. */ -EXTERN(void) jpeg_set_marker_processor - JPP((j_decompress_ptr cinfo, int marker_code, - jpeg_marker_parser_method routine)); +EXTERN(void) jpeg_set_marker_processor (j_decompress_ptr cinfo, + int marker_code, + jpeg_marker_parser_method routine); /* Read or write raw DCT coefficients --- useful for lossless transcoding. */ -EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo)); -EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo, - jvirt_barray_ptr * coef_arrays)); -EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo, - j_compress_ptr dstinfo)); +EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients (j_decompress_ptr cinfo); +EXTERN(void) jpeg_write_coefficients (j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays); +EXTERN(void) jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, + j_compress_ptr dstinfo); /* If you choose to abort compression or decompression before completing * jpeg_finish_(de)compress, then you need to clean up to release memory, @@ -1135,28 +1049,27 @@ * if you're done with the JPEG object, but if you want to clean it up and * reuse it, call this: */ -EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo)); -EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo)); +EXTERN(void) jpeg_abort_compress (j_compress_ptr cinfo); +EXTERN(void) jpeg_abort_decompress (j_decompress_ptr cinfo); /* Generic versions of jpeg_abort and jpeg_destroy that work on either * flavor of JPEG object. These may be more convenient in some places. */ -EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo)); -EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo)); +EXTERN(void) jpeg_abort (j_common_ptr cinfo); +EXTERN(void) jpeg_destroy (j_common_ptr cinfo); /* Default restart-marker-resync procedure for use by data source modules */ -EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo, - int desired)); +EXTERN(boolean) jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired); /* These marker codes are exported since applications and data source modules * are likely to want to use them. */ -#define JPEG_RST0 0xD0 /* RST0 marker code */ -#define JPEG_EOI 0xD9 /* EOI marker code */ -#define JPEG_APP0 0xE0 /* APP0 marker code */ -#define JPEG_COM 0xFE /* COM marker code */ +#define JPEG_RST0 0xD0 /* RST0 marker code */ +#define JPEG_EOI 0xD9 /* EOI marker code */ +#define JPEG_APP0 0xE0 /* APP0 marker code */ +#define JPEG_COM 0xFE /* COM marker code */ /* If we have a brain-damaged compiler that emits warnings (or worse, errors) @@ -1165,7 +1078,7 @@ */ #ifdef INCOMPLETE_TYPES_BROKEN -#ifndef JPEG_INTERNALS /* will be defined in jpegint.h */ +#ifndef JPEG_INTERNALS /* will be defined in jpegint.h */ struct jvirt_sarray_control { long dummy; }; struct jvirt_barray_control { long dummy; }; struct jpeg_comp_master { long dummy; }; @@ -1200,8 +1113,8 @@ */ #ifdef JPEG_INTERNALS -#include "jpegint.h" /* fetch private declarations */ -#include "jerror.h" /* fetch error codes too */ +#include "jpegint.h" /* fetch private declarations */ +#include "jerror.h" /* fetch error codes too */ #endif #ifdef __cplusplus diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpeglibmangler.h glmark2-2021.02/src/libjpeg-turbo/jpeglibmangler.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpeglibmangler.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jpeglibmangler.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,117 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ +#define THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ + +// Mangle all externally visible function names so we can build our own libjpeg +// without system libraries trying to use it. + +#define jpeg_make_c_derived_tbl chromium_jpeg_make_c_derived_tbl +#define jpeg_gen_optimal_table chromium_jpeg_gen_optimal_table +#define jpeg_make_d_derived_tbl chromium_jpeg_make_d_derived_tbl +#define jpeg_fill_bit_buffer chromium_jpeg_fill_bit_buffer +#define jpeg_huff_decode chromium_jpeg_huff_decode +#define jpeg_fdct_islow chromium_jpeg_fdct_islow +#define jpeg_fdct_ifast chromium_jpeg_fdct_ifast +#define jpeg_fdct_float chromium_jpeg_fdct_float +#define jpeg_idct_islow chromium_jpeg_idct_islow +#define jpeg_idct_ifast chromium_jpeg_idct_ifast +#define jpeg_idct_float chromium_jpeg_idct_float +#define jpeg_idct_4x4 chromium_jpeg_idct_4x4 +#define jpeg_idct_2x2 chromium_jpeg_idct_2x2 +#define jpeg_idct_1x1 chromium_jpeg_idct_1x1 +#define jinit_compress_master chromium_jinit_compress_master +#define jinit_c_master_control chromium_jinit_c_master_control +#define jinit_c_main_controller chromium_jinit_c_main_controller +#define jinit_c_prep_controller chromium_jinit_c_prep_controller +#define jinit_c_coef_controller chromium_jinit_c_coef_controller +#define jinit_color_converter chromium_jinit_color_converter +#define jinit_downsampler chromium_jinit_downsampler +#define jinit_forward_dct chromium_jinit_forward_dct +#define jinit_huff_encoder chromium_jinit_huff_encoder +#define jinit_phuff_encoder chromium_jinit_phuff_encoder +#define jinit_marker_writer chromium_jinit_marker_writer +#define jinit_master_decompress chromium_jinit_master_decompress +#define jinit_d_main_controller chromium_jinit_d_main_controller +#define jinit_d_coef_controller chromium_jinit_d_coef_controller +#define jinit_d_post_controller chromium_jinit_d_post_controller +#define jinit_input_controller chromium_jinit_input_controller +#define jinit_marker_reader chromium_jinit_marker_reader +#define jinit_huff_decoder chromium_jinit_huff_decoder +#define jinit_phuff_decoder chromium_jinit_phuff_decoder +#define jinit_inverse_dct chromium_jinit_inverse_dct +#define jinit_upsampler chromium_jinit_upsampler +#define jinit_color_deconverter chromium_jinit_color_deconverter +#define jinit_1pass_quantizer chromium_jinit_1pass_quantizer +#define jinit_2pass_quantizer chromium_jinit_2pass_quantizer +#define jinit_merged_upsampler chromium_jinit_merged_upsampler +#define jinit_memory_mgr chromium_jinit_memory_mgr +#define jdiv_round_up chromium_jdiv_round_up +#define jround_up chromium_jround_up +#define jcopy_sample_rows chromium_jcopy_sample_rows +#define jcopy_block_row chromium_jcopy_block_row +#define jzero_far chromium_jzero_far +#define jpeg_std_error chromium_jpeg_std_error +#define jpeg_CreateCompress chromium_jpeg_CreateCompress +#define jpeg_CreateDecompress chromium_jpeg_CreateDecompress +#define jpeg_destroy_compress chromium_jpeg_destroy_compress +#define jpeg_destroy_decompress chromium_jpeg_destroy_decompress +#define jpeg_stdio_dest chromium_jpeg_stdio_dest +#define jpeg_stdio_src chromium_jpeg_stdio_src +#define jpeg_set_defaults chromium_jpeg_set_defaults +#define jpeg_set_colorspace chromium_jpeg_set_colorspace +#define jpeg_default_colorspace chromium_jpeg_default_colorspace +#define jpeg_set_quality chromium_jpeg_set_quality +#define jpeg_set_linear_quality chromium_jpeg_set_linear_quality +#define jpeg_add_quant_table chromium_jpeg_add_quant_table +#define jpeg_quality_scaling chromium_jpeg_quality_scaling +#define jpeg_simple_progression chromium_jpeg_simple_progression +#define jpeg_suppress_tables chromium_jpeg_suppress_tables +#define jpeg_alloc_quant_table chromium_jpeg_alloc_quant_table +#define jpeg_alloc_huff_table chromium_jpeg_alloc_huff_table +#define jpeg_start_compress chromium_jpeg_start_compress +#define jpeg_write_scanlines chromium_jpeg_write_scanlines +#define jpeg_finish_compress chromium_jpeg_finish_compress +#define jpeg_write_raw_data chromium_jpeg_write_raw_data +#define jpeg_write_marker chromium_jpeg_write_marker +#define jpeg_write_m_header chromium_jpeg_write_m_header +#define jpeg_write_m_byte chromium_jpeg_write_m_byte +#define jpeg_write_tables chromium_jpeg_write_tables +#define jpeg_read_header chromium_jpeg_read_header +#define jpeg_start_decompress chromium_jpeg_start_decompress +#define jpeg_read_scanlines chromium_jpeg_read_scanlines +#define jpeg_skip_scanlines chromium_jpeg_skip_scanlines +#define jpeg_crop_scanline chromium_jpeg_crop_scanline +#define jpeg_finish_decompress chromium_jpeg_finish_decompress +#define jpeg_read_raw_data chromium_jpeg_read_raw_data +#define jpeg_has_multiple_scans chromium_jpeg_has_multiple_scans +#define jpeg_start_output chromium_jpeg_start_output +#define jpeg_finish_output chromium_jpeg_finish_output +#define jpeg_input_complete chromium_jpeg_input_complete +#define jpeg_new_colormap chromium_jpeg_new_colormap +#define jpeg_consume_input chromium_jpeg_consume_input +#define jpeg_calc_output_dimensions chromium_jpeg_calc_output_dimensions +#define jpeg_save_markers chromium_jpeg_save_markers +#define jpeg_set_marker_processor chromium_jpeg_set_marker_processor +#define jpeg_read_coefficients chromium_jpeg_read_coefficients +#define jpeg_write_coefficients chromium_jpeg_write_coefficients +#define jpeg_copy_critical_parameters chromium_jpeg_copy_critical_parameters +#define jpeg_abort_compress chromium_jpeg_abort_compress +#define jpeg_abort_decompress chromium_jpeg_abort_decompress +#define jpeg_abort chromium_jpeg_abort +#define jpeg_destroy chromium_jpeg_destroy +#define jpeg_resync_to_restart chromium_jpeg_resync_to_restart +#define jpeg_get_small chromium_jpeg_get_small +#define jpeg_free_small chromium_jpeg_free_small +#define jpeg_get_large chromium_jpeg_get_large +#define jpeg_free_large chromium_jpeg_free_large +#define jpeg_mem_available chromium_jpeg_mem_available +#define jpeg_open_backing_store chromium_jpeg_open_backing_store +#define jpeg_mem_init chromium_jpeg_mem_init +#define jpeg_mem_term chromium_jpeg_mem_term +#define jpeg_std_message_table chromium_jpeg_std_message_table +#define jpeg_natural_order chromium_jpeg_natural_order + +#endif // THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpeg_nbits_table.h glmark2-2021.02/src/libjpeg-turbo/jpeg_nbits_table.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpeg_nbits_table.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jpeg_nbits_table.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,4098 @@ +static const unsigned char jpeg_nbits_table[65536] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 +}; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpegtran.1 glmark2-2021.02/src/libjpeg-turbo/jpegtran.1 --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpegtran.1 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jpegtran.1 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,290 @@ +.TH JPEGTRAN 1 "18 February 2016" +.SH NAME +jpegtran \- lossless transformation of JPEG files +.SH SYNOPSIS +.B jpegtran +[ +.I options +] +[ +.I filename +] +.LP +.SH DESCRIPTION +.LP +.B jpegtran +performs various useful transformations of JPEG files. +It can translate the coded representation from one variant of JPEG to another, +for example from baseline JPEG to progressive JPEG or vice versa. It can also +perform some rearrangements of the image data, for example turning an image +from landscape to portrait format by rotation. +.PP +For EXIF files and JPEG files containing Exif data, you may prefer to use +.B exiftran +instead. +.PP +.B jpegtran +works by rearranging the compressed data (DCT coefficients), without +ever fully decoding the image. Therefore, its transformations are lossless: +there is no image degradation at all, which would not be true if you used +.B djpeg +followed by +.B cjpeg +to accomplish the same conversion. But by the same token, +.B jpegtran +cannot perform lossy operations such as changing the image quality. However, +while the image data is losslessly transformed, metadata can be removed. See +the +.B \-copy +option for specifics. +.PP +.B jpegtran +reads the named JPEG/JFIF file, or the standard input if no file is +named, and produces a JPEG/JFIF file on the standard output. +.SH OPTIONS +All switch names may be abbreviated; for example, +.B \-optimize +may be written +.B \-opt +or +.BR \-o . +Upper and lower case are equivalent. +British spellings are also accepted (e.g., +.BR \-optimise ), +though for brevity these are not mentioned below. +.PP +To specify the coded JPEG representation used in the output file, +.B jpegtran +accepts a subset of the switches recognized by +.BR cjpeg : +.TP +.B \-optimize +Perform optimization of entropy encoding parameters. +.TP +.B \-progressive +Create progressive JPEG file. +.TP +.BI \-restart " N" +Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is +attached to the number. +.TP +.B \-arithmetic +Use arithmetic coding. +.TP +.BI \-scans " file" +Use the scan script given in the specified text file. +.PP +See +.BR cjpeg (1) +for more details about these switches. +If you specify none of these switches, you get a plain baseline-JPEG output +file. The quality setting and so forth are determined by the input file. +.PP +The image can be losslessly transformed by giving one of these switches: +.TP +.B \-flip horizontal +Mirror image horizontally (left-right). +.TP +.B \-flip vertical +Mirror image vertically (top-bottom). +.TP +.B \-rotate 90 +Rotate image 90 degrees clockwise. +.TP +.B \-rotate 180 +Rotate image 180 degrees. +.TP +.B \-rotate 270 +Rotate image 270 degrees clockwise (or 90 ccw). +.TP +.B \-transpose +Transpose image (across UL-to-LR axis). +.TP +.B \-transverse +Transverse transpose (across UR-to-LL axis). +.PP +The transpose transformation has no restrictions regarding image dimensions. +The other transformations operate rather oddly if the image dimensions are not +a multiple of the iMCU size (usually 8 or 16 pixels), because they can only +transform complete blocks of DCT coefficient data in the desired way. +.PP +.BR jpegtran 's +default behavior when transforming an odd-size image is designed +to preserve exact reversibility and mathematical consistency of the +transformation set. As stated, transpose is able to flip the entire image +area. Horizontal mirroring leaves any partial iMCU column at the right edge +untouched, but is able to flip all rows of the image. Similarly, vertical +mirroring leaves any partial iMCU row at the bottom edge untouched, but is +able to flip all columns. The other transforms can be built up as sequences +of transpose and flip operations; for consistency, their actions on edge +pixels are defined to be the same as the end result of the corresponding +transpose-and-flip sequence. +.PP +For practical use, you may prefer to discard any untransformable edge pixels +rather than having a strange-looking strip along the right and/or bottom edges +of a transformed image. To do this, add the +.B \-trim +switch: +.TP +.B \-trim +Drop non-transformable edge blocks. +.IP +Obviously, a transformation with +.B \-trim +is not reversible, so strictly speaking +.B jpegtran +with this switch is not lossless. Also, the expected mathematical +equivalences between the transformations no longer hold. For example, +.B \-rot 270 -trim +trims only the bottom edge, but +.B \-rot 90 -trim +followed by +.B \-rot 180 -trim +trims both edges. +.TP +.B \-perfect +If you are only interested in perfect transformations, add the +.B \-perfect +switch. This causes +.B jpegtran +to fail with an error if the transformation is not perfect. +.IP +For example, you may want to do +.IP +.B (jpegtran \-rot 90 -perfect +.I foo.jpg +.B || djpeg +.I foo.jpg +.B | pnmflip \-r90 | cjpeg) +.IP +to do a perfect rotation, if available, or an approximated one if not. +.PP +This version of \fBjpegtran\fR also offers a lossless crop option, which +discards data outside of a given image region but losslessly preserves what is +inside. Like the rotate and flip transforms, lossless crop is restricted by the +current JPEG format; the upper left corner of the selected region must fall on +an iMCU boundary. If it doesn't, then it is silently moved up and/or left to +the nearest iMCU boundary (the lower right corner is unchanged.) Thus, the +output image covers at least the requested region, but it may cover more. The +adjustment of the region dimensions may be optionally disabled by attaching +an 'f' character ("force") to the width or height number. + +The image can be losslessly cropped by giving the switch: +.TP +.B \-crop WxH+X+Y +Crop the image to a rectangular region of width W and height H, starting at +point X,Y. The lossless crop feature discards data outside of a given image +region but losslessly preserves what is inside. Like the rotate and flip +transforms, lossless crop is restricted by the current JPEG format; the upper +left corner of the selected region must fall on an iMCU boundary. If it +doesn't, then it is silently moved up and/or left to the nearest iMCU boundary +(the lower right corner is unchanged.) +.PP +Other not-strictly-lossless transformation switches are: +.TP +.B \-grayscale +Force grayscale output. +.IP +This option discards the chrominance channels if the input image is YCbCr +(ie, a standard color JPEG), resulting in a grayscale JPEG file. The +luminance channel is preserved exactly, so this is a better method of reducing +to grayscale than decompression, conversion, and recompression. This switch +is particularly handy for fixing a monochrome picture that was mistakenly +encoded as a color JPEG. (In such a case, the space savings from getting rid +of the near-empty chroma channels won't be large; but the decoding time for +a grayscale JPEG is substantially less than that for a color JPEG.) +.PP +.B jpegtran +also recognizes these switches that control what to do with "extra" markers, +such as comment blocks: +.TP +.B \-copy none +Copy no extra markers from source file. This setting suppresses all +comments and other metadata in the source file. +.TP +.B \-copy comments +Copy only comment markers. This setting copies comments from the source file +but discards any other metadata. +.TP +.B \-copy all +Copy all extra markers. This setting preserves miscellaneous markers +found in the source file, such as JFIF thumbnails, Exif data, and Photoshop +settings. In some files, these extra markers can be sizable. Note that this +option will copy thumbnails as-is; they will not be transformed. +.PP +The default behavior is \fB-copy comments\fR. (Note: in IJG releases v6 and +v6a, \fBjpegtran\fR always did the equivalent of \fB-copy none\fR.) +.PP +Additional switches recognized by jpegtran are: +.TP +.BI \-maxmemory " N" +Set limit for amount of memory to use in processing large images. Value is +in thousands of bytes, or millions of bytes if "M" is attached to the +number. For example, +.B \-max 4m +selects 4000000 bytes. If more space is needed, temporary files will be used. +.TP +.BI \-outfile " name" +Send output image to the named file, not to standard output. +.TP +.B \-verbose +Enable debug printout. More +.BR \-v 's +give more output. Also, version information is printed at startup. +.TP +.B \-debug +Same as +.BR \-verbose . +.TP +.B \-version +Print version information and exit. +.SH EXAMPLES +.LP +This example converts a baseline JPEG file to progressive form: +.IP +.B jpegtran \-progressive +.I foo.jpg +.B > +.I fooprog.jpg +.PP +This example rotates an image 90 degrees clockwise, discarding any +unrotatable edge pixels: +.IP +.B jpegtran \-rot 90 -trim +.I foo.jpg +.B > +.I foo90.jpg +.SH ENVIRONMENT +.TP +.B JPEGMEM +If this environment variable is set, its value is the default memory limit. +The value is specified as described for the +.B \-maxmemory +switch. +.B JPEGMEM +overrides the default value specified when the program was compiled, and +itself is overridden by an explicit +.BR \-maxmemory . +.SH SEE ALSO +.BR cjpeg (1), +.BR djpeg (1), +.BR rdjpgcom (1), +.BR wrjpgcom (1) +.br +Wallace, Gregory K. "The JPEG Still Picture Compression Standard", +Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44. +.SH AUTHOR +Independent JPEG Group +.PP +This file was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo and to wordsmith certain sections. +.SH BUGS +The transform options can't transform odd-size images perfectly. Use +.B \-trim +or +.B \-perfect +if you don't like the results. +.PP +The entire image is read into memory and then written out again, even in +cases where this isn't really necessary. Expect swapping on large images, +especially when using the more complex transform options. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpegtran.c glmark2-2021.02/src/libjpeg-turbo/jpegtran.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jpegtran.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jpegtran.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,551 @@ +/* + * jpegtran.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1995-2010, Thomas G. Lane, Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a command-line user interface for JPEG transcoding. + * It is very similar to cjpeg.c, and partly to djpeg.c, but provides + * lossless transcoding between different JPEG file formats. It also + * provides some lossless and sort-of-lossless transformations of JPEG data. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "transupp.h" /* Support routines for jpegtran */ +#include "jversion.h" /* for version message */ +#include "jconfigint.h" + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + + +/* + * Argument-parsing code. + * The switch parser is designed to be useful with DOS-style command line + * syntax, ie, intermixed switches and file names, where only the switches + * to the left of a given file name affect processing of that file. + * The main program in this file doesn't actually use this capability... + */ + + +static const char *progname; /* program name for error messages */ +static char *outfilename; /* for -outfile switch */ +static JCOPY_OPTION copyoption; /* -copy switch */ +static jpeg_transform_info transformoption; /* image transformation options */ + + +LOCAL(void) +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "usage: %s [switches] ", progname); +#ifdef TWO_FILE_COMMANDLINE + fprintf(stderr, "inputfile outputfile\n"); +#else + fprintf(stderr, "[inputfile]\n"); +#endif + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -copy none Copy no extra markers from source file\n"); + fprintf(stderr, " -copy comments Copy only comment markers (default)\n"); + fprintf(stderr, " -copy all Copy all extra markers\n"); +#ifdef ENTROPY_OPT_SUPPORTED + fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n"); +#endif +#ifdef C_PROGRESSIVE_SUPPORTED + fprintf(stderr, " -progressive Create progressive JPEG file\n"); +#endif + fprintf(stderr, "Switches for modifying the image:\n"); +#if TRANSFORMS_SUPPORTED + fprintf(stderr, " -crop WxH+X+Y Crop to a rectangular subarea\n"); + fprintf(stderr, " -grayscale Reduce to grayscale (omit color data)\n"); + fprintf(stderr, " -flip [horizontal|vertical] Mirror image (left-right or top-bottom)\n"); + fprintf(stderr, " -perfect Fail if there is non-transformable edge blocks\n"); + fprintf(stderr, " -rotate [90|180|270] Rotate image (degrees clockwise)\n"); +#endif +#if TRANSFORMS_SUPPORTED + fprintf(stderr, " -transpose Transpose image\n"); + fprintf(stderr, " -transverse Transverse transpose image\n"); + fprintf(stderr, " -trim Drop non-transformable edge blocks\n"); +#endif + fprintf(stderr, "Switches for advanced users:\n"); +#ifdef C_ARITH_CODING_SUPPORTED + fprintf(stderr, " -arithmetic Use arithmetic coding\n"); +#endif + fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); + fprintf(stderr, " -outfile name Specify name for output file\n"); + fprintf(stderr, " -verbose or -debug Emit debug output\n"); + fprintf(stderr, " -version Print version information and exit\n"); + fprintf(stderr, "Switches for wizards:\n"); +#ifdef C_MULTISCAN_FILES_SUPPORTED + fprintf(stderr, " -scans file Create multi-scan JPEG per script file\n"); +#endif + exit(EXIT_FAILURE); +} + + +LOCAL(void) +select_transform (JXFORM_CODE transform) +/* Silly little routine to detect multiple transform options, + * which we can't handle. + */ +{ +#if TRANSFORMS_SUPPORTED + if (transformoption.transform == JXFORM_NONE || + transformoption.transform == transform) { + transformoption.transform = transform; + } else { + fprintf(stderr, "%s: can only do one image transformation at a time\n", + progname); + usage(); + } +#else + fprintf(stderr, "%s: sorry, image transformation was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif +} + + +LOCAL(int) +parse_switches (j_compress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) +/* Parse optional switches. + * Returns argv[] index of first file-name argument (== argc if none). + * Any file names with indexes <= last_file_arg_seen are ignored; + * they have presumably been processed in a previous iteration. + * (Pass 0 for last_file_arg_seen on the first or only iteration.) + * for_real is FALSE on the first (dummy) pass; we may skip any expensive + * processing. + */ +{ + int argn; + char *arg; + boolean simple_progressive; + char *scansarg = NULL; /* saves -scans parm if any */ + + /* Set up default JPEG parameters. */ + simple_progressive = FALSE; + outfilename = NULL; + copyoption = JCOPYOPT_DEFAULT; + transformoption.transform = JXFORM_NONE; + transformoption.perfect = FALSE; + transformoption.trim = FALSE; + transformoption.force_grayscale = FALSE; + transformoption.crop = FALSE; + transformoption.slow_hflip = FALSE; + cinfo->err->trace_level = 0; + + /* Scan command line options, adjust parameters */ + + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (*arg != '-') { + /* Not a switch, must be a file name argument */ + if (argn <= last_file_arg_seen) { + outfilename = NULL; /* -outfile applies to just one input file */ + continue; /* ignore this name if previously processed */ + } + break; /* else done parsing switches */ + } + arg++; /* advance past switch marker character */ + + if (keymatch(arg, "arithmetic", 1)) { + /* Use arithmetic coding. */ +#ifdef C_ARITH_CODING_SUPPORTED + cinfo->arith_code = TRUE; +#else + fprintf(stderr, "%s: sorry, arithmetic coding not supported\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "copy", 2)) { + /* Select which extra markers to copy. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "none", 1)) { + copyoption = JCOPYOPT_NONE; + } else if (keymatch(argv[argn], "comments", 1)) { + copyoption = JCOPYOPT_COMMENTS; + } else if (keymatch(argv[argn], "all", 1)) { + copyoption = JCOPYOPT_ALL; + } else + usage(); + + } else if (keymatch(arg, "crop", 2)) { + /* Perform lossless cropping. */ +#if TRANSFORMS_SUPPORTED + if (++argn >= argc) /* advance to next argument */ + usage(); + if (! jtransform_parse_crop_spec(&transformoption, argv[argn])) { + fprintf(stderr, "%s: bogus -crop argument '%s'\n", + progname, argv[argn]); + exit(EXIT_FAILURE); + } +#else + select_transform(JXFORM_NONE); /* force an error */ +#endif + + } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { + /* Enable debug printouts. */ + /* On first -d, print version identification */ + static boolean printed_version = FALSE; + + if (! printed_version) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + fprintf(stderr, "%s\n\n", JCOPYRIGHT); + fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n", + JVERSION); + printed_version = TRUE; + } + cinfo->err->trace_level++; + + } else if (keymatch(arg, "version", 4)) { + fprintf(stderr, "%s version %s (build %s)\n", + PACKAGE_NAME, VERSION, BUILD); + exit(EXIT_SUCCESS); + + } else if (keymatch(arg, "flip", 1)) { + /* Mirror left-right or top-bottom. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "horizontal", 1)) + select_transform(JXFORM_FLIP_H); + else if (keymatch(argv[argn], "vertical", 1)) + select_transform(JXFORM_FLIP_V); + else + usage(); + + } else if (keymatch(arg, "grayscale", 1) || keymatch(arg, "greyscale",1)) { + /* Force to grayscale. */ +#if TRANSFORMS_SUPPORTED + transformoption.force_grayscale = TRUE; +#else + select_transform(JXFORM_NONE); /* force an error */ +#endif + + } else if (keymatch(arg, "maxmemory", 3)) { + /* Maximum memory in Kb (or Mb with 'm'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (ch == 'm' || ch == 'M') + lval *= 1000L; + cinfo->mem->max_memory_to_use = lval * 1000L; + + } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { + /* Enable entropy parm optimization. */ +#ifdef ENTROPY_OPT_SUPPORTED + cinfo->optimize_coding = TRUE; +#else + fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "outfile", 4)) { + /* Set output file name. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + outfilename = argv[argn]; /* save it away for later use */ + + } else if (keymatch(arg, "perfect", 2)) { + /* Fail if there is any partial edge MCUs that the transform can't + * handle. */ + transformoption.perfect = TRUE; + + } else if (keymatch(arg, "progressive", 2)) { + /* Select simple progressive mode. */ +#ifdef C_PROGRESSIVE_SUPPORTED + simple_progressive = TRUE; + /* We must postpone execution until num_components is known. */ +#else + fprintf(stderr, "%s: sorry, progressive output was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "restart", 1)) { + /* Restart interval in MCU rows (or in MCUs with 'b'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (lval < 0 || lval > 65535L) + usage(); + if (ch == 'b' || ch == 'B') { + cinfo->restart_interval = (unsigned int) lval; + cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */ + } else { + cinfo->restart_in_rows = (int) lval; + /* restart_interval will be computed during startup */ + } + + } else if (keymatch(arg, "rotate", 2)) { + /* Rotate 90, 180, or 270 degrees (measured clockwise). */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "90", 2)) + select_transform(JXFORM_ROT_90); + else if (keymatch(argv[argn], "180", 3)) + select_transform(JXFORM_ROT_180); + else if (keymatch(argv[argn], "270", 3)) + select_transform(JXFORM_ROT_270); + else + usage(); + + } else if (keymatch(arg, "scans", 1)) { + /* Set scan script. */ +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (++argn >= argc) /* advance to next argument */ + usage(); + scansarg = argv[argn]; + /* We must postpone reading the file in case -progressive appears. */ +#else + fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "transpose", 1)) { + /* Transpose (across UL-to-LR axis). */ + select_transform(JXFORM_TRANSPOSE); + + } else if (keymatch(arg, "transverse", 6)) { + /* Transverse transpose (across UR-to-LL axis). */ + select_transform(JXFORM_TRANSVERSE); + + } else if (keymatch(arg, "trim", 3)) { + /* Trim off any partial edge MCUs that the transform can't handle. */ + transformoption.trim = TRUE; + + } else { + usage(); /* bogus switch */ + } + } + + /* Post-switch-scanning cleanup */ + + if (for_real) { + +#ifdef C_PROGRESSIVE_SUPPORTED + if (simple_progressive) /* process -progressive; -scans can override */ + jpeg_simple_progression(cinfo); +#endif + +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (scansarg != NULL) /* process -scans if it was present */ + if (! read_scan_script(cinfo, scansarg)) + usage(); +#endif + } + + return argn; /* return index of next arg (file name) */ +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + struct jpeg_decompress_struct srcinfo; + struct jpeg_compress_struct dstinfo; + struct jpeg_error_mgr jsrcerr, jdsterr; +#ifdef PROGRESS_REPORT + struct cdjpeg_progress_mgr progress; +#endif + jvirt_barray_ptr *src_coef_arrays; + jvirt_barray_ptr *dst_coef_arrays; + int file_index; + /* We assume all-in-memory processing and can therefore use only a + * single file pointer for sequential input and output operation. + */ + FILE *fp; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "jpegtran"; /* in case C library doesn't provide it */ + + /* Initialize the JPEG decompression object with default error handling. */ + srcinfo.err = jpeg_std_error(&jsrcerr); + jpeg_create_decompress(&srcinfo); + /* Initialize the JPEG compression object with default error handling. */ + dstinfo.err = jpeg_std_error(&jdsterr); + jpeg_create_compress(&dstinfo); + + /* Scan command line to find file names. + * It is convenient to use just one switch-parsing routine, but the switch + * values read here are mostly ignored; we will rescan the switches after + * opening the input file. Also note that most of the switches affect the + * destination JPEG object, so we parse into that and then copy over what + * needs to affects the source too. + */ + + file_index = parse_switches(&dstinfo, argc, argv, 0, FALSE); + jsrcerr.trace_level = jdsterr.trace_level; + srcinfo.mem->max_memory_to_use = dstinfo.mem->max_memory_to_use; + +#ifdef TWO_FILE_COMMANDLINE + /* Must have either -outfile switch or explicit output file name */ + if (outfilename == NULL) { + if (file_index != argc-2) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + outfilename = argv[file_index+1]; + } else { + if (file_index != argc-1) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + } +#else + /* Unix style: expect zero or one file name */ + if (file_index < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } +#endif /* TWO_FILE_COMMANDLINE */ + + /* Open the input file. */ + if (file_index < argc) { + if ((fp = fopen(argv[file_index], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s for reading\n", progname, argv[file_index]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ + fp = read_stdin(); + } + +#ifdef PROGRESS_REPORT + start_progress_monitor((j_common_ptr) &dstinfo, &progress); +#endif + + /* Specify data source for decompression */ + jpeg_stdio_src(&srcinfo, fp); + + /* Enable saving of extra markers that we want to copy */ + jcopy_markers_setup(&srcinfo, copyoption); + + /* Read file header */ + (void) jpeg_read_header(&srcinfo, TRUE); + + /* Any space needed by a transform option must be requested before + * jpeg_read_coefficients so that memory allocation will be done right. + */ +#if TRANSFORMS_SUPPORTED + /* Fail right away if -perfect is given and transformation is not perfect. + */ + if (!jtransform_request_workspace(&srcinfo, &transformoption)) { + fprintf(stderr, "%s: transformation is not perfect\n", progname); + exit(EXIT_FAILURE); + } +#endif + + /* Read source file as DCT coefficients */ + src_coef_arrays = jpeg_read_coefficients(&srcinfo); + + /* Initialize destination compression parameters from source values */ + jpeg_copy_critical_parameters(&srcinfo, &dstinfo); + + /* Adjust destination parameters if required by transform options; + * also find out which set of coefficient arrays will hold the output. + */ +#if TRANSFORMS_SUPPORTED + dst_coef_arrays = jtransform_adjust_parameters(&srcinfo, &dstinfo, + src_coef_arrays, + &transformoption); +#else + dst_coef_arrays = src_coef_arrays; +#endif + + /* Close input file, if we opened it. + * Note: we assume that jpeg_read_coefficients consumed all input + * until JPEG_REACHED_EOI, and that jpeg_finish_decompress will + * only consume more while (! cinfo->inputctl->eoi_reached). + * We cannot call jpeg_finish_decompress here since we still need the + * virtual arrays allocated from the source object for processing. + */ + if (fp != stdin) + fclose(fp); + + /* Open the output file. */ + if (outfilename != NULL) { + if ((fp = fopen(outfilename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s for writing\n", progname, outfilename); + exit(EXIT_FAILURE); + } + } else { + /* default output file is stdout */ + fp = write_stdout(); + } + + /* Adjust default compression parameters by re-parsing the options */ + file_index = parse_switches(&dstinfo, argc, argv, 0, TRUE); + + /* Specify data destination for compression */ + jpeg_stdio_dest(&dstinfo, fp); + + /* Start compressor (note no image data is actually written here) */ + jpeg_write_coefficients(&dstinfo, dst_coef_arrays); + + /* Copy to the output file any extra markers that we want to preserve */ + jcopy_markers_execute(&srcinfo, &dstinfo, copyoption); + + /* Execute image transformation, if any */ +#if TRANSFORMS_SUPPORTED + jtransform_execute_transformation(&srcinfo, &dstinfo, + src_coef_arrays, + &transformoption); +#endif + + /* Finish compression and release memory */ + jpeg_finish_compress(&dstinfo); + jpeg_destroy_compress(&dstinfo); + (void) jpeg_finish_decompress(&srcinfo); + jpeg_destroy_decompress(&srcinfo); + + /* Close output file, if we opened it */ + if (fp != stdout) + fclose(fp); + +#ifdef PROGRESS_REPORT + end_progress_monitor((j_common_ptr) &dstinfo); +#endif + + /* All done. */ + exit(jsrcerr.num_warnings + jdsterr.num_warnings ?EXIT_WARNING:EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jquant1.c glmark2-2021.02/src/libjpeg-turbo/jquant1.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jquant1.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jquant1.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jquant1.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. - * Copyright (C) 2009, D. R. Commander - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009, 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains 1-pass color quantization (color mapping) routines. * These routines provide mapping to a fixed color map using equally spaced @@ -69,9 +71,9 @@ * table in both directions. */ -#define ODITHER_SIZE 16 /* dimension of dither matrix */ +#define ODITHER_SIZE 16 /* dimension of dither matrix */ /* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */ -#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE) /* # cells in matrix */ +#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE) /* # cells in matrix */ #define ODITHER_MASK (ODITHER_SIZE-1) /* mask for wrapping around counters */ typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE]; @@ -106,8 +108,8 @@ * Errors are accumulated into the array fserrors[], at a resolution of * 1/16th of a pixel count. The error at a given pixel is propagated * to its not-yet-processed neighbors using the standard F-S fractions, - * ... (here) 7/16 - * 3/16 5/16 1/16 + * ... (here) 7/16 + * 3/16 5/16 1/16 * We work left-to-right on even rows, right-to-left on odd rows. * * We can get away with a single array (holding one row's worth of errors) @@ -120,52 +122,49 @@ * The fserrors[] array is indexed [component#][position]. * We provide (#columns + 2) entries per component; the extra entry at each * end saves us from special-casing the first and last pixels. - * - * Note: on a wide image, we might not have enough room in a PC's near data - * segment to hold the error array; so it is allocated with alloc_large. */ #if BITS_IN_JSAMPLE == 8 -typedef INT16 FSERROR; /* 16 bits should be enough */ -typedef int LOCFSERROR; /* use 'int' for calculation temps */ +typedef INT16 FSERROR; /* 16 bits should be enough */ +typedef int LOCFSERROR; /* use 'int' for calculation temps */ #else -typedef INT32 FSERROR; /* may need more than 16 bits */ -typedef INT32 LOCFSERROR; /* be sure calculation temps are big enough */ +typedef JLONG FSERROR; /* may need more than 16 bits */ +typedef JLONG LOCFSERROR; /* be sure calculation temps are big enough */ #endif -typedef FSERROR FAR *FSERRPTR; /* pointer to error array (in FAR storage!) */ +typedef FSERROR *FSERRPTR; /* pointer to error array */ /* Private subobject */ -#define MAX_Q_COMPS 4 /* max components I can handle */ +#define MAX_Q_COMPS 4 /* max components I can handle */ typedef struct { struct jpeg_color_quantizer pub; /* public fields */ /* Initially allocated colormap is saved here */ - JSAMPARRAY sv_colormap; /* The color map as a 2-D pixel array */ - int sv_actual; /* number of entries in use */ + JSAMPARRAY sv_colormap; /* The color map as a 2-D pixel array */ + int sv_actual; /* number of entries in use */ - JSAMPARRAY colorindex; /* Precomputed mapping for speed */ + JSAMPARRAY colorindex; /* Precomputed mapping for speed */ /* colorindex[i][j] = index of color closest to pixel value j in component i, * premultiplied as described above. Since colormap indexes must fit into * JSAMPLEs, the entries of this array will too. */ - boolean is_padded; /* is the colorindex padded for odither? */ + boolean is_padded; /* is the colorindex padded for odither? */ - int Ncolors[MAX_Q_COMPS]; /* # of values alloced to each component */ + int Ncolors[MAX_Q_COMPS]; /* # of values alloced to each component */ /* Variables for ordered dithering */ - int row_index; /* cur row's vertical index in dither matrix */ + int row_index; /* cur row's vertical index in dither matrix */ ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */ /* Variables for Floyd-Steinberg dithering */ FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */ - boolean on_odd_row; /* flag to remember which row we are on */ + boolean on_odd_row; /* flag to remember which row we are on */ } my_cquantizer; -typedef my_cquantizer * my_cquantize_ptr; +typedef my_cquantizer *my_cquantize_ptr; /* @@ -204,11 +203,11 @@ iroot = 1; do { iroot++; - temp = iroot; /* set temp = iroot ** nc */ + temp = iroot; /* set temp = iroot ** nc */ for (i = 1; i < nc; i++) temp *= iroot; } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */ - iroot--; /* now iroot = floor(root) */ + iroot--; /* now iroot = floor(root) */ /* Must have at least 2 color values per component */ if (iroot < 2) @@ -232,10 +231,10 @@ j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i); /* calculate new total_colors if Ncolors[j] is incremented */ temp = total_colors / Ncolors[j]; - temp *= Ncolors[j]+1; /* done in long arith to avoid oflo */ + temp *= Ncolors[j]+1; /* done in long arith to avoid oflo */ if (temp > (long) max_colors) - break; /* won't fit, done with this pass */ - Ncolors[j]++; /* OK, apply the increment */ + break; /* won't fit, done with this pass */ + Ncolors[j]++; /* OK, apply the increment */ total_colors = (int) temp; changed = TRUE; } @@ -255,7 +254,7 @@ * (Forcing the upper and lower values to the limits ensures that * dithering can't produce a color outside the selected gamut.) */ - return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj); + return (int) (((JLONG) j * MAXJSAMPLE + maxj/2) / maxj); } @@ -265,7 +264,7 @@ /* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */ { /* Breakpoints are halfway between values returned by output_value */ - return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj)); + return (int) (((JLONG) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj)); } @@ -277,8 +276,8 @@ create_colormap (j_decompress_ptr cinfo) { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; - JSAMPARRAY colormap; /* Created colormap */ - int total_colors; /* Number of distinct output colors */ + JSAMPARRAY colormap; /* Created colormap */ + int total_colors; /* Number of distinct output colors */ int i,j,k, nci, blksize, blkdist, ptr, val; /* Select number of colors for each component */ @@ -287,8 +286,8 @@ /* Report selected color counts */ if (cinfo->out_color_components == 3) TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS, - total_colors, cquantize->Ncolors[0], - cquantize->Ncolors[1], cquantize->Ncolors[2]); + total_colors, cquantize->Ncolors[0], + cquantize->Ncolors[1], cquantize->Ncolors[2]); else TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors); @@ -313,12 +312,12 @@ val = output_value(cinfo, i, j, nci-1); /* Fill in all colormap entries that have this value of this component */ for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) { - /* fill in blksize entries beginning at ptr */ - for (k = 0; k < blksize; k++) - colormap[i][ptr+k] = (JSAMPLE) val; + /* fill in blksize entries beginning at ptr */ + for (k = 0; k < blksize; k++) + colormap[i][ptr+k] = (JSAMPLE) val; } } - blkdist = blksize; /* blksize of this color is blkdist of next */ + blkdist = blksize; /* blksize of this color is blkdist of next */ } /* Save the colormap in private storage, @@ -376,16 +375,16 @@ val = 0; k = largest_input_value(cinfo, i, 0, nci-1); for (j = 0; j <= MAXJSAMPLE; j++) { - while (j > k) /* advance val if past boundary */ - k = largest_input_value(cinfo, i, ++val, nci-1); + while (j > k) /* advance val if past boundary */ + k = largest_input_value(cinfo, i, ++val, nci-1); /* premultiply so that no multiplication needed in main processing */ indexptr[j] = (JSAMPLE) (val * blksize); } /* Pad at both ends if necessary */ if (pad) for (j = 1; j <= MAXJSAMPLE; j++) { - indexptr[-j] = indexptr[0]; - indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE]; + indexptr[-j] = indexptr[0]; + indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE]; } } } @@ -401,21 +400,21 @@ { ODITHER_MATRIX_PTR odither; int j,k; - INT32 num,den; + JLONG num,den; odither = (ODITHER_MATRIX_PTR) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(ODITHER_MATRIX)); + sizeof(ODITHER_MATRIX)); /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1). * Hence the dither value for the matrix cell with fill order f * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1). * On 16-bit-int machine, be careful to avoid overflow. */ - den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1)); + den = 2 * ODITHER_CELLS * ((JLONG) (ncolors - 1)); for (j = 0; j < ODITHER_SIZE; j++) { for (k = 0; k < ODITHER_SIZE; k++) { - num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k]))) - * MAXJSAMPLE; + num = ((JLONG) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k]))) + * MAXJSAMPLE; /* Ensure round towards zero despite C's lack of consistency * about rounding negative values in integer division... */ @@ -428,7 +427,7 @@ /* * Create the ordered-dither tables. - * Components having the same number of representative colors may + * Components having the same number of representative colors may * share a dither table. */ @@ -441,14 +440,14 @@ for (i = 0; i < cinfo->out_color_components; i++) { nci = cquantize->Ncolors[i]; /* # of distinct values for this color */ - odither = NULL; /* search for matching prior component */ + odither = NULL; /* search for matching prior component */ for (j = 0; j < i; j++) { if (nci == cquantize->Ncolors[j]) { - odither = cquantize->odither[j]; - break; + odither = cquantize->odither[j]; + break; } } - if (odither == NULL) /* need a new table? */ + if (odither == NULL) /* need a new table? */ odither = make_odither_array(cinfo, nci); cquantize->odither[i] = odither; } @@ -461,7 +460,7 @@ METHODDEF(void) color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* General case, no dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; @@ -479,7 +478,7 @@ for (col = width; col > 0; col--) { pixcode = 0; for (ci = 0; ci < nc; ci++) { - pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]); + pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]); } *ptrout++ = (JSAMPLE) pixcode; } @@ -489,7 +488,7 @@ METHODDEF(void) color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* Fast path for out_color_components==3, no dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; @@ -517,15 +516,15 @@ METHODDEF(void) quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* General case, with ordered dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; register JSAMPROW input_ptr; register JSAMPROW output_ptr; JSAMPROW colorindex_ci; - int * dither; /* points to active row of dither matrix */ - int row_index, col_index; /* current indexes into dither matrix */ + int *dither; /* points to active row of dither matrix */ + int row_index, col_index; /* current indexes into dither matrix */ int nc = cinfo->out_color_components; int ci; int row; @@ -534,8 +533,7 @@ for (row = 0; row < num_rows; row++) { /* Initialize output values to 0 so can process components separately */ - jzero_far((void FAR *) output_buf[row], - (size_t) (width * SIZEOF(JSAMPLE))); + jzero_far((void *) output_buf[row], (size_t) (width * sizeof(JSAMPLE))); row_index = cquantize->row_index; for (ci = 0; ci < nc; ci++) { input_ptr = input_buf[row] + ci; @@ -545,17 +543,17 @@ col_index = 0; for (col = width; col > 0; col--) { - /* Form pixel value + dither, range-limit to 0..MAXJSAMPLE, - * select output value, accumulate into output code for this pixel. - * Range-limiting need not be done explicitly, as we have extended - * the colorindex table to produce the right answers for out-of-range - * inputs. The maximum dither is +- MAXJSAMPLE; this sets the - * required amount of padding. - */ - *output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]]; - input_ptr += nc; - output_ptr++; - col_index = (col_index + 1) & ODITHER_MASK; + /* Form pixel value + dither, range-limit to 0..MAXJSAMPLE, + * select output value, accumulate into output code for this pixel. + * Range-limiting need not be done explicitly, as we have extended + * the colorindex table to produce the right answers for out-of-range + * inputs. The maximum dither is +- MAXJSAMPLE; this sets the + * required amount of padding. + */ + *output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]]; + input_ptr += nc; + output_ptr++; + col_index = (col_index + 1) & ODITHER_MASK; } } /* Advance row index for next row */ @@ -567,7 +565,7 @@ METHODDEF(void) quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* Fast path for out_color_components==3, with ordered dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; @@ -577,10 +575,10 @@ JSAMPROW colorindex0 = cquantize->colorindex[0]; JSAMPROW colorindex1 = cquantize->colorindex[1]; JSAMPROW colorindex2 = cquantize->colorindex[2]; - int * dither0; /* points to active row of dither matrix */ - int * dither1; - int * dither2; - int row_index, col_index; /* current indexes into dither matrix */ + int *dither0; /* points to active row of dither matrix */ + int *dither1; + int *dither2; + int row_index, col_index; /* current indexes into dither matrix */ int row; JDIMENSION col; JDIMENSION width = cinfo->output_width; @@ -596,11 +594,11 @@ for (col = width; col > 0; col--) { pixcode = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + - dither0[col_index]]); + dither0[col_index]]); pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + - dither1[col_index]]); + dither1[col_index]]); pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + - dither2[col_index]]); + dither2[col_index]]); *output_ptr++ = (JSAMPLE) pixcode; col_index = (col_index + 1) & ODITHER_MASK; } @@ -612,24 +610,24 @@ METHODDEF(void) quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY output_buf, int num_rows) /* General case, with Floyd-Steinberg dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; - register LOCFSERROR cur; /* current error or pixel value */ - LOCFSERROR belowerr; /* error for pixel below cur */ - LOCFSERROR bpreverr; /* error for below/prev col */ - LOCFSERROR bnexterr; /* error for below/next col */ + register LOCFSERROR cur; /* current error or pixel value */ + LOCFSERROR belowerr; /* error for pixel below cur */ + LOCFSERROR bpreverr; /* error for below/prev col */ + LOCFSERROR bnexterr; /* error for below/next col */ LOCFSERROR delta; - register FSERRPTR errorptr; /* => fserrors[] at column before current */ + register FSERRPTR errorptr; /* => fserrors[] at column before current */ register JSAMPROW input_ptr; register JSAMPROW output_ptr; JSAMPROW colorindex_ci; JSAMPROW colormap_ci; int pixcode; int nc = cinfo->out_color_components; - int dir; /* 1 for left-to-right, -1 for right-to-left */ - int dirnc; /* dir * nc */ + int dir; /* 1 for left-to-right, -1 for right-to-left */ + int dirnc; /* dir * nc */ int ci; int row; JDIMENSION col; @@ -639,23 +637,22 @@ for (row = 0; row < num_rows; row++) { /* Initialize output values to 0 so can process components separately */ - jzero_far((void FAR *) output_buf[row], - (size_t) (width * SIZEOF(JSAMPLE))); + jzero_far((void *) output_buf[row], (size_t) (width * sizeof(JSAMPLE))); for (ci = 0; ci < nc; ci++) { input_ptr = input_buf[row] + ci; output_ptr = output_buf[row]; if (cquantize->on_odd_row) { - /* work right to left in this row */ - input_ptr += (width-1) * nc; /* so point to rightmost pixel */ - output_ptr += width-1; - dir = -1; - dirnc = -nc; - errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */ + /* work right to left in this row */ + input_ptr += (width-1) * nc; /* so point to rightmost pixel */ + output_ptr += width-1; + dir = -1; + dirnc = -nc; + errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */ } else { - /* work left to right in this row */ - dir = 1; - dirnc = nc; - errorptr = cquantize->fserrors[ci]; /* => entry before first column */ + /* work left to right in this row */ + dir = 1; + dirnc = nc; + errorptr = cquantize->fserrors[ci]; /* => entry before first column */ } colorindex_ci = cquantize->colorindex[ci]; colormap_ci = cquantize->sv_colormap[ci]; @@ -665,47 +662,47 @@ belowerr = bpreverr = 0; for (col = width; col > 0; col--) { - /* cur holds the error propagated from the previous pixel on the - * current line. Add the error propagated from the previous line - * to form the complete error correction term for this pixel, and - * round the error term (which is expressed * 16) to an integer. - * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct - * for either sign of the error value. - * Note: errorptr points to *previous* column's array entry. - */ - cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4); - /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE. - * The maximum error is +- MAXJSAMPLE; this sets the required size - * of the range_limit array. - */ - cur += GETJSAMPLE(*input_ptr); - cur = GETJSAMPLE(range_limit[cur]); - /* Select output value, accumulate into output code for this pixel */ - pixcode = GETJSAMPLE(colorindex_ci[cur]); - *output_ptr += (JSAMPLE) pixcode; - /* Compute actual representation error at this pixel */ - /* Note: we can do this even though we don't have the final */ - /* pixel code, because the colormap is orthogonal. */ - cur -= GETJSAMPLE(colormap_ci[pixcode]); - /* Compute error fractions to be propagated to adjacent pixels. - * Add these into the running sums, and simultaneously shift the - * next-line error sums left by 1 column. - */ - bnexterr = cur; - delta = cur * 2; - cur += delta; /* form error * 3 */ - errorptr[0] = (FSERROR) (bpreverr + cur); - cur += delta; /* form error * 5 */ - bpreverr = belowerr + cur; - belowerr = bnexterr; - cur += delta; /* form error * 7 */ - /* At this point cur contains the 7/16 error value to be propagated - * to the next pixel on the current line, and all the errors for the - * next line have been shifted over. We are therefore ready to move on. - */ - input_ptr += dirnc; /* advance input ptr to next column */ - output_ptr += dir; /* advance output ptr to next column */ - errorptr += dir; /* advance errorptr to current column */ + /* cur holds the error propagated from the previous pixel on the + * current line. Add the error propagated from the previous line + * to form the complete error correction term for this pixel, and + * round the error term (which is expressed * 16) to an integer. + * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct + * for either sign of the error value. + * Note: errorptr points to *previous* column's array entry. + */ + cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4); + /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE. + * The maximum error is +- MAXJSAMPLE; this sets the required size + * of the range_limit array. + */ + cur += GETJSAMPLE(*input_ptr); + cur = GETJSAMPLE(range_limit[cur]); + /* Select output value, accumulate into output code for this pixel */ + pixcode = GETJSAMPLE(colorindex_ci[cur]); + *output_ptr += (JSAMPLE) pixcode; + /* Compute actual representation error at this pixel */ + /* Note: we can do this even though we don't have the final */ + /* pixel code, because the colormap is orthogonal. */ + cur -= GETJSAMPLE(colormap_ci[pixcode]); + /* Compute error fractions to be propagated to adjacent pixels. + * Add these into the running sums, and simultaneously shift the + * next-line error sums left by 1 column. + */ + bnexterr = cur; + delta = cur * 2; + cur += delta; /* form error * 3 */ + errorptr[0] = (FSERROR) (bpreverr + cur); + cur += delta; /* form error * 5 */ + bpreverr = belowerr + cur; + belowerr = bnexterr; + cur += delta; /* form error * 7 */ + /* At this point cur contains the 7/16 error value to be propagated + * to the next pixel on the current line, and all the errors for the + * next line have been shifted over. We are therefore ready to move on. + */ + input_ptr += dirnc; /* advance input ptr to next column */ + output_ptr += dir; /* advance output ptr to next column */ + errorptr += dir; /* advance errorptr to current column */ } /* Post-loop cleanup: we must unload the final error value into the * final fserrors[] entry. Note we need not unload belowerr because @@ -729,7 +726,7 @@ size_t arraysize; int i; - arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR)); + arraysize = (size_t) ((cinfo->output_width + 2) * sizeof(FSERROR)); for (i = 0; i < cinfo->out_color_components; i++) { cquantize->fserrors[i] = (FSERRPTR) (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize); @@ -765,7 +762,7 @@ cquantize->pub.color_quantize = quantize3_ord_dither; else cquantize->pub.color_quantize = quantize_ord_dither; - cquantize->row_index = 0; /* initialize state for ordered dither */ + cquantize->row_index = 0; /* initialize state for ordered dither */ /* If user changed to ordered dither from another mode, * we must recreate the color index table with padding. * This will cost extra space, but probably isn't very likely. @@ -783,9 +780,9 @@ if (cquantize->fserrors[0] == NULL) alloc_fs_workspace(cinfo); /* Initialize the propagated errors to zero. */ - arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR)); + arraysize = (size_t) ((cinfo->output_width + 2) * sizeof(FSERROR)); for (i = 0; i < cinfo->out_color_components; i++) - jzero_far((void FAR *) cquantize->fserrors[i], arraysize); + jzero_far((void *) cquantize->fserrors[i], arraysize); break; default: ERREXIT(cinfo, JERR_NOT_COMPILED); @@ -828,13 +825,13 @@ cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_cquantizer)); + sizeof(my_cquantizer)); cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize; cquantize->pub.start_pass = start_pass_1_quant; cquantize->pub.finish_pass = finish_pass_1_quant; cquantize->pub.new_color_map = new_color_map_1_quant; cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */ - cquantize->odither[0] = NULL; /* Also flag odither arrays not allocated */ + cquantize->odither[0] = NULL; /* Also flag odither arrays not allocated */ /* Make sure my internal arrays won't overflow */ if (cinfo->out_color_components > MAX_Q_COMPS) @@ -848,10 +845,10 @@ create_colorindex(cinfo); /* Allocate Floyd-Steinberg workspace now if requested. - * We do this now since it is FAR storage and may affect the memory - * manager's space calculations. If the user changes to FS dither - * mode in a later pass, we will allocate the space then, and will - * possibly overrun the max_memory_to_use setting. + * We do this now since it may affect the memory manager's space + * calculations. If the user changes to FS dither mode in a later pass, we + * will allocate the space then, and will possibly overrun the + * max_memory_to_use setting. */ if (cinfo->dither_mode == JDITHER_FS) alloc_fs_workspace(cinfo); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jquant2.c glmark2-2021.02/src/libjpeg-turbo/jquant2.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jquant2.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jquant2.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jquant2.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. - * Copyright (C) 2009, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * libjpeg-turbo Modifications: + * Copyright (C) 2009, 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains 2-pass color quantization (color mapping) routines. * These routines provide selection of a custom color map for an image, @@ -42,7 +44,7 @@ * color space, and repeatedly splits the "largest" remaining box until we * have as many boxes as desired colors. Then the mean color in each * remaining box becomes one of the possible output colors. - * + * * The second pass over the image maps each input pixel to the closest output * color (optionally after applying a Floyd-Steinberg dithering correction). * This mapping is logically trivial, but making it go fast enough requires @@ -71,9 +73,9 @@ * probably need to change these scale factors. */ -#define R_SCALE 2 /* scale R distances by this much */ -#define G_SCALE 3 /* scale G distances by this much */ -#define B_SCALE 1 /* and B by this much */ +#define R_SCALE 2 /* scale R distances by this much */ +#define G_SCALE 3 /* scale G distances by this much */ +#define B_SCALE 1 /* and B by this much */ static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE}; #define C0_SCALE c_scales[rgb_red[cinfo->out_color_space]] @@ -101,9 +103,7 @@ * machines, we can't just allocate the histogram in one chunk. Instead * of a true 3-D array, we use a row of pointers to 2-D arrays. Each * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and - * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries. Note that - * on 80x86 machines, the pointer row is in near memory but the actual - * arrays are in far memory (same arrangement as we use for image arrays). + * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries. */ #define MAXNUMCOLORS (MAXJSAMPLE+1) /* maximum size of colormap */ @@ -111,9 +111,9 @@ /* These will do the right thing for either R,G,B or B,G,R color order, * but you may not like the results for other color orders. */ -#define HIST_C0_BITS 5 /* bits of precision in R/B histogram */ -#define HIST_C1_BITS 6 /* bits of precision in G histogram */ -#define HIST_C2_BITS 5 /* bits of precision in B/R histogram */ +#define HIST_C0_BITS 5 /* bits of precision in R/B histogram */ +#define HIST_C1_BITS 6 /* bits of precision in G histogram */ +#define HIST_C2_BITS 5 /* bits of precision in B/R histogram */ /* Number of elements along histogram axes. */ #define HIST_C0_ELEMS (1<cquantize; register JSAMPROW ptr; @@ -219,11 +216,11 @@ for (col = width; col > 0; col--) { /* get pixel value and index into the histogram */ histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT] - [GETJSAMPLE(ptr[1]) >> C1_SHIFT] - [GETJSAMPLE(ptr[2]) >> C2_SHIFT]; + [GETJSAMPLE(ptr[1]) >> C1_SHIFT] + [GETJSAMPLE(ptr[2]) >> C2_SHIFT]; /* increment, check for overflow and undo increment if so. */ if (++(*histp) <= 0) - (*histp)--; + (*histp)--; ptr += 3; } } @@ -243,12 +240,12 @@ int c1min, c1max; int c2min, c2max; /* The volume (actually 2-norm) of the box */ - INT32 volume; + JLONG volume; /* The number of nonzero histogram cells within this box */ long colorcount; } box; -typedef box * boxptr; +typedef box *boxptr; LOCAL(boxptr) @@ -260,7 +257,7 @@ register int i; register long maxc = 0; boxptr which = NULL; - + for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) { if (boxp->colorcount > maxc && boxp->volume > 0) { which = boxp; @@ -278,9 +275,9 @@ { register boxptr boxp; register int i; - register INT32 maxv = 0; + register JLONG maxv = 0; boxptr which = NULL; - + for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) { if (boxp->volume > maxv) { which = boxp; @@ -301,77 +298,77 @@ histptr histp; int c0,c1,c2; int c0min,c0max,c1min,c1max,c2min,c2max; - INT32 dist0,dist1,dist2; + JLONG dist0,dist1,dist2; long ccount; - + c0min = boxp->c0min; c0max = boxp->c0max; c1min = boxp->c1min; c1max = boxp->c1max; c2min = boxp->c2min; c2max = boxp->c2max; - + if (c0max > c0min) for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { - histp = & histogram[c0][c1][c2min]; - for (c2 = c2min; c2 <= c2max; c2++) - if (*histp++ != 0) { - boxp->c0min = c0min = c0; - goto have_c0min; - } + histp = & histogram[c0][c1][c2min]; + for (c2 = c2min; c2 <= c2max; c2++) + if (*histp++ != 0) { + boxp->c0min = c0min = c0; + goto have_c0min; + } } have_c0min: if (c0max > c0min) for (c0 = c0max; c0 >= c0min; c0--) for (c1 = c1min; c1 <= c1max; c1++) { - histp = & histogram[c0][c1][c2min]; - for (c2 = c2min; c2 <= c2max; c2++) - if (*histp++ != 0) { - boxp->c0max = c0max = c0; - goto have_c0max; - } + histp = & histogram[c0][c1][c2min]; + for (c2 = c2min; c2 <= c2max; c2++) + if (*histp++ != 0) { + boxp->c0max = c0max = c0; + goto have_c0max; + } } have_c0max: if (c1max > c1min) for (c1 = c1min; c1 <= c1max; c1++) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1][c2min]; - for (c2 = c2min; c2 <= c2max; c2++) - if (*histp++ != 0) { - boxp->c1min = c1min = c1; - goto have_c1min; - } + histp = & histogram[c0][c1][c2min]; + for (c2 = c2min; c2 <= c2max; c2++) + if (*histp++ != 0) { + boxp->c1min = c1min = c1; + goto have_c1min; + } } have_c1min: if (c1max > c1min) for (c1 = c1max; c1 >= c1min; c1--) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1][c2min]; - for (c2 = c2min; c2 <= c2max; c2++) - if (*histp++ != 0) { - boxp->c1max = c1max = c1; - goto have_c1max; - } + histp = & histogram[c0][c1][c2min]; + for (c2 = c2min; c2 <= c2max; c2++) + if (*histp++ != 0) { + boxp->c1max = c1max = c1; + goto have_c1max; + } } have_c1max: if (c2max > c2min) for (c2 = c2min; c2 <= c2max; c2++) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1min][c2]; - for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) - if (*histp != 0) { - boxp->c2min = c2min = c2; - goto have_c2min; - } + histp = & histogram[c0][c1min][c2]; + for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) + if (*histp != 0) { + boxp->c2min = c2min = c2; + goto have_c2min; + } } have_c2min: if (c2max > c2min) for (c2 = c2max; c2 >= c2min; c2--) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1min][c2]; - for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) - if (*histp != 0) { - boxp->c2max = c2max = c2; - goto have_c2max; - } + histp = & histogram[c0][c1min][c2]; + for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) + if (*histp != 0) { + boxp->c2max = c2max = c2; + goto have_c2max; + } } have_c2max: @@ -387,16 +384,16 @@ dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE; dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE; boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2; - + /* Now scan remaining volume of box and compute population */ ccount = 0; for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { histp = & histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++, histp++) - if (*histp != 0) { - ccount++; - } + if (*histp != 0) { + ccount++; + } } boxp->colorcount = ccount; } @@ -404,7 +401,7 @@ LOCAL(int) median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes, - int desired_colors) + int desired_colors) /* Repeatedly select and split the largest box until we have enough boxes */ { int n,lb; @@ -420,9 +417,9 @@ } else { b1 = find_biggest_volume(boxlist, numboxes); } - if (b1 == NULL) /* no splittable boxes left! */ + if (b1 == NULL) /* no splittable boxes left! */ break; - b2 = &boxlist[numboxes]; /* where new box will go */ + b2 = &boxlist[numboxes]; /* where new box will go */ /* Copy the color bounds to the new box. */ b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max; b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min; @@ -494,24 +491,24 @@ long c0total = 0; long c1total = 0; long c2total = 0; - + c0min = boxp->c0min; c0max = boxp->c0max; c1min = boxp->c1min; c1max = boxp->c1max; c2min = boxp->c2min; c2max = boxp->c2max; - + for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { histp = & histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++) { - if ((count = *histp++) != 0) { - total += count; - c0total += ((c0 << C0_SHIFT) + ((1<>1)) * count; - c1total += ((c1 << C1_SHIFT) + ((1<>1)) * count; - c2total += ((c2 << C2_SHIFT) + ((1<>1)) * count; - } + if ((count = *histp++) != 0) { + total += count; + c0total += ((c0 << C0_SHIFT) + ((1<>1)) * count; + c1total += ((c1 << C1_SHIFT) + ((1<>1)) * count; + c2total += ((c2 << C2_SHIFT) + ((1<>1)) * count; + } } } - + cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total); cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total); cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total); @@ -528,7 +525,7 @@ /* Allocate workspace for box list */ boxlist = (boxptr) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box)); + ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * sizeof(box)); /* Initialize one box containing whole space */ numboxes = 1; boxlist[0].c0min = 0; @@ -575,7 +572,7 @@ * distance from every colormap entry to every histogram cell. Unfortunately, * it needs a work array to hold the best-distance-so-far for each histogram * cell (because the inner loop has to be over cells, not colormap entries). - * The work array elements have to be INT32s, so the work array would need + * The work array elements have to be JLONGs, so the work array would need * 256Kb at our recommended precision. This is not feasible in DOS machines. * * To get around these problems, we apply Thomas' method to compute the @@ -627,7 +624,7 @@ LOCAL(int) find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, - JSAMPLE colorlist[]) + JSAMPLE colorlist[]) /* Locate the colormap entries close enough to an update box to be candidates * for the nearest entry to some cell(s) in the update box. The update box * is specified by the center coordinates of its first cell. The number of @@ -641,8 +638,8 @@ int maxc0, maxc1, maxc2; int centerc0, centerc1, centerc2; int i, x, ncolors; - INT32 minmaxdist, min_dist, max_dist, tdist; - INT32 mindist[MAXNUMCOLORS]; /* min distance to colormap entry i */ + JLONG minmaxdist, min_dist, max_dist, tdist; + JLONG mindist[MAXNUMCOLORS]; /* min distance to colormap entry i */ /* Compute true coordinates of update box's upper corner and center. * Actually we compute the coordinates of the center of the upper-corner @@ -684,11 +681,11 @@ /* within cell range so no contribution to min_dist */ min_dist = 0; if (x <= centerc0) { - tdist = (x - maxc0) * C0_SCALE; - max_dist = tdist*tdist; + tdist = (x - maxc0) * C0_SCALE; + max_dist = tdist*tdist; } else { - tdist = (x - minc0) * C0_SCALE; - max_dist = tdist*tdist; + tdist = (x - minc0) * C0_SCALE; + max_dist = tdist*tdist; } } @@ -706,11 +703,11 @@ } else { /* within cell range so no contribution to min_dist */ if (x <= centerc1) { - tdist = (x - maxc1) * C1_SCALE; - max_dist += tdist*tdist; + tdist = (x - maxc1) * C1_SCALE; + max_dist += tdist*tdist; } else { - tdist = (x - minc1) * C1_SCALE; - max_dist += tdist*tdist; + tdist = (x - minc1) * C1_SCALE; + max_dist += tdist*tdist; } } @@ -728,15 +725,15 @@ } else { /* within cell range so no contribution to min_dist */ if (x <= centerc2) { - tdist = (x - maxc2) * C2_SCALE; - max_dist += tdist*tdist; + tdist = (x - maxc2) * C2_SCALE; + max_dist += tdist*tdist; } else { - tdist = (x - minc2) * C2_SCALE; - max_dist += tdist*tdist; + tdist = (x - minc2) * C2_SCALE; + max_dist += tdist*tdist; } } - mindist[i] = min_dist; /* save away the results */ + mindist[i] = min_dist; /* save away the results */ if (max_dist < minmaxdist) minmaxdist = max_dist; } @@ -756,7 +753,7 @@ LOCAL(void) find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, - int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[]) + int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[]) /* Find the closest colormap entry for each cell in the update box, * given the list of candidate colors prepared by find_nearby_colors. * Return the indexes of the closest entries in the bestcolor[] array. @@ -766,31 +763,31 @@ { int ic0, ic1, ic2; int i, icolor; - register INT32 * bptr; /* pointer into bestdist[] array */ - JSAMPLE * cptr; /* pointer into bestcolor[] array */ - INT32 dist0, dist1; /* initial distance values */ - register INT32 dist2; /* current distance in inner loop */ - INT32 xx0, xx1; /* distance increments */ - register INT32 xx2; - INT32 inc0, inc1, inc2; /* initial values for increments */ + register JLONG *bptr; /* pointer into bestdist[] array */ + JSAMPLE *cptr; /* pointer into bestcolor[] array */ + JLONG dist0, dist1; /* initial distance values */ + register JLONG dist2; /* current distance in inner loop */ + JLONG xx0, xx1; /* distance increments */ + register JLONG xx2; + JLONG inc0, inc1, inc2; /* initial values for increments */ /* This array holds the distance to the nearest-so-far color for each cell */ - INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS]; + JLONG bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS]; /* Initialize best-distance for each cell of the update box */ bptr = bestdist; for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--) *bptr++ = 0x7FFFFFFFL; - + /* For each color selected by find_nearby_colors, * compute its distance to the center of each cell in the box. * If that's less than best-so-far, update best distance and color number. */ - + /* Nominal steps between cell centers ("x" in Thomas article) */ #define STEP_C0 ((1 << C0_SHIFT) * C0_SCALE) #define STEP_C1 ((1 << C1_SHIFT) * C1_SCALE) #define STEP_C2 ((1 << C2_SHIFT) * C2_SCALE) - + for (i = 0; i < numcolors; i++) { icolor = GETJSAMPLE(colorlist[i]); /* Compute (square of) distance from minc0/c1/c2 to this color */ @@ -812,20 +809,20 @@ dist1 = dist0; xx1 = inc1; for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) { - dist2 = dist1; - xx2 = inc2; - for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) { - if (dist2 < *bptr) { - *bptr = dist2; - *cptr = (JSAMPLE) icolor; - } - dist2 += xx2; - xx2 += 2 * STEP_C2 * STEP_C2; - bptr++; - cptr++; - } - dist1 += xx1; - xx1 += 2 * STEP_C1 * STEP_C1; + dist2 = dist1; + xx2 = inc2; + for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) { + if (dist2 < *bptr) { + *bptr = dist2; + *cptr = (JSAMPLE) icolor; + } + dist2 += xx2; + xx2 += 2 * STEP_C2 * STEP_C2; + bptr++; + cptr++; + } + dist1 += xx1; + xx1 += 2 * STEP_C1 * STEP_C1; } dist0 += xx0; xx0 += 2 * STEP_C0 * STEP_C0; @@ -842,13 +839,13 @@ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; hist3d histogram = cquantize->histogram; - int minc0, minc1, minc2; /* lower left corner of update box */ + int minc0, minc1, minc2; /* lower left corner of update box */ int ic0, ic1, ic2; - register JSAMPLE * cptr; /* pointer into bestcolor[] array */ - register histptr cachep; /* pointer into main cache array */ + register JSAMPLE *cptr; /* pointer into bestcolor[] array */ + register histptr cachep; /* pointer into main cache array */ /* This array lists the candidate colormap indexes. */ JSAMPLE colorlist[MAXNUMCOLORS]; - int numcolors; /* number of candidate colors */ + int numcolors; /* number of candidate colors */ /* This array holds the actually closest colormap index for each cell. */ JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS]; @@ -864,7 +861,7 @@ minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1); minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1); minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1); - + /* Determine which colormap entries are close enough to be candidates * for the nearest entry to some cell in the update box. */ @@ -872,10 +869,10 @@ /* Determine the actually nearest colors. */ find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist, - bestcolor); + bestcolor); /* Save the best color numbers (plus 1) in the main cache array */ - c0 <<= BOX_C0_LOG; /* convert ID back to base cell indexes */ + c0 <<= BOX_C0_LOG; /* convert ID back to base cell indexes */ c1 <<= BOX_C1_LOG; c2 <<= BOX_C2_LOG; cptr = bestcolor; @@ -883,7 +880,7 @@ for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) { cachep = & histogram[c0+ic0][c1+ic1][c2]; for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) { - *cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1); + *cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1); } } } @@ -896,7 +893,7 @@ METHODDEF(void) pass2_no_dither (j_decompress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) /* This version performs no dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; @@ -920,7 +917,7 @@ /* If we have not seen this color before, find nearest colormap entry */ /* and update the cache */ if (*cachep == 0) - fill_inverse_cmap(cinfo, c0,c1,c2); + fill_inverse_cmap(cinfo, c0,c1,c2); /* Now emit the colormap index for this cell */ *outptr++ = (JSAMPLE) (*cachep - 1); } @@ -930,20 +927,20 @@ METHODDEF(void) pass2_fs_dither (j_decompress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) + JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) /* This version performs Floyd-Steinberg dithering */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; hist3d histogram = cquantize->histogram; - register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */ + register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */ LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */ LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */ - register FSERRPTR errorptr; /* => fserrors[] at column before current */ - JSAMPROW inptr; /* => current input pixel */ - JSAMPROW outptr; /* => current output pixel */ + register FSERRPTR errorptr; /* => fserrors[] at column before current */ + JSAMPROW inptr; /* => current input pixel */ + JSAMPROW outptr; /* => current output pixel */ histptr cachep; - int dir; /* +1 or -1 depending on direction */ - int dir3; /* 3*dir, for advancing inptr & errorptr */ + int dir; /* +1 or -1 depending on direction */ + int dir3; /* 3*dir, for advancing inptr & errorptr */ int row; JDIMENSION col; JDIMENSION width = cinfo->output_width; @@ -959,7 +956,7 @@ outptr = output_buf[row]; if (cquantize->on_odd_row) { /* work right to left in this row */ - inptr += (width-1) * 3; /* so point to rightmost pixel */ + inptr += (width-1) * 3; /* so point to rightmost pixel */ outptr += width-1; dir = -1; dir3 = -3; @@ -1011,53 +1008,44 @@ /* If we have not seen this color before, find nearest colormap */ /* entry and update the cache */ if (*cachep == 0) - fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT); + fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT); /* Now emit the colormap index for this cell */ { register int pixcode = *cachep - 1; - *outptr = (JSAMPLE) pixcode; - /* Compute representation error for this pixel */ - cur0 -= GETJSAMPLE(colormap0[pixcode]); - cur1 -= GETJSAMPLE(colormap1[pixcode]); - cur2 -= GETJSAMPLE(colormap2[pixcode]); + *outptr = (JSAMPLE) pixcode; + /* Compute representation error for this pixel */ + cur0 -= GETJSAMPLE(colormap0[pixcode]); + cur1 -= GETJSAMPLE(colormap1[pixcode]); + cur2 -= GETJSAMPLE(colormap2[pixcode]); } /* Compute error fractions to be propagated to adjacent pixels. * Add these into the running sums, and simultaneously shift the * next-line error sums left by 1 column. */ - { register LOCFSERROR bnexterr, delta; + { register LOCFSERROR bnexterr; - bnexterr = cur0; /* Process component 0 */ - delta = cur0 * 2; - cur0 += delta; /* form error * 3 */ - errorptr[0] = (FSERROR) (bpreverr0 + cur0); - cur0 += delta; /* form error * 5 */ - bpreverr0 = belowerr0 + cur0; - belowerr0 = bnexterr; - cur0 += delta; /* form error * 7 */ - bnexterr = cur1; /* Process component 1 */ - delta = cur1 * 2; - cur1 += delta; /* form error * 3 */ - errorptr[1] = (FSERROR) (bpreverr1 + cur1); - cur1 += delta; /* form error * 5 */ - bpreverr1 = belowerr1 + cur1; - belowerr1 = bnexterr; - cur1 += delta; /* form error * 7 */ - bnexterr = cur2; /* Process component 2 */ - delta = cur2 * 2; - cur2 += delta; /* form error * 3 */ - errorptr[2] = (FSERROR) (bpreverr2 + cur2); - cur2 += delta; /* form error * 5 */ - bpreverr2 = belowerr2 + cur2; - belowerr2 = bnexterr; - cur2 += delta; /* form error * 7 */ + bnexterr = cur0; /* Process component 0 */ + errorptr[0] = (FSERROR) (bpreverr0 + cur0 * 3); + bpreverr0 = belowerr0 + cur0 * 5; + belowerr0 = bnexterr; + cur0 *= 7; + bnexterr = cur1; /* Process component 1 */ + errorptr[1] = (FSERROR) (bpreverr1 + cur1 * 3); + bpreverr1 = belowerr1 + cur1 * 5; + belowerr1 = bnexterr; + cur1 *= 7; + bnexterr = cur2; /* Process component 2 */ + errorptr[2] = (FSERROR) (bpreverr2 + cur2 * 3); + bpreverr2 = belowerr2 + cur2 * 5; + belowerr2 = bnexterr; + cur2 *= 7; } /* At this point curN contains the 7/16 error value to be propagated * to the next pixel on the current line, and all the errors for the * next line have been shifted over. We are therefore ready to move on. */ - inptr += dir3; /* Advance pixel pointers to next column */ + inptr += dir3; /* Advance pixel pointers to next column */ outptr += dir; - errorptr += dir3; /* advance errorptr to current column */ + errorptr += dir3; /* advance errorptr to current column */ } /* Post-loop cleanup: we must unload the final error values into the * final fserrors[] entry. Note we need not unload belowerrN because @@ -1092,12 +1080,12 @@ /* Allocate and fill in the error_limiter table */ { my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; - int * table; + int *table; int in, out; table = (int *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int)); - table += MAXJSAMPLE; /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */ + ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * sizeof(int)); + table += MAXJSAMPLE; /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */ cquantize->error_limiter = table; #define STEPSIZE ((MAXJSAMPLE+1)/16) @@ -1180,16 +1168,16 @@ if (cinfo->dither_mode == JDITHER_FS) { size_t arraysize = (size_t) ((cinfo->output_width + 2) * - (3 * SIZEOF(FSERROR))); + (3 * sizeof(FSERROR))); /* Allocate Floyd-Steinberg workspace if we didn't already. */ if (cquantize->fserrors == NULL) - cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large) - ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize); + cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large) + ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize); /* Initialize the propagated errors to zero. */ - jzero_far((void FAR *) cquantize->fserrors, arraysize); + jzero_far((void *) cquantize->fserrors, arraysize); /* Make the error-limit table if we didn't already. */ if (cquantize->error_limiter == NULL) - init_error_limit(cinfo); + init_error_limit(cinfo); cquantize->on_odd_row = FALSE; } @@ -1197,8 +1185,8 @@ /* Zero the histogram or inverse color map, if necessary */ if (cquantize->needs_zeroed) { for (i = 0; i < HIST_C0_ELEMS; i++) { - jzero_far((void FAR *) histogram[i], - HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell)); + jzero_far((void *) histogram[i], + HIST_C1_ELEMS*HIST_C2_ELEMS * sizeof(histcell)); } cquantize->needs_zeroed = FALSE; } @@ -1231,11 +1219,11 @@ cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_cquantizer)); + sizeof(my_cquantizer)); cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize; cquantize->pub.start_pass = start_pass_2_quant; cquantize->pub.new_color_map = new_color_map_2_quant; - cquantize->fserrors = NULL; /* flag optional arrays not allocated */ + cquantize->fserrors = NULL; /* flag optional arrays not allocated */ cquantize->error_limiter = NULL; /* Make sure jdmaster didn't give me a case I can't handle */ @@ -1244,17 +1232,17 @@ /* Allocate the histogram/inverse colormap storage */ cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d)); + ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * sizeof(hist2d)); for (i = 0; i < HIST_C0_ELEMS; i++) { cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell)); + HIST_C1_ELEMS*HIST_C2_ELEMS * sizeof(histcell)); } cquantize->needs_zeroed = TRUE; /* histogram is garbage now */ /* Allocate storage for the completed colormap, if required. - * We do this now since it is FAR storage and may affect - * the memory manager's space calculations. + * We do this now since it may affect the memory manager's space + * calculations. */ if (cinfo->enable_2pass_quant) { /* Make sure color count is acceptable */ @@ -1277,14 +1265,15 @@ cinfo->dither_mode = JDITHER_FS; /* Allocate Floyd-Steinberg workspace if necessary. - * This isn't really needed until pass 2, but again it is FAR storage. - * Although we will cope with a later change in dither_mode, - * we do not promise to honor max_memory_to_use if dither_mode changes. + * This isn't really needed until pass 2, but again it may affect the memory + * manager's space calculations. Although we will cope with a later change + * in dither_mode, we do not promise to honor max_memory_to_use if + * dither_mode changes. */ if (cinfo->dither_mode == JDITHER_FS) { cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR)))); + (size_t) ((cinfo->output_width + 2) * (3 * sizeof(FSERROR)))); /* Might as well create the error-limiting table too. */ init_error_limit(cinfo); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jsimddct.h glmark2-2021.02/src/libjpeg-turbo/jsimddct.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jsimddct.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jsimddct.h 2021-04-25 01:47:22.000000000 +0800 @@ -2,101 +2,73 @@ * jsimddct.h * * Copyright 2009 Pierre Ossman for Cendio AB - * + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc * */ -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jsimd_can_convsamp jSCanConv -#define jsimd_can_convsamp_float jSCanConvF -#define jsimd_convsamp jSConv -#define jsimd_convsamp_float jSConvF -#define jsimd_can_fdct_islow jSCanFDCTIS -#define jsimd_can_fdct_ifast jSCanFDCTIF -#define jsimd_can_fdct_float jSCanFDCTFl -#define jsimd_fdct_islow jSFDCTIS -#define jsimd_fdct_ifast jSFDCTIF -#define jsimd_fdct_float jSFDCTFl -#define jsimd_can_quantize jSCanQuant -#define jsimd_can_quantize_float jSCanQuantF -#define jsimd_quantize jSQuant -#define jsimd_quantize_float jSQuantF -#define jsimd_can_idct_2x2 jSCanIDCT22 -#define jsimd_can_idct_4x4 jSCanIDCT44 -#define jsimd_idct_2x2 jSIDCT22 -#define jsimd_idct_4x4 jSIDCT44 -#define jsimd_can_idct_islow jSCanIDCTIS -#define jsimd_can_idct_ifast jSCanIDCTIF -#define jsimd_can_idct_float jSCanIDCTFl -#define jsimd_idct_islow jSIDCTIS -#define jsimd_idct_ifast jSIDCTIF -#define jsimd_idct_float jSIDCTFl -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - -EXTERN(int) jsimd_can_convsamp JPP((void)); -EXTERN(int) jsimd_can_convsamp_float JPP((void)); - -EXTERN(void) jsimd_convsamp JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - DCTELEM * workspace)); -EXTERN(void) jsimd_convsamp_float JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT * workspace)); - -EXTERN(int) jsimd_can_fdct_islow JPP((void)); -EXTERN(int) jsimd_can_fdct_ifast JPP((void)); -EXTERN(int) jsimd_can_fdct_float JPP((void)); - -EXTERN(void) jsimd_fdct_islow JPP((DCTELEM * data)); -EXTERN(void) jsimd_fdct_ifast JPP((DCTELEM * data)); -EXTERN(void) jsimd_fdct_float JPP((FAST_FLOAT * data)); - -EXTERN(int) jsimd_can_quantize JPP((void)); -EXTERN(int) jsimd_can_quantize_float JPP((void)); - -EXTERN(void) jsimd_quantize JPP((JCOEFPTR coef_block, - DCTELEM * divisors, - DCTELEM * workspace)); -EXTERN(void) jsimd_quantize_float JPP((JCOEFPTR coef_block, - FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); - -EXTERN(int) jsimd_can_idct_2x2 JPP((void)); -EXTERN(int) jsimd_can_idct_4x4 JPP((void)); - -EXTERN(void) jsimd_idct_2x2 JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_4x4 JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -EXTERN(int) jsimd_can_idct_islow JPP((void)); -EXTERN(int) jsimd_can_idct_ifast JPP((void)); -EXTERN(int) jsimd_can_idct_float JPP((void)); - -EXTERN(void) jsimd_idct_islow JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_ifast JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_float JPP((j_decompress_ptr cinfo, - jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(int) jsimd_can_convsamp (void); +EXTERN(int) jsimd_can_convsamp_float (void); +EXTERN(void) jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace); +EXTERN(void) jsimd_convsamp_float (JSAMPARRAY sample_data, + JDIMENSION start_col, + FAST_FLOAT *workspace); + +EXTERN(int) jsimd_can_fdct_islow (void); +EXTERN(int) jsimd_can_fdct_ifast (void); +EXTERN(int) jsimd_can_fdct_float (void); + +EXTERN(void) jsimd_fdct_islow (DCTELEM *data); +EXTERN(void) jsimd_fdct_ifast (DCTELEM *data); +EXTERN(void) jsimd_fdct_float (FAST_FLOAT *data); + +EXTERN(int) jsimd_can_quantize (void); +EXTERN(int) jsimd_can_quantize_float (void); + +EXTERN(void) jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace); +EXTERN(void) jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace); + +EXTERN(int) jsimd_can_idct_2x2 (void); +EXTERN(int) jsimd_can_idct_4x4 (void); +EXTERN(int) jsimd_can_idct_6x6 (void); +EXTERN(int) jsimd_can_idct_12x12 (void); + +EXTERN(void) jsimd_idct_2x2 (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4 (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_6x6 (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_12x12 (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(int) jsimd_can_idct_islow (void); +EXTERN(int) jsimd_can_idct_ifast (void); +EXTERN(int) jsimd_can_idct_float (void); + +EXTERN(void) jsimd_idct_islow (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_ifast (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_float (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jsimd.h glmark2-2021.02/src/libjpeg-turbo/jsimd.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jsimd.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jsimd.h 2021-04-25 01:47:22.000000000 +0800 @@ -2,97 +2,92 @@ * jsimd.h * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright 2011 D. R. Commander - * + * Copyright 2011, 2014 D. R. Commander + * Copyright 2015 Matthieu Darbois + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc * */ -/* Short forms of external names for systems with brain-damaged linkers. */ +#include "jchuff.h" /* Declarations shared with jcphuff.c */ -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jsimd_can_rgb_ycc jSCanRgbYcc -#define jsimd_can_rgb_gray jSCanRgbGry -#define jsimd_can_ycc_rgb jSCanYccRgb -#define jsimd_rgb_ycc_convert jSRgbYccConv -#define jsimd_rgb_gray_convert jSRgbGryConv -#define jsimd_ycc_rgb_convert jSYccRgbConv -#define jsimd_can_h2v2_downsample jSCanH2V2Down -#define jsimd_can_h2v1_downsample jSCanH2V1Down -#define jsimd_h2v2_downsample jSH2V2Down -#define jsimd_h2v1_downsample jSH2V1Down -#define jsimd_can_h2v2_upsample jSCanH2V2Up -#define jsimd_can_h2v1_upsample jSCanH2V1Up -#define jsimd_h2v2_upsample jSH2V2Up -#define jsimd_h2v1_upsample jSH2V1Up -#define jsimd_can_h2v2_fancy_upsample jSCanH2V2FUp -#define jsimd_can_h2v1_fancy_upsample jSCanH2V1FUp -#define jsimd_h2v2_fancy_upsample jSH2V2FUp -#define jsimd_h2v1_fancy_upsample jSH2V1FUp -#define jsimd_can_h2v2_merged_upsample jSCanH2V2MUp -#define jsimd_can_h2v1_merged_upsample jSCanH2V1MUp -#define jsimd_h2v2_merged_upsample jSH2V2MUp -#define jsimd_h2v1_merged_upsample jSH2V1MUp -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - -EXTERN(int) jsimd_can_rgb_ycc JPP((void)); -EXTERN(int) jsimd_can_rgb_gray JPP((void)); -EXTERN(int) jsimd_can_ycc_rgb JPP((void)); +EXTERN(int) jsimd_can_rgb_ycc (void); +EXTERN(int) jsimd_can_rgb_gray (void); +EXTERN(int) jsimd_can_ycc_rgb (void); +EXTERN(int) jsimd_can_ycc_rgb565 (void); +EXTERN(int) jsimd_c_can_null_convert (void); EXTERN(void) jsimd_rgb_ycc_convert - JPP((j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_rgb_gray_convert - JPP((j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_ycc_rgb_convert - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_rgb565_convert + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_c_null_convert + (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); -EXTERN(int) jsimd_can_h2v2_downsample JPP((void)); -EXTERN(int) jsimd_can_h2v1_downsample JPP((void)); +EXTERN(int) jsimd_can_h2v2_downsample (void); +EXTERN(int) jsimd_can_h2v1_downsample (void); EXTERN(void) jsimd_h2v2_downsample - JPP((j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data)); + (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(int) jsimd_can_h2v2_smooth_downsample (void); + +EXTERN(void) jsimd_h2v2_smooth_downsample + (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data); + EXTERN(void) jsimd_h2v1_downsample - JPP((j_compress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data)); + (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data); -EXTERN(int) jsimd_can_h2v2_upsample JPP((void)); -EXTERN(int) jsimd_can_h2v1_upsample JPP((void)); +EXTERN(int) jsimd_can_h2v2_upsample (void); +EXTERN(int) jsimd_can_h2v1_upsample (void); +EXTERN(int) jsimd_can_int_upsample (void); EXTERN(void) jsimd_h2v2_upsample - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v1_upsample - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_int_upsample + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(int) jsimd_can_h2v2_fancy_upsample JPP((void)); -EXTERN(int) jsimd_can_h2v1_fancy_upsample JPP((void)); +EXTERN(int) jsimd_can_h2v2_fancy_upsample (void); +EXTERN(int) jsimd_can_h2v1_fancy_upsample (void); EXTERN(void) jsimd_h2v2_fancy_upsample - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v1_fancy_upsample - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(int) jsimd_can_h2v2_merged_upsample JPP((void)); -EXTERN(int) jsimd_can_h2v1_merged_upsample JPP((void)); +EXTERN(int) jsimd_can_h2v2_merged_upsample (void); +EXTERN(int) jsimd_can_h2v1_merged_upsample (void); EXTERN(void) jsimd_h2v2_merged_upsample - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_merged_upsample - JPP((j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf)); + (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + +EXTERN(int) jsimd_can_huff_encode_one_block (void); +EXTERN(JOCTET*) jsimd_huff_encode_one_block + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jsimd_none.c glmark2-2021.02/src/libjpeg-turbo/jsimd_none.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jsimd_none.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jsimd_none.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,404 @@ +/* + * jsimd_none.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014 D. R. Commander + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains stubs for when there is no SIMD support available. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" +#include "jsimd.h" +#include "jdct.h" +#include "jsimddct.h" + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_c_can_null_convert (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(void) +jsimd_c_null_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v2_smooth_downsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(void) +jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_int_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_6x6 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_12x12 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return NULL; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jstdhuff.c glmark2-2021.02/src/libjpeg-turbo/jstdhuff.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jstdhuff.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jstdhuff.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,135 @@ +/* + * jstdhuff.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1998, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to set the default Huffman tables, if they are + * not already set. + */ + +/* + * Huffman table setup routines + */ + +LOCAL(void) +add_huff_table (j_common_ptr cinfo, + JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val) +/* Define a Huffman table */ +{ + int nsymbols, len; + + if (*htblptr == NULL) + *htblptr = jpeg_alloc_huff_table(cinfo); + else + return; + + /* Copy the number-of-symbols-of-each-code-length counts */ + MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits)); + + /* Validate the counts. We do this here mainly so we can copy the right + * number of symbols from the val[] array, without risking marching off + * the end of memory. jchuff.c will do a more thorough test later. + */ + nsymbols = 0; + for (len = 1; len <= 16; len++) + nsymbols += bits[len]; + if (nsymbols < 1 || nsymbols > 256) + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + + MEMCOPY((*htblptr)->huffval, val, nsymbols * sizeof(UINT8)); + MEMZERO(&((*htblptr)->huffval[nsymbols]), (256 - nsymbols) * sizeof(UINT8)); + + /* Initialize sent_table FALSE so table will be written to JPEG file. */ + (*htblptr)->sent_table = FALSE; +} + + +LOCAL(void) +std_huff_tables (j_common_ptr cinfo) +/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */ +/* IMPORTANT: these are only valid for 8-bit data precision! */ +{ + JHUFF_TBL **dc_huff_tbl_ptrs, **ac_huff_tbl_ptrs; + + static const UINT8 bits_dc_luminance[17] = + { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; + static const UINT8 val_dc_luminance[] = + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + + static const UINT8 bits_dc_chrominance[17] = + { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; + static const UINT8 val_dc_chrominance[] = + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + + static const UINT8 bits_ac_luminance[17] = + { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d }; + static const UINT8 val_ac_luminance[] = + { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa }; + + static const UINT8 bits_ac_chrominance[17] = + { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 }; + static const UINT8 val_ac_chrominance[] = + { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa }; + + if (cinfo->is_decompressor) { + dc_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->dc_huff_tbl_ptrs; + ac_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->ac_huff_tbl_ptrs; + } else { + dc_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->dc_huff_tbl_ptrs; + ac_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->ac_huff_tbl_ptrs; + } + + add_huff_table(cinfo, &dc_huff_tbl_ptrs[0], bits_dc_luminance, + val_dc_luminance); + add_huff_table(cinfo, &ac_huff_tbl_ptrs[0], bits_ac_luminance, + val_ac_luminance); + add_huff_table(cinfo, &dc_huff_tbl_ptrs[1], bits_dc_chrominance, + val_dc_chrominance); + add_huff_table(cinfo, &ac_huff_tbl_ptrs[1], bits_ac_chrominance, + val_ac_chrominance); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jutils.c glmark2-2021.02/src/libjpeg-turbo/jutils.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jutils.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jutils.c 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,12 @@ /* * jutils.c * + * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * It was modified by The libjpeg-turbo Project to include only code + * relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains tables and miscellaneous utility routines needed * for both compression and decompression. @@ -21,7 +24,7 @@ * of a DCT block read in natural order (left to right, top to bottom). */ -#if 0 /* This table is not actually needed in v6a */ +#if 0 /* This table is not actually needed in v6a */ const int jpeg_zigzag_order[DCTSIZE2] = { 0, 1, 5, 6, 14, 15, 27, 28, @@ -87,30 +90,10 @@ } -/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays - * and coefficient-block arrays. This won't work on 80x86 because the arrays - * are FAR and we're assuming a small-pointer memory model. However, some - * DOS compilers provide far-pointer versions of memcpy() and memset() even - * in the small-model libraries. These will be used if USE_FMEM is defined. - * Otherwise, the routines below do it the hard way. (The performance cost - * is not all that great, because these routines aren't very heavily used.) - */ - -#ifndef NEED_FAR_POINTERS /* normal case, same as regular macros */ -#define FMEMCOPY(dest,src,size) MEMCOPY(dest,src,size) -#define FMEMZERO(target,size) MEMZERO(target,size) -#else /* 80x86 case, define if we can */ -#ifdef USE_FMEM -#define FMEMCOPY(dest,src,size) _fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size)) -#define FMEMZERO(target,size) _fmemset((void FAR *)(target), 0, (size_t)(size)) -#endif -#endif - - GLOBAL(void) jcopy_sample_rows (JSAMPARRAY input_array, int source_row, - JSAMPARRAY output_array, int dest_row, - int num_rows, JDIMENSION num_cols) + JSAMPARRAY output_array, int dest_row, + int num_rows, JDIMENSION num_cols) /* Copy some rows of samples from one place to another. * num_rows rows are copied from input_array[source_row++] * to output_array[dest_row++]; these areas may overlap for duplication. @@ -118,11 +101,7 @@ */ { register JSAMPROW inptr, outptr; -#ifdef FMEMCOPY - register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE)); -#else - register JDIMENSION count; -#endif + register size_t count = (size_t) (num_cols * sizeof(JSAMPLE)); register int row; input_array += source_row; @@ -131,49 +110,24 @@ for (row = num_rows; row > 0; row--) { inptr = *input_array++; outptr = *output_array++; -#ifdef FMEMCOPY - FMEMCOPY(outptr, inptr, count); -#else - for (count = num_cols; count > 0; count--) - *outptr++ = *inptr++; /* needn't bother with GETJSAMPLE() here */ -#endif + MEMCOPY(outptr, inptr, count); } } GLOBAL(void) jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row, - JDIMENSION num_blocks) + JDIMENSION num_blocks) /* Copy a row of coefficient blocks from one place to another. */ { -#ifdef FMEMCOPY - FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF))); -#else - register JCOEFPTR inptr, outptr; - register long count; - - inptr = (JCOEFPTR) input_row; - outptr = (JCOEFPTR) output_row; - for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) { - *outptr++ = *inptr++; - } -#endif + MEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF))); } GLOBAL(void) -jzero_far (void FAR * target, size_t bytestozero) -/* Zero out a chunk of FAR memory. */ +jzero_far (void *target, size_t bytestozero) +/* Zero out a chunk of memory. */ /* This might be sample-array data, block-array data, or alloc_large data. */ { -#ifdef FMEMZERO - FMEMZERO(target, bytestozero); -#else - register char FAR * ptr = (char FAR *) target; - register size_t count; - - for (count = bytestozero; count > 0; count--) { - *ptr++ = 0; - } -#endif + MEMZERO(target, bytestozero); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jversion.h glmark2-2021.02/src/libjpeg-turbo/jversion.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/jversion.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/jversion.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,12 @@ /* * jversion.h * - * Copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding. - * Copyright (C) 2010, 2012, D. R. Commander. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, 2012-2016, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. * * This file contains software version identification. */ @@ -12,7 +14,7 @@ #if JPEG_LIB_VERSION >= 80 -#define JVERSION "8b 16-May-2010" +#define JVERSION "8d 15-Jan-2012" #elif JPEG_LIB_VERSION >= 70 @@ -20,12 +22,28 @@ #else -#define JVERSION "6b 27-Mar-1998" +#define JVERSION "6b 27-Mar-1998" #endif -#define JCOPYRIGHT "Copyright (C) 1991-2010 Thomas G. Lane, Guido Vollbeding\n" \ - "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ - "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \ - "Copyright (C) 2009-2012 D. R. Commander\n" \ - "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)" +/* + * NOTE: It is our convention to place the authors in the following order: + * - libjpeg-turbo authors (2009-) in descending order of the date of their + * most recent contribution to the project, then in ascending order of the + * date of their first contribution to the project + * - Upstream authors in descending order of the date of the first inclusion of + * their code + */ + +#define JCOPYRIGHT "Copyright (C) 2009-2016 D. R. Commander\n" \ + "Copyright (C) 2011-2016 Siarhei Siamashka\n" \ + "Copyright (C) 2015-2016 Matthieu Darbois\n" \ + "Copyright (C) 2015 Google, Inc.\n" \ + "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \ + "Copyright (C) 2013 Linaro Limited\n" \ + "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \ + "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \ + "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ + "Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding" \ + +#define JCOPYRIGHT_SHORT "Copyright (C) 1991-2016 The libjpeg-turbo Project and many others" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/libjpeg.map.in glmark2-2021.02/src/libjpeg-turbo/libjpeg.map.in --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/libjpeg.map.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/libjpeg.map.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,11 @@ +LIBJPEGTURBO_@JPEG_LIB_VERSION_DECIMAL@ { + @MEM_SRCDST_FUNCTIONS@ + local: + jsimd_*; + jconst_*; +}; + +LIBJPEG_@JPEG_LIB_VERSION_DECIMAL@ { + global: + *; +}; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/libjpeg.txt glmark2-2021.02/src/libjpeg-turbo/libjpeg.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/libjpeg.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/libjpeg.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,3105 @@ +USING THE IJG JPEG LIBRARY + +This file was part of the Independent JPEG Group's software: +Copyright (C) 1994-2013, Thomas G. Lane, Guido Vollbeding. +libjpeg-turbo Modifications: +Copyright (C) 2010, 2014-2016, D. R. Commander. +Copyright (C) 2015, Google, Inc. +For conditions of distribution and use, see the accompanying README.ijg file. + + +This file describes how to use the IJG JPEG library within an application +program. Read it if you want to write a program that uses the library. + +The file example.c provides heavily commented skeleton code for calling the +JPEG library. Also see jpeglib.h (the include file to be used by application +programs) for full details about data structures and function parameter lists. +The library source code, of course, is the ultimate reference. + +Note that there have been *major* changes from the application interface +presented by IJG version 4 and earlier versions. The old design had several +inherent limitations, and it had accumulated a lot of cruft as we added +features while trying to minimize application-interface changes. We have +sacrificed backward compatibility in the version 5 rewrite, but we think the +improvements justify this. + + +TABLE OF CONTENTS +----------------- + +Overview: + Functions provided by the library + Outline of typical usage +Basic library usage: + Data formats + Compression details + Decompression details + Mechanics of usage: include files, linking, etc +Advanced features: + Compression parameter selection + Decompression parameter selection + Special color spaces + Error handling + Compressed data handling (source and destination managers) + I/O suspension + Progressive JPEG support + Buffered-image mode + Abbreviated datastreams and multiple images + Special markers + Raw (downsampled) image data + Really raw data: DCT coefficients + Progress monitoring + Memory management + Memory usage + Library compile-time options + Portability considerations + +You should read at least the overview and basic usage sections before trying +to program with the library. The sections on advanced features can be read +if and when you need them. + + +OVERVIEW +======== + +Functions provided by the library +--------------------------------- + +The IJG JPEG library provides C code to read and write JPEG-compressed image +files. The surrounding application program receives or supplies image data a +scanline at a time, using a straightforward uncompressed image format. All +details of color conversion and other preprocessing/postprocessing can be +handled by the library. + +The library includes a substantial amount of code that is not covered by the +JPEG standard but is necessary for typical applications of JPEG. These +functions preprocess the image before JPEG compression or postprocess it after +decompression. They include colorspace conversion, downsampling/upsampling, +and color quantization. The application indirectly selects use of this code +by specifying the format in which it wishes to supply or receive image data. +For example, if colormapped output is requested, then the decompression +library automatically invokes color quantization. + +A wide range of quality vs. speed tradeoffs are possible in JPEG processing, +and even more so in decompression postprocessing. The decompression library +provides multiple implementations that cover most of the useful tradeoffs, +ranging from very-high-quality down to fast-preview operation. On the +compression side we have generally not provided low-quality choices, since +compression is normally less time-critical. It should be understood that the +low-quality modes may not meet the JPEG standard's accuracy requirements; +nonetheless, they are useful for viewers. + +A word about functions *not* provided by the library. We handle a subset of +the ISO JPEG standard; most baseline, extended-sequential, and progressive +JPEG processes are supported. (Our subset includes all features now in common +use.) Unsupported ISO options include: + * Hierarchical storage + * Lossless JPEG + * DNL marker + * Nonintegral subsampling ratios +We support both 8- and 12-bit data precision, but this is a compile-time +choice rather than a run-time choice; hence it is difficult to use both +precisions in a single application. + +By itself, the library handles only interchange JPEG datastreams --- in +particular the widely used JFIF file format. The library can be used by +surrounding code to process interchange or abbreviated JPEG datastreams that +are embedded in more complex file formats. (For example, this library is +used by the free LIBTIFF library to support JPEG compression in TIFF.) + + +Outline of typical usage +------------------------ + +The rough outline of a JPEG compression operation is: + + Allocate and initialize a JPEG compression object + Specify the destination for the compressed data (eg, a file) + Set parameters for compression, including image size & colorspace + jpeg_start_compress(...); + while (scan lines remain to be written) + jpeg_write_scanlines(...); + jpeg_finish_compress(...); + Release the JPEG compression object + +A JPEG compression object holds parameters and working state for the JPEG +library. We make creation/destruction of the object separate from starting +or finishing compression of an image; the same object can be re-used for a +series of image compression operations. This makes it easy to re-use the +same parameter settings for a sequence of images. Re-use of a JPEG object +also has important implications for processing abbreviated JPEG datastreams, +as discussed later. + +The image data to be compressed is supplied to jpeg_write_scanlines() from +in-memory buffers. If the application is doing file-to-file compression, +reading image data from the source file is the application's responsibility. +The library emits compressed data by calling a "data destination manager", +which typically will write the data into a file; but the application can +provide its own destination manager to do something else. + +Similarly, the rough outline of a JPEG decompression operation is: + + Allocate and initialize a JPEG decompression object + Specify the source of the compressed data (eg, a file) + Call jpeg_read_header() to obtain image info + Set parameters for decompression + jpeg_start_decompress(...); + while (scan lines remain to be read) + jpeg_read_scanlines(...); + jpeg_finish_decompress(...); + Release the JPEG decompression object + +This is comparable to the compression outline except that reading the +datastream header is a separate step. This is helpful because information +about the image's size, colorspace, etc is available when the application +selects decompression parameters. For example, the application can choose an +output scaling ratio that will fit the image into the available screen size. + +The decompression library obtains compressed data by calling a data source +manager, which typically will read the data from a file; but other behaviors +can be obtained with a custom source manager. Decompressed data is delivered +into in-memory buffers passed to jpeg_read_scanlines(). + +It is possible to abort an incomplete compression or decompression operation +by calling jpeg_abort(); or, if you do not need to retain the JPEG object, +simply release it by calling jpeg_destroy(). + +JPEG compression and decompression objects are two separate struct types. +However, they share some common fields, and certain routines such as +jpeg_destroy() can work on either type of object. + +The JPEG library has no static variables: all state is in the compression +or decompression object. Therefore it is possible to process multiple +compression and decompression operations concurrently, using multiple JPEG +objects. + +Both compression and decompression can be done in an incremental memory-to- +memory fashion, if suitable source/destination managers are used. See the +section on "I/O suspension" for more details. + + +BASIC LIBRARY USAGE +=================== + +Data formats +------------ + +Before diving into procedural details, it is helpful to understand the +image data format that the JPEG library expects or returns. + +The standard input image format is a rectangular array of pixels, with each +pixel having the same number of "component" or "sample" values (color +channels). You must specify how many components there are and the colorspace +interpretation of the components. Most applications will use RGB data +(three components per pixel) or grayscale data (one component per pixel). +PLEASE NOTE THAT RGB DATA IS THREE SAMPLES PER PIXEL, GRAYSCALE ONLY ONE. +A remarkable number of people manage to miss this, only to find that their +programs don't work with grayscale JPEG files. + +There is no provision for colormapped input. JPEG files are always full-color +or full grayscale (or sometimes another colorspace such as CMYK). You can +feed in a colormapped image by expanding it to full-color format. However +JPEG often doesn't work very well with source data that has been colormapped, +because of dithering noise. This is discussed in more detail in the JPEG FAQ +and the other references mentioned in the README.ijg file. + +Pixels are stored by scanlines, with each scanline running from left to +right. The component values for each pixel are adjacent in the row; for +example, R,G,B,R,G,B,R,G,B,... for 24-bit RGB color. Each scanline is an +array of data type JSAMPLE --- which is typically "unsigned char", unless +you've changed jmorecfg.h. (You can also change the RGB pixel layout, say +to B,G,R order, by modifying jmorecfg.h. But see the restrictions listed in +that file before doing so.) + +A 2-D array of pixels is formed by making a list of pointers to the starts of +scanlines; so the scanlines need not be physically adjacent in memory. Even +if you process just one scanline at a time, you must make a one-element +pointer array to conform to this structure. Pointers to JSAMPLE rows are of +type JSAMPROW, and the pointer to the pointer array is of type JSAMPARRAY. + +The library accepts or supplies one or more complete scanlines per call. +It is not possible to process part of a row at a time. Scanlines are always +processed top-to-bottom. You can process an entire image in one call if you +have it all in memory, but usually it's simplest to process one scanline at +a time. + +For best results, source data values should have the precision specified by +BITS_IN_JSAMPLE (normally 8 bits). For instance, if you choose to compress +data that's only 6 bits/channel, you should left-justify each value in a +byte before passing it to the compressor. If you need to compress data +that has more than 8 bits/channel, compile with BITS_IN_JSAMPLE = 12. +(See "Library compile-time options", later.) + + +The data format returned by the decompressor is the same in all details, +except that colormapped output is supported. (Again, a JPEG file is never +colormapped. But you can ask the decompressor to perform on-the-fly color +quantization to deliver colormapped output.) If you request colormapped +output then the returned data array contains a single JSAMPLE per pixel; +its value is an index into a color map. The color map is represented as +a 2-D JSAMPARRAY in which each row holds the values of one color component, +that is, colormap[i][j] is the value of the i'th color component for pixel +value (map index) j. Note that since the colormap indexes are stored in +JSAMPLEs, the maximum number of colors is limited by the size of JSAMPLE +(ie, at most 256 colors for an 8-bit JPEG library). + + +Compression details +------------------- + +Here we revisit the JPEG compression outline given in the overview. + +1. Allocate and initialize a JPEG compression object. + +A JPEG compression object is a "struct jpeg_compress_struct". (It also has +a bunch of subsidiary structures which are allocated via malloc(), but the +application doesn't control those directly.) This struct can be just a local +variable in the calling routine, if a single routine is going to execute the +whole JPEG compression sequence. Otherwise it can be static or allocated +from malloc(). + +You will also need a structure representing a JPEG error handler. The part +of this that the library cares about is a "struct jpeg_error_mgr". If you +are providing your own error handler, you'll typically want to embed the +jpeg_error_mgr struct in a larger structure; this is discussed later under +"Error handling". For now we'll assume you are just using the default error +handler. The default error handler will print JPEG error/warning messages +on stderr, and it will call exit() if a fatal error occurs. + +You must initialize the error handler structure, store a pointer to it into +the JPEG object's "err" field, and then call jpeg_create_compress() to +initialize the rest of the JPEG object. + +Typical code for this step, if you are using the default error handler, is + + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; + ... + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + +jpeg_create_compress allocates a small amount of memory, so it could fail +if you are out of memory. In that case it will exit via the error handler; +that's why the error handler must be initialized first. + + +2. Specify the destination for the compressed data (eg, a file). + +As previously mentioned, the JPEG library delivers compressed data to a +"data destination" module. The library includes one data destination +module which knows how to write to a stdio stream. You can use your own +destination module if you want to do something else, as discussed later. + +If you use the standard destination module, you must open the target stdio +stream beforehand. Typical code for this step looks like: + + FILE *outfile; + ... + if ((outfile = fopen(filename, "wb")) == NULL) { + fprintf(stderr, "can't open %s\n", filename); + exit(1); + } + jpeg_stdio_dest(&cinfo, outfile); + +where the last line invokes the standard destination module. + +WARNING: it is critical that the binary compressed data be delivered to the +output file unchanged. On non-Unix systems the stdio library may perform +newline translation or otherwise corrupt binary data. To suppress this +behavior, you may need to use a "b" option to fopen (as shown above), or use +setmode() or another routine to put the stdio stream in binary mode. See +cjpeg.c and djpeg.c for code that has been found to work on many systems. + +You can select the data destination after setting other parameters (step 3), +if that's more convenient. You may not change the destination between +calling jpeg_start_compress() and jpeg_finish_compress(). + + +3. Set parameters for compression, including image size & colorspace. + +You must supply information about the source image by setting the following +fields in the JPEG object (cinfo structure): + + image_width Width of image, in pixels + image_height Height of image, in pixels + input_components Number of color channels (samples per pixel) + in_color_space Color space of source image + +The image dimensions are, hopefully, obvious. JPEG supports image dimensions +of 1 to 64K pixels in either direction. The input color space is typically +RGB or grayscale, and input_components is 3 or 1 accordingly. (See "Special +color spaces", later, for more info.) The in_color_space field must be +assigned one of the J_COLOR_SPACE enum constants, typically JCS_RGB or +JCS_GRAYSCALE. + +JPEG has a large number of compression parameters that determine how the +image is encoded. Most applications don't need or want to know about all +these parameters. You can set all the parameters to reasonable defaults by +calling jpeg_set_defaults(); then, if there are particular values you want +to change, you can do so after that. The "Compression parameter selection" +section tells about all the parameters. + +You must set in_color_space correctly before calling jpeg_set_defaults(), +because the defaults depend on the source image colorspace. However the +other three source image parameters need not be valid until you call +jpeg_start_compress(). There's no harm in calling jpeg_set_defaults() more +than once, if that happens to be convenient. + +Typical code for a 24-bit RGB source image is + + cinfo.image_width = Width; /* image width and height, in pixels */ + cinfo.image_height = Height; + cinfo.input_components = 3; /* # of color components per pixel */ + cinfo.in_color_space = JCS_RGB; /* colorspace of input image */ + + jpeg_set_defaults(&cinfo); + /* Make optional parameter settings here */ + + +4. jpeg_start_compress(...); + +After you have established the data destination and set all the necessary +source image info and other parameters, call jpeg_start_compress() to begin +a compression cycle. This will initialize internal state, allocate working +storage, and emit the first few bytes of the JPEG datastream header. + +Typical code: + + jpeg_start_compress(&cinfo, TRUE); + +The "TRUE" parameter ensures that a complete JPEG interchange datastream +will be written. This is appropriate in most cases. If you think you might +want to use an abbreviated datastream, read the section on abbreviated +datastreams, below. + +Once you have called jpeg_start_compress(), you may not alter any JPEG +parameters or other fields of the JPEG object until you have completed +the compression cycle. + + +5. while (scan lines remain to be written) + jpeg_write_scanlines(...); + +Now write all the required image data by calling jpeg_write_scanlines() +one or more times. You can pass one or more scanlines in each call, up +to the total image height. In most applications it is convenient to pass +just one or a few scanlines at a time. The expected format for the passed +data is discussed under "Data formats", above. + +Image data should be written in top-to-bottom scanline order. The JPEG spec +contains some weasel wording about how top and bottom are application-defined +terms (a curious interpretation of the English language...) but if you want +your files to be compatible with everyone else's, you WILL use top-to-bottom +order. If the source data must be read in bottom-to-top order, you can use +the JPEG library's virtual array mechanism to invert the data efficiently. +Examples of this can be found in the sample application cjpeg. + +The library maintains a count of the number of scanlines written so far +in the next_scanline field of the JPEG object. Usually you can just use +this variable as the loop counter, so that the loop test looks like +"while (cinfo.next_scanline < cinfo.image_height)". + +Code for this step depends heavily on the way that you store the source data. +example.c shows the following code for the case of a full-size 2-D source +array containing 3-byte RGB pixels: + + JSAMPROW row_pointer[1]; /* pointer to a single row */ + int row_stride; /* physical row width in buffer */ + + row_stride = image_width * 3; /* JSAMPLEs per row in image_buffer */ + + while (cinfo.next_scanline < cinfo.image_height) { + row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride]; + jpeg_write_scanlines(&cinfo, row_pointer, 1); + } + +jpeg_write_scanlines() returns the number of scanlines actually written. +This will normally be equal to the number passed in, so you can usually +ignore the return value. It is different in just two cases: + * If you try to write more scanlines than the declared image height, + the additional scanlines are ignored. + * If you use a suspending data destination manager, output buffer overrun + will cause the compressor to return before accepting all the passed lines. + This feature is discussed under "I/O suspension", below. The normal + stdio destination manager will NOT cause this to happen. +In any case, the return value is the same as the change in the value of +next_scanline. + + +6. jpeg_finish_compress(...); + +After all the image data has been written, call jpeg_finish_compress() to +complete the compression cycle. This step is ESSENTIAL to ensure that the +last bufferload of data is written to the data destination. +jpeg_finish_compress() also releases working memory associated with the JPEG +object. + +Typical code: + + jpeg_finish_compress(&cinfo); + +If using the stdio destination manager, don't forget to close the output +stdio stream (if necessary) afterwards. + +If you have requested a multi-pass operating mode, such as Huffman code +optimization, jpeg_finish_compress() will perform the additional passes using +data buffered by the first pass. In this case jpeg_finish_compress() may take +quite a while to complete. With the default compression parameters, this will +not happen. + +It is an error to call jpeg_finish_compress() before writing the necessary +total number of scanlines. If you wish to abort compression, call +jpeg_abort() as discussed below. + +After completing a compression cycle, you may dispose of the JPEG object +as discussed next, or you may use it to compress another image. In that case +return to step 2, 3, or 4 as appropriate. If you do not change the +destination manager, the new datastream will be written to the same target. +If you do not change any JPEG parameters, the new datastream will be written +with the same parameters as before. Note that you can change the input image +dimensions freely between cycles, but if you change the input colorspace, you +should call jpeg_set_defaults() to adjust for the new colorspace; and then +you'll need to repeat all of step 3. + + +7. Release the JPEG compression object. + +When you are done with a JPEG compression object, destroy it by calling +jpeg_destroy_compress(). This will free all subsidiary memory (regardless of +the previous state of the object). Or you can call jpeg_destroy(), which +works for either compression or decompression objects --- this may be more +convenient if you are sharing code between compression and decompression +cases. (Actually, these routines are equivalent except for the declared type +of the passed pointer. To avoid gripes from ANSI C compilers, jpeg_destroy() +should be passed a j_common_ptr.) + +If you allocated the jpeg_compress_struct structure from malloc(), freeing +it is your responsibility --- jpeg_destroy() won't. Ditto for the error +handler structure. + +Typical code: + + jpeg_destroy_compress(&cinfo); + + +8. Aborting. + +If you decide to abort a compression cycle before finishing, you can clean up +in either of two ways: + +* If you don't need the JPEG object any more, just call + jpeg_destroy_compress() or jpeg_destroy() to release memory. This is + legitimate at any point after calling jpeg_create_compress() --- in fact, + it's safe even if jpeg_create_compress() fails. + +* If you want to re-use the JPEG object, call jpeg_abort_compress(), or call + jpeg_abort() which works on both compression and decompression objects. + This will return the object to an idle state, releasing any working memory. + jpeg_abort() is allowed at any time after successful object creation. + +Note that cleaning up the data destination, if required, is your +responsibility; neither of these routines will call term_destination(). +(See "Compressed data handling", below, for more about that.) + +jpeg_destroy() and jpeg_abort() are the only safe calls to make on a JPEG +object that has reported an error by calling error_exit (see "Error handling" +for more info). The internal state of such an object is likely to be out of +whack. Either of these two routines will return the object to a known state. + + +Decompression details +--------------------- + +Here we revisit the JPEG decompression outline given in the overview. + +1. Allocate and initialize a JPEG decompression object. + +This is just like initialization for compression, as discussed above, +except that the object is a "struct jpeg_decompress_struct" and you +call jpeg_create_decompress(). Error handling is exactly the same. + +Typical code: + + struct jpeg_decompress_struct cinfo; + struct jpeg_error_mgr jerr; + ... + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_decompress(&cinfo); + +(Both here and in the IJG code, we usually use variable name "cinfo" for +both compression and decompression objects.) + + +2. Specify the source of the compressed data (eg, a file). + +As previously mentioned, the JPEG library reads compressed data from a "data +source" module. The library includes one data source module which knows how +to read from a stdio stream. You can use your own source module if you want +to do something else, as discussed later. + +If you use the standard source module, you must open the source stdio stream +beforehand. Typical code for this step looks like: + + FILE *infile; + ... + if ((infile = fopen(filename, "rb")) == NULL) { + fprintf(stderr, "can't open %s\n", filename); + exit(1); + } + jpeg_stdio_src(&cinfo, infile); + +where the last line invokes the standard source module. + +WARNING: it is critical that the binary compressed data be read unchanged. +On non-Unix systems the stdio library may perform newline translation or +otherwise corrupt binary data. To suppress this behavior, you may need to use +a "b" option to fopen (as shown above), or use setmode() or another routine to +put the stdio stream in binary mode. See cjpeg.c and djpeg.c for code that +has been found to work on many systems. + +You may not change the data source between calling jpeg_read_header() and +jpeg_finish_decompress(). If you wish to read a series of JPEG images from +a single source file, you should repeat the jpeg_read_header() to +jpeg_finish_decompress() sequence without reinitializing either the JPEG +object or the data source module; this prevents buffered input data from +being discarded. + + +3. Call jpeg_read_header() to obtain image info. + +Typical code for this step is just + + jpeg_read_header(&cinfo, TRUE); + +This will read the source datastream header markers, up to the beginning +of the compressed data proper. On return, the image dimensions and other +info have been stored in the JPEG object. The application may wish to +consult this information before selecting decompression parameters. + +More complex code is necessary if + * A suspending data source is used --- in that case jpeg_read_header() + may return before it has read all the header data. See "I/O suspension", + below. The normal stdio source manager will NOT cause this to happen. + * Abbreviated JPEG files are to be processed --- see the section on + abbreviated datastreams. Standard applications that deal only in + interchange JPEG files need not be concerned with this case either. + +It is permissible to stop at this point if you just wanted to find out the +image dimensions and other header info for a JPEG file. In that case, +call jpeg_destroy() when you are done with the JPEG object, or call +jpeg_abort() to return it to an idle state before selecting a new data +source and reading another header. + + +4. Set parameters for decompression. + +jpeg_read_header() sets appropriate default decompression parameters based on +the properties of the image (in particular, its colorspace). However, you +may well want to alter these defaults before beginning the decompression. +For example, the default is to produce full color output from a color file. +If you want colormapped output you must ask for it. Other options allow the +returned image to be scaled and allow various speed/quality tradeoffs to be +selected. "Decompression parameter selection", below, gives details. + +If the defaults are appropriate, nothing need be done at this step. + +Note that all default values are set by each call to jpeg_read_header(). +If you reuse a decompression object, you cannot expect your parameter +settings to be preserved across cycles, as you can for compression. +You must set desired parameter values each time. + + +5. jpeg_start_decompress(...); + +Once the parameter values are satisfactory, call jpeg_start_decompress() to +begin decompression. This will initialize internal state, allocate working +memory, and prepare for returning data. + +Typical code is just + + jpeg_start_decompress(&cinfo); + +If you have requested a multi-pass operating mode, such as 2-pass color +quantization, jpeg_start_decompress() will do everything needed before data +output can begin. In this case jpeg_start_decompress() may take quite a while +to complete. With a single-scan (non progressive) JPEG file and default +decompression parameters, this will not happen; jpeg_start_decompress() will +return quickly. + +After this call, the final output image dimensions, including any requested +scaling, are available in the JPEG object; so is the selected colormap, if +colormapped output has been requested. Useful fields include + + output_width image width and height, as scaled + output_height + out_color_components # of color components in out_color_space + output_components # of color components returned per pixel + colormap the selected colormap, if any + actual_number_of_colors number of entries in colormap + +output_components is 1 (a colormap index) when quantizing colors; otherwise it +equals out_color_components. It is the number of JSAMPLE values that will be +emitted per pixel in the output arrays. + +Typically you will need to allocate data buffers to hold the incoming image. +You will need output_width * output_components JSAMPLEs per scanline in your +output buffer, and a total of output_height scanlines will be returned. + +Note: if you are using the JPEG library's internal memory manager to allocate +data buffers (as djpeg does), then the manager's protocol requires that you +request large buffers *before* calling jpeg_start_decompress(). This is a +little tricky since the output_XXX fields are not normally valid then. You +can make them valid by calling jpeg_calc_output_dimensions() after setting the +relevant parameters (scaling, output color space, and quantization flag). + + +6. while (scan lines remain to be read) + jpeg_read_scanlines(...); + +Now you can read the decompressed image data by calling jpeg_read_scanlines() +one or more times. At each call, you pass in the maximum number of scanlines +to be read (ie, the height of your working buffer); jpeg_read_scanlines() +will return up to that many lines. The return value is the number of lines +actually read. The format of the returned data is discussed under "Data +formats", above. Don't forget that grayscale and color JPEGs will return +different data formats! + +Image data is returned in top-to-bottom scanline order. If you must write +out the image in bottom-to-top order, you can use the JPEG library's virtual +array mechanism to invert the data efficiently. Examples of this can be +found in the sample application djpeg. + +The library maintains a count of the number of scanlines returned so far +in the output_scanline field of the JPEG object. Usually you can just use +this variable as the loop counter, so that the loop test looks like +"while (cinfo.output_scanline < cinfo.output_height)". (Note that the test +should NOT be against image_height, unless you never use scaling. The +image_height field is the height of the original unscaled image.) +The return value always equals the change in the value of output_scanline. + +If you don't use a suspending data source, it is safe to assume that +jpeg_read_scanlines() reads at least one scanline per call, until the +bottom of the image has been reached. + +If you use a buffer larger than one scanline, it is NOT safe to assume that +jpeg_read_scanlines() fills it. (The current implementation returns only a +few scanlines per call, no matter how large a buffer you pass.) So you must +always provide a loop that calls jpeg_read_scanlines() repeatedly until the +whole image has been read. + + +7. jpeg_finish_decompress(...); + +After all the image data has been read, call jpeg_finish_decompress() to +complete the decompression cycle. This causes working memory associated +with the JPEG object to be released. + +Typical code: + + jpeg_finish_decompress(&cinfo); + +If using the stdio source manager, don't forget to close the source stdio +stream if necessary. + +It is an error to call jpeg_finish_decompress() before reading the correct +total number of scanlines. If you wish to abort decompression, call +jpeg_abort() as discussed below. + +After completing a decompression cycle, you may dispose of the JPEG object as +discussed next, or you may use it to decompress another image. In that case +return to step 2 or 3 as appropriate. If you do not change the source +manager, the next image will be read from the same source. + + +8. Release the JPEG decompression object. + +When you are done with a JPEG decompression object, destroy it by calling +jpeg_destroy_decompress() or jpeg_destroy(). The previous discussion of +destroying compression objects applies here too. + +Typical code: + + jpeg_destroy_decompress(&cinfo); + + +9. Aborting. + +You can abort a decompression cycle by calling jpeg_destroy_decompress() or +jpeg_destroy() if you don't need the JPEG object any more, or +jpeg_abort_decompress() or jpeg_abort() if you want to reuse the object. +The previous discussion of aborting compression cycles applies here too. + + +Partial image decompression +--------------------------- + +Partial image decompression is convenient for performance-critical applications +that wish to view only a portion of a large JPEG image without decompressing +the whole thing. It it also useful in memory-constrained environments (such as +on mobile devices.) This library provides the following functions to support +partial image decompression: + +1. Skipping rows when decompressing + + jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines); + +This function provides application programmers with the ability to skip over +multiple rows in the JPEG image. + +Suspending data sources are not supported by this function. Calling +jpeg_skip_scanlines() with a suspending data source will result in undefined +behavior. + +jpeg_skip_scanlines() will not allow skipping past the bottom of the image. If +the value of num_lines is large enough to skip past the bottom of the image, +then the function will skip to the end of the image instead. + +If the value of num_lines is valid, then jpeg_skip_scanlines() will always +skip all of the input rows requested. There is no need to inspect the return +value of the function in that case. + +Best results will be achieved by calling jpeg_skip_scanlines() for large chunks +of rows. The function should be viewed as a way to quickly jump to a +particular vertical offset in the JPEG image in order to decode a subset of the +image. Used in this manner, it will provide significant performance +improvements. + +Calling jpeg_skip_scanlines() for small values of num_lines has several +potential drawbacks: + 1) JPEG decompression occurs in blocks, so if jpeg_skip_scanlines() is + called from the middle of a decompression block, then it is likely that + much of the decompression work has already been done for the first + couple of rows that need to be skipped. + 2) When this function returns, it must leave the decompressor in a state + such that it is ready to read the next line. This may involve + decompressing a block that must be partially skipped. +These issues are especially tricky for cases in which upsampling requires +context rows. In the worst case, jpeg_skip_scanlines() will perform similarly +to jpeg_read_scanlines() (since it will actually call jpeg_read_scanlines().) + +2. Decompressing partial scanlines + + jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width) + +This function provides application programmers with the ability to decompress +only a portion of each row in the JPEG image. It must be called after +jpeg_start_decompress() and before any calls to jpeg_read_scanlines() or +jpeg_skip_scanlines(). + +If xoffset and width do not form a valid subset of the image row, then this +function will generate an error. Note that if the output image is scaled, then +xoffset and width are relative to the scaled image dimensions. + +xoffset and width are passed by reference because xoffset must fall on an iMCU +boundary. If it doesn't, then it will be moved left to the nearest iMCU +boundary, and width will be increased accordingly. If the calling program does +not like the adjusted values of xoffset and width, then it can call +jpeg_crop_scanline() again with new values (for instance, if it wants to move +xoffset to the nearest iMCU boundary to the right instead of to the left.) + +After calling this function, cinfo->output_width will be set to the adjusted +width. This value should be used when allocating an output buffer to pass to +jpeg_read_scanlines(). + +The output image from a partial-width decompression will be identical to the +corresponding image region from a full decode, with one exception: The "fancy" +(smooth) h2v2 (4:2:0) and h2v1 (4:2:2) upsampling algorithms fill in the +missing chroma components by averaging the chroma components from neighboring +pixels, except on the right and left edges of the image (where there are no +neighboring pixels.) When performing a partial-width decompression, these +"fancy" upsampling algorithms may treat the left and right edges of the partial +image region as if they are the left and right edges of the image, meaning that +the upsampling algorithm may be simplified. The result is that the pixels on +the left or right edge of the partial image may not be exactly identical to the +corresponding pixels in the original image. + + +Mechanics of usage: include files, linking, etc +----------------------------------------------- + +Applications using the JPEG library should include the header file jpeglib.h +to obtain declarations of data types and routines. Before including +jpeglib.h, include system headers that define at least the typedefs FILE and +size_t. On ANSI-conforming systems, including is sufficient; on +older Unix systems, you may need to define size_t. + +If the application needs to refer to individual JPEG library error codes, also +include jerror.h to define those symbols. + +jpeglib.h indirectly includes the files jconfig.h and jmorecfg.h. If you are +installing the JPEG header files in a system directory, you will want to +install all four files: jpeglib.h, jerror.h, jconfig.h, jmorecfg.h. + +The most convenient way to include the JPEG code into your executable program +is to prepare a library file ("libjpeg.a", or a corresponding name on non-Unix +machines) and reference it at your link step. If you use only half of the +library (only compression or only decompression), only that much code will be +included from the library, unless your linker is hopelessly brain-damaged. +The supplied makefiles build libjpeg.a automatically (see install.txt). + +While you can build the JPEG library as a shared library if the whim strikes +you, we don't really recommend it. The trouble with shared libraries is that +at some point you'll probably try to substitute a new version of the library +without recompiling the calling applications. That generally doesn't work +because the parameter struct declarations usually change with each new +version. In other words, the library's API is *not* guaranteed binary +compatible across versions; we only try to ensure source-code compatibility. +(In hindsight, it might have been smarter to hide the parameter structs from +applications and introduce a ton of access functions instead. Too late now, +however.) + +It may be worth pointing out that the core JPEG library does not actually +require the stdio library: only the default source/destination managers and +error handler need it. You can use the library in a stdio-less environment +if you replace those modules and use jmemnobs.c (or another memory manager of +your own devising). More info about the minimum system library requirements +may be found in jinclude.h. + + +ADVANCED FEATURES +================= + +Compression parameter selection +------------------------------- + +This section describes all the optional parameters you can set for JPEG +compression, as well as the "helper" routines provided to assist in this +task. Proper setting of some parameters requires detailed understanding +of the JPEG standard; if you don't know what a parameter is for, it's best +not to mess with it! See REFERENCES in the README.ijg file for pointers to +more info about JPEG. + +It's a good idea to call jpeg_set_defaults() first, even if you plan to set +all the parameters; that way your code is more likely to work with future JPEG +libraries that have additional parameters. For the same reason, we recommend +you use a helper routine where one is provided, in preference to twiddling +cinfo fields directly. + +The helper routines are: + +jpeg_set_defaults (j_compress_ptr cinfo) + This routine sets all JPEG parameters to reasonable defaults, using + only the input image's color space (field in_color_space, which must + already be set in cinfo). Many applications will only need to use + this routine and perhaps jpeg_set_quality(). + +jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace) + Sets the JPEG file's colorspace (field jpeg_color_space) as specified, + and sets other color-space-dependent parameters appropriately. See + "Special color spaces", below, before using this. A large number of + parameters, including all per-component parameters, are set by this + routine; if you want to twiddle individual parameters you should call + jpeg_set_colorspace() before rather than after. + +jpeg_default_colorspace (j_compress_ptr cinfo) + Selects an appropriate JPEG colorspace based on cinfo->in_color_space, + and calls jpeg_set_colorspace(). This is actually a subroutine of + jpeg_set_defaults(). It's broken out in case you want to change + just the colorspace-dependent JPEG parameters. + +jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline) + Constructs JPEG quantization tables appropriate for the indicated + quality setting. The quality value is expressed on the 0..100 scale + recommended by IJG (cjpeg's "-quality" switch uses this routine). + Note that the exact mapping from quality values to tables may change + in future IJG releases as more is learned about DCT quantization. + If the force_baseline parameter is TRUE, then the quantization table + entries are constrained to the range 1..255 for full JPEG baseline + compatibility. In the current implementation, this only makes a + difference for quality settings below 25, and it effectively prevents + very small/low quality files from being generated. The IJG decoder + is capable of reading the non-baseline files generated at low quality + settings when force_baseline is FALSE, but other decoders may not be. + +jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, + boolean force_baseline) + Same as jpeg_set_quality() except that the generated tables are the + sample tables given in the JPEC spec section K.1, multiplied by the + specified scale factor (which is expressed as a percentage; thus + scale_factor = 100 reproduces the spec's tables). Note that larger + scale factors give lower quality. This entry point is useful for + conforming to the Adobe PostScript DCT conventions, but we do not + recommend linear scaling as a user-visible quality scale otherwise. + force_baseline again constrains the computed table entries to 1..255. + +int jpeg_quality_scaling (int quality) + Converts a value on the IJG-recommended quality scale to a linear + scaling percentage. Note that this routine may change or go away + in future releases --- IJG may choose to adopt a scaling method that + can't be expressed as a simple scalar multiplier, in which case the + premise of this routine collapses. Caveat user. + +jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) + [libjpeg v7+ API/ABI emulation only] + Set default quantization tables with linear q_scale_factor[] values + (see below). + +jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, + const unsigned int *basic_table, + int scale_factor, boolean force_baseline) + Allows an arbitrary quantization table to be created. which_tbl + indicates which table slot to fill. basic_table points to an array + of 64 unsigned ints given in normal array order. These values are + multiplied by scale_factor/100 and then clamped to the range 1..65535 + (or to 1..255 if force_baseline is TRUE). + CAUTION: prior to library version 6a, jpeg_add_quant_table expected + the basic table to be given in JPEG zigzag order. If you need to + write code that works with either older or newer versions of this + routine, you must check the library version number. Something like + "#if JPEG_LIB_VERSION >= 61" is the right test. + +jpeg_simple_progression (j_compress_ptr cinfo) + Generates a default scan script for writing a progressive-JPEG file. + This is the recommended method of creating a progressive file, + unless you want to make a custom scan sequence. You must ensure that + the JPEG color space is set correctly before calling this routine. + + +Compression parameters (cinfo fields) include: + +boolean arith_code + If TRUE, use arithmetic coding. + If FALSE, use Huffman coding. + +J_DCT_METHOD dct_method + Selects the algorithm used for the DCT step. Choices are: + JDCT_ISLOW: slow but accurate integer algorithm + JDCT_IFAST: faster, less accurate integer method + JDCT_FLOAT: floating-point method + JDCT_DEFAULT: default method (normally JDCT_ISLOW) + JDCT_FASTEST: fastest method (normally JDCT_IFAST) + In libjpeg-turbo, JDCT_IFAST is generally about 5-15% faster than + JDCT_ISLOW when using the x86/x86-64 SIMD extensions (results may vary + with other SIMD implementations, or when using libjpeg-turbo without + SIMD extensions.) For quality levels of 90 and below, there should be + little or no perceptible difference between the two algorithms. For + quality levels above 90, however, the difference between JDCT_IFAST and + JDCT_ISLOW becomes more pronounced. With quality=97, for instance, + JDCT_IFAST incurs generally about a 1-3 dB loss (in PSNR) relative to + JDCT_ISLOW, but this can be larger for some images. Do not use + JDCT_IFAST with quality levels above 97. The algorithm often + degenerates at quality=98 and above and can actually produce a more + lossy image than if lower quality levels had been used. Also, in + libjpeg-turbo, JDCT_IFAST is not fully accelerated for quality levels + above 97, so it will be slower than JDCT_ISLOW. JDCT_FLOAT is mainly a + legacy feature. It does not produce significantly more accurate + results than the ISLOW method, and it is much slower. The FLOAT method + may also give different results on different machines due to varying + roundoff behavior, whereas the integer methods should give the same + results on all machines. + +J_COLOR_SPACE jpeg_color_space +int num_components + The JPEG color space and corresponding number of components; see + "Special color spaces", below, for more info. We recommend using + jpeg_set_color_space() if you want to change these. + +boolean optimize_coding + TRUE causes the compressor to compute optimal Huffman coding tables + for the image. This requires an extra pass over the data and + therefore costs a good deal of space and time. The default is + FALSE, which tells the compressor to use the supplied or default + Huffman tables. In most cases optimal tables save only a few percent + of file size compared to the default tables. Note that when this is + TRUE, you need not supply Huffman tables at all, and any you do + supply will be overwritten. + +unsigned int restart_interval +int restart_in_rows + To emit restart markers in the JPEG file, set one of these nonzero. + Set restart_interval to specify the exact interval in MCU blocks. + Set restart_in_rows to specify the interval in MCU rows. (If + restart_in_rows is not 0, then restart_interval is set after the + image width in MCUs is computed.) Defaults are zero (no restarts). + One restart marker per MCU row is often a good choice. + NOTE: the overhead of restart markers is higher in grayscale JPEG + files than in color files, and MUCH higher in progressive JPEGs. + If you use restarts, you may want to use larger intervals in those + cases. + +const jpeg_scan_info *scan_info +int num_scans + By default, scan_info is NULL; this causes the compressor to write a + single-scan sequential JPEG file. If not NULL, scan_info points to + an array of scan definition records of length num_scans. The + compressor will then write a JPEG file having one scan for each scan + definition record. This is used to generate noninterleaved or + progressive JPEG files. The library checks that the scan array + defines a valid JPEG scan sequence. (jpeg_simple_progression creates + a suitable scan definition array for progressive JPEG.) This is + discussed further under "Progressive JPEG support". + +int smoothing_factor + If non-zero, the input image is smoothed; the value should be 1 for + minimal smoothing to 100 for maximum smoothing. Consult jcsample.c + for details of the smoothing algorithm. The default is zero. + +boolean write_JFIF_header + If TRUE, a JFIF APP0 marker is emitted. jpeg_set_defaults() and + jpeg_set_colorspace() set this TRUE if a JFIF-legal JPEG color space + (ie, YCbCr or grayscale) is selected, otherwise FALSE. + +UINT8 JFIF_major_version +UINT8 JFIF_minor_version + The version number to be written into the JFIF marker. + jpeg_set_defaults() initializes the version to 1.01 (major=minor=1). + You should set it to 1.02 (major=1, minor=2) if you plan to write + any JFIF 1.02 extension markers. + +UINT8 density_unit +UINT16 X_density +UINT16 Y_density + The resolution information to be written into the JFIF marker; + not used otherwise. density_unit may be 0 for unknown, + 1 for dots/inch, or 2 for dots/cm. The default values are 0,1,1 + indicating square pixels of unknown size. + +boolean write_Adobe_marker + If TRUE, an Adobe APP14 marker is emitted. jpeg_set_defaults() and + jpeg_set_colorspace() set this TRUE if JPEG color space RGB, CMYK, + or YCCK is selected, otherwise FALSE. It is generally a bad idea + to set both write_JFIF_header and write_Adobe_marker. In fact, + you probably shouldn't change the default settings at all --- the + default behavior ensures that the JPEG file's color space can be + recognized by the decoder. + +JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS] + Pointers to coefficient quantization tables, one per table slot, + or NULL if no table is defined for a slot. Usually these should + be set via one of the above helper routines; jpeg_add_quant_table() + is general enough to define any quantization table. The other + routines will set up table slot 0 for luminance quality and table + slot 1 for chrominance. + +int q_scale_factor[NUM_QUANT_TBLS] + [libjpeg v7+ API/ABI emulation only] + Linear quantization scaling factors (0-100, default 100) + for use with jpeg_default_qtables(). + See rdswitch.c and cjpeg.c for an example of usage. + Note that the q_scale_factor[] values use "linear" scales, so JPEG + quality levels chosen by the user must be converted to these scales + using jpeg_quality_scaling(). Here is an example that corresponds to + cjpeg -quality 90,70: + + jpeg_set_defaults(cinfo); + + /* Set luminance quality 90. */ + cinfo->q_scale_factor[0] = jpeg_quality_scaling(90); + /* Set chrominance quality 70. */ + cinfo->q_scale_factor[1] = jpeg_quality_scaling(70); + + jpeg_default_qtables(cinfo, force_baseline); + + CAUTION: Setting separate quality levels for chrominance and luminance + is mainly only useful if chrominance subsampling is disabled. 2x2 + chrominance subsampling (AKA "4:2:0") is the default, but you can + explicitly disable subsampling as follows: + + cinfo->comp_info[0].v_samp_factor = 1; + cinfo->comp_info[0].h_samp_factor = 1; + +JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS] +JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS] + Pointers to Huffman coding tables, one per table slot, or NULL if + no table is defined for a slot. Slots 0 and 1 are filled with the + JPEG sample tables by jpeg_set_defaults(). If you need to allocate + more table structures, jpeg_alloc_huff_table() may be used. + Note that optimal Huffman tables can be computed for an image + by setting optimize_coding, as discussed above; there's seldom + any need to mess with providing your own Huffman tables. + + +[libjpeg v7+ API/ABI emulation only] +The actual dimensions of the JPEG image that will be written to the file are +given by the following fields. These are computed from the input image +dimensions and the compression parameters by jpeg_start_compress(). You can +also call jpeg_calc_jpeg_dimensions() to obtain the values that will result +from the current parameter settings. This can be useful if you are trying +to pick a scaling ratio that will get close to a desired target size. + +JDIMENSION jpeg_width Actual dimensions of output image. +JDIMENSION jpeg_height + + +Per-component parameters are stored in the struct cinfo.comp_info[i] for +component number i. Note that components here refer to components of the +JPEG color space, *not* the source image color space. A suitably large +comp_info[] array is allocated by jpeg_set_defaults(); if you choose not +to use that routine, it's up to you to allocate the array. + +int component_id + The one-byte identifier code to be recorded in the JPEG file for + this component. For the standard color spaces, we recommend you + leave the default values alone. + +int h_samp_factor +int v_samp_factor + Horizontal and vertical sampling factors for the component; must + be 1..4 according to the JPEG standard. Note that larger sampling + factors indicate a higher-resolution component; many people find + this behavior quite unintuitive. The default values are 2,2 for + luminance components and 1,1 for chrominance components, except + for grayscale where 1,1 is used. + +int quant_tbl_no + Quantization table number for component. The default value is + 0 for luminance components and 1 for chrominance components. + +int dc_tbl_no +int ac_tbl_no + DC and AC entropy coding table numbers. The default values are + 0 for luminance components and 1 for chrominance components. + +int component_index + Must equal the component's index in comp_info[]. (Beginning in + release v6, the compressor library will fill this in automatically; + you don't have to.) + + +Decompression parameter selection +--------------------------------- + +Decompression parameter selection is somewhat simpler than compression +parameter selection, since all of the JPEG internal parameters are +recorded in the source file and need not be supplied by the application. +(Unless you are working with abbreviated files, in which case see +"Abbreviated datastreams", below.) Decompression parameters control +the postprocessing done on the image to deliver it in a format suitable +for the application's use. Many of the parameters control speed/quality +tradeoffs, in which faster decompression may be obtained at the price of +a poorer-quality image. The defaults select the highest quality (slowest) +processing. + +The following fields in the JPEG object are set by jpeg_read_header() and +may be useful to the application in choosing decompression parameters: + +JDIMENSION image_width Width and height of image +JDIMENSION image_height +int num_components Number of color components +J_COLOR_SPACE jpeg_color_space Colorspace of image +boolean saw_JFIF_marker TRUE if a JFIF APP0 marker was seen + UINT8 JFIF_major_version Version information from JFIF marker + UINT8 JFIF_minor_version + UINT8 density_unit Resolution data from JFIF marker + UINT16 X_density + UINT16 Y_density +boolean saw_Adobe_marker TRUE if an Adobe APP14 marker was seen + UINT8 Adobe_transform Color transform code from Adobe marker + +The JPEG color space, unfortunately, is something of a guess since the JPEG +standard proper does not provide a way to record it. In practice most files +adhere to the JFIF or Adobe conventions, and the decoder will recognize these +correctly. See "Special color spaces", below, for more info. + + +The decompression parameters that determine the basic properties of the +returned image are: + +J_COLOR_SPACE out_color_space + Output color space. jpeg_read_header() sets an appropriate default + based on jpeg_color_space; typically it will be RGB or grayscale. + The application can change this field to request output in a different + colorspace. For example, set it to JCS_GRAYSCALE to get grayscale + output from a color file. (This is useful for previewing: grayscale + output is faster than full color since the color components need not + be processed.) Note that not all possible color space transforms are + currently implemented; you may need to extend jdcolor.c if you want an + unusual conversion. + +unsigned int scale_num, scale_denom + Scale the image by the fraction scale_num/scale_denom. Default is + 1/1, or no scaling. Currently, the only supported scaling ratios + are M/8 with all M from 1 to 16, or any reduced fraction thereof (such + as 1/2, 3/4, etc.) (The library design allows for arbitrary + scaling ratios but this is not likely to be implemented any time soon.) + Smaller scaling ratios permit significantly faster decoding since + fewer pixels need be processed and a simpler IDCT method can be used. + +boolean quantize_colors + If set TRUE, colormapped output will be delivered. Default is FALSE, + meaning that full-color output will be delivered. + +The next three parameters are relevant only if quantize_colors is TRUE. + +int desired_number_of_colors + Maximum number of colors to use in generating a library-supplied color + map (the actual number of colors is returned in a different field). + Default 256. Ignored when the application supplies its own color map. + +boolean two_pass_quantize + If TRUE, an extra pass over the image is made to select a custom color + map for the image. This usually looks a lot better than the one-size- + fits-all colormap that is used otherwise. Default is TRUE. Ignored + when the application supplies its own color map. + +J_DITHER_MODE dither_mode + Selects color dithering method. Supported values are: + JDITHER_NONE no dithering: fast, very low quality + JDITHER_ORDERED ordered dither: moderate speed and quality + JDITHER_FS Floyd-Steinberg dither: slow, high quality + Default is JDITHER_FS. (At present, ordered dither is implemented + only in the single-pass, standard-colormap case. If you ask for + ordered dither when two_pass_quantize is TRUE or when you supply + an external color map, you'll get F-S dithering.) + +When quantize_colors is TRUE, the target color map is described by the next +two fields. colormap is set to NULL by jpeg_read_header(). The application +can supply a color map by setting colormap non-NULL and setting +actual_number_of_colors to the map size. Otherwise, jpeg_start_decompress() +selects a suitable color map and sets these two fields itself. +[Implementation restriction: at present, an externally supplied colormap is +only accepted for 3-component output color spaces.] + +JSAMPARRAY colormap + The color map, represented as a 2-D pixel array of out_color_components + rows and actual_number_of_colors columns. Ignored if not quantizing. + CAUTION: if the JPEG library creates its own colormap, the storage + pointed to by this field is released by jpeg_finish_decompress(). + Copy the colormap somewhere else first, if you want to save it. + +int actual_number_of_colors + The number of colors in the color map. + +Additional decompression parameters that the application may set include: + +J_DCT_METHOD dct_method + Selects the algorithm used for the DCT step. Choices are: + JDCT_ISLOW: slow but accurate integer algorithm + JDCT_IFAST: faster, less accurate integer method + JDCT_FLOAT: floating-point method + JDCT_DEFAULT: default method (normally JDCT_ISLOW) + JDCT_FASTEST: fastest method (normally JDCT_IFAST) + In libjpeg-turbo, JDCT_IFAST is generally about 5-15% faster than + JDCT_ISLOW when using the x86/x86-64 SIMD extensions (results may vary + with other SIMD implementations, or when using libjpeg-turbo without + SIMD extensions.) If the JPEG image was compressed using a quality + level of 85 or below, then there should be little or no perceptible + difference between the two algorithms. When decompressing images that + were compressed using quality levels above 85, however, the difference + between JDCT_IFAST and JDCT_ISLOW becomes more pronounced. With images + compressed using quality=97, for instance, JDCT_IFAST incurs generally + about a 4-6 dB loss (in PSNR) relative to JDCT_ISLOW, but this can be + larger for some images. If you can avoid it, do not use JDCT_IFAST + when decompressing images that were compressed using quality levels + above 97. The algorithm often degenerates for such images and can + actually produce a more lossy output image than if the JPEG image had + been compressed using lower quality levels. JDCT_FLOAT is mainly a + legacy feature. It does not produce significantly more accurate + results than the ISLOW method, and it is much slower. The FLOAT method + may also give different results on different machines due to varying + roundoff behavior, whereas the integer methods should give the same + results on all machines. + +boolean do_fancy_upsampling + If TRUE, do careful upsampling of chroma components. If FALSE, + a faster but sloppier method is used. Default is TRUE. The visual + impact of the sloppier method is often very small. + +boolean do_block_smoothing + If TRUE, interblock smoothing is applied in early stages of decoding + progressive JPEG files; if FALSE, not. Default is TRUE. Early + progression stages look "fuzzy" with smoothing, "blocky" without. + In any case, block smoothing ceases to be applied after the first few + AC coefficients are known to full accuracy, so it is relevant only + when using buffered-image mode for progressive images. + +boolean enable_1pass_quant +boolean enable_external_quant +boolean enable_2pass_quant + These are significant only in buffered-image mode, which is + described in its own section below. + + +The output image dimensions are given by the following fields. These are +computed from the source image dimensions and the decompression parameters +by jpeg_start_decompress(). You can also call jpeg_calc_output_dimensions() +to obtain the values that will result from the current parameter settings. +This can be useful if you are trying to pick a scaling ratio that will get +close to a desired target size. It's also important if you are using the +JPEG library's memory manager to allocate output buffer space, because you +are supposed to request such buffers *before* jpeg_start_decompress(). + +JDIMENSION output_width Actual dimensions of output image. +JDIMENSION output_height +int out_color_components Number of color components in out_color_space. +int output_components Number of color components returned. +int rec_outbuf_height Recommended height of scanline buffer. + +When quantizing colors, output_components is 1, indicating a single color map +index per pixel. Otherwise it equals out_color_components. The output arrays +are required to be output_width * output_components JSAMPLEs wide. + +rec_outbuf_height is the recommended minimum height (in scanlines) of the +buffer passed to jpeg_read_scanlines(). If the buffer is smaller, the +library will still work, but time will be wasted due to unnecessary data +copying. In high-quality modes, rec_outbuf_height is always 1, but some +faster, lower-quality modes set it to larger values (typically 2 to 4). +If you are going to ask for a high-speed processing mode, you may as well +go to the trouble of honoring rec_outbuf_height so as to avoid data copying. +(An output buffer larger than rec_outbuf_height lines is OK, but won't +provide any material speed improvement over that height.) + + +Special color spaces +-------------------- + +The JPEG standard itself is "color blind" and doesn't specify any particular +color space. It is customary to convert color data to a luminance/chrominance +color space before compressing, since this permits greater compression. The +existing de-facto JPEG file format standards specify YCbCr or grayscale data +(JFIF), or grayscale, RGB, YCbCr, CMYK, or YCCK (Adobe). For special +applications such as multispectral images, other color spaces can be used, +but it must be understood that such files will be unportable. + +The JPEG library can handle the most common colorspace conversions (namely +RGB <=> YCbCr and CMYK <=> YCCK). It can also deal with data of an unknown +color space, passing it through without conversion. If you deal extensively +with an unusual color space, you can easily extend the library to understand +additional color spaces and perform appropriate conversions. + +For compression, the source data's color space is specified by field +in_color_space. This is transformed to the JPEG file's color space given +by jpeg_color_space. jpeg_set_defaults() chooses a reasonable JPEG color +space depending on in_color_space, but you can override this by calling +jpeg_set_colorspace(). Of course you must select a supported transformation. +jccolor.c currently supports the following transformations: + RGB => YCbCr + RGB => GRAYSCALE + YCbCr => GRAYSCALE + CMYK => YCCK +plus the null transforms: GRAYSCALE => GRAYSCALE, RGB => RGB, +YCbCr => YCbCr, CMYK => CMYK, YCCK => YCCK, and UNKNOWN => UNKNOWN. + +The de-facto file format standards (JFIF and Adobe) specify APPn markers that +indicate the color space of the JPEG file. It is important to ensure that +these are written correctly, or omitted if the JPEG file's color space is not +one of the ones supported by the de-facto standards. jpeg_set_colorspace() +will set the compression parameters to include or omit the APPn markers +properly, so long as it is told the truth about the JPEG color space. +For example, if you are writing some random 3-component color space without +conversion, don't try to fake out the library by setting in_color_space and +jpeg_color_space to JCS_YCbCr; use JCS_UNKNOWN. You may want to write an +APPn marker of your own devising to identify the colorspace --- see "Special +markers", below. + +When told that the color space is UNKNOWN, the library will default to using +luminance-quality compression parameters for all color components. You may +well want to change these parameters. See the source code for +jpeg_set_colorspace(), in jcparam.c, for details. + +For decompression, the JPEG file's color space is given in jpeg_color_space, +and this is transformed to the output color space out_color_space. +jpeg_read_header's setting of jpeg_color_space can be relied on if the file +conforms to JFIF or Adobe conventions, but otherwise it is no better than a +guess. If you know the JPEG file's color space for certain, you can override +jpeg_read_header's guess by setting jpeg_color_space. jpeg_read_header also +selects a default output color space based on (its guess of) jpeg_color_space; +set out_color_space to override this. Again, you must select a supported +transformation. jdcolor.c currently supports + YCbCr => RGB + YCbCr => GRAYSCALE + RGB => GRAYSCALE + GRAYSCALE => RGB + YCCK => CMYK +as well as the null transforms. (Since GRAYSCALE=>RGB is provided, an +application can force grayscale JPEGs to look like color JPEGs if it only +wants to handle one case.) + +The two-pass color quantizer, jquant2.c, is specialized to handle RGB data +(it weights distances appropriately for RGB colors). You'll need to modify +the code if you want to use it for non-RGB output color spaces. Note that +jquant2.c is used to map to an application-supplied colormap as well as for +the normal two-pass colormap selection process. + +CAUTION: it appears that Adobe Photoshop writes inverted data in CMYK JPEG +files: 0 represents 100% ink coverage, rather than 0% ink as you'd expect. +This is arguably a bug in Photoshop, but if you need to work with Photoshop +CMYK files, you will have to deal with it in your application. We cannot +"fix" this in the library by inverting the data during the CMYK<=>YCCK +transform, because that would break other applications, notably Ghostscript. +Photoshop versions prior to 3.0 write EPS files containing JPEG-encoded CMYK +data in the same inverted-YCCK representation used in bare JPEG files, but +the surrounding PostScript code performs an inversion using the PS image +operator. I am told that Photoshop 3.0 will write uninverted YCCK in +EPS/JPEG files, and will omit the PS-level inversion. (But the data +polarity used in bare JPEG files will not change in 3.0.) In either case, +the JPEG library must not invert the data itself, or else Ghostscript would +read these EPS files incorrectly. + + +Error handling +-------------- + +When the default error handler is used, any error detected inside the JPEG +routines will cause a message to be printed on stderr, followed by exit(). +You can supply your own error handling routines to override this behavior +and to control the treatment of nonfatal warnings and trace/debug messages. +The file example.c illustrates the most common case, which is to have the +application regain control after an error rather than exiting. + +The JPEG library never writes any message directly; it always goes through +the error handling routines. Three classes of messages are recognized: + * Fatal errors: the library cannot continue. + * Warnings: the library can continue, but the data is corrupt, and a + damaged output image is likely to result. + * Trace/informational messages. These come with a trace level indicating + the importance of the message; you can control the verbosity of the + program by adjusting the maximum trace level that will be displayed. + +You may, if you wish, simply replace the entire JPEG error handling module +(jerror.c) with your own code. However, you can avoid code duplication by +only replacing some of the routines depending on the behavior you need. +This is accomplished by calling jpeg_std_error() as usual, but then overriding +some of the method pointers in the jpeg_error_mgr struct, as illustrated by +example.c. + +All of the error handling routines will receive a pointer to the JPEG object +(a j_common_ptr which points to either a jpeg_compress_struct or a +jpeg_decompress_struct; if you need to tell which, test the is_decompressor +field). This struct includes a pointer to the error manager struct in its +"err" field. Frequently, custom error handler routines will need to access +additional data which is not known to the JPEG library or the standard error +handler. The most convenient way to do this is to embed either the JPEG +object or the jpeg_error_mgr struct in a larger structure that contains +additional fields; then casting the passed pointer provides access to the +additional fields. Again, see example.c for one way to do it. (Beginning +with IJG version 6b, there is also a void pointer "client_data" in each +JPEG object, which the application can also use to find related data. +The library does not touch client_data at all.) + +The individual methods that you might wish to override are: + +error_exit (j_common_ptr cinfo) + Receives control for a fatal error. Information sufficient to + generate the error message has been stored in cinfo->err; call + output_message to display it. Control must NOT return to the caller; + generally this routine will exit() or longjmp() somewhere. + Typically you would override this routine to get rid of the exit() + default behavior. Note that if you continue processing, you should + clean up the JPEG object with jpeg_abort() or jpeg_destroy(). + +output_message (j_common_ptr cinfo) + Actual output of any JPEG message. Override this to send messages + somewhere other than stderr. Note that this method does not know + how to generate a message, only where to send it. + +format_message (j_common_ptr cinfo, char *buffer) + Constructs a readable error message string based on the error info + stored in cinfo->err. This method is called by output_message. Few + applications should need to override this method. One possible + reason for doing so is to implement dynamic switching of error message + language. + +emit_message (j_common_ptr cinfo, int msg_level) + Decide whether or not to emit a warning or trace message; if so, + calls output_message. The main reason for overriding this method + would be to abort on warnings. msg_level is -1 for warnings, + 0 and up for trace messages. + +Only error_exit() and emit_message() are called from the rest of the JPEG +library; the other two are internal to the error handler. + +The actual message texts are stored in an array of strings which is pointed to +by the field err->jpeg_message_table. The messages are numbered from 0 to +err->last_jpeg_message, and it is these code numbers that are used in the +JPEG library code. You could replace the message texts (for instance, with +messages in French or German) by changing the message table pointer. See +jerror.h for the default texts. CAUTION: this table will almost certainly +change or grow from one library version to the next. + +It may be useful for an application to add its own message texts that are +handled by the same mechanism. The error handler supports a second "add-on" +message table for this purpose. To define an addon table, set the pointer +err->addon_message_table and the message numbers err->first_addon_message and +err->last_addon_message. If you number the addon messages beginning at 1000 +or so, you won't have to worry about conflicts with the library's built-in +messages. See the sample applications cjpeg/djpeg for an example of using +addon messages (the addon messages are defined in cderror.h). + +Actual invocation of the error handler is done via macros defined in jerror.h: + ERREXITn(...) for fatal errors + WARNMSn(...) for corrupt-data warnings + TRACEMSn(...) for trace and informational messages. +These macros store the message code and any additional parameters into the +error handler struct, then invoke the error_exit() or emit_message() method. +The variants of each macro are for varying numbers of additional parameters. +The additional parameters are inserted into the generated message using +standard printf() format codes. + +See jerror.h and jerror.c for further details. + + +Compressed data handling (source and destination managers) +---------------------------------------------------------- + +The JPEG compression library sends its compressed data to a "destination +manager" module. The default destination manager just writes the data to a +memory buffer or to a stdio stream, but you can provide your own manager to +do something else. Similarly, the decompression library calls a "source +manager" to obtain the compressed data; you can provide your own source +manager if you want the data to come from somewhere other than a memory +buffer or a stdio stream. + +In both cases, compressed data is processed a bufferload at a time: the +destination or source manager provides a work buffer, and the library invokes +the manager only when the buffer is filled or emptied. (You could define a +one-character buffer to force the manager to be invoked for each byte, but +that would be rather inefficient.) The buffer's size and location are +controlled by the manager, not by the library. For example, the memory +source manager just makes the buffer pointer and length point to the original +data in memory. In this case the buffer-reload procedure will be invoked +only if the decompressor ran off the end of the datastream, which would +indicate an erroneous datastream. + +The work buffer is defined as an array of datatype JOCTET, which is generally +"char" or "unsigned char". On a machine where char is not exactly 8 bits +wide, you must define JOCTET as a wider data type and then modify the data +source and destination modules to transcribe the work arrays into 8-bit units +on external storage. + +A data destination manager struct contains a pointer and count defining the +next byte to write in the work buffer and the remaining free space: + + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + +The library increments the pointer and decrements the count until the buffer +is filled. The manager's empty_output_buffer method must reset the pointer +and count. The manager is expected to remember the buffer's starting address +and total size in private fields not visible to the library. + +A data destination manager provides three methods: + +init_destination (j_compress_ptr cinfo) + Initialize destination. This is called by jpeg_start_compress() + before any data is actually written. It must initialize + next_output_byte and free_in_buffer. free_in_buffer must be + initialized to a positive value. + +empty_output_buffer (j_compress_ptr cinfo) + This is called whenever the buffer has filled (free_in_buffer + reaches zero). In typical applications, it should write out the + *entire* buffer (use the saved start address and buffer length; + ignore the current state of next_output_byte and free_in_buffer). + Then reset the pointer & count to the start of the buffer, and + return TRUE indicating that the buffer has been dumped. + free_in_buffer must be set to a positive value when TRUE is + returned. A FALSE return should only be used when I/O suspension is + desired (this operating mode is discussed in the next section). + +term_destination (j_compress_ptr cinfo) + Terminate destination --- called by jpeg_finish_compress() after all + data has been written. In most applications, this must flush any + data remaining in the buffer. Use either next_output_byte or + free_in_buffer to determine how much data is in the buffer. + +term_destination() is NOT called by jpeg_abort() or jpeg_destroy(). If you +want the destination manager to be cleaned up during an abort, you must do it +yourself. + +You will also need code to create a jpeg_destination_mgr struct, fill in its +method pointers, and insert a pointer to the struct into the "dest" field of +the JPEG compression object. This can be done in-line in your setup code if +you like, but it's probably cleaner to provide a separate routine similar to +the jpeg_stdio_dest() or jpeg_mem_dest() routines of the supplied destination +managers. + +Decompression source managers follow a parallel design, but with some +additional frammishes. The source manager struct contains a pointer and count +defining the next byte to read from the work buffer and the number of bytes +remaining: + + const JOCTET *next_input_byte; /* => next byte to read from buffer */ + size_t bytes_in_buffer; /* # of bytes remaining in buffer */ + +The library increments the pointer and decrements the count until the buffer +is emptied. The manager's fill_input_buffer method must reset the pointer and +count. In most applications, the manager must remember the buffer's starting +address and total size in private fields not visible to the library. + +A data source manager provides five methods: + +init_source (j_decompress_ptr cinfo) + Initialize source. This is called by jpeg_read_header() before any + data is actually read. Unlike init_destination(), it may leave + bytes_in_buffer set to 0 (in which case a fill_input_buffer() call + will occur immediately). + +fill_input_buffer (j_decompress_ptr cinfo) + This is called whenever bytes_in_buffer has reached zero and more + data is wanted. In typical applications, it should read fresh data + into the buffer (ignoring the current state of next_input_byte and + bytes_in_buffer), reset the pointer & count to the start of the + buffer, and return TRUE indicating that the buffer has been reloaded. + It is not necessary to fill the buffer entirely, only to obtain at + least one more byte. bytes_in_buffer MUST be set to a positive value + if TRUE is returned. A FALSE return should only be used when I/O + suspension is desired (this mode is discussed in the next section). + +skip_input_data (j_decompress_ptr cinfo, long num_bytes) + Skip num_bytes worth of data. The buffer pointer and count should + be advanced over num_bytes input bytes, refilling the buffer as + needed. This is used to skip over a potentially large amount of + uninteresting data (such as an APPn marker). In some applications + it may be possible to optimize away the reading of the skipped data, + but it's not clear that being smart is worth much trouble; large + skips are uncommon. bytes_in_buffer may be zero on return. + A zero or negative skip count should be treated as a no-op. + +resync_to_restart (j_decompress_ptr cinfo, int desired) + This routine is called only when the decompressor has failed to find + a restart (RSTn) marker where one is expected. Its mission is to + find a suitable point for resuming decompression. For most + applications, we recommend that you just use the default resync + procedure, jpeg_resync_to_restart(). However, if you are able to back + up in the input data stream, or if you have a-priori knowledge about + the likely location of restart markers, you may be able to do better. + Read the read_restart_marker() and jpeg_resync_to_restart() routines + in jdmarker.c if you think you'd like to implement your own resync + procedure. + +term_source (j_decompress_ptr cinfo) + Terminate source --- called by jpeg_finish_decompress() after all + data has been read. Often a no-op. + +For both fill_input_buffer() and skip_input_data(), there is no such thing +as an EOF return. If the end of the file has been reached, the routine has +a choice of exiting via ERREXIT() or inserting fake data into the buffer. +In most cases, generating a warning message and inserting a fake EOI marker +is the best course of action --- this will allow the decompressor to output +however much of the image is there. In pathological cases, the decompressor +may swallow the EOI and again demand data ... just keep feeding it fake EOIs. +jdatasrc.c illustrates the recommended error recovery behavior. + +term_source() is NOT called by jpeg_abort() or jpeg_destroy(). If you want +the source manager to be cleaned up during an abort, you must do it yourself. + +You will also need code to create a jpeg_source_mgr struct, fill in its method +pointers, and insert a pointer to the struct into the "src" field of the JPEG +decompression object. This can be done in-line in your setup code if you +like, but it's probably cleaner to provide a separate routine similar to the +jpeg_stdio_src() or jpeg_mem_src() routines of the supplied source managers. + +For more information, consult the memory and stdio source and destination +managers in jdatasrc.c and jdatadst.c. + + +I/O suspension +-------------- + +Some applications need to use the JPEG library as an incremental memory-to- +memory filter: when the compressed data buffer is filled or emptied, they want +control to return to the outer loop, rather than expecting that the buffer can +be emptied or reloaded within the data source/destination manager subroutine. +The library supports this need by providing an "I/O suspension" mode, which we +describe in this section. + +The I/O suspension mode is not a panacea: nothing is guaranteed about the +maximum amount of time spent in any one call to the library, so it will not +eliminate response-time problems in single-threaded applications. If you +need guaranteed response time, we suggest you "bite the bullet" and implement +a real multi-tasking capability. + +To use I/O suspension, cooperation is needed between the calling application +and the data source or destination manager; you will always need a custom +source/destination manager. (Please read the previous section if you haven't +already.) The basic idea is that the empty_output_buffer() or +fill_input_buffer() routine is a no-op, merely returning FALSE to indicate +that it has done nothing. Upon seeing this, the JPEG library suspends +operation and returns to its caller. The surrounding application is +responsible for emptying or refilling the work buffer before calling the +JPEG library again. + +Compression suspension: + +For compression suspension, use an empty_output_buffer() routine that returns +FALSE; typically it will not do anything else. This will cause the +compressor to return to the caller of jpeg_write_scanlines(), with the return +value indicating that not all the supplied scanlines have been accepted. +The application must make more room in the output buffer, adjust the output +buffer pointer/count appropriately, and then call jpeg_write_scanlines() +again, pointing to the first unconsumed scanline. + +When forced to suspend, the compressor will backtrack to a convenient stopping +point (usually the start of the current MCU); it will regenerate some output +data when restarted. Therefore, although empty_output_buffer() is only +called when the buffer is filled, you should NOT write out the entire buffer +after a suspension. Write only the data up to the current position of +next_output_byte/free_in_buffer. The data beyond that point will be +regenerated after resumption. + +Because of the backtracking behavior, a good-size output buffer is essential +for efficiency; you don't want the compressor to suspend often. (In fact, an +overly small buffer could lead to infinite looping, if a single MCU required +more data than would fit in the buffer.) We recommend a buffer of at least +several Kbytes. You may want to insert explicit code to ensure that you don't +call jpeg_write_scanlines() unless there is a reasonable amount of space in +the output buffer; in other words, flush the buffer before trying to compress +more data. + +The compressor does not allow suspension while it is trying to write JPEG +markers at the beginning and end of the file. This means that: + * At the beginning of a compression operation, there must be enough free + space in the output buffer to hold the header markers (typically 600 or + so bytes). The recommended buffer size is bigger than this anyway, so + this is not a problem as long as you start with an empty buffer. However, + this restriction might catch you if you insert large special markers, such + as a JFIF thumbnail image, without flushing the buffer afterwards. + * When you call jpeg_finish_compress(), there must be enough space in the + output buffer to emit any buffered data and the final EOI marker. In the + current implementation, half a dozen bytes should suffice for this, but + for safety's sake we recommend ensuring that at least 100 bytes are free + before calling jpeg_finish_compress(). + +A more significant restriction is that jpeg_finish_compress() cannot suspend. +This means you cannot use suspension with multi-pass operating modes, namely +Huffman code optimization and multiple-scan output. Those modes write the +whole file during jpeg_finish_compress(), which will certainly result in +buffer overrun. (Note that this restriction applies only to compression, +not decompression. The decompressor supports input suspension in all of its +operating modes.) + +Decompression suspension: + +For decompression suspension, use a fill_input_buffer() routine that simply +returns FALSE (except perhaps during error recovery, as discussed below). +This will cause the decompressor to return to its caller with an indication +that suspension has occurred. This can happen at four places: + * jpeg_read_header(): will return JPEG_SUSPENDED. + * jpeg_start_decompress(): will return FALSE, rather than its usual TRUE. + * jpeg_read_scanlines(): will return the number of scanlines already + completed (possibly 0). + * jpeg_finish_decompress(): will return FALSE, rather than its usual TRUE. +The surrounding application must recognize these cases, load more data into +the input buffer, and repeat the call. In the case of jpeg_read_scanlines(), +increment the passed pointers past any scanlines successfully read. + +Just as with compression, the decompressor will typically backtrack to a +convenient restart point before suspending. When fill_input_buffer() is +called, next_input_byte/bytes_in_buffer point to the current restart point, +which is where the decompressor will backtrack to if FALSE is returned. +The data beyond that position must NOT be discarded if you suspend; it needs +to be re-read upon resumption. In most implementations, you'll need to shift +this data down to the start of your work buffer and then load more data after +it. Again, this behavior means that a several-Kbyte work buffer is essential +for decent performance; furthermore, you should load a reasonable amount of +new data before resuming decompression. (If you loaded, say, only one new +byte each time around, you could waste a LOT of cycles.) + +The skip_input_data() source manager routine requires special care in a +suspension scenario. This routine is NOT granted the ability to suspend the +decompressor; it can decrement bytes_in_buffer to zero, but no more. If the +requested skip distance exceeds the amount of data currently in the input +buffer, then skip_input_data() must set bytes_in_buffer to zero and record the +additional skip distance somewhere else. The decompressor will immediately +call fill_input_buffer(), which should return FALSE, which will cause a +suspension return. The surrounding application must then arrange to discard +the recorded number of bytes before it resumes loading the input buffer. +(Yes, this design is rather baroque, but it avoids complexity in the far more +common case where a non-suspending source manager is used.) + +If the input data has been exhausted, we recommend that you emit a warning +and insert dummy EOI markers just as a non-suspending data source manager +would do. This can be handled either in the surrounding application logic or +within fill_input_buffer(); the latter is probably more efficient. If +fill_input_buffer() knows that no more data is available, it can set the +pointer/count to point to a dummy EOI marker and then return TRUE just as +though it had read more data in a non-suspending situation. + +The decompressor does not attempt to suspend within standard JPEG markers; +instead it will backtrack to the start of the marker and reprocess the whole +marker next time. Hence the input buffer must be large enough to hold the +longest standard marker in the file. Standard JPEG markers should normally +not exceed a few hundred bytes each (DHT tables are typically the longest). +We recommend at least a 2K buffer for performance reasons, which is much +larger than any correct marker is likely to be. For robustness against +damaged marker length counts, you may wish to insert a test in your +application for the case that the input buffer is completely full and yet +the decoder has suspended without consuming any data --- otherwise, if this +situation did occur, it would lead to an endless loop. (The library can't +provide this test since it has no idea whether "the buffer is full", or +even whether there is a fixed-size input buffer.) + +The input buffer would need to be 64K to allow for arbitrary COM or APPn +markers, but these are handled specially: they are either saved into allocated +memory, or skipped over by calling skip_input_data(). In the former case, +suspension is handled correctly, and in the latter case, the problem of +buffer overrun is placed on skip_input_data's shoulders, as explained above. +Note that if you provide your own marker handling routine for large markers, +you should consider how to deal with buffer overflow. + +Multiple-buffer management: + +In some applications it is desirable to store the compressed data in a linked +list of buffer areas, so as to avoid data copying. This can be handled by +having empty_output_buffer() or fill_input_buffer() set the pointer and count +to reference the next available buffer; FALSE is returned only if no more +buffers are available. Although seemingly straightforward, there is a +pitfall in this approach: the backtrack that occurs when FALSE is returned +could back up into an earlier buffer. For example, when fill_input_buffer() +is called, the current pointer & count indicate the backtrack restart point. +Since fill_input_buffer() will set the pointer and count to refer to a new +buffer, the restart position must be saved somewhere else. Suppose a second +call to fill_input_buffer() occurs in the same library call, and no +additional input data is available, so fill_input_buffer must return FALSE. +If the JPEG library has not moved the pointer/count forward in the current +buffer, then *the correct restart point is the saved position in the prior +buffer*. Prior buffers may be discarded only after the library establishes +a restart point within a later buffer. Similar remarks apply for output into +a chain of buffers. + +The library will never attempt to backtrack over a skip_input_data() call, +so any skipped data can be permanently discarded. You still have to deal +with the case of skipping not-yet-received data, however. + +It's much simpler to use only a single buffer; when fill_input_buffer() is +called, move any unconsumed data (beyond the current pointer/count) down to +the beginning of this buffer and then load new data into the remaining buffer +space. This approach requires a little more data copying but is far easier +to get right. + + +Progressive JPEG support +------------------------ + +Progressive JPEG rearranges the stored data into a series of scans of +increasing quality. In situations where a JPEG file is transmitted across a +slow communications link, a decoder can generate a low-quality image very +quickly from the first scan, then gradually improve the displayed quality as +more scans are received. The final image after all scans are complete is +identical to that of a regular (sequential) JPEG file of the same quality +setting. Progressive JPEG files are often slightly smaller than equivalent +sequential JPEG files, but the possibility of incremental display is the main +reason for using progressive JPEG. + +The IJG encoder library generates progressive JPEG files when given a +suitable "scan script" defining how to divide the data into scans. +Creation of progressive JPEG files is otherwise transparent to the encoder. +Progressive JPEG files can also be read transparently by the decoder library. +If the decoding application simply uses the library as defined above, it +will receive a final decoded image without any indication that the file was +progressive. Of course, this approach does not allow incremental display. +To perform incremental display, an application needs to use the decoder +library's "buffered-image" mode, in which it receives a decoded image +multiple times. + +Each displayed scan requires about as much work to decode as a full JPEG +image of the same size, so the decoder must be fairly fast in relation to the +data transmission rate in order to make incremental display useful. However, +it is possible to skip displaying the image and simply add the incoming bits +to the decoder's coefficient buffer. This is fast because only Huffman +decoding need be done, not IDCT, upsampling, colorspace conversion, etc. +The IJG decoder library allows the application to switch dynamically between +displaying the image and simply absorbing the incoming bits. A properly +coded application can automatically adapt the number of display passes to +suit the time available as the image is received. Also, a final +higher-quality display cycle can be performed from the buffered data after +the end of the file is reached. + +Progressive compression: + +To create a progressive JPEG file (or a multiple-scan sequential JPEG file), +set the scan_info cinfo field to point to an array of scan descriptors, and +perform compression as usual. Instead of constructing your own scan list, +you can call the jpeg_simple_progression() helper routine to create a +recommended progression sequence; this method should be used by all +applications that don't want to get involved in the nitty-gritty of +progressive scan sequence design. (If you want to provide user control of +scan sequences, you may wish to borrow the scan script reading code found +in rdswitch.c, so that you can read scan script files just like cjpeg's.) +When scan_info is not NULL, the compression library will store DCT'd data +into a buffer array as jpeg_write_scanlines() is called, and will emit all +the requested scans during jpeg_finish_compress(). This implies that +multiple-scan output cannot be created with a suspending data destination +manager, since jpeg_finish_compress() does not support suspension. We +should also note that the compressor currently forces Huffman optimization +mode when creating a progressive JPEG file, because the default Huffman +tables are unsuitable for progressive files. + +Progressive decompression: + +When buffered-image mode is not used, the decoder library will read all of +a multi-scan file during jpeg_start_decompress(), so that it can provide a +final decoded image. (Here "multi-scan" means either progressive or +multi-scan sequential.) This makes multi-scan files transparent to the +decoding application. However, existing applications that used suspending +input with version 5 of the IJG library will need to be modified to check +for a suspension return from jpeg_start_decompress(). + +To perform incremental display, an application must use the library's +buffered-image mode. This is described in the next section. + + +Buffered-image mode +------------------- + +In buffered-image mode, the library stores the partially decoded image in a +coefficient buffer, from which it can be read out as many times as desired. +This mode is typically used for incremental display of progressive JPEG files, +but it can be used with any JPEG file. Each scan of a progressive JPEG file +adds more data (more detail) to the buffered image. The application can +display in lockstep with the source file (one display pass per input scan), +or it can allow input processing to outrun display processing. By making +input and display processing run independently, it is possible for the +application to adapt progressive display to a wide range of data transmission +rates. + +The basic control flow for buffered-image decoding is + + jpeg_create_decompress() + set data source + jpeg_read_header() + set overall decompression parameters + cinfo.buffered_image = TRUE; /* select buffered-image mode */ + jpeg_start_decompress() + for (each output pass) { + adjust output decompression parameters if required + jpeg_start_output() /* start a new output pass */ + for (all scanlines in image) { + jpeg_read_scanlines() + display scanlines + } + jpeg_finish_output() /* terminate output pass */ + } + jpeg_finish_decompress() + jpeg_destroy_decompress() + +This differs from ordinary unbuffered decoding in that there is an additional +level of looping. The application can choose how many output passes to make +and how to display each pass. + +The simplest approach to displaying progressive images is to do one display +pass for each scan appearing in the input file. In this case the outer loop +condition is typically + while (! jpeg_input_complete(&cinfo)) +and the start-output call should read + jpeg_start_output(&cinfo, cinfo.input_scan_number); +The second parameter to jpeg_start_output() indicates which scan of the input +file is to be displayed; the scans are numbered starting at 1 for this +purpose. (You can use a loop counter starting at 1 if you like, but using +the library's input scan counter is easier.) The library automatically reads +data as necessary to complete each requested scan, and jpeg_finish_output() +advances to the next scan or end-of-image marker (hence input_scan_number +will be incremented by the time control arrives back at jpeg_start_output()). +With this technique, data is read from the input file only as needed, and +input and output processing run in lockstep. + +After reading the final scan and reaching the end of the input file, the +buffered image remains available; it can be read additional times by +repeating the jpeg_start_output()/jpeg_read_scanlines()/jpeg_finish_output() +sequence. For example, a useful technique is to use fast one-pass color +quantization for display passes made while the image is arriving, followed by +a final display pass using two-pass quantization for highest quality. This +is done by changing the library parameters before the final output pass. +Changing parameters between passes is discussed in detail below. + +In general the last scan of a progressive file cannot be recognized as such +until after it is read, so a post-input display pass is the best approach if +you want special processing in the final pass. + +When done with the image, be sure to call jpeg_finish_decompress() to release +the buffered image (or just use jpeg_destroy_decompress()). + +If input data arrives faster than it can be displayed, the application can +cause the library to decode input data in advance of what's needed to produce +output. This is done by calling the routine jpeg_consume_input(). +The return value is one of the following: + JPEG_REACHED_SOS: reached an SOS marker (the start of a new scan) + JPEG_REACHED_EOI: reached the EOI marker (end of image) + JPEG_ROW_COMPLETED: completed reading one MCU row of compressed data + JPEG_SCAN_COMPLETED: completed reading last MCU row of current scan + JPEG_SUSPENDED: suspended before completing any of the above +(JPEG_SUSPENDED can occur only if a suspending data source is used.) This +routine can be called at any time after initializing the JPEG object. It +reads some additional data and returns when one of the indicated significant +events occurs. (If called after the EOI marker is reached, it will +immediately return JPEG_REACHED_EOI without attempting to read more data.) + +The library's output processing will automatically call jpeg_consume_input() +whenever the output processing overtakes the input; thus, simple lockstep +display requires no direct calls to jpeg_consume_input(). But by adding +calls to jpeg_consume_input(), you can absorb data in advance of what is +being displayed. This has two benefits: + * You can limit buildup of unprocessed data in your input buffer. + * You can eliminate extra display passes by paying attention to the + state of the library's input processing. + +The first of these benefits only requires interspersing calls to +jpeg_consume_input() with your display operations and any other processing +you may be doing. To avoid wasting cycles due to backtracking, it's best to +call jpeg_consume_input() only after a hundred or so new bytes have arrived. +This is discussed further under "I/O suspension", above. (Note: the JPEG +library currently is not thread-safe. You must not call jpeg_consume_input() +from one thread of control if a different library routine is working on the +same JPEG object in another thread.) + +When input arrives fast enough that more than one new scan is available +before you start a new output pass, you may as well skip the output pass +corresponding to the completed scan. This occurs for free if you pass +cinfo.input_scan_number as the target scan number to jpeg_start_output(). +The input_scan_number field is simply the index of the scan currently being +consumed by the input processor. You can ensure that this is up-to-date by +emptying the input buffer just before calling jpeg_start_output(): call +jpeg_consume_input() repeatedly until it returns JPEG_SUSPENDED or +JPEG_REACHED_EOI. + +The target scan number passed to jpeg_start_output() is saved in the +cinfo.output_scan_number field. The library's output processing calls +jpeg_consume_input() whenever the current input scan number and row within +that scan is less than or equal to the current output scan number and row. +Thus, input processing can "get ahead" of the output processing but is not +allowed to "fall behind". You can achieve several different effects by +manipulating this interlock rule. For example, if you pass a target scan +number greater than the current input scan number, the output processor will +wait until that scan starts to arrive before producing any output. (To avoid +an infinite loop, the target scan number is automatically reset to the last +scan number when the end of image is reached. Thus, if you specify a large +target scan number, the library will just absorb the entire input file and +then perform an output pass. This is effectively the same as what +jpeg_start_decompress() does when you don't select buffered-image mode.) +When you pass a target scan number equal to the current input scan number, +the image is displayed no faster than the current input scan arrives. The +final possibility is to pass a target scan number less than the current input +scan number; this disables the input/output interlock and causes the output +processor to simply display whatever it finds in the image buffer, without +waiting for input. (However, the library will not accept a target scan +number less than one, so you can't avoid waiting for the first scan.) + +When data is arriving faster than the output display processing can advance +through the image, jpeg_consume_input() will store data into the buffered +image beyond the point at which the output processing is reading data out +again. If the input arrives fast enough, it may "wrap around" the buffer to +the point where the input is more than one whole scan ahead of the output. +If the output processing simply proceeds through its display pass without +paying attention to the input, the effect seen on-screen is that the lower +part of the image is one or more scans better in quality than the upper part. +Then, when the next output scan is started, you have a choice of what target +scan number to use. The recommended choice is to use the current input scan +number at that time, which implies that you've skipped the output scans +corresponding to the input scans that were completed while you processed the +previous output scan. In this way, the decoder automatically adapts its +speed to the arriving data, by skipping output scans as necessary to keep up +with the arriving data. + +When using this strategy, you'll want to be sure that you perform a final +output pass after receiving all the data; otherwise your last display may not +be full quality across the whole screen. So the right outer loop logic is +something like this: + do { + absorb any waiting input by calling jpeg_consume_input() + final_pass = jpeg_input_complete(&cinfo); + adjust output decompression parameters if required + jpeg_start_output(&cinfo, cinfo.input_scan_number); + ... + jpeg_finish_output() + } while (! final_pass); +rather than quitting as soon as jpeg_input_complete() returns TRUE. This +arrangement makes it simple to use higher-quality decoding parameters +for the final pass. But if you don't want to use special parameters for +the final pass, the right loop logic is like this: + for (;;) { + absorb any waiting input by calling jpeg_consume_input() + jpeg_start_output(&cinfo, cinfo.input_scan_number); + ... + jpeg_finish_output() + if (jpeg_input_complete(&cinfo) && + cinfo.input_scan_number == cinfo.output_scan_number) + break; + } +In this case you don't need to know in advance whether an output pass is to +be the last one, so it's not necessary to have reached EOF before starting +the final output pass; rather, what you want to test is whether the output +pass was performed in sync with the final input scan. This form of the loop +will avoid an extra output pass whenever the decoder is able (or nearly able) +to keep up with the incoming data. + +When the data transmission speed is high, you might begin a display pass, +then find that much or all of the file has arrived before you can complete +the pass. (You can detect this by noting the JPEG_REACHED_EOI return code +from jpeg_consume_input(), or equivalently by testing jpeg_input_complete().) +In this situation you may wish to abort the current display pass and start a +new one using the newly arrived information. To do so, just call +jpeg_finish_output() and then start a new pass with jpeg_start_output(). + +A variant strategy is to abort and restart display if more than one complete +scan arrives during an output pass; this can be detected by noting +JPEG_REACHED_SOS returns and/or examining cinfo.input_scan_number. This +idea should be employed with caution, however, since the display process +might never get to the bottom of the image before being aborted, resulting +in the lower part of the screen being several passes worse than the upper. +In most cases it's probably best to abort an output pass only if the whole +file has arrived and you want to begin the final output pass immediately. + +When receiving data across a communication link, we recommend always using +the current input scan number for the output target scan number; if a +higher-quality final pass is to be done, it should be started (aborting any +incomplete output pass) as soon as the end of file is received. However, +many other strategies are possible. For example, the application can examine +the parameters of the current input scan and decide whether to display it or +not. If the scan contains only chroma data, one might choose not to use it +as the target scan, expecting that the scan will be small and will arrive +quickly. To skip to the next scan, call jpeg_consume_input() until it +returns JPEG_REACHED_SOS or JPEG_REACHED_EOI. Or just use the next higher +number as the target scan for jpeg_start_output(); but that method doesn't +let you inspect the next scan's parameters before deciding to display it. + + +In buffered-image mode, jpeg_start_decompress() never performs input and +thus never suspends. An application that uses input suspension with +buffered-image mode must be prepared for suspension returns from these +routines: +* jpeg_start_output() performs input only if you request 2-pass quantization + and the target scan isn't fully read yet. (This is discussed below.) +* jpeg_read_scanlines(), as always, returns the number of scanlines that it + was able to produce before suspending. +* jpeg_finish_output() will read any markers following the target scan, + up to the end of the file or the SOS marker that begins another scan. + (But it reads no input if jpeg_consume_input() has already reached the + end of the file or a SOS marker beyond the target output scan.) +* jpeg_finish_decompress() will read until the end of file, and thus can + suspend if the end hasn't already been reached (as can be tested by + calling jpeg_input_complete()). +jpeg_start_output(), jpeg_finish_output(), and jpeg_finish_decompress() +all return TRUE if they completed their tasks, FALSE if they had to suspend. +In the event of a FALSE return, the application must load more input data +and repeat the call. Applications that use non-suspending data sources need +not check the return values of these three routines. + + +It is possible to change decoding parameters between output passes in the +buffered-image mode. The decoder library currently supports only very +limited changes of parameters. ONLY THE FOLLOWING parameter changes are +allowed after jpeg_start_decompress() is called: +* dct_method can be changed before each call to jpeg_start_output(). + For example, one could use a fast DCT method for early scans, changing + to a higher quality method for the final scan. +* dither_mode can be changed before each call to jpeg_start_output(); + of course this has no impact if not using color quantization. Typically + one would use ordered dither for initial passes, then switch to + Floyd-Steinberg dither for the final pass. Caution: changing dither mode + can cause more memory to be allocated by the library. Although the amount + of memory involved is not large (a scanline or so), it may cause the + initial max_memory_to_use specification to be exceeded, which in the worst + case would result in an out-of-memory failure. +* do_block_smoothing can be changed before each call to jpeg_start_output(). + This setting is relevant only when decoding a progressive JPEG image. + During the first DC-only scan, block smoothing provides a very "fuzzy" look + instead of the very "blocky" look seen without it; which is better seems a + matter of personal taste. But block smoothing is nearly always a win + during later stages, especially when decoding a successive-approximation + image: smoothing helps to hide the slight blockiness that otherwise shows + up on smooth gradients until the lowest coefficient bits are sent. +* Color quantization mode can be changed under the rules described below. + You *cannot* change between full-color and quantized output (because that + would alter the required I/O buffer sizes), but you can change which + quantization method is used. + +When generating color-quantized output, changing quantization method is a +very useful way of switching between high-speed and high-quality display. +The library allows you to change among its three quantization methods: +1. Single-pass quantization to a fixed color cube. + Selected by cinfo.two_pass_quantize = FALSE and cinfo.colormap = NULL. +2. Single-pass quantization to an application-supplied colormap. + Selected by setting cinfo.colormap to point to the colormap (the value of + two_pass_quantize is ignored); also set cinfo.actual_number_of_colors. +3. Two-pass quantization to a colormap chosen specifically for the image. + Selected by cinfo.two_pass_quantize = TRUE and cinfo.colormap = NULL. + (This is the default setting selected by jpeg_read_header, but it is + probably NOT what you want for the first pass of progressive display!) +These methods offer successively better quality and lesser speed. However, +only the first method is available for quantizing in non-RGB color spaces. + +IMPORTANT: because the different quantizer methods have very different +working-storage requirements, the library requires you to indicate which +one(s) you intend to use before you call jpeg_start_decompress(). (If we did +not require this, the max_memory_to_use setting would be a complete fiction.) +You do this by setting one or more of these three cinfo fields to TRUE: + enable_1pass_quant Fixed color cube colormap + enable_external_quant Externally-supplied colormap + enable_2pass_quant Two-pass custom colormap +All three are initialized FALSE by jpeg_read_header(). But +jpeg_start_decompress() automatically sets TRUE the one selected by the +current two_pass_quantize and colormap settings, so you only need to set the +enable flags for any other quantization methods you plan to change to later. + +After setting the enable flags correctly at jpeg_start_decompress() time, you +can change to any enabled quantization method by setting two_pass_quantize +and colormap properly just before calling jpeg_start_output(). The following +special rules apply: +1. You must explicitly set cinfo.colormap to NULL when switching to 1-pass + or 2-pass mode from a different mode, or when you want the 2-pass + quantizer to be re-run to generate a new colormap. +2. To switch to an external colormap, or to change to a different external + colormap than was used on the prior pass, you must call + jpeg_new_colormap() after setting cinfo.colormap. +NOTE: if you want to use the same colormap as was used in the prior pass, +you should not do either of these things. This will save some nontrivial +switchover costs. +(These requirements exist because cinfo.colormap will always be non-NULL +after completing a prior output pass, since both the 1-pass and 2-pass +quantizers set it to point to their output colormaps. Thus you have to +do one of these two things to notify the library that something has changed. +Yup, it's a bit klugy, but it's necessary to do it this way for backwards +compatibility.) + +Note that in buffered-image mode, the library generates any requested colormap +during jpeg_start_output(), not during jpeg_start_decompress(). + +When using two-pass quantization, jpeg_start_output() makes a pass over the +buffered image to determine the optimum color map; it therefore may take a +significant amount of time, whereas ordinarily it does little work. The +progress monitor hook is called during this pass, if defined. It is also +important to realize that if the specified target scan number is greater than +or equal to the current input scan number, jpeg_start_output() will attempt +to consume input as it makes this pass. If you use a suspending data source, +you need to check for a FALSE return from jpeg_start_output() under these +conditions. The combination of 2-pass quantization and a not-yet-fully-read +target scan is the only case in which jpeg_start_output() will consume input. + + +Application authors who support buffered-image mode may be tempted to use it +for all JPEG images, even single-scan ones. This will work, but it is +inefficient: there is no need to create an image-sized coefficient buffer for +single-scan images. Requesting buffered-image mode for such an image wastes +memory. Worse, it can cost time on large images, since the buffered data has +to be swapped out or written to a temporary file. If you are concerned about +maximum performance on baseline JPEG files, you should use buffered-image +mode only when the incoming file actually has multiple scans. This can be +tested by calling jpeg_has_multiple_scans(), which will return a correct +result at any time after jpeg_read_header() completes. + +It is also worth noting that when you use jpeg_consume_input() to let input +processing get ahead of output processing, the resulting pattern of access to +the coefficient buffer is quite nonsequential. It's best to use the memory +manager jmemnobs.c if you can (ie, if you have enough real or virtual main +memory). If not, at least make sure that max_memory_to_use is set as high as +possible. If the JPEG memory manager has to use a temporary file, you will +probably see a lot of disk traffic and poor performance. (This could be +improved with additional work on the memory manager, but we haven't gotten +around to it yet.) + +In some applications it may be convenient to use jpeg_consume_input() for all +input processing, including reading the initial markers; that is, you may +wish to call jpeg_consume_input() instead of jpeg_read_header() during +startup. This works, but note that you must check for JPEG_REACHED_SOS and +JPEG_REACHED_EOI return codes as the equivalent of jpeg_read_header's codes. +Once the first SOS marker has been reached, you must call +jpeg_start_decompress() before jpeg_consume_input() will consume more input; +it'll just keep returning JPEG_REACHED_SOS until you do. If you read a +tables-only file this way, jpeg_consume_input() will return JPEG_REACHED_EOI +without ever returning JPEG_REACHED_SOS; be sure to check for this case. +If this happens, the decompressor will not read any more input until you call +jpeg_abort() to reset it. It is OK to call jpeg_consume_input() even when not +using buffered-image mode, but in that case it's basically a no-op after the +initial markers have been read: it will just return JPEG_SUSPENDED. + + +Abbreviated datastreams and multiple images +------------------------------------------- + +A JPEG compression or decompression object can be reused to process multiple +images. This saves a small amount of time per image by eliminating the +"create" and "destroy" operations, but that isn't the real purpose of the +feature. Rather, reuse of an object provides support for abbreviated JPEG +datastreams. Object reuse can also simplify processing a series of images in +a single input or output file. This section explains these features. + +A JPEG file normally contains several hundred bytes worth of quantization +and Huffman tables. In a situation where many images will be stored or +transmitted with identical tables, this may represent an annoying overhead. +The JPEG standard therefore permits tables to be omitted. The standard +defines three classes of JPEG datastreams: + * "Interchange" datastreams contain an image and all tables needed to decode + the image. These are the usual kind of JPEG file. + * "Abbreviated image" datastreams contain an image, but are missing some or + all of the tables needed to decode that image. + * "Abbreviated table specification" (henceforth "tables-only") datastreams + contain only table specifications. +To decode an abbreviated image, it is necessary to load the missing table(s) +into the decoder beforehand. This can be accomplished by reading a separate +tables-only file. A variant scheme uses a series of images in which the first +image is an interchange (complete) datastream, while subsequent ones are +abbreviated and rely on the tables loaded by the first image. It is assumed +that once the decoder has read a table, it will remember that table until a +new definition for the same table number is encountered. + +It is the application designer's responsibility to figure out how to associate +the correct tables with an abbreviated image. While abbreviated datastreams +can be useful in a closed environment, their use is strongly discouraged in +any situation where data exchange with other applications might be needed. +Caveat designer. + +The JPEG library provides support for reading and writing any combination of +tables-only datastreams and abbreviated images. In both compression and +decompression objects, a quantization or Huffman table will be retained for +the lifetime of the object, unless it is overwritten by a new table definition. + + +To create abbreviated image datastreams, it is only necessary to tell the +compressor not to emit some or all of the tables it is using. Each +quantization and Huffman table struct contains a boolean field "sent_table", +which normally is initialized to FALSE. For each table used by the image, the +header-writing process emits the table and sets sent_table = TRUE unless it is +already TRUE. (In normal usage, this prevents outputting the same table +definition multiple times, as would otherwise occur because the chroma +components typically share tables.) Thus, setting this field to TRUE before +calling jpeg_start_compress() will prevent the table from being written at +all. + +If you want to create a "pure" abbreviated image file containing no tables, +just call "jpeg_suppress_tables(&cinfo, TRUE)" after constructing all the +tables. If you want to emit some but not all tables, you'll need to set the +individual sent_table fields directly. + +To create an abbreviated image, you must also call jpeg_start_compress() +with a second parameter of FALSE, not TRUE. Otherwise jpeg_start_compress() +will force all the sent_table fields to FALSE. (This is a safety feature to +prevent abbreviated images from being created accidentally.) + +To create a tables-only file, perform the same parameter setup that you +normally would, but instead of calling jpeg_start_compress() and so on, call +jpeg_write_tables(&cinfo). This will write an abbreviated datastream +containing only SOI, DQT and/or DHT markers, and EOI. All the quantization +and Huffman tables that are currently defined in the compression object will +be emitted unless their sent_tables flag is already TRUE, and then all the +sent_tables flags will be set TRUE. + +A sure-fire way to create matching tables-only and abbreviated image files +is to proceed as follows: + + create JPEG compression object + set JPEG parameters + set destination to tables-only file + jpeg_write_tables(&cinfo); + set destination to image file + jpeg_start_compress(&cinfo, FALSE); + write data... + jpeg_finish_compress(&cinfo); + +Since the JPEG parameters are not altered between writing the table file and +the abbreviated image file, the same tables are sure to be used. Of course, +you can repeat the jpeg_start_compress() ... jpeg_finish_compress() sequence +many times to produce many abbreviated image files matching the table file. + +You cannot suppress output of the computed Huffman tables when Huffman +optimization is selected. (If you could, there'd be no way to decode the +image...) Generally, you don't want to set optimize_coding = TRUE when +you are trying to produce abbreviated files. + +In some cases you might want to compress an image using tables which are +not stored in the application, but are defined in an interchange or +tables-only file readable by the application. This can be done by setting up +a JPEG decompression object to read the specification file, then copying the +tables into your compression object. See jpeg_copy_critical_parameters() +for an example of copying quantization tables. + + +To read abbreviated image files, you simply need to load the proper tables +into the decompression object before trying to read the abbreviated image. +If the proper tables are stored in the application program, you can just +allocate the table structs and fill in their contents directly. For example, +to load a fixed quantization table into table slot "n": + + if (cinfo.quant_tbl_ptrs[n] == NULL) + cinfo.quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) &cinfo); + quant_ptr = cinfo.quant_tbl_ptrs[n]; /* quant_ptr is JQUANT_TBL* */ + for (i = 0; i < 64; i++) { + /* Qtable[] is desired quantization table, in natural array order */ + quant_ptr->quantval[i] = Qtable[i]; + } + +Code to load a fixed Huffman table is typically (for AC table "n"): + + if (cinfo.ac_huff_tbl_ptrs[n] == NULL) + cinfo.ac_huff_tbl_ptrs[n] = jpeg_alloc_huff_table((j_common_ptr) &cinfo); + huff_ptr = cinfo.ac_huff_tbl_ptrs[n]; /* huff_ptr is JHUFF_TBL* */ + for (i = 1; i <= 16; i++) { + /* counts[i] is number of Huffman codes of length i bits, i=1..16 */ + huff_ptr->bits[i] = counts[i]; + } + for (i = 0; i < 256; i++) { + /* symbols[] is the list of Huffman symbols, in code-length order */ + huff_ptr->huffval[i] = symbols[i]; + } + +(Note that trying to set cinfo.quant_tbl_ptrs[n] to point directly at a +constant JQUANT_TBL object is not safe. If the incoming file happened to +contain a quantization table definition, your master table would get +overwritten! Instead allocate a working table copy and copy the master table +into it, as illustrated above. Ditto for Huffman tables, of course.) + +You might want to read the tables from a tables-only file, rather than +hard-wiring them into your application. The jpeg_read_header() call is +sufficient to read a tables-only file. You must pass a second parameter of +FALSE to indicate that you do not require an image to be present. Thus, the +typical scenario is + + create JPEG decompression object + set source to tables-only file + jpeg_read_header(&cinfo, FALSE); + set source to abbreviated image file + jpeg_read_header(&cinfo, TRUE); + set decompression parameters + jpeg_start_decompress(&cinfo); + read data... + jpeg_finish_decompress(&cinfo); + +In some cases, you may want to read a file without knowing whether it contains +an image or just tables. In that case, pass FALSE and check the return value +from jpeg_read_header(): it will be JPEG_HEADER_OK if an image was found, +JPEG_HEADER_TABLES_ONLY if only tables were found. (A third return value, +JPEG_SUSPENDED, is possible when using a suspending data source manager.) +Note that jpeg_read_header() will not complain if you read an abbreviated +image for which you haven't loaded the missing tables; the missing-table check +occurs later, in jpeg_start_decompress(). + + +It is possible to read a series of images from a single source file by +repeating the jpeg_read_header() ... jpeg_finish_decompress() sequence, +without releasing/recreating the JPEG object or the data source module. +(If you did reinitialize, any partial bufferload left in the data source +buffer at the end of one image would be discarded, causing you to lose the +start of the next image.) When you use this method, stored tables are +automatically carried forward, so some of the images can be abbreviated images +that depend on tables from earlier images. + +If you intend to write a series of images into a single destination file, +you might want to make a specialized data destination module that doesn't +flush the output buffer at term_destination() time. This would speed things +up by some trifling amount. Of course, you'd need to remember to flush the +buffer after the last image. You can make the later images be abbreviated +ones by passing FALSE to jpeg_start_compress(). + + +Special markers +--------------- + +Some applications may need to insert or extract special data in the JPEG +datastream. The JPEG standard provides marker types "COM" (comment) and +"APP0" through "APP15" (application) to hold application-specific data. +Unfortunately, the use of these markers is not specified by the standard. +COM markers are fairly widely used to hold user-supplied text. The JFIF file +format spec uses APP0 markers with specified initial strings to hold certain +data. Adobe applications use APP14 markers beginning with the string "Adobe" +for miscellaneous data. Other APPn markers are rarely seen, but might +contain almost anything. + +If you wish to store user-supplied text, we recommend you use COM markers +and place readable 7-bit ASCII text in them. Newline conventions are not +standardized --- expect to find LF (Unix style), CR/LF (DOS style), or CR +(Mac style). A robust COM reader should be able to cope with random binary +garbage, including nulls, since some applications generate COM markers +containing non-ASCII junk. (But yours should not be one of them.) + +For program-supplied data, use an APPn marker, and be sure to begin it with an +identifying string so that you can tell whether the marker is actually yours. +It's probably best to avoid using APP0 or APP14 for any private markers. +(NOTE: the upcoming SPIFF standard will use APP8 markers; we recommend you +not use APP8 markers for any private purposes, either.) + +Keep in mind that at most 65533 bytes can be put into one marker, but you +can have as many markers as you like. + +By default, the IJG compression library will write a JFIF APP0 marker if the +selected JPEG colorspace is grayscale or YCbCr, or an Adobe APP14 marker if +the selected colorspace is RGB, CMYK, or YCCK. You can disable this, but +we don't recommend it. The decompression library will recognize JFIF and +Adobe markers and will set the JPEG colorspace properly when one is found. + + +You can write special markers immediately following the datastream header by +calling jpeg_write_marker() after jpeg_start_compress() and before the first +call to jpeg_write_scanlines(). When you do this, the markers appear after +the SOI and the JFIF APP0 and Adobe APP14 markers (if written), but before +all else. Specify the marker type parameter as "JPEG_COM" for COM or +"JPEG_APP0 + n" for APPn. (Actually, jpeg_write_marker will let you write +any marker type, but we don't recommend writing any other kinds of marker.) +For example, to write a user comment string pointed to by comment_text: + jpeg_write_marker(cinfo, JPEG_COM, comment_text, strlen(comment_text)); + +If it's not convenient to store all the marker data in memory at once, +you can instead call jpeg_write_m_header() followed by multiple calls to +jpeg_write_m_byte(). If you do it this way, it's your responsibility to +call jpeg_write_m_byte() exactly the number of times given in the length +parameter to jpeg_write_m_header(). (This method lets you empty the +output buffer partway through a marker, which might be important when +using a suspending data destination module. In any case, if you are using +a suspending destination, you should flush its buffer after inserting +any special markers. See "I/O suspension".) + +Or, if you prefer to synthesize the marker byte sequence yourself, +you can just cram it straight into the data destination module. + +If you are writing JFIF 1.02 extension markers (thumbnail images), don't +forget to set cinfo.JFIF_minor_version = 2 so that the encoder will write the +correct JFIF version number in the JFIF header marker. The library's default +is to write version 1.01, but that's wrong if you insert any 1.02 extension +markers. (We could probably get away with just defaulting to 1.02, but there +used to be broken decoders that would complain about unknown minor version +numbers. To reduce compatibility risks it's safest not to write 1.02 unless +you are actually using 1.02 extensions.) + + +When reading, two methods of handling special markers are available: +1. You can ask the library to save the contents of COM and/or APPn markers +into memory, and then examine them at your leisure afterwards. +2. You can supply your own routine to process COM and/or APPn markers +on-the-fly as they are read. +The first method is simpler to use, especially if you are using a suspending +data source; writing a marker processor that copes with input suspension is +not easy (consider what happens if the marker is longer than your available +input buffer). However, the second method conserves memory since the marker +data need not be kept around after it's been processed. + +For either method, you'd normally set up marker handling after creating a +decompression object and before calling jpeg_read_header(), because the +markers of interest will typically be near the head of the file and so will +be scanned by jpeg_read_header. Once you've established a marker handling +method, it will be used for the life of that decompression object +(potentially many datastreams), unless you change it. Marker handling is +determined separately for COM markers and for each APPn marker code. + + +To save the contents of special markers in memory, call + jpeg_save_markers(cinfo, marker_code, length_limit) +where marker_code is the marker type to save, JPEG_COM or JPEG_APP0+n. +(To arrange to save all the special marker types, you need to call this +routine 17 times, for COM and APP0-APP15.) If the incoming marker is longer +than length_limit data bytes, only length_limit bytes will be saved; this +parameter allows you to avoid chewing up memory when you only need to see the +first few bytes of a potentially large marker. If you want to save all the +data, set length_limit to 0xFFFF; that is enough since marker lengths are only +16 bits. As a special case, setting length_limit to 0 prevents that marker +type from being saved at all. (That is the default behavior, in fact.) + +After jpeg_read_header() completes, you can examine the special markers by +following the cinfo->marker_list pointer chain. All the special markers in +the file appear in this list, in order of their occurrence in the file (but +omitting any markers of types you didn't ask for). Both the original data +length and the saved data length are recorded for each list entry; the latter +will not exceed length_limit for the particular marker type. Note that these +lengths exclude the marker length word, whereas the stored representation +within the JPEG file includes it. (Hence the maximum data length is really +only 65533.) + +It is possible that additional special markers appear in the file beyond the +SOS marker at which jpeg_read_header stops; if so, the marker list will be +extended during reading of the rest of the file. This is not expected to be +common, however. If you are short on memory you may want to reset the length +limit to zero for all marker types after finishing jpeg_read_header, to +ensure that the max_memory_to_use setting cannot be exceeded due to addition +of later markers. + +The marker list remains stored until you call jpeg_finish_decompress or +jpeg_abort, at which point the memory is freed and the list is set to empty. +(jpeg_destroy also releases the storage, of course.) + +Note that the library is internally interested in APP0 and APP14 markers; +if you try to set a small nonzero length limit on these types, the library +will silently force the length up to the minimum it wants. (But you can set +a zero length limit to prevent them from being saved at all.) Also, in a +16-bit environment, the maximum length limit may be constrained to less than +65533 by malloc() limitations. It is therefore best not to assume that the +effective length limit is exactly what you set it to be. + + +If you want to supply your own marker-reading routine, you do it by calling +jpeg_set_marker_processor(). A marker processor routine must have the +signature + boolean jpeg_marker_parser_method (j_decompress_ptr cinfo) +Although the marker code is not explicitly passed, the routine can find it +in cinfo->unread_marker. At the time of call, the marker proper has been +read from the data source module. The processor routine is responsible for +reading the marker length word and the remaining parameter bytes, if any. +Return TRUE to indicate success. (FALSE should be returned only if you are +using a suspending data source and it tells you to suspend. See the standard +marker processors in jdmarker.c for appropriate coding methods if you need to +use a suspending data source.) + +If you override the default APP0 or APP14 processors, it is up to you to +recognize JFIF and Adobe markers if you want colorspace recognition to occur +properly. We recommend copying and extending the default processors if you +want to do that. (A better idea is to save these marker types for later +examination by calling jpeg_save_markers(); that method doesn't interfere +with the library's own processing of these markers.) + +jpeg_set_marker_processor() and jpeg_save_markers() are mutually exclusive +--- if you call one it overrides any previous call to the other, for the +particular marker type specified. + +A simple example of an external COM processor can be found in djpeg.c. +Also, see jpegtran.c for an example of using jpeg_save_markers. + + +Raw (downsampled) image data +---------------------------- + +Some applications need to supply already-downsampled image data to the JPEG +compressor, or to receive raw downsampled data from the decompressor. The +library supports this requirement by allowing the application to write or +read raw data, bypassing the normal preprocessing or postprocessing steps. +The interface is different from the standard one and is somewhat harder to +use. If your interest is merely in bypassing color conversion, we recommend +that you use the standard interface and simply set jpeg_color_space = +in_color_space (or jpeg_color_space = out_color_space for decompression). +The mechanism described in this section is necessary only to supply or +receive downsampled image data, in which not all components have the same +dimensions. + + +To compress raw data, you must supply the data in the colorspace to be used +in the JPEG file (please read the earlier section on Special color spaces) +and downsampled to the sampling factors specified in the JPEG parameters. +You must supply the data in the format used internally by the JPEG library, +namely a JSAMPIMAGE array. This is an array of pointers to two-dimensional +arrays, each of type JSAMPARRAY. Each 2-D array holds the values for one +color component. This structure is necessary since the components are of +different sizes. If the image dimensions are not a multiple of the MCU size, +you must also pad the data correctly (usually, this is done by replicating +the last column and/or row). The data must be padded to a multiple of a DCT +block in each component: that is, each downsampled row must contain a +multiple of 8 valid samples, and there must be a multiple of 8 sample rows +for each component. (For applications such as conversion of digital TV +images, the standard image size is usually a multiple of the DCT block size, +so that no padding need actually be done.) + +The procedure for compression of raw data is basically the same as normal +compression, except that you call jpeg_write_raw_data() in place of +jpeg_write_scanlines(). Before calling jpeg_start_compress(), you must do +the following: + * Set cinfo->raw_data_in to TRUE. (It is set FALSE by jpeg_set_defaults().) + This notifies the library that you will be supplying raw data. + * Ensure jpeg_color_space is correct --- an explicit jpeg_set_colorspace() + call is a good idea. Note that since color conversion is bypassed, + in_color_space is ignored, except that jpeg_set_defaults() uses it to + choose the default jpeg_color_space setting. + * Ensure the sampling factors, cinfo->comp_info[i].h_samp_factor and + cinfo->comp_info[i].v_samp_factor, are correct. Since these indicate the + dimensions of the data you are supplying, it's wise to set them + explicitly, rather than assuming the library's defaults are what you want. + +To pass raw data to the library, call jpeg_write_raw_data() in place of +jpeg_write_scanlines(). The two routines work similarly except that +jpeg_write_raw_data takes a JSAMPIMAGE data array rather than JSAMPARRAY. +The scanlines count passed to and returned from jpeg_write_raw_data is +measured in terms of the component with the largest v_samp_factor. + +jpeg_write_raw_data() processes one MCU row per call, which is to say +v_samp_factor*DCTSIZE sample rows of each component. The passed num_lines +value must be at least max_v_samp_factor*DCTSIZE, and the return value will +be exactly that amount (or possibly some multiple of that amount, in future +library versions). This is true even on the last call at the bottom of the +image; don't forget to pad your data as necessary. + +The required dimensions of the supplied data can be computed for each +component as + cinfo->comp_info[i].width_in_blocks*DCTSIZE samples per row + cinfo->comp_info[i].height_in_blocks*DCTSIZE rows in image +after jpeg_start_compress() has initialized those fields. If the valid data +is smaller than this, it must be padded appropriately. For some sampling +factors and image sizes, additional dummy DCT blocks are inserted to make +the image a multiple of the MCU dimensions. The library creates such dummy +blocks itself; it does not read them from your supplied data. Therefore you +need never pad by more than DCTSIZE samples. An example may help here. +Assume 2h2v downsampling of YCbCr data, that is + cinfo->comp_info[0].h_samp_factor = 2 for Y + cinfo->comp_info[0].v_samp_factor = 2 + cinfo->comp_info[1].h_samp_factor = 1 for Cb + cinfo->comp_info[1].v_samp_factor = 1 + cinfo->comp_info[2].h_samp_factor = 1 for Cr + cinfo->comp_info[2].v_samp_factor = 1 +and suppose that the nominal image dimensions (cinfo->image_width and +cinfo->image_height) are 101x101 pixels. Then jpeg_start_compress() will +compute downsampled_width = 101 and width_in_blocks = 13 for Y, +downsampled_width = 51 and width_in_blocks = 7 for Cb and Cr (and the same +for the height fields). You must pad the Y data to at least 13*8 = 104 +columns and rows, the Cb/Cr data to at least 7*8 = 56 columns and rows. The +MCU height is max_v_samp_factor = 2 DCT rows so you must pass at least 16 +scanlines on each call to jpeg_write_raw_data(), which is to say 16 actual +sample rows of Y and 8 each of Cb and Cr. A total of 7 MCU rows are needed, +so you must pass a total of 7*16 = 112 "scanlines". The last DCT block row +of Y data is dummy, so it doesn't matter what you pass for it in the data +arrays, but the scanlines count must total up to 112 so that all of the Cb +and Cr data gets passed. + +Output suspension is supported with raw-data compression: if the data +destination module suspends, jpeg_write_raw_data() will return 0. +In this case the same data rows must be passed again on the next call. + + +Decompression with raw data output implies bypassing all postprocessing: +you cannot ask for rescaling or color quantization, for instance. More +seriously, you must deal with the color space and sampling factors present in +the incoming file. If your application only handles, say, 2h1v YCbCr data, +you must check for and fail on other color spaces or other sampling factors. +The library will not convert to a different color space for you. + +To obtain raw data output, set cinfo->raw_data_out = TRUE before +jpeg_start_decompress() (it is set FALSE by jpeg_read_header()). Be sure to +verify that the color space and sampling factors are ones you can handle. +Then call jpeg_read_raw_data() in place of jpeg_read_scanlines(). The +decompression process is otherwise the same as usual. + +jpeg_read_raw_data() returns one MCU row per call, and thus you must pass a +buffer of at least max_v_samp_factor*DCTSIZE scanlines (scanline counting is +the same as for raw-data compression). The buffer you pass must be large +enough to hold the actual data plus padding to DCT-block boundaries. As with +compression, any entirely dummy DCT blocks are not processed so you need not +allocate space for them, but the total scanline count includes them. The +above example of computing buffer dimensions for raw-data compression is +equally valid for decompression. + +Input suspension is supported with raw-data decompression: if the data source +module suspends, jpeg_read_raw_data() will return 0. You can also use +buffered-image mode to read raw data in multiple passes. + + +Really raw data: DCT coefficients +--------------------------------- + +It is possible to read or write the contents of a JPEG file as raw DCT +coefficients. This facility is mainly intended for use in lossless +transcoding between different JPEG file formats. Other possible applications +include lossless cropping of a JPEG image, lossless reassembly of a +multi-strip or multi-tile TIFF/JPEG file into a single JPEG datastream, etc. + +To read the contents of a JPEG file as DCT coefficients, open the file and do +jpeg_read_header() as usual. But instead of calling jpeg_start_decompress() +and jpeg_read_scanlines(), call jpeg_read_coefficients(). This will read the +entire image into a set of virtual coefficient-block arrays, one array per +component. The return value is a pointer to an array of virtual-array +descriptors. Each virtual array can be accessed directly using the JPEG +memory manager's access_virt_barray method (see Memory management, below, +and also read structure.txt's discussion of virtual array handling). Or, +for simple transcoding to a different JPEG file format, the array list can +just be handed directly to jpeg_write_coefficients(). + +Each block in the block arrays contains quantized coefficient values in +normal array order (not JPEG zigzag order). The block arrays contain only +DCT blocks containing real data; any entirely-dummy blocks added to fill out +interleaved MCUs at the right or bottom edges of the image are discarded +during reading and are not stored in the block arrays. (The size of each +block array can be determined from the width_in_blocks and height_in_blocks +fields of the component's comp_info entry.) This is also the data format +expected by jpeg_write_coefficients(). + +When you are done using the virtual arrays, call jpeg_finish_decompress() +to release the array storage and return the decompression object to an idle +state; or just call jpeg_destroy() if you don't need to reuse the object. + +If you use a suspending data source, jpeg_read_coefficients() will return +NULL if it is forced to suspend; a non-NULL return value indicates successful +completion. You need not test for a NULL return value when using a +non-suspending data source. + +It is also possible to call jpeg_read_coefficients() to obtain access to the +decoder's coefficient arrays during a normal decode cycle in buffered-image +mode. This frammish might be useful for progressively displaying an incoming +image and then re-encoding it without loss. To do this, decode in buffered- +image mode as discussed previously, then call jpeg_read_coefficients() after +the last jpeg_finish_output() call. The arrays will be available for your use +until you call jpeg_finish_decompress(). + + +To write the contents of a JPEG file as DCT coefficients, you must provide +the DCT coefficients stored in virtual block arrays. You can either pass +block arrays read from an input JPEG file by jpeg_read_coefficients(), or +allocate virtual arrays from the JPEG compression object and fill them +yourself. In either case, jpeg_write_coefficients() is substituted for +jpeg_start_compress() and jpeg_write_scanlines(). Thus the sequence is + * Create compression object + * Set all compression parameters as necessary + * Request virtual arrays if needed + * jpeg_write_coefficients() + * jpeg_finish_compress() + * Destroy or re-use compression object +jpeg_write_coefficients() is passed a pointer to an array of virtual block +array descriptors; the number of arrays is equal to cinfo.num_components. + +The virtual arrays need only have been requested, not realized, before +jpeg_write_coefficients() is called. A side-effect of +jpeg_write_coefficients() is to realize any virtual arrays that have been +requested from the compression object's memory manager. Thus, when obtaining +the virtual arrays from the compression object, you should fill the arrays +after calling jpeg_write_coefficients(). The data is actually written out +when you call jpeg_finish_compress(); jpeg_write_coefficients() only writes +the file header. + +When writing raw DCT coefficients, it is crucial that the JPEG quantization +tables and sampling factors match the way the data was encoded, or the +resulting file will be invalid. For transcoding from an existing JPEG file, +we recommend using jpeg_copy_critical_parameters(). This routine initializes +all the compression parameters to default values (like jpeg_set_defaults()), +then copies the critical information from a source decompression object. +The decompression object should have just been used to read the entire +JPEG input file --- that is, it should be awaiting jpeg_finish_decompress(). + +jpeg_write_coefficients() marks all tables stored in the compression object +as needing to be written to the output file (thus, it acts like +jpeg_start_compress(cinfo, TRUE)). This is for safety's sake, to avoid +emitting abbreviated JPEG files by accident. If you really want to emit an +abbreviated JPEG file, call jpeg_suppress_tables(), or set the tables' +individual sent_table flags, between calling jpeg_write_coefficients() and +jpeg_finish_compress(). + + +Progress monitoring +------------------- + +Some applications may need to regain control from the JPEG library every so +often. The typical use of this feature is to produce a percent-done bar or +other progress display. (For a simple example, see cjpeg.c or djpeg.c.) +Although you do get control back frequently during the data-transferring pass +(the jpeg_read_scanlines or jpeg_write_scanlines loop), any additional passes +will occur inside jpeg_finish_compress or jpeg_start_decompress; those +routines may take a long time to execute, and you don't get control back +until they are done. + +You can define a progress-monitor routine which will be called periodically +by the library. No guarantees are made about how often this call will occur, +so we don't recommend you use it for mouse tracking or anything like that. +At present, a call will occur once per MCU row, scanline, or sample row +group, whichever unit is convenient for the current processing mode; so the +wider the image, the longer the time between calls. During the data +transferring pass, only one call occurs per call of jpeg_read_scanlines or +jpeg_write_scanlines, so don't pass a large number of scanlines at once if +you want fine resolution in the progress count. (If you really need to use +the callback mechanism for time-critical tasks like mouse tracking, you could +insert additional calls inside some of the library's inner loops.) + +To establish a progress-monitor callback, create a struct jpeg_progress_mgr, +fill in its progress_monitor field with a pointer to your callback routine, +and set cinfo->progress to point to the struct. The callback will be called +whenever cinfo->progress is non-NULL. (This pointer is set to NULL by +jpeg_create_compress or jpeg_create_decompress; the library will not change +it thereafter. So if you allocate dynamic storage for the progress struct, +make sure it will live as long as the JPEG object does. Allocating from the +JPEG memory manager with lifetime JPOOL_PERMANENT will work nicely.) You +can use the same callback routine for both compression and decompression. + +The jpeg_progress_mgr struct contains four fields which are set by the library: + long pass_counter; /* work units completed in this pass */ + long pass_limit; /* total number of work units in this pass */ + int completed_passes; /* passes completed so far */ + int total_passes; /* total number of passes expected */ +During any one pass, pass_counter increases from 0 up to (not including) +pass_limit; the step size is usually but not necessarily 1. The pass_limit +value may change from one pass to another. The expected total number of +passes is in total_passes, and the number of passes already completed is in +completed_passes. Thus the fraction of work completed may be estimated as + completed_passes + (pass_counter/pass_limit) + -------------------------------------------- + total_passes +ignoring the fact that the passes may not be equal amounts of work. + +When decompressing, pass_limit can even change within a pass, because it +depends on the number of scans in the JPEG file, which isn't always known in +advance. The computed fraction-of-work-done may jump suddenly (if the library +discovers it has overestimated the number of scans) or even decrease (in the +opposite case). It is not wise to put great faith in the work estimate. + +When using the decompressor's buffered-image mode, the progress monitor work +estimate is likely to be completely unhelpful, because the library has no way +to know how many output passes will be demanded of it. Currently, the library +sets total_passes based on the assumption that there will be one more output +pass if the input file end hasn't yet been read (jpeg_input_complete() isn't +TRUE), but no more output passes if the file end has been reached when the +output pass is started. This means that total_passes will rise as additional +output passes are requested. If you have a way of determining the input file +size, estimating progress based on the fraction of the file that's been read +will probably be more useful than using the library's value. + + +Memory management +----------------- + +This section covers some key facts about the JPEG library's built-in memory +manager. For more info, please read structure.txt's section about the memory +manager, and consult the source code if necessary. + +All memory and temporary file allocation within the library is done via the +memory manager. If necessary, you can replace the "back end" of the memory +manager to control allocation yourself (for example, if you don't want the +library to use malloc() and free() for some reason). + +Some data is allocated "permanently" and will not be freed until the JPEG +object is destroyed. Most data is allocated "per image" and is freed by +jpeg_finish_compress, jpeg_finish_decompress, or jpeg_abort. You can call the +memory manager yourself to allocate structures that will automatically be +freed at these times. Typical code for this is + ptr = (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, size); +Use JPOOL_PERMANENT to get storage that lasts as long as the JPEG object. +Use alloc_large instead of alloc_small for anything bigger than a few Kbytes. +There are also alloc_sarray and alloc_barray routines that automatically +build 2-D sample or block arrays. + +The library's minimum space requirements to process an image depend on the +image's width, but not on its height, because the library ordinarily works +with "strip" buffers that are as wide as the image but just a few rows high. +Some operating modes (eg, two-pass color quantization) require full-image +buffers. Such buffers are treated as "virtual arrays": only the current strip +need be in memory, and the rest can be swapped out to a temporary file. + +If you use the simplest memory manager back end (jmemnobs.c), then no +temporary files are used; virtual arrays are simply malloc()'d. Images bigger +than memory can be processed only if your system supports virtual memory. +The other memory manager back ends support temporary files of various flavors +and thus work in machines without virtual memory. They may also be useful on +Unix machines if you need to process images that exceed available swap space. + +When using temporary files, the library will make the in-memory buffers for +its virtual arrays just big enough to stay within a "maximum memory" setting. +Your application can set this limit by setting cinfo->mem->max_memory_to_use +after creating the JPEG object. (Of course, there is still a minimum size for +the buffers, so the max-memory setting is effective only if it is bigger than +the minimum space needed.) If you allocate any large structures yourself, you +must allocate them before jpeg_start_compress() or jpeg_start_decompress() in +order to have them counted against the max memory limit. Also keep in mind +that space allocated with alloc_small() is ignored, on the assumption that +it's too small to be worth worrying about; so a reasonable safety margin +should be left when setting max_memory_to_use. + + +Memory usage +------------ + +Working memory requirements while performing compression or decompression +depend on image dimensions, image characteristics (such as colorspace and +JPEG process), and operating mode (application-selected options). + +As of v6b, the decompressor requires: + 1. About 24K in more-or-less-fixed-size data. This varies a bit depending + on operating mode and image characteristics (particularly color vs. + grayscale), but it doesn't depend on image dimensions. + 2. Strip buffers (of size proportional to the image width) for IDCT and + upsampling results. The worst case for commonly used sampling factors + is about 34 bytes * width in pixels for a color image. A grayscale image + only needs about 8 bytes per pixel column. + 3. A full-image DCT coefficient buffer is needed to decode a multi-scan JPEG + file (including progressive JPEGs), or whenever you select buffered-image + mode. This takes 2 bytes/coefficient. At typical 2x2 sampling, that's + 3 bytes per pixel for a color image. Worst case (1x1 sampling) requires + 6 bytes/pixel. For grayscale, figure 2 bytes/pixel. + 4. To perform 2-pass color quantization, the decompressor also needs a + 128K color lookup table and a full-image pixel buffer (3 bytes/pixel). +This does not count any memory allocated by the application, such as a +buffer to hold the final output image. + +The above figures are valid for 8-bit JPEG data precision and a machine with +32-bit ints. For 12-bit JPEG data, double the size of the strip buffers and +quantization pixel buffer. The "fixed-size" data will be somewhat smaller +with 16-bit ints, larger with 64-bit ints. Also, CMYK or other unusual +color spaces will require different amounts of space. + +The full-image coefficient and pixel buffers, if needed at all, do not +have to be fully RAM resident; you can have the library use temporary +files instead when the total memory usage would exceed a limit you set. +(But if your OS supports virtual memory, it's probably better to just use +jmemnobs and let the OS do the swapping.) + +The compressor's memory requirements are similar, except that it has no need +for color quantization. Also, it needs a full-image DCT coefficient buffer +if Huffman-table optimization is asked for, even if progressive mode is not +requested. + +If you need more detailed information about memory usage in a particular +situation, you can enable the MEM_STATS code in jmemmgr.c. + + +Library compile-time options +---------------------------- + +A number of compile-time options are available by modifying jmorecfg.h. + +The JPEG standard provides for both the baseline 8-bit DCT process and +a 12-bit DCT process. The IJG code supports 12-bit lossy JPEG if you define +BITS_IN_JSAMPLE as 12 rather than 8. Note that this causes JSAMPLE to be +larger than a char, so it affects the surrounding application's image data. +The sample applications cjpeg and djpeg can support 12-bit mode only for PPM +and GIF file formats; you must disable the other file formats to compile a +12-bit cjpeg or djpeg. (install.txt has more information about that.) +At present, a 12-bit library can handle *only* 12-bit images, not both +precisions. + +Note that a 12-bit library always compresses in Huffman optimization mode, +in order to generate valid Huffman tables. This is necessary because our +default Huffman tables only cover 8-bit data. If you need to output 12-bit +files in one pass, you'll have to supply suitable default Huffman tables. +You may also want to supply your own DCT quantization tables; the existing +quality-scaling code has been developed for 8-bit use, and probably doesn't +generate especially good tables for 12-bit. + +The maximum number of components (color channels) in the image is determined +by MAX_COMPONENTS. The JPEG standard allows up to 255 components, but we +expect that few applications will need more than four or so. + +On machines with unusual data type sizes, you may be able to improve +performance or reduce memory space by tweaking the various typedefs in +jmorecfg.h. In particular, on some RISC CPUs, access to arrays of "short"s +is quite slow; consider trading memory for speed by making JCOEF, INT16, and +UINT16 be "int" or "unsigned int". UINT8 is also a candidate to become int. +You probably don't want to make JSAMPLE be int unless you have lots of memory +to burn. + +You can reduce the size of the library by compiling out various optional +functions. To do this, undefine xxx_SUPPORTED symbols as necessary. + +You can also save a few K by not having text error messages in the library; +the standard error message table occupies about 5Kb. This is particularly +reasonable for embedded applications where there's no good way to display +a message anyway. To do this, remove the creation of the message table +(jpeg_std_message_table[]) from jerror.c, and alter format_message to do +something reasonable without it. You could output the numeric value of the +message code number, for example. If you do this, you can also save a couple +more K by modifying the TRACEMSn() macros in jerror.h to expand to nothing; +you don't need trace capability anyway, right? + + +Portability considerations +-------------------------- + +The JPEG library has been written to be extremely portable; the sample +applications cjpeg and djpeg are slightly less so. This section summarizes +the design goals in this area. (If you encounter any bugs that cause the +library to be less portable than is claimed here, we'd appreciate hearing +about them.) + +The code works fine on ANSI C and C++ compilers, using any of the popular +system include file setups, and some not-so-popular ones too. + +The code is not dependent on the exact sizes of the C data types. As +distributed, we make the assumptions that + char is at least 8 bits wide + short is at least 16 bits wide + int is at least 16 bits wide + long is at least 32 bits wide +(These are the minimum requirements of the ANSI C standard.) Wider types will +work fine, although memory may be used inefficiently if char is much larger +than 8 bits or short is much bigger than 16 bits. The code should work +equally well with 16- or 32-bit ints. + +In a system where these assumptions are not met, you may be able to make the +code work by modifying the typedefs in jmorecfg.h. However, you will probably +have difficulty if int is less than 16 bits wide, since references to plain +int abound in the code. + +char can be either signed or unsigned, although the code runs faster if an +unsigned char type is available. If char is wider than 8 bits, you will need +to redefine JOCTET and/or provide custom data source/destination managers so +that JOCTET represents exactly 8 bits of data on external storage. + +The JPEG library proper does not assume ASCII representation of characters. +But some of the image file I/O modules in cjpeg/djpeg do have ASCII +dependencies in file-header manipulation; so does cjpeg's select_file_type() +routine. + +The JPEG library does not rely heavily on the C library. In particular, C +stdio is used only by the data source/destination modules and the error +handler, all of which are application-replaceable. (cjpeg/djpeg are more +heavily dependent on stdio.) malloc and free are called only from the memory +manager "back end" module, so you can use a different memory allocator by +replacing that one file. + +More info about porting the code may be gleaned by reading jconfig.txt, +jmorecfg.h, and jinclude.h. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/LICENSE.md glmark2-2021.02/src/libjpeg-turbo/LICENSE.md --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/LICENSE.md 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/LICENSE.md 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,88 @@ +libjpeg-turbo Licenses +====================== + +libjpeg-turbo is covered by three compatible BSD-style open source licenses: + +- The IJG (Independent JPEG Group) License, which is listed in + [README.ijg](README.ijg) + + This license applies to the libjpeg API library and associated programs + (any code inherited from libjpeg, and any modifications to that code.) + +- The Modified (3-clause) BSD License, which is listed in + [turbojpeg.c](turbojpeg.c) + + This license covers the TurboJPEG API library and associated programs. + +- The zlib License, which is listed in [simd/jsimdext.inc](simd/jsimdext.inc) + + This license is a subset of the other two, and it covers the libjpeg-turbo + SIMD extensions. + + +Complying with the libjpeg-turbo Licenses +========================================= + +This section provides a roll-up of the libjpeg-turbo licensing terms, to the +best of our understanding. + +1. If you are distributing a modified version of the libjpeg-turbo source, + then: + + 1. You cannot alter or remove any existing copyright or license notices + from the source. + + **Origin** + - Clause 1 of the IJG License + - Clause 1 of the Modified BSD License + - Clauses 1 and 3 of the zlib License + + 2. You must add your own copyright notice to the header of each source + file you modified, so others can tell that you modified that file (if + there is not an existing copyright header in that file, then you can + simply add a notice stating that you modified the file.) + + **Origin** + - Clause 1 of the IJG License + - Clause 2 of the zlib License + + 3. You must include the IJG README file, and you must not alter any of the + copyright or license text in that file. + + **Origin** + - Clause 1 of the IJG License + +2. If you are distributing only libjpeg-turbo binaries without the source, or + if you are distributing an application that statically links with + libjpeg-turbo, then: + + 1. Your product documentation must include a message stating: + + This software is based in part on the work of the Independent JPEG + Group. + + **Origin** + - Clause 2 of the IJG license + + 2. If your binary distribution includes or uses the TurboJPEG API, then + your product documentation must include the text of the Modified BSD + License. + + **Origin** + - Clause 2 of the Modified BSD License + +3. You cannot use the name of the IJG or The libjpeg-turbo Project or the + contributors thereof in advertising, publicity, etc. + + **Origin** + - IJG License + - Clause 3 of the Modified BSD License + +4. The IJG and The libjpeg-turbo Project do not warrant libjpeg-turbo to be + free of defects, nor do we accept any liability for undesirable + consequences resulting from your use of the software. + + **Origin** + - IJG License + - Modified BSD License + - zlib License diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/Makefile.am glmark2-2021.02/src/libjpeg-turbo/Makefile.am --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/Makefile.am 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/Makefile.am 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,778 @@ +lib_LTLIBRARIES = libjpeg.la +libjpeg_la_LDFLAGS = -version-info ${LIBTOOL_CURRENT}:${SO_MINOR_VERSION}:${SO_AGE} -no-undefined +include_HEADERS = jerror.h jmorecfg.h jpeglib.h + +if WITH_TURBOJPEG +lib_LTLIBRARIES += libturbojpeg.la +libturbojpeg_la_LDFLAGS = -version-info 1:0:1 -no-undefined +include_HEADERS += turbojpeg.h +endif + +nodist_include_HEADERS = jconfig.h + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = pkgscripts/libjpeg.pc pkgscripts/libturbojpeg.pc + +HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ + jpegint.h jpeglib.h jversion.h jsimd.h jsimddct.h jpegcomp.h \ + jpeg_nbits_table.h + +libjpeg_la_SOURCES = $(HDRS) jcapimin.c jcapistd.c jccoefct.c jccolor.c \ + jcdctmgr.c jchuff.c jcinit.c jcmainct.c jcmarker.c jcmaster.c \ + jcomapi.c jcparam.c jcphuff.c jcprepct.c jcsample.c jctrans.c \ + jdapimin.c jdapistd.c jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c \ + jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c jdmaster.c \ + jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c \ + jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c \ + jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c + +if WITH_ARITH +libjpeg_la_SOURCES += jaricom.c +endif + +if WITH_ARITH_ENC +libjpeg_la_SOURCES += jcarith.c +endif + +if WITH_ARITH_DEC +libjpeg_la_SOURCES += jdarith.c +endif + + +SUBDIRS = java + + +if WITH_TURBOJPEG + +libturbojpeg_la_SOURCES = $(libjpeg_la_SOURCES) turbojpeg.c turbojpeg.h \ + transupp.c transupp.h jdatadst-tj.c jdatasrc-tj.c + +if WITH_JAVA + +libturbojpeg_la_SOURCES += turbojpeg-jni.c +libturbojpeg_la_CFLAGS = ${JNI_CFLAGS} +TJMAPFILE = turbojpeg-mapfile.jni + +else + +TJMAPFILE = turbojpeg-mapfile + +endif + +libturbojpeg_la_SOURCES += $(TJMAPFILE) + +if VERSION_SCRIPT +libturbojpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)$(srcdir)/$(TJMAPFILE) +endif + +endif + + +if VERSION_SCRIPT +libjpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)libjpeg.map +endif + + +if WITH_SIMD + +SUBDIRS += simd +libjpeg_la_LIBADD = simd/libsimd.la +libturbojpeg_la_LIBADD = simd/libsimd.la + +else + +libjpeg_la_SOURCES += jsimd_none.c + +endif + + +bin_PROGRAMS = cjpeg djpeg jpegtran rdjpgcom wrjpgcom +noinst_PROGRAMS = jcstest + + +if WITH_TURBOJPEG + +bin_PROGRAMS += tjbench + +noinst_PROGRAMS += tjunittest + +tjbench_SOURCES = tjbench.c bmp.h bmp.c tjutil.h tjutil.c rdbmp.c rdppm.c \ + wrbmp.c wrppm.c + +tjbench_LDADD = libturbojpeg.la libjpeg.la -lm + +tjbench_CFLAGS = -DBMP_SUPPORTED -DPPM_SUPPORTED + +tjunittest_SOURCES = tjunittest.c tjutil.h tjutil.c + +tjunittest_LDADD = libturbojpeg.la + +endif + + +cjpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c cjpeg.c rdgif.c rdppm.c rdswitch.c +if WITH_12BIT +else +cjpeg_SOURCES += rdbmp.c rdtarga.c +endif + +cjpeg_LDADD = libjpeg.la + +cjpeg_CFLAGS = -DGIF_SUPPORTED -DPPM_SUPPORTED +if WITH_12BIT +else +cjpeg_CFLAGS += -DBMP_SUPPORTED -DTARGA_SUPPORTED +endif + +djpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c djpeg.c rdcolmap.c rdswitch.c \ + wrgif.c wrppm.c +if WITH_12BIT +else +djpeg_SOURCES += wrbmp.c wrtarga.c +endif + +djpeg_LDADD = libjpeg.la + +djpeg_CFLAGS = -DGIF_SUPPORTED -DPPM_SUPPORTED +if WITH_12BIT +else +djpeg_CFLAGS += -DBMP_SUPPORTED -DTARGA_SUPPORTED +endif + +jpegtran_SOURCES = jpegtran.c rdswitch.c cdjpeg.c transupp.c transupp.h + +jpegtran_LDADD = libjpeg.la + +rdjpgcom_SOURCES = rdjpgcom.c + +rdjpgcom_LDADD = libjpeg.la + +wrjpgcom_SOURCES = wrjpgcom.c + +wrjpgcom_LDADD = libjpeg.la + +jcstest_SOURCES = jcstest.c + +jcstest_LDADD = libjpeg.la + +dist_man1_MANS = cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 wrjpgcom.1 + +DOCS= coderules.txt jconfig.txt change.log rdrle.c wrrle.c BUILDING.md \ + ChangeLog.txt + +dist_doc_DATA = README.ijg README.md libjpeg.txt structure.txt usage.txt \ + wizard.txt LICENSE.md + +exampledir = $(docdir) +dist_example_DATA = example.c + + +EXTRA_DIST = win release $(DOCS) testimages CMakeLists.txt \ + sharedlib/CMakeLists.txt cmakescripts libjpeg.map.in doc doxygen.config \ + doxygen-extra.css jccolext.c jdcolext.c jdcol565.c jdmrgext.c jdmrg565.c \ + jstdhuff.c jdcoefct.h jdmainct.h jdmaster.h jdsample.h wrppm.h \ + md5/CMakeLists.txt + +dist-hook: + rm -rf `find $(distdir) -name .svn` + + +SUBDIRS += md5 + +if WITH_12BIT + +TESTORIG = testorig12.jpg +MD5_JPEG_RGB_ISLOW = 9620f424569594bb9242b48498ad801f +MD5_PPM_RGB_ISLOW = f3301d2219783b8b3d942b7239fa50c0 +MD5_JPEG_422_IFAST_OPT = 7322e3bd2f127f7de4b40d4480ce60e4 +MD5_PPM_422_IFAST = 79807fa552899e66a04708f533e16950 +MD5_PPM_422M_IFAST = 07737bfe8a7c1c87aaa393a0098d16b0 +MD5_JPEG_420_IFAST_Q100_PROG = a1da220b5604081863a504297ed59e55 +MD5_PPM_420_Q100_IFAST = 1b3730122709f53d007255e8dfd3305e +MD5_PPM_420M_Q100_IFAST = 980a1a3c5bf9510022869d30b7d26566 +MD5_JPEG_GRAY_ISLOW = 235c90707b16e2e069f37c888b2636d9 +MD5_PPM_GRAY_ISLOW = 7213c10af507ad467da5578ca5ee1fca +MD5_PPM_GRAY_ISLOW_RGB = e96ee81c30a6ed422d466338bd3de65d +MD5_JPEG_420S_IFAST_OPT = 7af8e60be4d9c227ec63ac9b6630855e +MD5_JPEG_3x2_FLOAT_PROG_SSE = a8c17daf77b457725ec929e215b603f8 +MD5_PPM_3x2_FLOAT_SSE = 42876ab9e5c2f76a87d08db5fbd57956 +MD5_JPEG_3x2_FLOAT_PROG_32BIT = a8c17daf77b457725ec929e215b603f8 +MD5_PPM_3x2_FLOAT_32BIT = 42876ab9e5c2f76a87d08db5fbd57956 +MD5_PPM_3x2_FLOAT_64BIT = d6fbc71153b3d8ded484dbc17c7b9cf4 +MD5_JPEG_3x2_IFAST_PROG = 1396cc2b7185cfe943d408c9d305339e +MD5_PPM_3x2_IFAST = 3975985ef6eeb0a2cdc58daa651ccc00 +MD5_PPM_420M_ISLOW_2_1 = 4ca6be2a6f326ff9eaab63e70a8259c0 +MD5_PPM_420M_ISLOW_15_8 = 12aa9f9534c1b3d7ba047322226365eb +MD5_PPM_420M_ISLOW_13_8 = f7e22817c7b25e1393e4ec101e9d4e96 +MD5_PPM_420M_ISLOW_11_8 = 800a16f9f4dc9b293197bfe11be10a82 +MD5_PPM_420M_ISLOW_9_8 = 06b7a92a9bc69f4dc36ec40f1937d55c +MD5_PPM_420M_ISLOW_7_8 = 3ec444a14a4ab4eab88ffc49c48eca43 +MD5_PPM_420M_ISLOW_3_4 = 3e726b7ea872445b19437d1c1d4f0d93 +MD5_PPM_420M_ISLOW_5_8 = a8a771abdc94301d20ffac119b2caccd +MD5_PPM_420M_ISLOW_1_2 = b419124dd5568b085787234866102866 +MD5_PPM_420M_ISLOW_3_8 = 343d19015531b7bbe746124127244fa8 +MD5_PPM_420M_ISLOW_1_4 = 35fd59d866e44659edfa3c18db2a3edb +MD5_PPM_420M_ISLOW_1_8 = ccaed48ac0aedefda5d4abe4013f4ad7 +MD5_PPM_420_ISLOW_SKIP15_31 = 86664cd9dc956536409e44e244d20a97 +MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71 = 452a21656115a163029cfba5c04fa76a +MD5_PPM_444_ISLOW_SKIP1_6 = ef63901f71ef7a75cd78253fc0914f84 +MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13 = 15b173fb5872d9575572fbcc1b05956f +MD5_JPEG_CROP = cdb35ff4b4519392690ea040c56ea99c + +else + +TESTORIG = testorig.jpg +MD5_JPEG_RGB_ISLOW = 768e970dd57b340ff1b83c9d3d47c77b +MD5_PPM_RGB_ISLOW = 00a257f5393fef8821f2b88ac7421291 +MD5_BMP_RGB_ISLOW_565 = f07d2e75073e4bb10f6c6f4d36e2e3be +MD5_BMP_RGB_ISLOW_565D = 4cfa0928ef3e6bb626d7728c924cfda4 +MD5_JPEG_422_IFAST_OPT = 2540287b79d913f91665e660303ab2c8 +MD5_PPM_422_IFAST = 35bd6b3f833bad23de82acea847129fa +MD5_PPM_422M_IFAST = 8dbc65323d62cca7c91ba02dd1cfa81d +MD5_BMP_422M_IFAST_565 = 3294bd4d9a1f2b3d08ea6020d0db7065 +MD5_BMP_422M_IFAST_565D = da98c9c7b6039511be4a79a878a9abc1 +MD5_JPEG_420_IFAST_Q100_PROG = 990cbe0329c882420a2094da7e5adade +MD5_PPM_420_Q100_IFAST = 5a732542015c278ff43635e473a8a294 +MD5_PPM_420M_Q100_IFAST = ff692ee9323a3b424894862557c092f1 +MD5_JPEG_GRAY_ISLOW = 72b51f894b8f4a10b3ee3066770aa38d +MD5_PPM_GRAY_ISLOW = 8d3596c56eace32f205deccc229aa5ed +MD5_PPM_GRAY_ISLOW_RGB = 116424ac07b79e5e801f00508eab48ec +MD5_BMP_GRAY_ISLOW_565 = 12f78118e56a2f48b966f792fedf23cc +MD5_BMP_GRAY_ISLOW_565D = bdbbd616441a24354c98553df5dc82db +MD5_JPEG_420S_IFAST_OPT = 388708217ac46273ca33086b22827ed8 +# See README.md for more details on why this next bit is necessary. +MD5_JPEG_3x2_FLOAT_PROG_SSE = 343e3f8caf8af5986ebaf0bdc13b5c71 +MD5_PPM_3x2_FLOAT_SSE = 1a75f36e5904d6fc3a85a43da9ad89bb +MD5_JPEG_3x2_FLOAT_PROG_32BIT = 9bca803d2042bd1eb03819e2bf92b3e5 +MD5_PPM_3x2_FLOAT_32BIT = f6bfab038438ed8f5522fbd33595dcdc +MD5_PPM_3x2_FLOAT_64BIT = 0e917a34193ef976b679a6b069b1be26 +MD5_JPEG_3x2_IFAST_PROG = 1ee5d2c1a77f2da495f993c8c7cceca5 +MD5_PPM_3x2_IFAST = fd283664b3b49127984af0a7f118fccd +MD5_JPEG_420_ISLOW_ARI = e986fb0a637a8d833d96e8a6d6d84ea1 +MD5_JPEG_444_ISLOW_PROGARI = 0a8f1c8f66e113c3cf635df0a475a617 +MD5_PPM_420M_IFAST_ARI = 72b59a99bcf1de24c5b27d151bde2437 +MD5_JPEG_420_ISLOW = 9a68f56bc76e466aa7e52f415d0f4a5f +MD5_PPM_420M_ISLOW_2_1 = 9f9de8c0612f8d06869b960b05abf9c9 +MD5_PPM_420M_ISLOW_15_8 = b6875bc070720b899566cc06459b63b7 +MD5_PPM_420M_ISLOW_13_8 = bc3452573c8152f6ae552939ee19f82f +MD5_PPM_420M_ISLOW_11_8 = d8cc73c0aaacd4556569b59437ba00a5 +MD5_PPM_420M_ISLOW_9_8 = d25e61bc7eac0002f5b393aa223747b6 +MD5_PPM_420M_ISLOW_7_8 = ddb564b7c74a09494016d6cd7502a946 +MD5_PPM_420M_ISLOW_3_4 = 8ed8e68808c3fbc4ea764fc9d2968646 +MD5_PPM_420M_ISLOW_5_8 = a3363274999da2366a024efae6d16c9b +MD5_PPM_420M_ISLOW_1_2 = e692a315cea26b988c8e8b29a5dbcd81 +MD5_PPM_420M_ISLOW_3_8 = 79eca9175652ced755155c90e785a996 +MD5_PPM_420M_ISLOW_1_4 = 79cd778f8bf1a117690052cacdd54eca +MD5_PPM_420M_ISLOW_1_8 = 391b3d4aca640c8567d6f8745eb2142f +MD5_BMP_420_ISLOW_256 = 4980185e3776e89bd931736e1cddeee6 +MD5_BMP_420_ISLOW_565 = bf9d13e16c4923b92e1faa604d7922cb +MD5_BMP_420_ISLOW_565D = 6bde71526acc44bcff76f696df8638d2 +MD5_BMP_420M_ISLOW_565 = 8dc0185245353cfa32ad97027342216f +MD5_BMP_420M_ISLOW_565D =d1be3a3339166255e76fa50a0d70d73e +MD5_PPM_420_ISLOW_SKIP15_31 = c4c65c1e43d7275cd50328a61e6534f0 +MD5_PPM_420_ISLOW_ARI_SKIP16_139 = 087c6b123db16ac00cb88c5b590bb74a +MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71 = 26eb36ccc7d1f0cb80cdabb0ac8b5d99 +MD5_PPM_420_ISLOW_ARI_CROP53x53_4_4 = 886c6775af22370257122f8b16207e6d +MD5_PPM_444_ISLOW_SKIP1_6 = 5606f86874cf26b8fcee1117a0a436a6 +MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13 = db87dc7ce26bcdc7a6b56239ce2b9d6c +MD5_PPM_444_ISLOW_ARI_CROP37x37_0_0 = cb57b32bd6d03e35432362f7bf184b6d +MD5_JPEG_CROP = b4197f377e621c4e9b1d20471432610d + +endif + +.PHONY: test +test: tjquicktest tjbittest bittest + +if CROSS_COMPILING +tjquicktest: testclean +else +tjquicktest: testclean all +endif + +if WITH_TURBOJPEG +if WITH_JAVA + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -bi + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -noyuvpad + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi + $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi -noyuvpad +endif + ./tjunittest + ./tjunittest -alloc + ./tjunittest -yuv + ./tjunittest -yuv -alloc + ./tjunittest -yuv -noyuvpad +endif + echo GREAT SUCCESS! + +if CROSS_COMPILING +tjbittest: testclean +else +tjbittest: testclean all +endif + +if WITH_TURBOJPEG + +MD5_PPM_GRAY_TILE = 89d3ca21213d9d864b50b4e4e7de4ca6 +MD5_PPM_420_8x8_TILE = 847fceab15c5b7b911cb986cf0f71de3 +MD5_PPM_420_16x16_TILE = ca45552a93687e078f7137cc4126a7b0 +MD5_PPM_420_32x32_TILE = d8676f1d6b68df358353bba9844f4a00 +MD5_PPM_420_64x64_TILE = 4e4c1a3d7ea4bace4f868bcbe83b7050 +MD5_PPM_420_128x128_TILE = f24c3429c52265832beab9df72a0ceae +MD5_PPM_420M_8x8_TILE = bc25320e1f4c31ce2e610e43e9fd173c +MD5_PPM_420M_TILE = 75ffdf14602258c5c189522af57fa605 +MD5_PPM_422_8x8_TILE = d83dacd9fc73b0a6f10c09acad64eb1e +MD5_PPM_422_16x16_TILE = 35077fb610d72dd743b1eb0cbcfe10fb +MD5_PPM_422_32x32_TILE = e6902ed8a449ecc0f0d6f2bf945f65f7 +MD5_PPM_422_64x64_TILE = 2b4502a8f316cedbde1da7bce3d2231e +MD5_PPM_422_128x128_TILE = f0b5617d578f5e13c8eee215d64d4877 +MD5_PPM_422M_8x8_TILE = 828941d7f41cd6283abd6beffb7fd51d +MD5_PPM_422M_TILE = e877ae1324c4a280b95376f7f018172f +MD5_PPM_444_TILE = 7964e41e67cfb8d0a587c0aa4798f9c3 + +# Test compressing from/decompressing to an arbitrary subregion of a larger +# image buffer + cp $(srcdir)/testimages/testorig.ppm testout_tile.ppm + ./tjbench testout_tile.ppm 95 -rgb -quiet -tile -benchtime 0.01 >/dev/null 2>&1 + for i in 8 16 32 64 128; do \ + md5/md5cmp $(MD5_PPM_GRAY_TILE) testout_tile_GRAY_Q95_$$i\x$$i.ppm; \ + done + md5/md5cmp $(MD5_PPM_420_8x8_TILE) testout_tile_420_Q95_8x8.ppm + md5/md5cmp $(MD5_PPM_420_16x16_TILE) testout_tile_420_Q95_16x16.ppm + md5/md5cmp $(MD5_PPM_420_32x32_TILE) testout_tile_420_Q95_32x32.ppm + md5/md5cmp $(MD5_PPM_420_64x64_TILE) testout_tile_420_Q95_64x64.ppm + md5/md5cmp $(MD5_PPM_420_128x128_TILE) testout_tile_420_Q95_128x128.ppm + md5/md5cmp $(MD5_PPM_422_8x8_TILE) testout_tile_422_Q95_8x8.ppm + md5/md5cmp $(MD5_PPM_422_16x16_TILE) testout_tile_422_Q95_16x16.ppm + md5/md5cmp $(MD5_PPM_422_32x32_TILE) testout_tile_422_Q95_32x32.ppm + md5/md5cmp $(MD5_PPM_422_64x64_TILE) testout_tile_422_Q95_64x64.ppm + md5/md5cmp $(MD5_PPM_422_128x128_TILE) testout_tile_422_Q95_128x128.ppm + for i in 8 16 32 64 128; do \ + md5/md5cmp $(MD5_PPM_444_TILE) testout_tile_444_Q95_$$i\x$$i.ppm; \ + done + rm -f testout_tile_GRAY_* testout_tile_420_* testout_tile_422_* testout_tile_444_* + + ./tjbench testout_tile.ppm 95 -rgb -fastupsample -quiet -tile -benchtime 0.01 >/dev/null 2>&1 + md5/md5cmp $(MD5_PPM_420M_8x8_TILE) testout_tile_420_Q95_8x8.ppm + for i in 16 32 64 128; do \ + md5/md5cmp $(MD5_PPM_420M_TILE) testout_tile_420_Q95_$$i\x$$i.ppm; \ + done + md5/md5cmp $(MD5_PPM_422M_8x8_TILE) testout_tile_422_Q95_8x8.ppm + for i in 16 32 64 128; do \ + md5/md5cmp $(MD5_PPM_422M_TILE) testout_tile_422_Q95_$$i\x$$i.ppm; \ + done + rm -f testout_tile_GRAY_* testout_tile_420_* testout_tile_422_* testout_tile_444_* testout_tile.ppm + echo GREAT SUCCESS! + +endif + +if CROSS_COMPILING +bittest: testclean +else +bittest: testclean all +endif + +# These tests are carefully crafted to provide full coverage of as many of the +# underlying algorithms as possible (including all of the SIMD-accelerated +# ones.) + +# CC: null SAMP: fullsize FDCT: islow ENT: huff + ./cjpeg -rgb -dct int -outfile testout_rgb_islow.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_RGB_ISLOW) testout_rgb_islow.jpg +# CC: null SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -ppm -outfile testout_rgb_islow.ppm testout_rgb_islow.jpg + md5/md5cmp $(MD5_PPM_RGB_ISLOW) testout_rgb_islow.ppm + rm -f testout_rgb_islow.ppm +if WITH_12BIT + rm -f testout_rgb_islow.jpg +else +# CC: RGB->RGB565 SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_rgb_islow_565.bmp testout_rgb_islow.jpg + md5/md5cmp $(MD5_BMP_RGB_ISLOW_565) testout_rgb_islow_565.bmp + rm -f testout_rgb_islow_565.bmp +# CC: RGB->RGB565 (dithered) SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -bmp -outfile testout_rgb_islow_565D.bmp testout_rgb_islow.jpg + md5/md5cmp $(MD5_BMP_RGB_ISLOW_565D) testout_rgb_islow_565D.bmp + rm -f testout_rgb_islow_565D.bmp testout_rgb_islow.jpg +endif + +# CC: RGB->YCC SAMP: fullsize/h2v1 FDCT: ifast ENT: 2-pass huff + ./cjpeg -sample 2x1 -dct fast -opt -outfile testout_422_ifast_opt.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_422_IFAST_OPT) testout_422_ifast_opt.jpg +# CC: YCC->RGB SAMP: fullsize/h2v1 fancy IDCT: ifast ENT: huff + ./djpeg -dct fast -outfile testout_422_ifast.ppm testout_422_ifast_opt.jpg + md5/md5cmp $(MD5_PPM_422_IFAST) testout_422_ifast.ppm + rm -f testout_422_ifast.ppm +# CC: YCC->RGB SAMP: h2v1 merged IDCT: ifast ENT: huff + ./djpeg -dct fast -nosmooth -outfile testout_422m_ifast.ppm testout_422_ifast_opt.jpg + md5/md5cmp $(MD5_PPM_422M_IFAST) testout_422m_ifast.ppm + rm -f testout_422m_ifast.ppm +if WITH_12BIT + rm -f testout_422_ifast_opt.jpg +else +# CC: YCC->RGB565 SAMP: h2v1 merged IDCT: ifast ENT: huff + ./djpeg -dct int -nosmooth -rgb565 -dither none -bmp -outfile testout_422m_ifast_565.bmp testout_422_ifast_opt.jpg + md5/md5cmp $(MD5_BMP_422M_IFAST_565) testout_422m_ifast_565.bmp + rm -f testout_422m_ifast_565.bmp +# CC: YCC->RGB565 (dithered) SAMP: h2v1 merged IDCT: ifast ENT: huff + ./djpeg -dct int -nosmooth -rgb565 -bmp -outfile testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg + md5/md5cmp $(MD5_BMP_422M_IFAST_565D) testout_422m_ifast_565D.bmp + rm -f testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg +endif + +# CC: RGB->YCC SAMP: fullsize/h2v2 FDCT: ifast ENT: prog huff + ./cjpeg -sample 2x2 -quality 100 -dct fast -prog -outfile testout_420_q100_ifast_prog.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_420_IFAST_Q100_PROG) testout_420_q100_ifast_prog.jpg +# CC: YCC->RGB SAMP: fullsize/h2v2 fancy IDCT: ifast ENT: prog huff + ./djpeg -dct fast -outfile testout_420_q100_ifast.ppm testout_420_q100_ifast_prog.jpg + md5/md5cmp $(MD5_PPM_420_Q100_IFAST) testout_420_q100_ifast.ppm + rm -f testout_420_q100_ifast.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: ifast ENT: prog huff + ./djpeg -dct fast -nosmooth -outfile testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg + md5/md5cmp $(MD5_PPM_420M_Q100_IFAST) testout_420m_q100_ifast.ppm + rm -f testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg + +# CC: RGB->Gray SAMP: fullsize FDCT: islow ENT: huff + ./cjpeg -gray -dct int -outfile testout_gray_islow.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_GRAY_ISLOW) testout_gray_islow.jpg +# CC: Gray->Gray SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -outfile testout_gray_islow.ppm testout_gray_islow.jpg + md5/md5cmp $(MD5_PPM_GRAY_ISLOW) testout_gray_islow.ppm + rm -f testout_gray_islow.ppm +# CC: Gray->RGB SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb -outfile testout_gray_islow_rgb.ppm testout_gray_islow.jpg + md5/md5cmp $(MD5_PPM_GRAY_ISLOW_RGB) testout_gray_islow_rgb.ppm + rm -f testout_gray_islow_rgb.ppm +if WITH_12BIT + rm -f testout_gray_islow.jpg +else +# CC: Gray->RGB565 SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_gray_islow_565.bmp testout_gray_islow.jpg + md5/md5cmp $(MD5_BMP_GRAY_ISLOW_565) testout_gray_islow_565.bmp + rm -f testout_gray_islow_565.bmp +# CC: Gray->RGB565 (dithered) SAMP: fullsize IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -bmp -outfile testout_gray_islow_565D.bmp testout_gray_islow.jpg + md5/md5cmp $(MD5_BMP_GRAY_ISLOW_565D) testout_gray_islow_565D.bmp + rm -f testout_gray_islow_565D.bmp testout_gray_islow.jpg +endif + +# CC: RGB->YCC SAMP: fullsize smooth/h2v2 smooth FDCT: islow +# ENT: 2-pass huff + ./cjpeg -sample 2x2 -smooth 1 -dct int -opt -outfile testout_420s_ifast_opt.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_420S_IFAST_OPT) testout_420s_ifast_opt.jpg + rm -f testout_420s_ifast_opt.jpg + +# The output of the floating point tests is not validated by default, because +# the output differs depending on the type of floating point math used, and +# this is only deterministic if the DCT/IDCT are implemented using SIMD +# instructions on a particular platform. Pass one of the following on the make +# command line to validate the floating point tests against one of the expected +# results: +# +# FLOATTEST=sse validate against the expected results from the libjpeg-turbo +# SSE SIMD extensions +# FLOATTEST=32bit validate against the expected results from the C code +# when running on a 32-bit FPU (or when SSE is being used for +# floating point math, which is generally the default with +# x86-64 compilers) +# FLOATTEST=64bit validate against the exepected results from the C code +# when running on a 64-bit FPU + +# CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff + ./cjpeg -sample 3x2 -dct float -prog -outfile testout_3x2_float_prog.jpg $(srcdir)/testimages/testorig.ppm + if [ "${FLOATTEST}" = "sse" ]; then \ + md5/md5cmp $(MD5_JPEG_3x2_FLOAT_PROG_SSE) testout_3x2_float_prog.jpg; \ + elif [ "${FLOATTEST}" = "32bit" -o "${FLOATTEST}" = "64bit" ]; then \ + md5/md5cmp $(MD5_JPEG_3x2_FLOAT_PROG_32BIT) testout_3x2_float_prog.jpg; \ + fi +# CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff + ./djpeg -dct float -outfile testout_3x2_float.ppm testout_3x2_float_prog.jpg + if [ "${FLOATTEST}" = "sse" ]; then \ + md5/md5cmp $(MD5_PPM_3x2_FLOAT_SSE) testout_3x2_float.ppm; \ + elif [ "${FLOATTEST}" = "32bit" ]; then \ + md5/md5cmp $(MD5_PPM_3x2_FLOAT_32BIT) testout_3x2_float.ppm; \ + elif [ "${FLOATTEST}" = "64bit" ]; then \ + md5/md5cmp $(MD5_PPM_3x2_FLOAT_64BIT) testout_3x2_float.ppm; \ + fi + rm -f testout_3x2_float.ppm testout_3x2_float_prog.jpg + +# CC: RGB->YCC SAMP: fullsize/int FDCT: ifast ENT: prog huff + ./cjpeg -sample 3x2 -dct fast -prog -outfile testout_3x2_ifast_prog.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_3x2_IFAST_PROG) testout_3x2_ifast_prog.jpg +# CC: YCC->RGB SAMP: fullsize/int IDCT: ifast ENT: prog huff + ./djpeg -dct fast -outfile testout_3x2_ifast.ppm testout_3x2_ifast_prog.jpg + md5/md5cmp $(MD5_PPM_3x2_IFAST) testout_3x2_ifast.ppm + rm -f testout_3x2_ifast.ppm testout_3x2_ifast_prog.jpg + +if WITH_ARITH_ENC +# CC: YCC->RGB SAMP: fullsize/h2v2 FDCT: islow ENT: arith + ./cjpeg -dct int -arithmetic -outfile testout_420_islow_ari.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_420_ISLOW_ARI) testout_420_islow_ari.jpg + rm -f testout_420_islow_ari.jpg + ./jpegtran -arithmetic -outfile testout_420_islow_ari.jpg $(srcdir)/testimages/testimgint.jpg + md5/md5cmp $(MD5_JPEG_420_ISLOW_ARI) testout_420_islow_ari.jpg + rm -f testout_420_islow_ari.jpg +# CC: YCC->RGB SAMP: fullsize FDCT: islow ENT: prog arith + ./cjpeg -sample 1x1 -dct int -prog -arithmetic -outfile testout_444_islow_progari.jpg $(srcdir)/testimages/testorig.ppm + md5/md5cmp $(MD5_JPEG_444_ISLOW_PROGARI) testout_444_islow_progari.jpg + rm -f testout_444_islow_progari.jpg +endif +if WITH_ARITH_DEC +# CC: RGB->YCC SAMP: h2v2 merged IDCT: ifast ENT: arith + ./djpeg -fast -ppm -outfile testout_420m_ifast_ari.ppm $(srcdir)/testimages/testimgari.jpg + md5/md5cmp $(MD5_PPM_420M_IFAST_ARI) testout_420m_ifast_ari.ppm + rm -f testout_420m_ifast_ari.ppm + ./jpegtran -outfile testout_420_islow.jpg $(srcdir)/testimages/testimgari.jpg + md5/md5cmp $(MD5_JPEG_420_ISLOW) testout_420_islow.jpg + rm -f testout_420_islow.jpg +endif + +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 16x16 islow ENT: huff + ./djpeg -dct int -scale 2/1 -nosmooth -ppm -outfile testout_420m_islow_2_1.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_2_1) testout_420m_islow_2_1.ppm + rm -f testout_420m_islow_2_1.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 15x15 islow ENT: huff + ./djpeg -dct int -scale 15/8 -nosmooth -ppm -outfile testout_420m_islow_15_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_15_8) testout_420m_islow_15_8.ppm + rm -f testout_420m_islow_15_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 13x13 islow ENT: huff + ./djpeg -dct int -scale 13/8 -nosmooth -ppm -outfile testout_420m_islow_13_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_13_8) testout_420m_islow_13_8.ppm + rm -f testout_420m_islow_13_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 11x11 islow ENT: huff + ./djpeg -dct int -scale 11/8 -nosmooth -ppm -outfile testout_420m_islow_11_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_11_8) testout_420m_islow_11_8.ppm + rm -f testout_420m_islow_11_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 9x9 islow ENT: huff + ./djpeg -dct int -scale 9/8 -nosmooth -ppm -outfile testout_420m_islow_9_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_9_8) testout_420m_islow_9_8.ppm + rm -f testout_420m_islow_9_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 7x7 islow/14x14 islow ENT: huff + ./djpeg -dct int -scale 7/8 -nosmooth -ppm -outfile testout_420m_islow_7_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_7_8) testout_420m_islow_7_8.ppm + rm -f testout_420m_islow_7_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 6x6 islow/12x12 islow ENT: huff + ./djpeg -dct int -scale 3/4 -nosmooth -ppm -outfile testout_420m_islow_3_4.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_3_4) testout_420m_islow_3_4.ppm + rm -f testout_420m_islow_3_4.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 5x5 islow/10x10 islow ENT: huff + ./djpeg -dct int -scale 5/8 -nosmooth -ppm -outfile testout_420m_islow_5_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_5_8) testout_420m_islow_5_8.ppm + rm -f testout_420m_islow_5_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 4x4 islow/8x8 islow ENT: huff + ./djpeg -dct int -scale 1/2 -nosmooth -ppm -outfile testout_420m_islow_1_2.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_1_2) testout_420m_islow_1_2.ppm + rm -f testout_420m_islow_1_2.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 3x3 islow/6x6 islow ENT: huff + ./djpeg -dct int -scale 3/8 -nosmooth -ppm -outfile testout_420m_islow_3_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_3_8) testout_420m_islow_3_8.ppm + rm -f testout_420m_islow_3_8.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 2x2 islow/4x4 islow ENT: huff + ./djpeg -dct int -scale 1/4 -nosmooth -ppm -outfile testout_420m_islow_1_4.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_1_4) testout_420m_islow_1_4.ppm + rm -f testout_420m_islow_1_4.ppm +# CC: YCC->RGB SAMP: h2v2 merged IDCT: 1x1 islow/2x2 islow ENT: huff + ./djpeg -dct int -scale 1/8 -nosmooth -ppm -outfile testout_420m_islow_1_8.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420M_ISLOW_1_8) testout_420m_islow_1_8.ppm + rm -f testout_420m_islow_1_8.ppm +if WITH_12BIT +else +# CC: YCC->RGB (dithered) SAMP: h2v2 fancy IDCT: islow ENT: huff + ./djpeg -dct int -colors 256 -bmp -outfile testout_420_islow_256.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420_ISLOW_256) testout_420_islow_256.bmp + rm -f testout_420_islow_256.bmp +# CC: YCC->RGB565 SAMP: h2v2 fancy IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_420_islow_565.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420_ISLOW_565) testout_420_islow_565.bmp + rm -f testout_420_islow_565.bmp +# CC: YCC->RGB565 (dithered) SAMP: h2v2 fancy IDCT: islow ENT: huff + ./djpeg -dct int -rgb565 -bmp -outfile testout_420_islow_565D.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420_ISLOW_565D) testout_420_islow_565D.bmp + rm -f testout_420_islow_565D.bmp +# CC: YCC->RGB565 SAMP: h2v2 merged IDCT: islow ENT: huff + ./djpeg -dct int -nosmooth -rgb565 -dither none -bmp -outfile testout_420m_islow_565.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420M_ISLOW_565) testout_420m_islow_565.bmp + rm -f testout_420m_islow_565.bmp +# CC: YCC->RGB565 (dithered) SAMP: h2v2 merged IDCT: islow ENT: huff + ./djpeg -dct int -nosmooth -rgb565 -bmp -outfile testout_420m_islow_565D.bmp $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_BMP_420M_ISLOW_565D) testout_420m_islow_565D.bmp + rm -f testout_420m_islow_565D.bmp +endif + +# Partial decode tests. These tests are designed to cover all of the possible +# code paths in jpeg_skip_scanlines(). + +# Context rows: Yes Intra-iMCU row: Yes iMCU row prefetch: No ENT: huff + ./djpeg -dct int -skip 15,31 -ppm -outfile testout_420_islow_skip15,31.ppm $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_PPM_420_ISLOW_SKIP15_31) testout_420_islow_skip15,31.ppm + rm -f testout_420_islow_skip15,31.ppm +# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: Yes ENT: arith +if WITH_ARITH_DEC + ./djpeg -dct int -skip 16,139 -ppm -outfile testout_420_islow_ari_skip16,139.ppm $(srcdir)/testimages/testimgari.jpg + md5/md5cmp $(MD5_PPM_420_ISLOW_ARI_SKIP16_139) testout_420_islow_ari_skip16,139.ppm + rm -f testout_420_islow_ari_skip16,139.ppm +endif +# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: prog huff + ./cjpeg -dct int -prog -outfile testout_420_islow_prog.jpg $(srcdir)/testimages/testorig.ppm + ./djpeg -dct int -crop 62x62+71+71 -ppm -outfile testout_420_islow_prog_crop62x62,71,71.ppm testout_420_islow_prog.jpg + md5/md5cmp $(MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71) testout_420_islow_prog_crop62x62,71,71.ppm + rm -f testout_420_islow_prog_crop62x62,71,71.ppm testout_420_islow_prog.jpg +# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: arith +if WITH_ARITH_DEC + ./djpeg -dct int -crop 53x53+4+4 -ppm -outfile testout_420_islow_ari_crop53x53,4,4.ppm $(srcdir)/testimages/testimgari.jpg + md5/md5cmp $(MD5_PPM_420_ISLOW_ARI_CROP53x53_4_4) testout_420_islow_ari_crop53x53,4,4.ppm + rm -f testout_420_islow_ari_crop53x53,4,4.ppm +endif +# Context rows: No Intra-iMCU row: Yes ENT: huff + ./cjpeg -dct int -sample 1x1 -outfile testout_444_islow.jpg $(srcdir)/testimages/testorig.ppm + ./djpeg -dct int -skip 1,6 -ppm -outfile testout_444_islow_skip1,6.ppm testout_444_islow.jpg + md5/md5cmp $(MD5_PPM_444_ISLOW_SKIP1_6) testout_444_islow_skip1,6.ppm + rm -f testout_444_islow_skip1,6.ppm testout_444_islow.jpg +# Context rows: No Intra-iMCU row: No ENT: prog huff + ./cjpeg -dct int -prog -sample 1x1 -outfile testout_444_islow_prog.jpg $(srcdir)/testimages/testorig.ppm + ./djpeg -dct int -crop 98x98+13+13 -ppm -outfile testout_444_islow_prog_crop98x98,13,13.ppm testout_444_islow_prog.jpg + md5/md5cmp $(MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13) testout_444_islow_prog_crop98x98,13,13.ppm + rm -f testout_444_islow_prog_crop98x98,13,13.ppm testout_444_islow_prog.jpg +# Context rows: No Intra-iMCU row: No ENT: arith +if WITH_ARITH_ENC + ./cjpeg -dct int -arithmetic -sample 1x1 -outfile testout_444_islow_ari.jpg $(srcdir)/testimages/testorig.ppm +if WITH_ARITH_DEC + ./djpeg -dct int -crop 37x37+0+0 -ppm -outfile testout_444_islow_ari_crop37x37,0,0.ppm testout_444_islow_ari.jpg + md5/md5cmp $(MD5_PPM_444_ISLOW_ARI_CROP37x37_0_0) testout_444_islow_ari_crop37x37,0,0.ppm + rm -f testout_444_islow_ari_crop37x37,0,0.ppm +endif + rm -f testout_444_islow_ari.jpg +endif + + ./jpegtran -crop 120x90+20+50 -transpose -perfect -outfile testout_crop.jpg $(srcdir)/testimages/$(TESTORIG) + md5/md5cmp $(MD5_JPEG_CROP) testout_crop.jpg + rm -f testout_crop.jpg + echo GREAT SUCCESS! + + +testclean: + rm -f testout* + rm -f *_GRAY_*.bmp + rm -f *_GRAY_*.png + rm -f *_GRAY_*.ppm + rm -f *_GRAY_*.jpg + rm -f *_GRAY.yuv + rm -f *_420_*.bmp + rm -f *_420_*.png + rm -f *_420_*.ppm + rm -f *_420_*.jpg + rm -f *_420.yuv + rm -f *_422_*.bmp + rm -f *_422_*.png + rm -f *_422_*.ppm + rm -f *_422_*.jpg + rm -f *_422.yuv + rm -f *_444_*.bmp + rm -f *_444_*.png + rm -f *_444_*.ppm + rm -f *_444_*.jpg + rm -f *_444.yuv + rm -f *_440_*.bmp + rm -f *_440_*.png + rm -f *_440_*.ppm + rm -f *_440_*.jpg + rm -f *_440.yuv + rm -f *_411_*.bmp + rm -f *_411_*.png + rm -f *_411_*.ppm + rm -f *_411_*.jpg + rm -f *_411.yuv + + +tjtest: + sh ./tjbenchtest + sh ./tjbenchtest -alloc + sh ./tjbenchtest -yuv + sh ./tjbenchtest -yuv -alloc +if WITH_JAVA + sh ./tjbenchtest.java + sh ./tjbenchtest.java -yuv +endif + + +pkgscripts/libjpeg-turbo.spec: pkgscripts/libjpeg-turbo.spec.tmpl + cat pkgscripts/libjpeg-turbo.spec.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \ + sed s@%{__docdir}@$(docdir)@g | sed s@%{__includedir}@$(includedir)@g | \ + sed s@%{__libdir}@$(libdir)@g | sed s@%{__mandir}@$(mandir)@g \ + > pkgscripts/libjpeg-turbo.spec + +rpm: all pkgscripts/libjpeg-turbo.spec + TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \ + mkdir -p $$TMPDIR/RPMS; \ + ln -fs `pwd` $$TMPDIR/BUILD; \ + rm -f ${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \ + rpmbuild -bb --define "_blddir $$TMPDIR/buildroot" \ + --define "_topdir $$TMPDIR" \ + --target ${RPMARCH} pkgscripts/libjpeg-turbo.spec; \ + cp $$TMPDIR/RPMS/${RPMARCH}/${PKGNAME}-${VERSION}-${BUILD}.${RPMARCH}.rpm \ + ${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \ + rm -rf $$TMPDIR + +srpm: dist-gzip pkgscripts/libjpeg-turbo.spec + TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \ + mkdir -p $$TMPDIR/RPMS; \ + mkdir -p $$TMPDIR/SRPMS; \ + mkdir -p $$TMPDIR/BUILD; \ + mkdir -p $$TMPDIR/SOURCES; \ + mkdir -p $$TMPDIR/SPECS; \ + rm -f ${PKGNAME}-${VERSION}.src.rpm; \ + cp ${PACKAGE_NAME}-${VERSION}.tar.gz $$TMPDIR/SOURCES; \ + cat pkgscripts/libjpeg-turbo.spec | sed s/%{_blddir}/%{_tmppath}/g \ + | sed s/#--\>//g \ + > $$TMPDIR/SPECS/libjpeg-turbo.spec; \ + rpmbuild -bs --define "_topdir $$TMPDIR" $$TMPDIR/SPECS/libjpeg-turbo.spec; \ + cp $$TMPDIR/SRPMS/${PKGNAME}-${VERSION}-${BUILD}.src.rpm \ + ${PKGNAME}-${VERSION}.src.rpm; \ + rm -rf $$TMPDIR + +pkgscripts/makedpkg: pkgscripts/makedpkg.tmpl + cat pkgscripts/makedpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \ + > pkgscripts/makedpkg + +deb: all pkgscripts/makedpkg + sh pkgscripts/makedpkg + +pkgscripts/uninstall: pkgscripts/uninstall.tmpl + cat pkgscripts/uninstall.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \ + sed s@%{__includedir}@$(includedir)@g | sed s@%{__libdir}@$(libdir)@g | \ + sed s@%{__mandir}@$(mandir)@g > pkgscripts/uninstall + +pkgscripts/makemacpkg: pkgscripts/makemacpkg.tmpl + cat pkgscripts/makemacpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__bindir}@$(bindir)@g | sed s@%{__docdir}@$(docdir)@g | \ + sed s@%{__libdir}@$(libdir)@g > pkgscripts/makemacpkg + +if X86_64 + +udmg: all pkgscripts/makemacpkg pkgscripts/uninstall + sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} + +iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall + sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" + +else + +iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall + sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" + +endif + +dmg: all pkgscripts/makemacpkg pkgscripts/uninstall + sh pkgscripts/makemacpkg + +pkgscripts/makecygwinpkg: pkgscripts/makecygwinpkg.tmpl + cat pkgscripts/makecygwinpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ + sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \ + > pkgscripts/makecygwinpkg + +cygwinpkg: all pkgscripts/makecygwinpkg + sh pkgscripts/makecygwinpkg diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdbmp.c glmark2-2021.02/src/libjpeg-turbo/rdbmp.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdbmp.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdbmp.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,483 @@ +/* + * rdbmp.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2009-2010 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Modified 2011 by Siarhei Siamashka. + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in Microsoft "BMP" + * format (MS Windows 3.x, OS/2 1.x, and OS/2 2.x flavors). + * Currently, only 8-bit and 24-bit images are supported, not 1-bit or + * 4-bit (feeding such low-depth images into JPEG would be silly anyway). + * Also, we don't support RLE-compressed files. + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; start_input may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed BMP format). + * + * This code contributed by James Arthur Boucher. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef BMP_SUPPORTED + + +/* Macros to deal with unsigned chars as efficiently as compiler allows */ + +#ifdef HAVE_UNSIGNED_CHAR +typedef unsigned char U_CHAR; +#define UCH(x) ((int) (x)) +#else /* !HAVE_UNSIGNED_CHAR */ +#ifdef __CHAR_UNSIGNED__ +typedef char U_CHAR; +#define UCH(x) ((int) (x)) +#else +typedef char U_CHAR; +#define UCH(x) ((int) (x) & 0xFF) +#endif +#endif /* HAVE_UNSIGNED_CHAR */ + + +#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) + + +/* Private version of data source object */ + +typedef struct _bmp_source_struct *bmp_source_ptr; + +typedef struct _bmp_source_struct { + struct cjpeg_source_struct pub; /* public fields */ + + j_compress_ptr cinfo; /* back link saves passing separate parm */ + + JSAMPARRAY colormap; /* BMP colormap (converted to my format) */ + + jvirt_sarray_ptr whole_image; /* Needed to reverse row order */ + JDIMENSION source_row; /* Current source row number */ + JDIMENSION row_width; /* Physical width of scanlines in file */ + + int bits_per_pixel; /* remembers 8- or 24-bit format */ +} bmp_source_struct; + + +LOCAL(int) +read_byte (bmp_source_ptr sinfo) +/* Read next byte from BMP file */ +{ + register FILE *infile = sinfo->pub.input_file; + register int c; + + if ((c = getc(infile)) == EOF) + ERREXIT(sinfo->cinfo, JERR_INPUT_EOF); + return c; +} + + +LOCAL(void) +read_colormap (bmp_source_ptr sinfo, int cmaplen, int mapentrysize) +/* Read the colormap from a BMP file */ +{ + int i; + + switch (mapentrysize) { + case 3: + /* BGR format (occurs in OS/2 files) */ + for (i = 0; i < cmaplen; i++) { + sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); + } + break; + case 4: + /* BGR0 format (occurs in MS Windows files) */ + for (i = 0; i < cmaplen; i++) { + sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); + (void) read_byte(sinfo); + } + break; + default: + ERREXIT(sinfo->cinfo, JERR_BMP_BADCMAP); + break; + } +} + + +/* + * Read one row of pixels. + * The image has been read into the whole_image array, but is otherwise + * unprocessed. We must read it out in top-to-bottom row order, and if + * it is an 8-bit image, we must expand colormapped pixels to 24bit format. + */ + +METHODDEF(JDIMENSION) +get_8bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 8-bit colormap indexes */ +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + register JSAMPARRAY colormap = source->colormap; + JSAMPARRAY image_ptr; + register int t; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + source->source_row, (JDIMENSION) 1, FALSE); + + /* Expand the colormap indexes to real data */ + inptr = image_ptr[0]; + outptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + t = GETJSAMPLE(*inptr++); + *outptr++ = colormap[0][t]; /* can omit GETJSAMPLE() safely */ + *outptr++ = colormap[1][t]; + *outptr++ = colormap[2][t]; + } + + return 1; +} + + +METHODDEF(JDIMENSION) +get_24bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 24-bit pixels */ +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + JSAMPARRAY image_ptr; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + source->source_row, (JDIMENSION) 1, FALSE); + + /* Transfer data. Note source values are in BGR order + * (even though Microsoft's own documents say the opposite). + */ + inptr = image_ptr[0]; + outptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[1] = *inptr++; + outptr[0] = *inptr++; + outptr += 3; + } + + return 1; +} + + +METHODDEF(JDIMENSION) +get_32bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 32-bit pixels */ +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + JSAMPARRAY image_ptr; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + source->source_row, (JDIMENSION) 1, FALSE); + /* Transfer data. Note source values are in BGR order + * (even though Microsoft's own documents say the opposite). + */ + inptr = image_ptr[0]; + outptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[1] = *inptr++; + outptr[0] = *inptr++; + inptr++; /* skip the 4th byte (Alpha channel) */ + outptr += 3; + } + + return 1; +} + + +/* + * This method loads the image into whole_image during the first call on + * get_pixel_rows. The get_pixel_rows pointer is then adjusted to call + * get_8bit_row, get_24bit_row, or get_32bit_row on subsequent calls. + */ + +METHODDEF(JDIMENSION) +preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + register FILE *infile = source->pub.input_file; + register JSAMPROW out_ptr; + JSAMPARRAY image_ptr; + JDIMENSION row; + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + + /* Read the data into a virtual array in input-file row order. */ + for (row = 0; row < cinfo->image_height; row++) { + if (progress != NULL) { + progress->pub.pass_counter = (long) row; + progress->pub.pass_limit = (long) cinfo->image_height; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + row, (JDIMENSION) 1, TRUE); + out_ptr = image_ptr[0]; + if (fread(out_ptr, 1, source->row_width, infile) != source->row_width) { + if (feof(infile)) + ERREXIT(cinfo, JERR_INPUT_EOF); + else + ERREXIT(cinfo, JERR_FILE_READ); + } + } + if (progress != NULL) + progress->completed_extra_passes++; + + /* Set up to read from the virtual array in top-to-bottom order */ + switch (source->bits_per_pixel) { + case 8: + source->pub.get_pixel_rows = get_8bit_row; + break; + case 24: + source->pub.get_pixel_rows = get_24bit_row; + break; + case 32: + source->pub.get_pixel_rows = get_32bit_row; + break; + default: + ERREXIT(cinfo, JERR_BMP_BADDEPTH); + } + source->source_row = cinfo->image_height; + + /* And read the first row */ + return (*source->pub.get_pixel_rows) (cinfo, sinfo); +} + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + bmp_source_ptr source = (bmp_source_ptr) sinfo; + U_CHAR bmpfileheader[14]; + U_CHAR bmpinfoheader[64]; +#define GET_2B(array,offset) ((unsigned short) UCH(array[offset]) + \ + (((unsigned short) UCH(array[offset+1])) << 8)) +#define GET_4B(array,offset) ((unsigned int) UCH(array[offset]) + \ + (((unsigned int) UCH(array[offset+1])) << 8) + \ + (((unsigned int) UCH(array[offset+2])) << 16) + \ + (((unsigned int) UCH(array[offset+3])) << 24)) + unsigned int bfOffBits; + unsigned int headerSize; + int biWidth; + int biHeight; + unsigned short biPlanes; + unsigned int biCompression; + int biXPelsPerMeter,biYPelsPerMeter; + unsigned int biClrUsed = 0; + int mapentrysize = 0; /* 0 indicates no colormap */ + int bPad; + JDIMENSION row_width; + + /* Read and verify the bitmap file header */ + if (! ReadOK(source->pub.input_file, bmpfileheader, 14)) + ERREXIT(cinfo, JERR_INPUT_EOF); + if (GET_2B(bmpfileheader,0) != 0x4D42) /* 'BM' */ + ERREXIT(cinfo, JERR_BMP_NOT); + bfOffBits = GET_4B(bmpfileheader,10); + /* We ignore the remaining fileheader fields */ + + /* The infoheader might be 12 bytes (OS/2 1.x), 40 bytes (Windows), + * or 64 bytes (OS/2 2.x). Check the first 4 bytes to find out which. + */ + if (! ReadOK(source->pub.input_file, bmpinfoheader, 4)) + ERREXIT(cinfo, JERR_INPUT_EOF); + headerSize = GET_4B(bmpinfoheader,0); + if (headerSize < 12 || headerSize > 64) + ERREXIT(cinfo, JERR_BMP_BADHEADER); + if (! ReadOK(source->pub.input_file, bmpinfoheader+4, headerSize-4)) + ERREXIT(cinfo, JERR_INPUT_EOF); + + switch (headerSize) { + case 12: + /* Decode OS/2 1.x header (Microsoft calls this a BITMAPCOREHEADER) */ + biWidth = (int) GET_2B(bmpinfoheader,4); + biHeight = (int) GET_2B(bmpinfoheader,6); + biPlanes = GET_2B(bmpinfoheader,8); + source->bits_per_pixel = (int) GET_2B(bmpinfoheader,10); + + switch (source->bits_per_pixel) { + case 8: /* colormapped image */ + mapentrysize = 3; /* OS/2 uses RGBTRIPLE colormap */ + TRACEMS2(cinfo, 1, JTRC_BMP_OS2_MAPPED, biWidth, biHeight); + break; + case 24: /* RGB image */ + TRACEMS2(cinfo, 1, JTRC_BMP_OS2, biWidth, biHeight); + break; + default: + ERREXIT(cinfo, JERR_BMP_BADDEPTH); + break; + } + break; + case 40: + case 64: + /* Decode Windows 3.x header (Microsoft calls this a BITMAPINFOHEADER) */ + /* or OS/2 2.x header, which has additional fields that we ignore */ + biWidth = (int) GET_4B(bmpinfoheader,4); + biHeight = (int) GET_4B(bmpinfoheader,8); + biPlanes = GET_2B(bmpinfoheader,12); + source->bits_per_pixel = (int) GET_2B(bmpinfoheader,14); + biCompression = GET_4B(bmpinfoheader,16); + biXPelsPerMeter = (int) GET_4B(bmpinfoheader,24); + biYPelsPerMeter = (int) GET_4B(bmpinfoheader,28); + biClrUsed = GET_4B(bmpinfoheader,32); + /* biSizeImage, biClrImportant fields are ignored */ + + switch (source->bits_per_pixel) { + case 8: /* colormapped image */ + mapentrysize = 4; /* Windows uses RGBQUAD colormap */ + TRACEMS2(cinfo, 1, JTRC_BMP_MAPPED, biWidth, biHeight); + break; + case 24: /* RGB image */ + TRACEMS2(cinfo, 1, JTRC_BMP, biWidth, biHeight); + break; + case 32: /* RGB image + Alpha channel */ + TRACEMS2(cinfo, 1, JTRC_BMP, biWidth, biHeight); + break; + default: + ERREXIT(cinfo, JERR_BMP_BADDEPTH); + break; + } + if (biCompression != 0) + ERREXIT(cinfo, JERR_BMP_COMPRESSED); + + if (biXPelsPerMeter > 0 && biYPelsPerMeter > 0) { + /* Set JFIF density parameters from the BMP data */ + cinfo->X_density = (UINT16) (biXPelsPerMeter/100); /* 100 cm per meter */ + cinfo->Y_density = (UINT16) (biYPelsPerMeter/100); + cinfo->density_unit = 2; /* dots/cm */ + } + break; + default: + ERREXIT(cinfo, JERR_BMP_BADHEADER); + return; + } + + if (biWidth <= 0 || biHeight <= 0) + ERREXIT(cinfo, JERR_BMP_EMPTY); + if (biPlanes != 1) + ERREXIT(cinfo, JERR_BMP_BADPLANES); + + /* Compute distance to bitmap data --- will adjust for colormap below */ + bPad = bfOffBits - (headerSize + 14); + + /* Read the colormap, if any */ + if (mapentrysize > 0) { + if (biClrUsed <= 0) + biClrUsed = 256; /* assume it's 256 */ + else if (biClrUsed > 256) + ERREXIT(cinfo, JERR_BMP_BADCMAP); + /* Allocate space to store the colormap */ + source->colormap = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) biClrUsed, (JDIMENSION) 3); + /* and read it from the file */ + read_colormap(source, (int) biClrUsed, mapentrysize); + /* account for size of colormap */ + bPad -= biClrUsed * mapentrysize; + } + + /* Skip any remaining pad bytes */ + if (bPad < 0) /* incorrect bfOffBits value? */ + ERREXIT(cinfo, JERR_BMP_BADHEADER); + while (--bPad >= 0) { + (void) read_byte(source); + } + + /* Compute row width in file, including padding to 4-byte boundary */ + if (source->bits_per_pixel == 24) + row_width = (JDIMENSION) (biWidth * 3); + else if (source->bits_per_pixel == 32) + row_width = (JDIMENSION) (biWidth * 4); + else + row_width = (JDIMENSION) biWidth; + while ((row_width & 3) != 0) row_width++; + source->row_width = row_width; + + /* Allocate space for inversion array, prepare for preload pass */ + source->whole_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + row_width, (JDIMENSION) biHeight, (JDIMENSION) 1); + source->pub.get_pixel_rows = preload_image; + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } + + /* Allocate one-row buffer for returned data */ + source->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) (biWidth * 3), (JDIMENSION) 1); + source->pub.buffer_height = 1; + + cinfo->in_color_space = JCS_RGB; + cinfo->input_components = 3; + cinfo->data_precision = 8; + cinfo->image_width = (JDIMENSION) biWidth; + cinfo->image_height = (JDIMENSION) biHeight; +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + +/* + * The module selection routine for BMP format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_bmp (j_compress_ptr cinfo) +{ + bmp_source_ptr source; + + /* Create module interface object */ + source = (bmp_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(bmp_source_struct)); + source->cinfo = cinfo; /* make back link for subroutines */ + /* Fill in method ptrs, except get_pixel_rows which start_input sets */ + source->pub.start_input = start_input_bmp; + source->pub.finish_input = finish_input_bmp; + + return (cjpeg_source_ptr) source; +} + +#endif /* BMP_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdcolmap.c glmark2-2021.02/src/libjpeg-turbo/rdcolmap.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdcolmap.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdcolmap.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,254 @@ +/* + * rdcolmap.c + * + * Copyright (C) 1994-1996, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file implements djpeg's "-map file" switch. It reads a source image + * and constructs a colormap to be supplied to the JPEG decompressor. + * + * Currently, these file formats are supported for the map file: + * GIF: the contents of the GIF's global colormap are used. + * PPM (either text or raw flavor): the entire file is read and + * each unique pixel value is entered in the map. + * Note that reading a large PPM file will be horrendously slow. + * Typically, a PPM-format map file should contain just one pixel + * of each desired color. Such a file can be extracted from an + * ordinary image PPM file with ppmtomap(1). + * + * Rescaling a PPM that has a maxval unequal to MAXJSAMPLE is not + * currently implemented. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef QUANT_2PASS_SUPPORTED /* otherwise can't quantize to supplied map */ + +/* Portions of this code are based on the PBMPLUS library, which is: +** +** Copyright (C) 1988 by Jef Poskanzer. +** +** Permission to use, copy, modify, and distribute this software and its +** documentation for any purpose and without fee is hereby granted, provided +** that the above copyright notice appear in all copies and that both that +** copyright notice and this permission notice appear in supporting +** documentation. This software is provided "as is" without express or +** implied warranty. +*/ + + +/* + * Add a (potentially) new color to the color map. + */ + +LOCAL(void) +add_map_entry (j_decompress_ptr cinfo, int R, int G, int B) +{ + JSAMPROW colormap0 = cinfo->colormap[0]; + JSAMPROW colormap1 = cinfo->colormap[1]; + JSAMPROW colormap2 = cinfo->colormap[2]; + int ncolors = cinfo->actual_number_of_colors; + int index; + + /* Check for duplicate color. */ + for (index = 0; index < ncolors; index++) { + if (GETJSAMPLE(colormap0[index]) == R && + GETJSAMPLE(colormap1[index]) == G && + GETJSAMPLE(colormap2[index]) == B) + return; /* color is already in map */ + } + + /* Check for map overflow. */ + if (ncolors >= (MAXJSAMPLE+1)) + ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, (MAXJSAMPLE+1)); + + /* OK, add color to map. */ + colormap0[ncolors] = (JSAMPLE) R; + colormap1[ncolors] = (JSAMPLE) G; + colormap2[ncolors] = (JSAMPLE) B; + cinfo->actual_number_of_colors++; +} + + +/* + * Extract color map from a GIF file. + */ + +LOCAL(void) +read_gif_map (j_decompress_ptr cinfo, FILE *infile) +{ + int header[13]; + int i, colormaplen; + int R, G, B; + + /* Initial 'G' has already been read by read_color_map */ + /* Read the rest of the GIF header and logical screen descriptor */ + for (i = 1; i < 13; i++) { + if ((header[i] = getc(infile)) == EOF) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + } + + /* Verify GIF Header */ + if (header[1] != 'I' || header[2] != 'F') + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + /* There must be a global color map. */ + if ((header[10] & 0x80) == 0) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + /* OK, fetch it. */ + colormaplen = 2 << (header[10] & 0x07); + + for (i = 0; i < colormaplen; i++) { + R = getc(infile); + G = getc(infile); + B = getc(infile); + if (R == EOF || G == EOF || B == EOF) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + add_map_entry(cinfo, + R << (BITS_IN_JSAMPLE-8), + G << (BITS_IN_JSAMPLE-8), + B << (BITS_IN_JSAMPLE-8)); + } +} + + +/* Support routines for reading PPM */ + + +LOCAL(int) +pbm_getc (FILE *infile) +/* Read next char, skipping over any comments */ +/* A comment/newline sequence is returned as a newline */ +{ + register int ch; + + ch = getc(infile); + if (ch == '#') { + do { + ch = getc(infile); + } while (ch != '\n' && ch != EOF); + } + return ch; +} + + +LOCAL(unsigned int) +read_pbm_integer (j_decompress_ptr cinfo, FILE *infile) +/* Read an unsigned decimal integer from the PPM file */ +/* Swallows one trailing character after the integer */ +/* Note that on a 16-bit-int machine, only values up to 64k can be read. */ +/* This should not be a problem in practice. */ +{ + register int ch; + register unsigned int val; + + /* Skip any leading whitespace */ + do { + ch = pbm_getc(infile); + if (ch == EOF) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + } while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); + + if (ch < '0' || ch > '9') + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + val = ch - '0'; + while ((ch = pbm_getc(infile)) >= '0' && ch <= '9') { + val *= 10; + val += ch - '0'; + } + return val; +} + + +/* + * Extract color map from a PPM file. + */ + +LOCAL(void) +read_ppm_map (j_decompress_ptr cinfo, FILE *infile) +{ + int c; + unsigned int w, h, maxval, row, col; + int R, G, B; + + /* Initial 'P' has already been read by read_color_map */ + c = getc(infile); /* save format discriminator for a sec */ + + /* while we fetch the remaining header info */ + w = read_pbm_integer(cinfo, infile); + h = read_pbm_integer(cinfo, infile); + maxval = read_pbm_integer(cinfo, infile); + + if (w <= 0 || h <= 0 || maxval <= 0) /* error check */ + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + /* For now, we don't support rescaling from an unusual maxval. */ + if (maxval != (unsigned int) MAXJSAMPLE) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + + switch (c) { + case '3': /* it's a text-format PPM file */ + for (row = 0; row < h; row++) { + for (col = 0; col < w; col++) { + R = read_pbm_integer(cinfo, infile); + G = read_pbm_integer(cinfo, infile); + B = read_pbm_integer(cinfo, infile); + add_map_entry(cinfo, R, G, B); + } + } + break; + + case '6': /* it's a raw-format PPM file */ + for (row = 0; row < h; row++) { + for (col = 0; col < w; col++) { + R = getc(infile); + G = getc(infile); + B = getc(infile); + if (R == EOF || G == EOF || B == EOF) + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + add_map_entry(cinfo, R, G, B); + } + } + break; + + default: + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + break; + } +} + + +/* + * Main entry point from djpeg.c. + * Input: opened input file (from file name argument on command line). + * Output: colormap and actual_number_of_colors fields are set in cinfo. + */ + +GLOBAL(void) +read_color_map (j_decompress_ptr cinfo, FILE *infile) +{ + /* Allocate space for a color map of maximum supported size. */ + cinfo->colormap = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) (MAXJSAMPLE+1), (JDIMENSION) 3); + cinfo->actual_number_of_colors = 0; /* initialize map to empty */ + + /* Read first byte to determine file format */ + switch (getc(infile)) { + case 'G': + read_gif_map(cinfo, infile); + break; + case 'P': + read_ppm_map(cinfo, infile); + break; + default: + ERREXIT(cinfo, JERR_BAD_CMAP_FILE); + break; + } +} + +#endif /* QUANT_2PASS_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdgif.c glmark2-2021.02/src/libjpeg-turbo/rdgif.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdgif.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdgif.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,39 @@ +/* + * rdgif.c + * + * Copyright (C) 1991-1997, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in GIF format. + * + ***************************************************************************** + * NOTE: to avoid entanglements with Unisys' patent on LZW compression, * + * the ability to read GIF files has been removed from the IJG distribution. * + * Sorry about that. * + ***************************************************************************** + * + * We are required to state that + * "The Graphics Interchange Format(c) is the Copyright property of + * CompuServe Incorporated. GIF(sm) is a Service Mark property of + * CompuServe Incorporated." + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef GIF_SUPPORTED + +/* + * The module selection routine for GIF format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_gif (j_compress_ptr cinfo) +{ + fprintf(stderr, "GIF input is unsupported for legal reasons. Sorry.\n"); + exit(EXIT_FAILURE); + return NULL; /* keep compiler happy */ +} + +#endif /* GIF_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdjpgcom.1 glmark2-2021.02/src/libjpeg-turbo/rdjpgcom.1 --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdjpgcom.1 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdjpgcom.1 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,63 @@ +.TH RDJPGCOM 1 "02 April 2009" +.SH NAME +rdjpgcom \- display text comments from a JPEG file +.SH SYNOPSIS +.B rdjpgcom +[ +.B \-raw +] +[ +.B \-verbose +] +[ +.I filename +] +.LP +.SH DESCRIPTION +.LP +.B rdjpgcom +reads the named JPEG/JFIF file, or the standard input if no file is named, +and prints any text comments found in the file on the standard output. +.PP +The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file. +Although the standard doesn't actually define what COM blocks are for, they +are widely used to hold user-supplied text strings. This lets you add +annotations, titles, index terms, etc to your JPEG files, and later retrieve +them as text. COM blocks do not interfere with the image stored in the JPEG +file. The maximum size of a COM block is 64K, but you can have as many of +them as you like in one JPEG file. +.SH OPTIONS +.TP +.B \-raw +Normally +.B rdjpgcom +escapes non-printable characters in comments, for security reasons. +This option avoids that. +.PP +.B \-verbose +Causes +.B rdjpgcom +to also display the JPEG image dimensions. +.PP +Switch names may be abbreviated, and are not case sensitive. +.SH HINTS +.B rdjpgcom +does not depend on the IJG JPEG library. Its source code is intended as an +illustration of the minimum amount of code required to parse a JPEG file +header correctly. +.PP +In +.B \-verbose +mode, +.B rdjpgcom +will also attempt to print the contents of any "APP12" markers as text. +Some digital cameras produce APP12 markers containing useful textual +information. If you like, you can modify the source code to print +other APPn marker types as well. +.SH SEE ALSO +.BR cjpeg (1), +.BR djpeg (1), +.BR jpegtran (1), +.BR wrjpgcom (1) +.SH AUTHOR +Independent JPEG Group diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdjpgcom.c glmark2-2021.02/src/libjpeg-turbo/rdjpgcom.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdjpgcom.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdjpgcom.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,510 @@ +/* + * rdjpgcom.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1997, Thomas G. Lane. + * Modified 2009 by Bill Allombert, Guido Vollbeding. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a very simple stand-alone application that displays + * the text in COM (comment) markers in a JFIF file. + * This may be useful as an example of the minimum logic needed to parse + * JPEG markers. + */ + +#define JPEG_CJPEG_DJPEG /* to get the command-line config symbols */ +#include "jinclude.h" /* get auto-config symbols, */ + +#ifdef HAVE_LOCALE_H +#include /* Bill Allombert: use locale for isprint */ +#endif +#include /* to declare isupper(), tolower() */ +#ifdef USE_SETMODE +#include /* to declare setmode()'s parameter macros */ +/* If you have setmode() but not , just delete this line: */ +#include /* to declare setmode() */ +#endif + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + +#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */ +#define READ_BINARY "r" +#else +#define READ_BINARY "rb" +#endif + +#ifndef EXIT_FAILURE /* define exit() codes if not provided */ +#define EXIT_FAILURE 1 +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + + +/* + * These macros are used to read the input file. + * To reuse this code in another application, you might need to change these. + */ + +static FILE *infile; /* input JPEG file */ + +/* Return next input byte, or EOF if no more */ +#define NEXTBYTE() getc(infile) + + +/* Error exit handler */ +#define ERREXIT(msg) (fprintf(stderr, "%s\n", msg), exit(EXIT_FAILURE)) + + +/* Read one byte, testing for EOF */ +static int +read_1_byte (void) +{ + int c; + + c = NEXTBYTE(); + if (c == EOF) + ERREXIT("Premature EOF in JPEG file"); + return c; +} + +/* Read 2 bytes, convert to unsigned int */ +/* All 2-byte quantities in JPEG markers are MSB first */ +static unsigned int +read_2_bytes (void) +{ + int c1, c2; + + c1 = NEXTBYTE(); + if (c1 == EOF) + ERREXIT("Premature EOF in JPEG file"); + c2 = NEXTBYTE(); + if (c2 == EOF) + ERREXIT("Premature EOF in JPEG file"); + return (((unsigned int) c1) << 8) + ((unsigned int) c2); +} + + +/* + * JPEG markers consist of one or more 0xFF bytes, followed by a marker + * code byte (which is not an FF). Here are the marker codes of interest + * in this program. (See jdmarker.c for a more complete list.) + */ + +#define M_SOF0 0xC0 /* Start Of Frame N */ +#define M_SOF1 0xC1 /* N indicates which compression process */ +#define M_SOF2 0xC2 /* Only SOF0-SOF2 are now in common use */ +#define M_SOF3 0xC3 +#define M_SOF5 0xC5 /* NB: codes C4 and CC are NOT SOF markers */ +#define M_SOF6 0xC6 +#define M_SOF7 0xC7 +#define M_SOF9 0xC9 +#define M_SOF10 0xCA +#define M_SOF11 0xCB +#define M_SOF13 0xCD +#define M_SOF14 0xCE +#define M_SOF15 0xCF +#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ +#define M_EOI 0xD9 /* End Of Image (end of datastream) */ +#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ +#define M_APP0 0xE0 /* Application-specific marker, type N */ +#define M_APP12 0xEC /* (we don't bother to list all 16 APPn's) */ +#define M_COM 0xFE /* COMment */ + + +/* + * Find the next JPEG marker and return its marker code. + * We expect at least one FF byte, possibly more if the compressor used FFs + * to pad the file. + * There could also be non-FF garbage between markers. The treatment of such + * garbage is unspecified; we choose to skip over it but emit a warning msg. + * NB: this routine must not be used after seeing SOS marker, since it will + * not deal correctly with FF/00 sequences in the compressed image data... + */ + +static int +next_marker (void) +{ + int c; + int discarded_bytes = 0; + + /* Find 0xFF byte; count and skip any non-FFs. */ + c = read_1_byte(); + while (c != 0xFF) { + discarded_bytes++; + c = read_1_byte(); + } + /* Get marker code byte, swallowing any duplicate FF bytes. Extra FFs + * are legal as pad bytes, so don't count them in discarded_bytes. + */ + do { + c = read_1_byte(); + } while (c == 0xFF); + + if (discarded_bytes != 0) { + fprintf(stderr, "Warning: garbage data found in JPEG file\n"); + } + + return c; +} + + +/* + * Read the initial marker, which should be SOI. + * For a JFIF file, the first two bytes of the file should be literally + * 0xFF M_SOI. To be more general, we could use next_marker, but if the + * input file weren't actually JPEG at all, next_marker might read the whole + * file and then return a misleading error message... + */ + +static int +first_marker (void) +{ + int c1, c2; + + c1 = NEXTBYTE(); + c2 = NEXTBYTE(); + if (c1 != 0xFF || c2 != M_SOI) + ERREXIT("Not a JPEG file"); + return c2; +} + + +/* + * Most types of marker are followed by a variable-length parameter segment. + * This routine skips over the parameters for any marker we don't otherwise + * want to process. + * Note that we MUST skip the parameter segment explicitly in order not to + * be fooled by 0xFF bytes that might appear within the parameter segment; + * such bytes do NOT introduce new markers. + */ + +static void +skip_variable (void) +/* Skip over an unknown or uninteresting variable-length marker */ +{ + unsigned int length; + + /* Get the marker parameter length count */ + length = read_2_bytes(); + /* Length includes itself, so must be at least 2 */ + if (length < 2) + ERREXIT("Erroneous JPEG marker length"); + length -= 2; + /* Skip over the remaining bytes */ + while (length > 0) { + (void) read_1_byte(); + length--; + } +} + + +/* + * Process a COM marker. + * We want to print out the marker contents as legible text; + * we must guard against non-text junk and varying newline representations. + */ + +static void +process_COM (int raw) +{ + unsigned int length; + int ch; + int lastch = 0; + + /* Bill Allombert: set locale properly for isprint */ +#ifdef HAVE_LOCALE_H + setlocale(LC_CTYPE, ""); +#endif + + /* Get the marker parameter length count */ + length = read_2_bytes(); + /* Length includes itself, so must be at least 2 */ + if (length < 2) + ERREXIT("Erroneous JPEG marker length"); + length -= 2; + + while (length > 0) { + ch = read_1_byte(); + if (raw) { + putc(ch, stdout); + /* Emit the character in a readable form. + * Nonprintables are converted to \nnn form, + * while \ is converted to \\. + * Newlines in CR, CR/LF, or LF form will be printed as one newline. + */ + } else if (ch == '\r') { + printf("\n"); + } else if (ch == '\n') { + if (lastch != '\r') + printf("\n"); + } else if (ch == '\\') { + printf("\\\\"); + } else if (isprint(ch)) { + putc(ch, stdout); + } else { + printf("\\%03o", ch); + } + lastch = ch; + length--; + } + printf("\n"); + + /* Bill Allombert: revert to C locale */ +#ifdef HAVE_LOCALE_H + setlocale(LC_CTYPE, "C"); +#endif +} + + +/* + * Process a SOFn marker. + * This code is only needed if you want to know the image dimensions... + */ + +static void +process_SOFn (int marker) +{ + unsigned int length; + unsigned int image_height, image_width; + int data_precision, num_components; + const char *process; + int ci; + + length = read_2_bytes(); /* usual parameter length count */ + + data_precision = read_1_byte(); + image_height = read_2_bytes(); + image_width = read_2_bytes(); + num_components = read_1_byte(); + + switch (marker) { + case M_SOF0: process = "Baseline"; break; + case M_SOF1: process = "Extended sequential"; break; + case M_SOF2: process = "Progressive"; break; + case M_SOF3: process = "Lossless"; break; + case M_SOF5: process = "Differential sequential"; break; + case M_SOF6: process = "Differential progressive"; break; + case M_SOF7: process = "Differential lossless"; break; + case M_SOF9: process = "Extended sequential, arithmetic coding"; break; + case M_SOF10: process = "Progressive, arithmetic coding"; break; + case M_SOF11: process = "Lossless, arithmetic coding"; break; + case M_SOF13: process = "Differential sequential, arithmetic coding"; break; + case M_SOF14: process = "Differential progressive, arithmetic coding"; break; + case M_SOF15: process = "Differential lossless, arithmetic coding"; break; + default: process = "Unknown"; break; + } + + printf("JPEG image is %uw * %uh, %d color components, %d bits per sample\n", + image_width, image_height, num_components, data_precision); + printf("JPEG process: %s\n", process); + + if (length != (unsigned int) (8 + num_components * 3)) + ERREXIT("Bogus SOF marker length"); + + for (ci = 0; ci < num_components; ci++) { + (void) read_1_byte(); /* Component ID code */ + (void) read_1_byte(); /* H, V sampling factors */ + (void) read_1_byte(); /* Quantization table number */ + } +} + + +/* + * Parse the marker stream until SOS or EOI is seen; + * display any COM markers. + * While the companion program wrjpgcom will always insert COM markers before + * SOFn, other implementations might not, so we scan to SOS before stopping. + * If we were only interested in the image dimensions, we would stop at SOFn. + * (Conversely, if we only cared about COM markers, there would be no need + * for special code to handle SOFn; we could treat it like other markers.) + */ + +static int +scan_JPEG_header (int verbose, int raw) +{ + int marker; + + /* Expect SOI at start of file */ + if (first_marker() != M_SOI) + ERREXIT("Expected SOI marker first"); + + /* Scan miscellaneous markers until we reach SOS. */ + for (;;) { + marker = next_marker(); + switch (marker) { + /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be, + * treated as SOFn. C4 in particular is actually DHT. + */ + case M_SOF0: /* Baseline */ + case M_SOF1: /* Extended sequential, Huffman */ + case M_SOF2: /* Progressive, Huffman */ + case M_SOF3: /* Lossless, Huffman */ + case M_SOF5: /* Differential sequential, Huffman */ + case M_SOF6: /* Differential progressive, Huffman */ + case M_SOF7: /* Differential lossless, Huffman */ + case M_SOF9: /* Extended sequential, arithmetic */ + case M_SOF10: /* Progressive, arithmetic */ + case M_SOF11: /* Lossless, arithmetic */ + case M_SOF13: /* Differential sequential, arithmetic */ + case M_SOF14: /* Differential progressive, arithmetic */ + case M_SOF15: /* Differential lossless, arithmetic */ + if (verbose) + process_SOFn(marker); + else + skip_variable(); + break; + + case M_SOS: /* stop before hitting compressed data */ + return marker; + + case M_EOI: /* in case it's a tables-only JPEG stream */ + return marker; + + case M_COM: + process_COM(raw); + break; + + case M_APP12: + /* Some digital camera makers put useful textual information into + * APP12 markers, so we print those out too when in -verbose mode. + */ + if (verbose) { + printf("APP12 contains:\n"); + process_COM(raw); + } else + skip_variable(); + break; + + default: /* Anything else just gets skipped */ + skip_variable(); /* we assume it has a parameter count... */ + break; + } + } /* end loop */ +} + + +/* Command line parsing code */ + +static const char *progname; /* program name for error messages */ + + +static void +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "rdjpgcom displays any textual comments in a JPEG file.\n"); + + fprintf(stderr, "Usage: %s [switches] [inputfile]\n", progname); + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -raw Display non-printable characters in comments (unsafe)\n"); + fprintf(stderr, " -verbose Also display dimensions of JPEG image\n"); + + exit(EXIT_FAILURE); +} + + +static int +keymatch (char *arg, const char *keyword, int minchars) +/* Case-insensitive matching of (possibly abbreviated) keyword switches. */ +/* keyword is the constant keyword (must be lower case already), */ +/* minchars is length of minimum legal abbreviation. */ +{ + register int ca, ck; + register int nmatched = 0; + + while ((ca = *arg++) != '\0') { + if ((ck = *keyword++) == '\0') + return 0; /* arg longer than keyword, no good */ + if (isupper(ca)) /* force arg to lcase (assume ck is already) */ + ca = tolower(ca); + if (ca != ck) + return 0; /* no good */ + nmatched++; /* count matched characters */ + } + /* reached end of argument; fail if it's too short for unique abbrev */ + if (nmatched < minchars) + return 0; + return 1; /* A-OK */ +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + int argn; + char *arg; + int verbose = 0, raw = 0; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "rdjpgcom"; /* in case C library doesn't provide it */ + + /* Parse switches, if any */ + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (arg[0] != '-') + break; /* not switch, must be file name */ + arg++; /* advance over '-' */ + if (keymatch(arg, "verbose", 1)) { + verbose++; + } else if (keymatch(arg, "raw", 1)) { + raw = 1; + } else + usage(); + } + + /* Open the input file. */ + /* Unix style: expect zero or one file name */ + if (argn < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } + if (argn < argc) { + if ((infile = fopen(argv[argn], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdin), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open stdin\n", progname); + exit(EXIT_FAILURE); + } +#else + infile = stdin; +#endif + } + + /* Scan the JPEG headers. */ + (void) scan_JPEG_header(verbose, raw); + + /* All done. */ + exit(EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdppm.c glmark2-2021.02/src/libjpeg-turbo/rdppm.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdppm.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdppm.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,461 @@ +/* + * rdppm.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2009 by Bill Allombert, Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in PPM/PGM format. + * The extended 2-byte-per-sample raw PPM/PGM formats are supported. + * The PBMPLUS library is NOT required to compile this software + * (but it is highly useful as a set of PPM image manipulation programs). + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; start_input may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed PPM format). + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef PPM_SUPPORTED + + +/* Portions of this code are based on the PBMPLUS library, which is: +** +** Copyright (C) 1988 by Jef Poskanzer. +** +** Permission to use, copy, modify, and distribute this software and its +** documentation for any purpose and without fee is hereby granted, provided +** that the above copyright notice appear in all copies and that both that +** copyright notice and this permission notice appear in supporting +** documentation. This software is provided "as is" without express or +** implied warranty. +*/ + + +/* Macros to deal with unsigned chars as efficiently as compiler allows */ + +#ifdef HAVE_UNSIGNED_CHAR +typedef unsigned char U_CHAR; +#define UCH(x) ((int) (x)) +#else /* !HAVE_UNSIGNED_CHAR */ +#ifdef __CHAR_UNSIGNED__ +typedef char U_CHAR; +#define UCH(x) ((int) (x)) +#else +typedef char U_CHAR; +#define UCH(x) ((int) (x) & 0xFF) +#endif +#endif /* HAVE_UNSIGNED_CHAR */ + + +#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) + + +/* Private version of data source object */ + +typedef struct { + struct cjpeg_source_struct pub; /* public fields */ + + /* Usually these two pointers point to the same place: */ + U_CHAR *iobuffer; /* fread's I/O buffer */ + JSAMPROW pixrow; /* compressor input buffer */ + size_t buffer_width; /* width of I/O buffer */ + JSAMPLE *rescale; /* => maxval-remapping array, or NULL */ + int maxval; +} ppm_source_struct; + +typedef ppm_source_struct *ppm_source_ptr; + + +LOCAL(int) +pbm_getc (FILE *infile) +/* Read next char, skipping over any comments */ +/* A comment/newline sequence is returned as a newline */ +{ + register int ch; + + ch = getc(infile); + if (ch == '#') { + do { + ch = getc(infile); + } while (ch != '\n' && ch != EOF); + } + return ch; +} + + +LOCAL(unsigned int) +read_pbm_integer (j_compress_ptr cinfo, FILE *infile, unsigned int maxval) +/* Read an unsigned decimal integer from the PPM file */ +/* Swallows one trailing character after the integer */ +/* Note that on a 16-bit-int machine, only values up to 64k can be read. */ +/* This should not be a problem in practice. */ +{ + register int ch; + register unsigned int val; + + /* Skip any leading whitespace */ + do { + ch = pbm_getc(infile); + if (ch == EOF) + ERREXIT(cinfo, JERR_INPUT_EOF); + } while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); + + if (ch < '0' || ch > '9') + ERREXIT(cinfo, JERR_PPM_NONNUMERIC); + + val = ch - '0'; + while ((ch = pbm_getc(infile)) >= '0' && ch <= '9') { + val *= 10; + val += ch - '0'; + } + + if (val > maxval) + ERREXIT(cinfo, JERR_PPM_TOOLARGE); + + return val; +} + + +/* + * Read one row of pixels. + * + * We provide several different versions depending on input file format. + * In all cases, input is scaled to the size of JSAMPLE. + * + * A really fast path is provided for reading byte/sample raw files with + * maxval = MAXJSAMPLE, which is the normal case for 8-bit data. + */ + + +METHODDEF(JDIMENSION) +get_text_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading text-format PGM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + FILE *infile = source->pub.input_file; + register JSAMPROW ptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_text_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading text-format PPM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + FILE *infile = source->pub.input_file; + register JSAMPROW ptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_scaled_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format PGM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + for (col = cinfo->image_width; col > 0; col--) { + *ptr++ = rescale[UCH(*bufferptr++)]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_scaled_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format PPM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + for (col = cinfo->image_width; col > 0; col--) { + *ptr++ = rescale[UCH(*bufferptr++)]; + *ptr++ = rescale[UCH(*bufferptr++)]; + *ptr++ = rescale[UCH(*bufferptr++)]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_raw_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format files with maxval = MAXJSAMPLE. + * In this case we just read right into the JSAMPLE buffer! + * Note that same code works for PPM and PGM files. + */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + return 1; +} + + +METHODDEF(JDIMENSION) +get_word_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-word-format PGM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + for (col = cinfo->image_width; col > 0; col--) { + register int temp; + temp = UCH(*bufferptr++) << 8; + temp |= UCH(*bufferptr++); + *ptr++ = rescale[temp]; + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_word_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-word-format PPM files with any maxval */ +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + + if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + for (col = cinfo->image_width; col > 0; col--) { + register int temp; + temp = UCH(*bufferptr++) << 8; + temp |= UCH(*bufferptr++); + *ptr++ = rescale[temp]; + temp = UCH(*bufferptr++) << 8; + temp |= UCH(*bufferptr++); + *ptr++ = rescale[temp]; + temp = UCH(*bufferptr++) << 8; + temp |= UCH(*bufferptr++); + *ptr++ = rescale[temp]; + } + return 1; +} + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + ppm_source_ptr source = (ppm_source_ptr) sinfo; + int c; + unsigned int w, h, maxval; + boolean need_iobuffer, use_raw_buffer, need_rescale; + + if (getc(source->pub.input_file) != 'P') + ERREXIT(cinfo, JERR_PPM_NOT); + + c = getc(source->pub.input_file); /* subformat discriminator character */ + + /* detect unsupported variants (ie, PBM) before trying to read header */ + switch (c) { + case '2': /* it's a text-format PGM file */ + case '3': /* it's a text-format PPM file */ + case '5': /* it's a raw-format PGM file */ + case '6': /* it's a raw-format PPM file */ + break; + default: + ERREXIT(cinfo, JERR_PPM_NOT); + break; + } + + /* fetch the remaining header info */ + w = read_pbm_integer(cinfo, source->pub.input_file, 65535); + h = read_pbm_integer(cinfo, source->pub.input_file, 65535); + maxval = read_pbm_integer(cinfo, source->pub.input_file, 65535); + + if (w <= 0 || h <= 0 || maxval <= 0) /* error check */ + ERREXIT(cinfo, JERR_PPM_NOT); + + cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */ + cinfo->image_width = (JDIMENSION) w; + cinfo->image_height = (JDIMENSION) h; + source->maxval = maxval; + + /* initialize flags to most common settings */ + need_iobuffer = TRUE; /* do we need an I/O buffer? */ + use_raw_buffer = FALSE; /* do we map input buffer onto I/O buffer? */ + need_rescale = TRUE; /* do we need a rescale array? */ + + switch (c) { + case '2': /* it's a text-format PGM file */ + cinfo->input_components = 1; + cinfo->in_color_space = JCS_GRAYSCALE; + TRACEMS2(cinfo, 1, JTRC_PGM_TEXT, w, h); + source->pub.get_pixel_rows = get_text_gray_row; + need_iobuffer = FALSE; + break; + + case '3': /* it's a text-format PPM file */ + cinfo->input_components = 3; + cinfo->in_color_space = JCS_RGB; + TRACEMS2(cinfo, 1, JTRC_PPM_TEXT, w, h); + source->pub.get_pixel_rows = get_text_rgb_row; + need_iobuffer = FALSE; + break; + + case '5': /* it's a raw-format PGM file */ + cinfo->input_components = 1; + cinfo->in_color_space = JCS_GRAYSCALE; + TRACEMS2(cinfo, 1, JTRC_PGM, w, h); + if (maxval > 255) { + source->pub.get_pixel_rows = get_word_gray_row; + } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR)) { + source->pub.get_pixel_rows = get_raw_row; + use_raw_buffer = TRUE; + need_rescale = FALSE; + } else { + source->pub.get_pixel_rows = get_scaled_gray_row; + } + break; + + case '6': /* it's a raw-format PPM file */ + cinfo->input_components = 3; + cinfo->in_color_space = JCS_RGB; + TRACEMS2(cinfo, 1, JTRC_PPM, w, h); + if (maxval > 255) { + source->pub.get_pixel_rows = get_word_rgb_row; + } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR)) { + source->pub.get_pixel_rows = get_raw_row; + use_raw_buffer = TRUE; + need_rescale = FALSE; + } else { + source->pub.get_pixel_rows = get_scaled_rgb_row; + } + break; + } + + /* Allocate space for I/O buffer: 1 or 3 bytes or words/pixel. */ + if (need_iobuffer) { + source->buffer_width = (size_t) w * cinfo->input_components * + ((maxval <= 255) ? sizeof(U_CHAR) : (2 * sizeof(U_CHAR))); + source->iobuffer = (U_CHAR *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + source->buffer_width); + } + + /* Create compressor input buffer. */ + if (use_raw_buffer) { + /* For unscaled raw-input case, we can just map it onto the I/O buffer. */ + /* Synthesize a JSAMPARRAY pointer structure */ + source->pixrow = (JSAMPROW) source->iobuffer; + source->pub.buffer = & source->pixrow; + source->pub.buffer_height = 1; + } else { + /* Need to translate anyway, so make a separate sample buffer. */ + source->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) w * cinfo->input_components, (JDIMENSION) 1); + source->pub.buffer_height = 1; + } + + /* Compute the rescaling array if required. */ + if (need_rescale) { + long val, half_maxval; + + /* On 16-bit-int machines we have to be careful of maxval = 65535 */ + source->rescale = (JSAMPLE *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (size_t) (((long) maxval + 1L) * + sizeof(JSAMPLE))); + half_maxval = maxval / 2; + for (val = 0; val <= (long) maxval; val++) { + /* The multiplication here must be done in 32 bits to avoid overflow */ + source->rescale[val] = (JSAMPLE) ((val * MAXJSAMPLE + half_maxval) / + maxval); + } + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + +/* + * The module selection routine for PPM format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_ppm (j_compress_ptr cinfo) +{ + ppm_source_ptr source; + + /* Create module interface object */ + source = (ppm_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(ppm_source_struct)); + /* Fill in method ptrs, except get_pixel_rows which start_input sets */ + source->pub.start_input = start_input_ppm; + source->pub.finish_input = finish_input_ppm; + + return (cjpeg_source_ptr) source; +} + +#endif /* PPM_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdrle.c glmark2-2021.02/src/libjpeg-turbo/rdrle.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdrle.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdrle.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,389 @@ +/* + * rdrle.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in Utah RLE format. + * The Utah Raster Toolkit library is required (version 3.1 or later). + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; start_input may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed RLE format). + * + * Based on code contributed by Mike Lijewski, + * with updates from Robert Hutchinson. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef RLE_SUPPORTED + +/* rle.h is provided by the Utah Raster Toolkit. */ + +#include + +/* + * We assume that JSAMPLE has the same representation as rle_pixel, + * to wit, "unsigned char". Hence we can't cope with 12- or 16-bit samples. + */ + +#if BITS_IN_JSAMPLE != 8 + Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */ +#endif + +/* + * We support the following types of RLE files: + * + * GRAYSCALE - 8 bits, no colormap + * MAPPEDGRAY - 8 bits, 1 channel colomap + * PSEUDOCOLOR - 8 bits, 3 channel colormap + * TRUECOLOR - 24 bits, 3 channel colormap + * DIRECTCOLOR - 24 bits, no colormap + * + * For now, we ignore any alpha channel in the image. + */ + +typedef enum + { GRAYSCALE, MAPPEDGRAY, PSEUDOCOLOR, TRUECOLOR, DIRECTCOLOR } rle_kind; + + +/* + * Since RLE stores scanlines bottom-to-top, we have to invert the image + * to conform to JPEG's top-to-bottom order. To do this, we read the + * incoming image into a virtual array on the first get_pixel_rows call, + * then fetch the required row from the virtual array on subsequent calls. + */ + +typedef struct _rle_source_struct *rle_source_ptr; + +typedef struct _rle_source_struct { + struct cjpeg_source_struct pub; /* public fields */ + + rle_kind visual; /* actual type of input file */ + jvirt_sarray_ptr image; /* virtual array to hold the image */ + JDIMENSION row; /* current row # in the virtual array */ + rle_hdr header; /* Input file information */ + rle_pixel **rle_row; /* holds a row returned by rle_getrow() */ + +} rle_source_struct; + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + rle_source_ptr source = (rle_source_ptr) sinfo; + JDIMENSION width, height; +#ifdef PROGRESS_REPORT + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; +#endif + + /* Use RLE library routine to get the header info */ + source->header = *rle_hdr_init(NULL); + source->header.rle_file = source->pub.input_file; + switch (rle_get_setup(&(source->header))) { + case RLE_SUCCESS: + /* A-OK */ + break; + case RLE_NOT_RLE: + ERREXIT(cinfo, JERR_RLE_NOT); + break; + case RLE_NO_SPACE: + ERREXIT(cinfo, JERR_RLE_MEM); + break; + case RLE_EMPTY: + ERREXIT(cinfo, JERR_RLE_EMPTY); + break; + case RLE_EOF: + ERREXIT(cinfo, JERR_RLE_EOF); + break; + default: + ERREXIT(cinfo, JERR_RLE_BADERROR); + break; + } + + /* Figure out what we have, set private vars and return values accordingly */ + + width = source->header.xmax - source->header.xmin + 1; + height = source->header.ymax - source->header.ymin + 1; + source->header.xmin = 0; /* realign horizontally */ + source->header.xmax = width-1; + + cinfo->image_width = width; + cinfo->image_height = height; + cinfo->data_precision = 8; /* we can only handle 8 bit data */ + + if (source->header.ncolors == 1 && source->header.ncmap == 0) { + source->visual = GRAYSCALE; + TRACEMS2(cinfo, 1, JTRC_RLE_GRAY, width, height); + } else if (source->header.ncolors == 1 && source->header.ncmap == 1) { + source->visual = MAPPEDGRAY; + TRACEMS3(cinfo, 1, JTRC_RLE_MAPGRAY, width, height, + 1 << source->header.cmaplen); + } else if (source->header.ncolors == 1 && source->header.ncmap == 3) { + source->visual = PSEUDOCOLOR; + TRACEMS3(cinfo, 1, JTRC_RLE_MAPPED, width, height, + 1 << source->header.cmaplen); + } else if (source->header.ncolors == 3 && source->header.ncmap == 3) { + source->visual = TRUECOLOR; + TRACEMS3(cinfo, 1, JTRC_RLE_FULLMAP, width, height, + 1 << source->header.cmaplen); + } else if (source->header.ncolors == 3 && source->header.ncmap == 0) { + source->visual = DIRECTCOLOR; + TRACEMS2(cinfo, 1, JTRC_RLE, width, height); + } else + ERREXIT(cinfo, JERR_RLE_UNSUPPORTED); + + if (source->visual == GRAYSCALE || source->visual == MAPPEDGRAY) { + cinfo->in_color_space = JCS_GRAYSCALE; + cinfo->input_components = 1; + } else { + cinfo->in_color_space = JCS_RGB; + cinfo->input_components = 3; + } + + /* + * A place to hold each scanline while it's converted. + * (GRAYSCALE scanlines don't need converting) + */ + if (source->visual != GRAYSCALE) { + source->rle_row = (rle_pixel**) (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) width, (JDIMENSION) cinfo->input_components); + } + + /* request a virtual array to hold the image */ + source->image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) (width * source->header.ncolors), + (JDIMENSION) height, (JDIMENSION) 1); + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + /* count file input as separate pass */ + progress->total_extra_passes++; + } +#endif + + source->pub.buffer_height = 1; +} + + +/* + * Read one row of pixels. + * Called only after load_image has read the image into the virtual array. + * Used for GRAYSCALE, MAPPEDGRAY, TRUECOLOR, and DIRECTCOLOR images. + */ + +METHODDEF(JDIMENSION) +get_rle_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + rle_source_ptr source = (rle_source_ptr) sinfo; + + source->row--; + source->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, source->row, (JDIMENSION) 1, FALSE); + + return 1; +} + +/* + * Read one row of pixels. + * Called only after load_image has read the image into the virtual array. + * Used for PSEUDOCOLOR images. + */ + +METHODDEF(JDIMENSION) +get_pseudocolor_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + rle_source_ptr source = (rle_source_ptr) sinfo; + JSAMPROW src_row, dest_row; + JDIMENSION col; + rle_map *colormap; + int val; + + colormap = source->header.cmap; + dest_row = source->pub.buffer[0]; + source->row--; + src_row = *(*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, source->row, (JDIMENSION) 1, FALSE); + + for (col = cinfo->image_width; col > 0; col--) { + val = GETJSAMPLE(*src_row++); + *dest_row++ = (JSAMPLE) (colormap[val ] >> 8); + *dest_row++ = (JSAMPLE) (colormap[val + 256] >> 8); + *dest_row++ = (JSAMPLE) (colormap[val + 512] >> 8); + } + + return 1; +} + + +/* + * Load the image into a virtual array. We have to do this because RLE + * files start at the lower left while the JPEG standard has them starting + * in the upper left. This is called the first time we want to get a row + * of input. What we do is load the RLE data into the array and then call + * the appropriate routine to read one row from the array. Before returning, + * we set source->pub.get_pixel_rows so that subsequent calls go straight to + * the appropriate row-reading routine. + */ + +METHODDEF(JDIMENSION) +load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + rle_source_ptr source = (rle_source_ptr) sinfo; + JDIMENSION row, col; + JSAMPROW scanline, red_ptr, green_ptr, blue_ptr; + rle_pixel **rle_row; + rle_map *colormap; + char channel; +#ifdef PROGRESS_REPORT + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; +#endif + + colormap = source->header.cmap; + rle_row = source->rle_row; + + /* Read the RLE data into our virtual array. + * We assume here that rle_pixel is represented the same as JSAMPLE. + */ + RLE_CLR_BIT(source->header, RLE_ALPHA); /* don't read the alpha channel */ + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_limit = cinfo->image_height; + progress->pub.pass_counter = 0; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + + switch (source->visual) { + + case GRAYSCALE: + case PSEUDOCOLOR: + for (row = 0; row < cinfo->image_height; row++) { + rle_row = (rle_pixel **) (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + rle_getrow(&source->header, rle_row); +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + break; + + case MAPPEDGRAY: + case TRUECOLOR: + for (row = 0; row < cinfo->image_height; row++) { + scanline = *(*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + rle_row = source->rle_row; + rle_getrow(&source->header, rle_row); + + for (col = 0; col < cinfo->image_width; col++) { + for (channel = 0; channel < source->header.ncolors; channel++) { + *scanline++ = (JSAMPLE) + (colormap[GETJSAMPLE(rle_row[channel][col]) + 256 * channel] >> 8); + } + } + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + break; + + case DIRECTCOLOR: + for (row = 0; row < cinfo->image_height; row++) { + scanline = *(*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + rle_getrow(&source->header, rle_row); + + red_ptr = rle_row[0]; + green_ptr = rle_row[1]; + blue_ptr = rle_row[2]; + + for (col = cinfo->image_width; col > 0; col--) { + *scanline++ = *red_ptr++; + *scanline++ = *green_ptr++; + *scanline++ = *blue_ptr++; + } + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + } + +#ifdef PROGRESS_REPORT + if (progress != NULL) + progress->completed_extra_passes++; +#endif + + /* Set up to call proper row-extraction routine in future */ + if (source->visual == PSEUDOCOLOR) { + source->pub.buffer = source->rle_row; + source->pub.get_pixel_rows = get_pseudocolor_row; + } else { + source->pub.get_pixel_rows = get_rle_row; + } + source->row = cinfo->image_height; + + /* And fetch the topmost (bottommost) row */ + return (*source->pub.get_pixel_rows) (cinfo, sinfo); +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + +/* + * The module selection routine for RLE format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_rle (j_compress_ptr cinfo) +{ + rle_source_ptr source; + + /* Create module interface object */ + source = (rle_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(rle_source_struct)); + /* Fill in method ptrs */ + source->pub.start_input = start_input_rle; + source->pub.finish_input = finish_input_rle; + source->pub.get_pixel_rows = load_image; + + return (cjpeg_source_ptr) source; +} + +#endif /* RLE_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdswitch.c glmark2-2021.02/src/libjpeg-turbo/rdswitch.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdswitch.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdswitch.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,424 @@ +/* + * rdswitch.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to process some of cjpeg's more complicated + * command-line switches. Switches processed here are: + * -qtables file Read quantization tables from text file + * -scans file Read scan script from text file + * -quality N[,N,...] Set quality ratings + * -qslots N[,N,...] Set component quantization table selectors + * -sample HxV[,HxV,...] Set component sampling factors + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include /* to declare isdigit(), isspace() */ + + +LOCAL(int) +text_getc (FILE *file) +/* Read next char, skipping over any comments (# to end of line) */ +/* A comment/newline sequence is returned as a newline */ +{ + register int ch; + + ch = getc(file); + if (ch == '#') { + do { + ch = getc(file); + } while (ch != '\n' && ch != EOF); + } + return ch; +} + + +LOCAL(boolean) +read_text_integer (FILE *file, long *result, int *termchar) +/* Read an unsigned decimal integer from a file, store it in result */ +/* Reads one trailing character after the integer; returns it in termchar */ +{ + register int ch; + register long val; + + /* Skip any leading whitespace, detect EOF */ + do { + ch = text_getc(file); + if (ch == EOF) { + *termchar = ch; + return FALSE; + } + } while (isspace(ch)); + + if (! isdigit(ch)) { + *termchar = ch; + return FALSE; + } + + val = ch - '0'; + while ((ch = text_getc(file)) != EOF) { + if (! isdigit(ch)) + break; + val *= 10; + val += ch - '0'; + } + *result = val; + *termchar = ch; + return TRUE; +} + + +#if JPEG_LIB_VERSION < 70 +static int q_scale_factor[NUM_QUANT_TBLS] = {100, 100, 100, 100}; +#endif + +GLOBAL(boolean) +read_quant_tables (j_compress_ptr cinfo, char *filename, + boolean force_baseline) +/* Read a set of quantization tables from the specified file. + * The file is plain ASCII text: decimal numbers with whitespace between. + * Comments preceded by '#' may be included in the file. + * There may be one to NUM_QUANT_TBLS tables in the file, each of 64 values. + * The tables are implicitly numbered 0,1,etc. + * NOTE: does not affect the qslots mapping, which will default to selecting + * table 0 for luminance (or primary) components, 1 for chrominance components. + * You must use -qslots if you want a different component->table mapping. + */ +{ + FILE *fp; + int tblno, i, termchar; + long val; + unsigned int table[DCTSIZE2]; + + if ((fp = fopen(filename, "r")) == NULL) { + fprintf(stderr, "Can't open table file %s\n", filename); + return FALSE; + } + tblno = 0; + + while (read_text_integer(fp, &val, &termchar)) { /* read 1st element of table */ + if (tblno >= NUM_QUANT_TBLS) { + fprintf(stderr, "Too many tables in file %s\n", filename); + fclose(fp); + return FALSE; + } + table[0] = (unsigned int) val; + for (i = 1; i < DCTSIZE2; i++) { + if (! read_text_integer(fp, &val, &termchar)) { + fprintf(stderr, "Invalid table data in file %s\n", filename); + fclose(fp); + return FALSE; + } + table[i] = (unsigned int) val; + } +#if JPEG_LIB_VERSION >= 70 + jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno], + force_baseline); +#else + jpeg_add_quant_table(cinfo, tblno, table, q_scale_factor[tblno], + force_baseline); +#endif + tblno++; + } + + if (termchar != EOF) { + fprintf(stderr, "Non-numeric data in file %s\n", filename); + fclose(fp); + return FALSE; + } + + fclose(fp); + return TRUE; +} + + +#ifdef C_MULTISCAN_FILES_SUPPORTED + +LOCAL(boolean) +read_scan_integer (FILE *file, long *result, int *termchar) +/* Variant of read_text_integer that always looks for a non-space termchar; + * this simplifies parsing of punctuation in scan scripts. + */ +{ + register int ch; + + if (! read_text_integer(file, result, termchar)) + return FALSE; + ch = *termchar; + while (ch != EOF && isspace(ch)) + ch = text_getc(file); + if (isdigit(ch)) { /* oops, put it back */ + if (ungetc(ch, file) == EOF) + return FALSE; + ch = ' '; + } else { + /* Any separators other than ';' and ':' are ignored; + * this allows user to insert commas, etc, if desired. + */ + if (ch != EOF && ch != ';' && ch != ':') + ch = ' '; + } + *termchar = ch; + return TRUE; +} + + +GLOBAL(boolean) +read_scan_script (j_compress_ptr cinfo, char *filename) +/* Read a scan script from the specified text file. + * Each entry in the file defines one scan to be emitted. + * Entries are separated by semicolons ';'. + * An entry contains one to four component indexes, + * optionally followed by a colon ':' and four progressive-JPEG parameters. + * The component indexes denote which component(s) are to be transmitted + * in the current scan. The first component has index 0. + * Sequential JPEG is used if the progressive-JPEG parameters are omitted. + * The file is free format text: any whitespace may appear between numbers + * and the ':' and ';' punctuation marks. Also, other punctuation (such + * as commas or dashes) can be placed between numbers if desired. + * Comments preceded by '#' may be included in the file. + * Note: we do very little validity checking here; + * jcmaster.c will validate the script parameters. + */ +{ + FILE *fp; + int scanno, ncomps, termchar; + long val; + jpeg_scan_info *scanptr; +#define MAX_SCANS 100 /* quite arbitrary limit */ + jpeg_scan_info scans[MAX_SCANS]; + + if ((fp = fopen(filename, "r")) == NULL) { + fprintf(stderr, "Can't open scan definition file %s\n", filename); + return FALSE; + } + scanptr = scans; + scanno = 0; + + while (read_scan_integer(fp, &val, &termchar)) { + if (scanno >= MAX_SCANS) { + fprintf(stderr, "Too many scans defined in file %s\n", filename); + fclose(fp); + return FALSE; + } + scanptr->component_index[0] = (int) val; + ncomps = 1; + while (termchar == ' ') { + if (ncomps >= MAX_COMPS_IN_SCAN) { + fprintf(stderr, "Too many components in one scan in file %s\n", + filename); + fclose(fp); + return FALSE; + } + if (! read_scan_integer(fp, &val, &termchar)) + goto bogus; + scanptr->component_index[ncomps] = (int) val; + ncomps++; + } + scanptr->comps_in_scan = ncomps; + if (termchar == ':') { + if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + goto bogus; + scanptr->Ss = (int) val; + if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + goto bogus; + scanptr->Se = (int) val; + if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + goto bogus; + scanptr->Ah = (int) val; + if (! read_scan_integer(fp, &val, &termchar)) + goto bogus; + scanptr->Al = (int) val; + } else { + /* set non-progressive parameters */ + scanptr->Ss = 0; + scanptr->Se = DCTSIZE2-1; + scanptr->Ah = 0; + scanptr->Al = 0; + } + if (termchar != ';' && termchar != EOF) { +bogus: + fprintf(stderr, "Invalid scan entry format in file %s\n", filename); + fclose(fp); + return FALSE; + } + scanptr++, scanno++; + } + + if (termchar != EOF) { + fprintf(stderr, "Non-numeric data in file %s\n", filename); + fclose(fp); + return FALSE; + } + + if (scanno > 0) { + /* Stash completed scan list in cinfo structure. + * NOTE: for cjpeg's use, JPOOL_IMAGE is the right lifetime for this data, + * but if you want to compress multiple images you'd want JPOOL_PERMANENT. + */ + scanptr = (jpeg_scan_info *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + scanno * sizeof(jpeg_scan_info)); + MEMCOPY(scanptr, scans, scanno * sizeof(jpeg_scan_info)); + cinfo->scan_info = scanptr; + cinfo->num_scans = scanno; + } + + fclose(fp); + return TRUE; +} + +#endif /* C_MULTISCAN_FILES_SUPPORTED */ + + +#if JPEG_LIB_VERSION < 70 +/* These are the sample quantization tables given in JPEG spec section K.1. + * The spec says that the values given produce "good" quality, and + * when divided by 2, "very good" quality. + */ +static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, + 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, + 72, 92, 95, 98, 112, 100, 103, 99 +}; +static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 +}; + + +LOCAL(void) +jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) +{ + jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, + q_scale_factor[0], force_baseline); + jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, + q_scale_factor[1], force_baseline); +} +#endif + + +GLOBAL(boolean) +set_quality_ratings (j_compress_ptr cinfo, char *arg, boolean force_baseline) +/* Process a quality-ratings parameter string, of the form + * N[,N,...] + * If there are more q-table slots than parameters, the last value is replicated. + */ +{ + int val = 75; /* default value */ + int tblno; + char ch; + + for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) { + if (*arg) { + ch = ','; /* if not set by sscanf, will be ',' */ + if (sscanf(arg, "%d%c", &val, &ch) < 1) + return FALSE; + if (ch != ',') /* syntax check */ + return FALSE; + /* Convert user 0-100 rating to percentage scaling */ +#if JPEG_LIB_VERSION >= 70 + cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val); +#else + q_scale_factor[tblno] = jpeg_quality_scaling(val); +#endif + while (*arg && *arg++ != ',') /* advance to next segment of arg string */ + ; + } else { + /* reached end of parameter, set remaining factors to last value */ +#if JPEG_LIB_VERSION >= 70 + cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val); +#else + q_scale_factor[tblno] = jpeg_quality_scaling(val); +#endif + } + } + jpeg_default_qtables(cinfo, force_baseline); + return TRUE; +} + + +GLOBAL(boolean) +set_quant_slots (j_compress_ptr cinfo, char *arg) +/* Process a quantization-table-selectors parameter string, of the form + * N[,N,...] + * If there are more components than parameters, the last value is replicated. + */ +{ + int val = 0; /* default table # */ + int ci; + char ch; + + for (ci = 0; ci < MAX_COMPONENTS; ci++) { + if (*arg) { + ch = ','; /* if not set by sscanf, will be ',' */ + if (sscanf(arg, "%d%c", &val, &ch) < 1) + return FALSE; + if (ch != ',') /* syntax check */ + return FALSE; + if (val < 0 || val >= NUM_QUANT_TBLS) { + fprintf(stderr, "JPEG quantization tables are numbered 0..%d\n", + NUM_QUANT_TBLS-1); + return FALSE; + } + cinfo->comp_info[ci].quant_tbl_no = val; + while (*arg && *arg++ != ',') /* advance to next segment of arg string */ + ; + } else { + /* reached end of parameter, set remaining components to last table */ + cinfo->comp_info[ci].quant_tbl_no = val; + } + } + return TRUE; +} + + +GLOBAL(boolean) +set_sample_factors (j_compress_ptr cinfo, char *arg) +/* Process a sample-factors parameter string, of the form + * HxV[,HxV,...] + * If there are more components than parameters, "1x1" is assumed for the rest. + */ +{ + int ci, val1, val2; + char ch1, ch2; + + for (ci = 0; ci < MAX_COMPONENTS; ci++) { + if (*arg) { + ch2 = ','; /* if not set by sscanf, will be ',' */ + if (sscanf(arg, "%d%c%d%c", &val1, &ch1, &val2, &ch2) < 3) + return FALSE; + if ((ch1 != 'x' && ch1 != 'X') || ch2 != ',') /* syntax check */ + return FALSE; + if (val1 <= 0 || val1 > 4 || val2 <= 0 || val2 > 4) { + fprintf(stderr, "JPEG sampling factors must be 1..4\n"); + return FALSE; + } + cinfo->comp_info[ci].h_samp_factor = val1; + cinfo->comp_info[ci].v_samp_factor = val2; + while (*arg && *arg++ != ',') /* advance to next segment of arg string */ + ; + } else { + /* reached end of parameter, set remaining components to 1x1 sampling */ + cinfo->comp_info[ci].h_samp_factor = 1; + cinfo->comp_info[ci].v_samp_factor = 1; + } + } + return TRUE; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdtarga.c glmark2-2021.02/src/libjpeg-turbo/rdtarga.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/rdtarga.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/rdtarga.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,503 @@ +/* + * rdtarga.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to read input images in Targa format. + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; start_input may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed Targa format). + * + * Based on code contributed by Lee Daniel Crocker. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef TARGA_SUPPORTED + + +/* Macros to deal with unsigned chars as efficiently as compiler allows */ + +#ifdef HAVE_UNSIGNED_CHAR +typedef unsigned char U_CHAR; +#define UCH(x) ((int) (x)) +#else /* !HAVE_UNSIGNED_CHAR */ +#ifdef __CHAR_UNSIGNED__ +typedef char U_CHAR; +#define UCH(x) ((int) (x)) +#else +typedef char U_CHAR; +#define UCH(x) ((int) (x) & 0xFF) +#endif +#endif /* HAVE_UNSIGNED_CHAR */ + + +#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) + + +/* Private version of data source object */ + +typedef struct _tga_source_struct *tga_source_ptr; + +typedef struct _tga_source_struct { + struct cjpeg_source_struct pub; /* public fields */ + + j_compress_ptr cinfo; /* back link saves passing separate parm */ + + JSAMPARRAY colormap; /* Targa colormap (converted to my format) */ + + jvirt_sarray_ptr whole_image; /* Needed if funny input row order */ + JDIMENSION current_row; /* Current logical row number to read */ + + /* Pointer to routine to extract next Targa pixel from input file */ + void (*read_pixel) (tga_source_ptr sinfo); + + /* Result of read_pixel is delivered here: */ + U_CHAR tga_pixel[4]; + + int pixel_size; /* Bytes per Targa pixel (1 to 4) */ + + /* State info for reading RLE-coded pixels; both counts must be init to 0 */ + int block_count; /* # of pixels remaining in RLE block */ + int dup_pixel_count; /* # of times to duplicate previous pixel */ + + /* This saves the correct pixel-row-expansion method for preload_image */ + JDIMENSION (*get_pixel_rows) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo); +} tga_source_struct; + + +/* For expanding 5-bit pixel values to 8-bit with best rounding */ + +static const UINT8 c5to8bits[32] = { + 0, 8, 16, 25, 33, 41, 49, 58, + 66, 74, 82, 90, 99, 107, 115, 123, + 132, 140, 148, 156, 165, 173, 181, 189, + 197, 206, 214, 222, 230, 239, 247, 255 +}; + + + +LOCAL(int) +read_byte (tga_source_ptr sinfo) +/* Read next byte from Targa file */ +{ + register FILE *infile = sinfo->pub.input_file; + register int c; + + if ((c = getc(infile)) == EOF) + ERREXIT(sinfo->cinfo, JERR_INPUT_EOF); + return c; +} + + +LOCAL(void) +read_colormap (tga_source_ptr sinfo, int cmaplen, int mapentrysize) +/* Read the colormap from a Targa file */ +{ + int i; + + /* Presently only handles 24-bit BGR format */ + if (mapentrysize != 24) + ERREXIT(sinfo->cinfo, JERR_TGA_BADCMAP); + + for (i = 0; i < cmaplen; i++) { + sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); + } +} + + +/* + * read_pixel methods: get a single pixel from Targa file into tga_pixel[] + */ + +METHODDEF(void) +read_non_rle_pixel (tga_source_ptr sinfo) +/* Read one Targa pixel from the input file; no RLE expansion */ +{ + register FILE *infile = sinfo->pub.input_file; + register int i; + + for (i = 0; i < sinfo->pixel_size; i++) { + sinfo->tga_pixel[i] = (U_CHAR) getc(infile); + } +} + + +METHODDEF(void) +read_rle_pixel (tga_source_ptr sinfo) +/* Read one Targa pixel from the input file, expanding RLE data as needed */ +{ + register FILE *infile = sinfo->pub.input_file; + register int i; + + /* Duplicate previously read pixel? */ + if (sinfo->dup_pixel_count > 0) { + sinfo->dup_pixel_count--; + return; + } + + /* Time to read RLE block header? */ + if (--sinfo->block_count < 0) { /* decrement pixels remaining in block */ + i = read_byte(sinfo); + if (i & 0x80) { /* Start of duplicate-pixel block? */ + sinfo->dup_pixel_count = i & 0x7F; /* number of dups after this one */ + sinfo->block_count = 0; /* then read new block header */ + } else { + sinfo->block_count = i & 0x7F; /* number of pixels after this one */ + } + } + + /* Read next pixel */ + for (i = 0; i < sinfo->pixel_size; i++) { + sinfo->tga_pixel[i] = (U_CHAR) getc(infile); + } +} + + +/* + * Read one row of pixels. + * + * We provide several different versions depending on input file format. + */ + + +METHODDEF(JDIMENSION) +get_8bit_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 8-bit grayscale pixels */ +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ + *ptr++ = (JSAMPLE) UCH(source->tga_pixel[0]); + } + return 1; +} + +METHODDEF(JDIMENSION) +get_8bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 8-bit colormap indexes */ +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + register int t; + register JSAMPROW ptr; + register JDIMENSION col; + register JSAMPARRAY colormap = source->colormap; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ + t = UCH(source->tga_pixel[0]); + *ptr++ = colormap[0][t]; + *ptr++ = colormap[1][t]; + *ptr++ = colormap[2][t]; + } + return 1; +} + +METHODDEF(JDIMENSION) +get_16bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 16-bit pixels */ +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + register int t; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ + t = UCH(source->tga_pixel[0]); + t += UCH(source->tga_pixel[1]) << 8; + /* We expand 5 bit data to 8 bit sample width. + * The format of the 16-bit (LSB first) input word is + * xRRRRRGGGGGBBBBB + */ + ptr[2] = (JSAMPLE) c5to8bits[t & 0x1F]; + t >>= 5; + ptr[1] = (JSAMPLE) c5to8bits[t & 0x1F]; + t >>= 5; + ptr[0] = (JSAMPLE) c5to8bits[t & 0x1F]; + ptr += 3; + } + return 1; +} + +METHODDEF(JDIMENSION) +get_24bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading 24-bit pixels */ +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ + *ptr++ = (JSAMPLE) UCH(source->tga_pixel[2]); /* change BGR to RGB order */ + *ptr++ = (JSAMPLE) UCH(source->tga_pixel[1]); + *ptr++ = (JSAMPLE) UCH(source->tga_pixel[0]); + } + return 1; +} + +/* + * Targa also defines a 32-bit pixel format with order B,G,R,A. + * We presently ignore the attribute byte, so the code for reading + * these pixels is identical to the 24-bit routine above. + * This works because the actual pixel length is only known to read_pixel. + */ + +#define get_32bit_row get_24bit_row + + +/* + * This method is for re-reading the input data in standard top-down + * row order. The entire image has already been read into whole_image + * with proper conversion of pixel format, but it's in a funny row order. + */ + +METHODDEF(JDIMENSION) +get_memory_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + JDIMENSION source_row; + + /* Compute row of source that maps to current_row of normal order */ + /* For now, assume image is bottom-up and not interlaced. */ + /* NEEDS WORK to support interlaced images! */ + source_row = cinfo->image_height - source->current_row - 1; + + /* Fetch that row from virtual array */ + source->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, + source_row, (JDIMENSION) 1, FALSE); + + source->current_row++; + return 1; +} + + +/* + * This method loads the image into whole_image during the first call on + * get_pixel_rows. The get_pixel_rows pointer is then adjusted to call + * get_memory_row on subsequent calls. + */ + +METHODDEF(JDIMENSION) +preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + JDIMENSION row; + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + + /* Read the data into a virtual array in input-file row order. */ + for (row = 0; row < cinfo->image_height; row++) { + if (progress != NULL) { + progress->pub.pass_counter = (long) row; + progress->pub.pass_limit = (long) cinfo->image_height; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } + source->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->whole_image, row, (JDIMENSION) 1, TRUE); + (*source->get_pixel_rows) (cinfo, sinfo); + } + if (progress != NULL) + progress->completed_extra_passes++; + + /* Set up to read from the virtual array in unscrambled order */ + source->pub.get_pixel_rows = get_memory_row; + source->current_row = 0; + /* And read the first row */ + return get_memory_row(cinfo, sinfo); +} + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + tga_source_ptr source = (tga_source_ptr) sinfo; + U_CHAR targaheader[18]; + int idlen, cmaptype, subtype, flags, interlace_type, components; + unsigned int width, height, maplen; + boolean is_bottom_up; + +#define GET_2B(offset) ((unsigned int) UCH(targaheader[offset]) + \ + (((unsigned int) UCH(targaheader[offset+1])) << 8)) + + if (! ReadOK(source->pub.input_file, targaheader, 18)) + ERREXIT(cinfo, JERR_INPUT_EOF); + + /* Pretend "15-bit" pixels are 16-bit --- we ignore attribute bit anyway */ + if (targaheader[16] == 15) + targaheader[16] = 16; + + idlen = UCH(targaheader[0]); + cmaptype = UCH(targaheader[1]); + subtype = UCH(targaheader[2]); + maplen = GET_2B(5); + width = GET_2B(12); + height = GET_2B(14); + source->pixel_size = UCH(targaheader[16]) >> 3; + flags = UCH(targaheader[17]); /* Image Descriptor byte */ + + is_bottom_up = ((flags & 0x20) == 0); /* bit 5 set => top-down */ + interlace_type = flags >> 6; /* bits 6/7 are interlace code */ + + if (cmaptype > 1 || /* cmaptype must be 0 or 1 */ + source->pixel_size < 1 || source->pixel_size > 4 || + (UCH(targaheader[16]) & 7) != 0 || /* bits/pixel must be multiple of 8 */ + interlace_type != 0 || /* currently don't allow interlaced image */ + width == 0 || height == 0) /* image width/height must be non-zero */ + ERREXIT(cinfo, JERR_TGA_BADPARMS); + + if (subtype > 8) { + /* It's an RLE-coded file */ + source->read_pixel = read_rle_pixel; + source->block_count = source->dup_pixel_count = 0; + subtype -= 8; + } else { + /* Non-RLE file */ + source->read_pixel = read_non_rle_pixel; + } + + /* Now should have subtype 1, 2, or 3 */ + components = 3; /* until proven different */ + cinfo->in_color_space = JCS_RGB; + + switch (subtype) { + case 1: /* Colormapped image */ + if (source->pixel_size == 1 && cmaptype == 1) + source->get_pixel_rows = get_8bit_row; + else + ERREXIT(cinfo, JERR_TGA_BADPARMS); + TRACEMS2(cinfo, 1, JTRC_TGA_MAPPED, width, height); + break; + case 2: /* RGB image */ + switch (source->pixel_size) { + case 2: + source->get_pixel_rows = get_16bit_row; + break; + case 3: + source->get_pixel_rows = get_24bit_row; + break; + case 4: + source->get_pixel_rows = get_32bit_row; + break; + default: + ERREXIT(cinfo, JERR_TGA_BADPARMS); + break; + } + TRACEMS2(cinfo, 1, JTRC_TGA, width, height); + break; + case 3: /* Grayscale image */ + components = 1; + cinfo->in_color_space = JCS_GRAYSCALE; + if (source->pixel_size == 1) + source->get_pixel_rows = get_8bit_gray_row; + else + ERREXIT(cinfo, JERR_TGA_BADPARMS); + TRACEMS2(cinfo, 1, JTRC_TGA_GRAY, width, height); + break; + default: + ERREXIT(cinfo, JERR_TGA_BADPARMS); + break; + } + + if (is_bottom_up) { + /* Create a virtual array to buffer the upside-down image. */ + source->whole_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) width * components, (JDIMENSION) height, (JDIMENSION) 1); + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } + /* source->pub.buffer will point to the virtual array. */ + source->pub.buffer_height = 1; /* in case anyone looks at it */ + source->pub.get_pixel_rows = preload_image; + } else { + /* Don't need a virtual array, but do need a one-row input buffer. */ + source->whole_image = NULL; + source->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) width * components, (JDIMENSION) 1); + source->pub.buffer_height = 1; + source->pub.get_pixel_rows = source->get_pixel_rows; + } + + while (idlen--) /* Throw away ID field */ + (void) read_byte(source); + + if (maplen > 0) { + if (maplen > 256 || GET_2B(3) != 0) + ERREXIT(cinfo, JERR_TGA_BADCMAP); + /* Allocate space to store the colormap */ + source->colormap = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, (JDIMENSION) maplen, (JDIMENSION) 3); + /* and read it from the file */ + read_colormap(source, (int) maplen, UCH(targaheader[7])); + } else { + if (cmaptype) /* but you promised a cmap! */ + ERREXIT(cinfo, JERR_TGA_BADPARMS); + source->colormap = NULL; + } + + cinfo->input_components = components; + cinfo->data_precision = 8; + cinfo->image_width = width; + cinfo->image_height = height; +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + +/* + * The module selection routine for Targa format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_targa (j_compress_ptr cinfo) +{ + tga_source_ptr source; + + /* Create module interface object */ + source = (tga_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(tga_source_struct)); + source->cinfo = cinfo; /* make back link for subroutines */ + /* Fill in method ptrs, except get_pixel_rows which start_input sets */ + source->pub.start_input = start_input_tga; + source->pub.finish_input = finish_input_tga; + + return (cjpeg_source_ptr) source; +} + +#endif /* TARGA_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/README glmark2-2021.02/src/libjpeg-turbo/README --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/README 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/README 1970-01-01 08:00:00.000000000 +0800 @@ -1,290 +0,0 @@ -libjpeg-turbo note: This file contains portions of the libjpeg v6b and v8 -README files, with additional wordsmithing by The libjpeg-turbo Project. -It is included only for reference, as some parts of it may not apply to -libjpeg-turbo. Please see README-turbo.txt for information specific to -libjpeg-turbo. - - -The Independent JPEG Group's JPEG software -========================================== - -This distribution contains a release of the Independent JPEG Group's free JPEG -software. You are welcome to redistribute this software and to use it for any -purpose, subject to the conditions under LEGAL ISSUES, below. - -This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone, -Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson, -Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers, -and other members of the Independent JPEG Group. - -IJG is not affiliated with the official ISO JPEG standards committee. - - -DOCUMENTATION ROADMAP -===================== - -This file contains the following sections: - -OVERVIEW General description of JPEG and the IJG software. -LEGAL ISSUES Copyright, lack of warranty, terms of distribution. -REFERENCES Where to learn more about JPEG. -ARCHIVE LOCATIONS Where to find newer versions of this software. -FILE FORMAT WARS Software *not* to get. -TO DO Plans for future IJG releases. - -Other documentation files in the distribution are: - -User documentation: - install.txt How to configure and install the IJG software. - usage.txt Usage instructions for cjpeg, djpeg, jpegtran, - rdjpgcom, and wrjpgcom. - *.1 Unix-style man pages for programs (same info as usage.txt). - wizard.txt Advanced usage instructions for JPEG wizards only. - change.log Version-to-version change highlights. -Programmer and internal documentation: - libjpeg.txt How to use the JPEG library in your own programs. - example.c Sample code for calling the JPEG library. - structure.txt Overview of the JPEG library's internal structure. - filelist.txt Road map of IJG files. - coderules.txt Coding style rules --- please read if you contribute code. - -Please read at least the files install.txt and usage.txt. Some information -can also be found in the JPEG FAQ (Frequently Asked Questions) article. See -ARCHIVE LOCATIONS below to find out where to obtain the FAQ article. - -If you want to understand how the JPEG code works, we suggest reading one or -more of the REFERENCES, then looking at the documentation files (in roughly -the order listed) before diving into the code. - - -OVERVIEW -======== - -This package contains C software to implement JPEG image encoding, decoding, -and transcoding. JPEG (pronounced "jay-peg") is a standardized compression -method for full-color and gray-scale images. JPEG's strong suit is compressing -photographic images or other types of images that have smooth color and -brightness transitions between neighboring pixels. Images with sharp lines or -other abrupt features may not compress well with JPEG, and a higher JPEG -quality may have to be used to avoid visible compression artifacts with such -images. - -JPEG is lossy, meaning that the output pixels are not necessarily identical to -the input pixels. However, on photographic content and other "smooth" images, -very good compression ratios can be obtained with no visible compression -artifacts, and extremely high compression ratios are possible if you are -willing to sacrifice image quality (by reducing the "quality" setting in the -compressor.) - -This software implements JPEG baseline, extended-sequential, and progressive -compression processes. Provision is made for supporting all variants of these -processes, although some uncommon parameter settings aren't implemented yet. -We have made no provision for supporting the hierarchical or lossless -processes defined in the standard. - -We provide a set of library routines for reading and writing JPEG image files, -plus two sample applications "cjpeg" and "djpeg", which use the library to -perform conversion between JPEG and some other popular image file formats. -The library is intended to be reused in other applications. - -In order to support file conversion and viewing software, we have included -considerable functionality beyond the bare JPEG coding/decoding capability; -for example, the color quantization modules are not strictly part of JPEG -decoding, but they are essential for output to colormapped file formats or -colormapped displays. These extra functions can be compiled out of the -library if not required for a particular application. - -We have also included "jpegtran", a utility for lossless transcoding between -different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple -applications for inserting and extracting textual comments in JFIF files. - -The emphasis in designing this software has been on achieving portability and -flexibility, while also making it fast enough to be useful. In particular, -the software is not intended to be read as a tutorial on JPEG. (See the -REFERENCES section for introductory material.) Rather, it is intended to -be reliable, portable, industrial-strength code. We do not claim to have -achieved that goal in every aspect of the software, but we strive for it. - -We welcome the use of this software as a component of commercial products. -No royalty is required, but we do ask for an acknowledgement in product -documentation, as described under LEGAL ISSUES. - - -LEGAL ISSUES -============ - -In plain English: - -1. We don't promise that this software works. (But if you find any bugs, - please let us know!) -2. You can use this software for whatever you want. You don't have to pay us. -3. You may not pretend that you wrote this software. If you use it in a - program, you must acknowledge somewhere in your documentation that - you've used the IJG code. - -In legalese: - -The authors make NO WARRANTY or representation, either express or implied, -with respect to this software, its quality, accuracy, merchantability, or -fitness for a particular purpose. This software is provided "AS IS", and you, -its user, assume the entire risk as to its quality and accuracy. - -This software is copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding. -All Rights Reserved except as specified below. - -Permission is hereby granted to use, copy, modify, and distribute this -software (or portions thereof) for any purpose, without fee, subject to these -conditions: -(1) If any part of the source code for this software is distributed, then this -README file must be included, with this copyright and no-warranty notice -unaltered; and any additions, deletions, or changes to the original files -must be clearly indicated in accompanying documentation. -(2) If only executable code is distributed, then the accompanying -documentation must state that "this software is based in part on the work of -the Independent JPEG Group". -(3) Permission for use of this software is granted only if the user accepts -full responsibility for any undesirable consequences; the authors accept -NO LIABILITY for damages of any kind. - -These conditions apply to any software derived from or based on the IJG code, -not just to the unmodified library. If you use our work, you ought to -acknowledge us. - -Permission is NOT granted for the use of any IJG author's name or company name -in advertising or publicity relating to this software or products derived from -it. This software may be referred to only as "the Independent JPEG Group's -software". - -We specifically permit and encourage the use of this software as the basis of -commercial products, provided that all warranty or liability claims are -assumed by the product vendor. - - -ansi2knr.c is included in this distribution by permission of L. Peter Deutsch, -sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA. -ansi2knr.c is NOT covered by the above copyright and conditions, but instead -by the usual distribution terms of the Free Software Foundation; principally, -that you must include source code if you redistribute it. (See the file -ansi2knr.c for full details.) However, since ansi2knr.c is not needed as part -of any program generated from the IJG code, this does not limit you more than -the foregoing paragraphs do. - -The Unix configuration script "configure" was produced with GNU Autoconf. -It is copyright by the Free Software Foundation but is freely distributable. -The same holds for its supporting scripts (config.guess, config.sub, -ltmain.sh). Another support script, install-sh, is copyright by X Consortium -but is also freely distributable. - -The IJG distribution formerly included code to read and write GIF files. -To avoid entanglement with the Unisys LZW patent, GIF reading support has -been removed altogether, and the GIF writer has been simplified to produce -"uncompressed GIFs". This technique does not use the LZW algorithm; the -resulting GIF files are larger than usual, but are readable by all standard -GIF decoders. - -We are required to state that - "The Graphics Interchange Format(c) is the Copyright property of - CompuServe Incorporated. GIF(sm) is a Service Mark property of - CompuServe Incorporated." - - -REFERENCES -========== - -We recommend reading one or more of these references before trying to -understand the innards of the JPEG software. - -The best short technical introduction to the JPEG compression algorithm is - Wallace, Gregory K. "The JPEG Still Picture Compression Standard", - Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44. -(Adjacent articles in that issue discuss MPEG motion picture compression, -applications of JPEG, and related topics.) If you don't have the CACM issue -handy, a PostScript file containing a revised version of Wallace's article is -available at http://www.ijg.org/files/wallace.ps.gz. The file (actually -a preprint for an article that appeared in IEEE Trans. Consumer Electronics) -omits the sample images that appeared in CACM, but it includes corrections -and some added material. Note: the Wallace article is copyright ACM and IEEE, -and it may not be used for commercial purposes. - -A somewhat less technical, more leisurely introduction to JPEG can be found in -"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by -M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1. This book provides -good explanations and example C code for a multitude of compression methods -including JPEG. It is an excellent source if you are comfortable reading C -code but don't know much about data compression in general. The book's JPEG -sample code is far from industrial-strength, but when you are ready to look -at a full implementation, you've got one here... - -The best currently available description of JPEG is the textbook "JPEG Still -Image Data Compression Standard" by William B. Pennebaker and Joan L. -Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. -Price US$59.95, 638 pp. The book includes the complete text of the ISO JPEG -standards (DIS 10918-1 and draft DIS 10918-2). - -The original JPEG standard is divided into two parts, Part 1 being the actual -specification, while Part 2 covers compliance testing methods. Part 1 is -titled "Digital Compression and Coding of Continuous-tone Still Images, -Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS -10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of -Continuous-tone Still Images, Part 2: Compliance testing" and has document -numbers ISO/IEC IS 10918-2, ITU-T T.83. - -The JPEG standard does not specify all details of an interchangeable file -format. For the omitted details we follow the "JFIF" conventions, revision -1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report -and thus received a formal publication status. It is available as a free -download in PDF format from -http://www.ecma-international.org/publications/techreports/E-TR-098.htm. -A PostScript version of the JFIF document is available at -http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at -http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures. - -The TIFF 6.0 file format specification can be obtained by FTP from -ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme -found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems. -IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6). -Instead, we recommend the JPEG design proposed by TIFF Technical Note #2 -(Compression tag 7). Copies of this Note can be obtained from -http://www.ijg.org/files/. It is expected that the next revision -of the TIFF spec will replace the 6.0 JPEG design with the Note's design. -Although IJG's own code does not support TIFF/JPEG, the free libtiff library -uses our library to implement TIFF/JPEG per the Note. - - -ARCHIVE LOCATIONS -================= - -The "official" archive site for this software is www.ijg.org. -The most recent released version can always be found there in -directory "files". This particular version will be archived as -http://www.ijg.org/files/jpegsrc.v8d.tar.gz, and in Windows-compatible -"zip" archive format as http://www.ijg.org/files/jpegsr8d.zip. - -The JPEG FAQ (Frequently Asked Questions) article is a source of some -general information about JPEG. -It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/ -and other news.answers archive sites, including the official news.answers -archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/. -If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu -with body - send usenet/news.answers/jpeg-faq/part1 - send usenet/news.answers/jpeg-faq/part2 - - -FILE FORMAT WARS -================ - -The ISO JPEG standards committee actually promotes different formats like -"JPEG 2000" or "JPEG XR", which are incompatible with original DCT-based -JPEG. IJG therefore does not support these formats (see REFERENCES). Indeed, -one of the original reasons for developing this free software was to help -force convergence on common, interoperable format standards for JPEG files. -Don't use an incompatible file format! -(In any case, our decoder will remain capable of reading existing JPEG -image files indefinitely.) - - -TO DO -===== - -Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/README.ijg glmark2-2021.02/src/libjpeg-turbo/README.ijg --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/README.ijg 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/README.ijg 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,279 @@ +libjpeg-turbo note: This file has been modified by The libjpeg-turbo Project +to include only information relevant to libjpeg-turbo, to wordsmith certain +sections, and to remove impolitic language that existed in the libjpeg v8 +README. It is included only for reference. Please see README.md for +information specific to libjpeg-turbo. + + +The Independent JPEG Group's JPEG software +========================================== + +This distribution contains a release of the Independent JPEG Group's free JPEG +software. You are welcome to redistribute this software and to use it for any +purpose, subject to the conditions under LEGAL ISSUES, below. + +This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone, +Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson, +Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers, +and other members of the Independent JPEG Group. + +IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee +(also known as JPEG, together with ITU-T SG16). + + +DOCUMENTATION ROADMAP +===================== + +This file contains the following sections: + +OVERVIEW General description of JPEG and the IJG software. +LEGAL ISSUES Copyright, lack of warranty, terms of distribution. +REFERENCES Where to learn more about JPEG. +ARCHIVE LOCATIONS Where to find newer versions of this software. +FILE FORMAT WARS Software *not* to get. +TO DO Plans for future IJG releases. + +Other documentation files in the distribution are: + +User documentation: + usage.txt Usage instructions for cjpeg, djpeg, jpegtran, + rdjpgcom, and wrjpgcom. + *.1 Unix-style man pages for programs (same info as usage.txt). + wizard.txt Advanced usage instructions for JPEG wizards only. + change.log Version-to-version change highlights. +Programmer and internal documentation: + libjpeg.txt How to use the JPEG library in your own programs. + example.c Sample code for calling the JPEG library. + structure.txt Overview of the JPEG library's internal structure. + coderules.txt Coding style rules --- please read if you contribute code. + +Please read at least usage.txt. Some information can also be found in the JPEG +FAQ (Frequently Asked Questions) article. See ARCHIVE LOCATIONS below to find +out where to obtain the FAQ article. + +If you want to understand how the JPEG code works, we suggest reading one or +more of the REFERENCES, then looking at the documentation files (in roughly +the order listed) before diving into the code. + + +OVERVIEW +======== + +This package contains C software to implement JPEG image encoding, decoding, +and transcoding. JPEG (pronounced "jay-peg") is a standardized compression +method for full-color and grayscale images. JPEG's strong suit is compressing +photographic images or other types of images that have smooth color and +brightness transitions between neighboring pixels. Images with sharp lines or +other abrupt features may not compress well with JPEG, and a higher JPEG +quality may have to be used to avoid visible compression artifacts with such +images. + +JPEG is lossy, meaning that the output pixels are not necessarily identical to +the input pixels. However, on photographic content and other "smooth" images, +very good compression ratios can be obtained with no visible compression +artifacts, and extremely high compression ratios are possible if you are +willing to sacrifice image quality (by reducing the "quality" setting in the +compressor.) + +This software implements JPEG baseline, extended-sequential, and progressive +compression processes. Provision is made for supporting all variants of these +processes, although some uncommon parameter settings aren't implemented yet. +We have made no provision for supporting the hierarchical or lossless +processes defined in the standard. + +We provide a set of library routines for reading and writing JPEG image files, +plus two sample applications "cjpeg" and "djpeg", which use the library to +perform conversion between JPEG and some other popular image file formats. +The library is intended to be reused in other applications. + +In order to support file conversion and viewing software, we have included +considerable functionality beyond the bare JPEG coding/decoding capability; +for example, the color quantization modules are not strictly part of JPEG +decoding, but they are essential for output to colormapped file formats or +colormapped displays. These extra functions can be compiled out of the +library if not required for a particular application. + +We have also included "jpegtran", a utility for lossless transcoding between +different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple +applications for inserting and extracting textual comments in JFIF files. + +The emphasis in designing this software has been on achieving portability and +flexibility, while also making it fast enough to be useful. In particular, +the software is not intended to be read as a tutorial on JPEG. (See the +REFERENCES section for introductory material.) Rather, it is intended to +be reliable, portable, industrial-strength code. We do not claim to have +achieved that goal in every aspect of the software, but we strive for it. + +We welcome the use of this software as a component of commercial products. +No royalty is required, but we do ask for an acknowledgement in product +documentation, as described under LEGAL ISSUES. + + +LEGAL ISSUES +============ + +In plain English: + +1. We don't promise that this software works. (But if you find any bugs, + please let us know!) +2. You can use this software for whatever you want. You don't have to pay us. +3. You may not pretend that you wrote this software. If you use it in a + program, you must acknowledge somewhere in your documentation that + you've used the IJG code. + +In legalese: + +The authors make NO WARRANTY or representation, either express or implied, +with respect to this software, its quality, accuracy, merchantability, or +fitness for a particular purpose. This software is provided "AS IS", and you, +its user, assume the entire risk as to its quality and accuracy. + +This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding. +All Rights Reserved except as specified below. + +Permission is hereby granted to use, copy, modify, and distribute this +software (or portions thereof) for any purpose, without fee, subject to these +conditions: +(1) If any part of the source code for this software is distributed, then this +README file must be included, with this copyright and no-warranty notice +unaltered; and any additions, deletions, or changes to the original files +must be clearly indicated in accompanying documentation. +(2) If only executable code is distributed, then the accompanying +documentation must state that "this software is based in part on the work of +the Independent JPEG Group". +(3) Permission for use of this software is granted only if the user accepts +full responsibility for any undesirable consequences; the authors accept +NO LIABILITY for damages of any kind. + +These conditions apply to any software derived from or based on the IJG code, +not just to the unmodified library. If you use our work, you ought to +acknowledge us. + +Permission is NOT granted for the use of any IJG author's name or company name +in advertising or publicity relating to this software or products derived from +it. This software may be referred to only as "the Independent JPEG Group's +software". + +We specifically permit and encourage the use of this software as the basis of +commercial products, provided that all warranty or liability claims are +assumed by the product vendor. + + +The Unix configuration script "configure" was produced with GNU Autoconf. +It is copyright by the Free Software Foundation but is freely distributable. +The same holds for its supporting scripts (config.guess, config.sub, +ltmain.sh). Another support script, install-sh, is copyright by X Consortium +but is also freely distributable. + +The IJG distribution formerly included code to read and write GIF files. +To avoid entanglement with the Unisys LZW patent (now expired), GIF reading +support has been removed altogether, and the GIF writer has been simplified +to produce "uncompressed GIFs". This technique does not use the LZW +algorithm; the resulting GIF files are larger than usual, but are readable +by all standard GIF decoders. + +We are required to state that + "The Graphics Interchange Format(c) is the Copyright property of + CompuServe Incorporated. GIF(sm) is a Service Mark property of + CompuServe Incorporated." + + +REFERENCES +========== + +We recommend reading one or more of these references before trying to +understand the innards of the JPEG software. + +The best short technical introduction to the JPEG compression algorithm is + Wallace, Gregory K. "The JPEG Still Picture Compression Standard", + Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44. +(Adjacent articles in that issue discuss MPEG motion picture compression, +applications of JPEG, and related topics.) If you don't have the CACM issue +handy, a PDF file containing a revised version of Wallace's article is +available at http://www.ijg.org/files/Wallace.JPEG.pdf. The file (actually +a preprint for an article that appeared in IEEE Trans. Consumer Electronics) +omits the sample images that appeared in CACM, but it includes corrections +and some added material. Note: the Wallace article is copyright ACM and IEEE, +and it may not be used for commercial purposes. + +A somewhat less technical, more leisurely introduction to JPEG can be found in +"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by +M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1. This book provides +good explanations and example C code for a multitude of compression methods +including JPEG. It is an excellent source if you are comfortable reading C +code but don't know much about data compression in general. The book's JPEG +sample code is far from industrial-strength, but when you are ready to look +at a full implementation, you've got one here... + +The best currently available description of JPEG is the textbook "JPEG Still +Image Data Compression Standard" by William B. Pennebaker and Joan L. +Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. +Price US$59.95, 638 pp. The book includes the complete text of the ISO JPEG +standards (DIS 10918-1 and draft DIS 10918-2). + +The original JPEG standard is divided into two parts, Part 1 being the actual +specification, while Part 2 covers compliance testing methods. Part 1 is +titled "Digital Compression and Coding of Continuous-tone Still Images, +Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS +10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of +Continuous-tone Still Images, Part 2: Compliance testing" and has document +numbers ISO/IEC IS 10918-2, ITU-T T.83. + +The JPEG standard does not specify all details of an interchangeable file +format. For the omitted details we follow the "JFIF" conventions, revision +1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report +and thus received a formal publication status. It is available as a free +download in PDF format from +http://www.ecma-international.org/publications/techreports/E-TR-098.htm. +A PostScript version of the JFIF document is available at +http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at +http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures. + +The TIFF 6.0 file format specification can be obtained by FTP from +ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme +found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems. +IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6). +Instead, we recommend the JPEG design proposed by TIFF Technical Note #2 +(Compression tag 7). Copies of this Note can be obtained from +http://www.ijg.org/files/. It is expected that the next revision +of the TIFF spec will replace the 6.0 JPEG design with the Note's design. +Although IJG's own code does not support TIFF/JPEG, the free libtiff library +uses our library to implement TIFF/JPEG per the Note. + + +ARCHIVE LOCATIONS +================= + +The "official" archive site for this software is www.ijg.org. +The most recent released version can always be found there in +directory "files". + +The JPEG FAQ (Frequently Asked Questions) article is a source of some +general information about JPEG. +It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/ +and other news.answers archive sites, including the official news.answers +archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/. +If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu +with body + send usenet/news.answers/jpeg-faq/part1 + send usenet/news.answers/jpeg-faq/part2 + + +FILE FORMAT WARS +================ + +The ISO/IEC JTC1/SC29/WG1 standards committee (also known as JPEG, together +with ITU-T SG16) currently promotes different formats containing the name +"JPEG" which are incompatible with original DCT-based JPEG. IJG therefore does +not support these formats (see REFERENCES). Indeed, one of the original +reasons for developing this free software was to help force convergence on +common, interoperable format standards for JPEG files. +Don't use an incompatible file format! +(In any case, our decoder will remain capable of reading existing JPEG +image files indefinitely.) + + +TO DO +===== + +Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/README.md glmark2-2021.02/src/libjpeg-turbo/README.md --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/README.md 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/README.md 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,335 @@ +Background +========== + +libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, +NEON, AltiVec) to accelerate baseline JPEG compression and decompression on +x86, x86-64, ARM, and PowerPC systems. On such systems, libjpeg-turbo is +generally 2-6x as fast as libjpeg, all else being equal. On other types of +systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by +virtue of its highly-optimized Huffman coding routines. In many cases, the +performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs. + +libjpeg-turbo implements both the traditional libjpeg API as well as the less +powerful but more straightforward TurboJPEG API. libjpeg-turbo also features +colorspace extensions that allow it to compress from/decompress to 32-bit and +big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java +interface. + +libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated +derivative of libjpeg v6b developed by Miyasaka Masaru. The TigerVNC and +VirtualGL projects made numerous enhancements to the codec in 2009, and in +early 2010, libjpeg-turbo spun off into an independent project, with the goal +of making high-speed JPEG compression/decompression technology available to a +broader range of users and developers. + + +License +======= + +libjpeg-turbo is covered by three compatible BSD-style open source licenses. +Refer to [LICENSE.md](LICENSE.md) for a roll-up of license terms. + + +Using libjpeg-turbo +=================== + +libjpeg-turbo includes two APIs that can be used to compress and decompress +JPEG images: + +- **TurboJPEG API** + This API provides an easy-to-use interface for compressing and decompressing + JPEG images in memory. It also provides some functionality that would not be + straightforward to achieve using the underlying libjpeg API, such as + generating planar YUV images and performing multiple simultaneous lossless + transforms on an image. The Java interface for libjpeg-turbo is written on + top of the TurboJPEG API. + +- **libjpeg API** + This is the de facto industry-standard API for compressing and decompressing + JPEG images. It is more difficult to use than the TurboJPEG API but also + more powerful. The libjpeg API implementation in libjpeg-turbo is both + API/ABI-compatible and mathematically compatible with libjpeg v6b. It can + also optionally be configured to be API/ABI-compatible with libjpeg v7 and v8 + (see below.) + +There is no significant performance advantage to either API when both are used +to perform similar operations. + +Colorspace Extensions +--------------------- + +libjpeg-turbo includes extensions that allow JPEG images to be compressed +directly from (and decompressed directly to) buffers that use BGR, BGRX, +RGBX, XBGR, and XRGB pixel ordering. This is implemented with ten new +colorspace constants: + + JCS_EXT_RGB /* red/green/blue */ + JCS_EXT_RGBX /* red/green/blue/x */ + JCS_EXT_BGR /* blue/green/red */ + JCS_EXT_BGRX /* blue/green/red/x */ + JCS_EXT_XBGR /* x/blue/green/red */ + JCS_EXT_XRGB /* x/red/green/blue */ + JCS_EXT_RGBA /* red/green/blue/alpha */ + JCS_EXT_BGRA /* blue/green/red/alpha */ + JCS_EXT_ABGR /* alpha/blue/green/red */ + JCS_EXT_ARGB /* alpha/red/green/blue */ + +Setting `cinfo.in_color_space` (compression) or `cinfo.out_color_space` +(decompression) to one of these values will cause libjpeg-turbo to read the +red, green, and blue values from (or write them to) the appropriate position in +the pixel when compressing from/decompressing to an RGB buffer. + +Your application can check for the existence of these extensions at compile +time with: + + #ifdef JCS_EXTENSIONS + +At run time, attempting to use these extensions with a libjpeg implementation +that does not support them will result in a "Bogus input colorspace" error. +Applications can trap this error in order to test whether run-time support is +available for the colorspace extensions. + +When using the RGBX, BGRX, XBGR, and XRGB colorspaces during decompression, the +X byte is undefined, and in order to ensure the best performance, libjpeg-turbo +can set that byte to whatever value it wishes. If an application expects the X +byte to be used as an alpha channel, then it should specify `JCS_EXT_RGBA`, +`JCS_EXT_BGRA`, `JCS_EXT_ABGR`, or `JCS_EXT_ARGB`. When these colorspace +constants are used, the X byte is guaranteed to be 0xFF, which is interpreted +as opaque. + +Your application can check for the existence of the alpha channel colorspace +extensions at compile time with: + + #ifdef JCS_ALPHA_EXTENSIONS + +[jcstest.c](jcstest.c), located in the libjpeg-turbo source tree, demonstrates +how to check for the existence of the colorspace extensions at compile time and +run time. + +libjpeg v7 and v8 API/ABI Emulation +----------------------------------- + +With libjpeg v7 and v8, new features were added that necessitated extending the +compression and decompression structures. Unfortunately, due to the exposed +nature of those structures, extending them also necessitated breaking backward +ABI compatibility with previous libjpeg releases. Thus, programs that were +built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is +based on the libjpeg v6b code base. Although libjpeg v7 and v8 are not +as widely used as v6b, enough programs (including a few Linux distros) made +the switch that there was a demand to emulate the libjpeg v7 and v8 ABIs +in libjpeg-turbo. It should be noted, however, that this feature was added +primarily so that applications that had already been compiled to use libjpeg +v7+ could take advantage of accelerated baseline JPEG encoding/decoding +without recompiling. libjpeg-turbo does not claim to support all of the +libjpeg v7+ features, nor to produce identical output to libjpeg v7+ in all +cases (see below.) + +By passing an argument of `--with-jpeg7` or `--with-jpeg8` to `configure`, or +an argument of `-DWITH_JPEG7=1` or `-DWITH_JPEG8=1` to `cmake`, you can build a +version of libjpeg-turbo that emulates the libjpeg v7 or v8 ABI, so that +programs that are built against libjpeg v7 or v8 can be run with libjpeg-turbo. +The following section describes which libjpeg v7+ features are supported and +which aren't. + +### Support for libjpeg v7 and v8 Features + +#### Fully supported + +- **libjpeg: IDCT scaling extensions in decompressor** + libjpeg-turbo supports IDCT scaling with scaling factors of 1/8, 1/4, 3/8, + 1/2, 5/8, 3/4, 7/8, 9/8, 5/4, 11/8, 3/2, 13/8, 7/4, 15/8, and 2/1 (only 1/4 + and 1/2 are SIMD-accelerated.) + +- **libjpeg: Arithmetic coding** + +- **libjpeg: In-memory source and destination managers** + See notes below. + +- **cjpeg: Separate quality settings for luminance and chrominance** + Note that the libpjeg v7+ API was extended to accommodate this feature only + for convenience purposes. It has always been possible to implement this + feature with libjpeg v6b (see rdswitch.c for an example.) + +- **cjpeg: 32-bit BMP support** + +- **cjpeg: `-rgb` option** + +- **jpegtran: Lossless cropping** + +- **jpegtran: `-perfect` option** + +- **jpegtran: Forcing width/height when performing lossless crop** + +- **rdjpgcom: `-raw` option** + +- **rdjpgcom: Locale awareness** + + +#### Not supported + +NOTE: As of this writing, extensive research has been conducted into the +usefulness of DCT scaling as a means of data reduction and SmartScale as a +means of quality improvement. The reader is invited to peruse the research at +http://www.libjpeg-turbo.org/About/SmartScale and draw his/her own conclusions, +but it is the general belief of our project that these features have not +demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo. + +- **libjpeg: DCT scaling in compressor** + `cinfo.scale_num` and `cinfo.scale_denom` are silently ignored. + There is no technical reason why DCT scaling could not be supported when + emulating the libjpeg v7+ API/ABI, but without the SmartScale extension (see + below), only scaling factors of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and + 8/9 would be available, which is of limited usefulness. + +- **libjpeg: SmartScale** + `cinfo.block_size` is silently ignored. + SmartScale is an extension to the JPEG format that allows for DCT block + sizes other than 8x8. Providing support for this new format would be + feasible (particularly without full acceleration.) However, until/unless + the format becomes either an official industry standard or, at minimum, an + accepted solution in the community, we are hesitant to implement it, as + there is no sense of whether or how it might change in the future. It is + our belief that SmartScale has not demonstrated sufficient usefulness as a + lossless format nor as a means of quality enhancement, and thus our primary + interest in providing this feature would be as a means of supporting + additional DCT scaling factors. + +- **libjpeg: Fancy downsampling in compressor** + `cinfo.do_fancy_downsampling` is silently ignored. + This requires the DCT scaling feature, which is not supported. + +- **jpegtran: Scaling** + This requires both the DCT scaling and SmartScale features, which are not + supported. + +- **Lossless RGB JPEG files** + This requires the SmartScale feature, which is not supported. + +### What About libjpeg v9? + +libjpeg v9 introduced yet another field to the JPEG compression structure +(`color_transform`), thus making the ABI backward incompatible with that of +libjpeg v8. This new field was introduced solely for the purpose of supporting +lossless SmartScale encoding. Furthermore, there was actually no reason to +extend the API in this manner, as the color transform could have just as easily +been activated by way of a new JPEG colorspace constant, thus preserving +backward ABI compatibility. + +Our research (see link above) has shown that lossless SmartScale does not +generally accomplish anything that can't already be accomplished better with +existing, standard lossless formats. Therefore, at this time it is our belief +that there is not sufficient technical justification for software projects to +upgrade from libjpeg v8 to libjpeg v9, and thus there is not sufficient +echnical justification for us to emulate the libjpeg v9 ABI. + +In-Memory Source/Destination Managers +------------------------------------- + +By default, libjpeg-turbo 1.3 and later includes the `jpeg_mem_src()` and +`jpeg_mem_dest()` functions, even when not emulating the libjpeg v8 API/ABI. +Previously, it was necessary to build libjpeg-turbo from source with libjpeg v8 +API/ABI emulation in order to use the in-memory source/destination managers, +but several projects requested that those functions be included when emulating +the libjpeg v6b API/ABI as well. This allows the use of those functions by +programs that need them, without breaking ABI compatibility for programs that +don't, and it allows those functions to be provided in the "official" +libjpeg-turbo binaries. + +Those who are concerned about maintaining strict conformance with the libjpeg +v6b or v7 API can pass an argument of `--without-mem-srcdst` to `configure` or +an argument of `-DWITH_MEM_SRCDST=0` to `cmake` prior to building +libjpeg-turbo. This will restore the pre-1.3 behavior, in which +`jpeg_mem_src()` and `jpeg_mem_dest()` are only included when emulating the +libjpeg v8 API/ABI. + +On Un*x systems, including the in-memory source/destination managers changes +the dynamic library version from 62.0.0 to 62.1.0 if using libjpeg v6b API/ABI +emulation and from 7.0.0 to 7.1.0 if using libjpeg v7 API/ABI emulation. + +Note that, on most Un*x systems, the dynamic linker will not look for a +function in a library until that function is actually used. Thus, if a program +is built against libjpeg-turbo 1.3+ and uses `jpeg_mem_src()` or +`jpeg_mem_dest()`, that program will not fail if run against an older version +of libjpeg-turbo or against libjpeg v7- until the program actually tries to +call `jpeg_mem_src()` or `jpeg_mem_dest()`. Such is not the case on Windows. +If a program is built against the libjpeg-turbo 1.3+ DLL and uses +`jpeg_mem_src()` or `jpeg_mem_dest()`, then it must use the libjpeg-turbo 1.3+ +DLL at run time. + +Both cjpeg and djpeg have been extended to allow testing the in-memory +source/destination manager functions. See their respective man pages for more +details. + + +Mathematical Compatibility +========================== + +For the most part, libjpeg-turbo should produce identical output to libjpeg +v6b. The one exception to this is when using the floating point DCT/IDCT, in +which case the outputs of libjpeg v6b and libjpeg-turbo can differ for the +following reasons: + +- The SSE/SSE2 floating point DCT implementation in libjpeg-turbo is ever so + slightly more accurate than the implementation in libjpeg v6b, but not by + any amount perceptible to human vision (generally in the range of 0.01 to + 0.08 dB gain in PNSR.) + +- When not using the SIMD extensions, libjpeg-turbo uses the more accurate + (and slightly faster) floating point IDCT algorithm introduced in libjpeg + v8a as opposed to the algorithm used in libjpeg v6b. It should be noted, + however, that this algorithm basically brings the accuracy of the floating + point IDCT in line with the accuracy of the slow integer IDCT. The floating + point DCT/IDCT algorithms are mainly a legacy feature, and they do not + produce significantly more accuracy than the slow integer algorithms (to put + numbers on this, the typical difference in PNSR between the two algorithms + is less than 0.10 dB, whereas changing the quality level by 1 in the upper + range of the quality scale is typically more like a 1.0 dB difference.) + +- If the floating point algorithms in libjpeg-turbo are not implemented using + SIMD instructions on a particular platform, then the accuracy of the + floating point DCT/IDCT can depend on the compiler settings. + +While libjpeg-turbo does emulate the libjpeg v8 API/ABI, under the hood it is +still using the same algorithms as libjpeg v6b, so there are several specific +cases in which libjpeg-turbo cannot be expected to produce the same output as +libjpeg v8: + +- When decompressing using scaling factors of 1/2 and 1/4, because libjpeg v8 + implements those scaling algorithms differently than libjpeg v6b does, and + libjpeg-turbo's SIMD extensions are based on the libjpeg v6b behavior. + +- When using chrominance subsampling, because libjpeg v8 implements this + with its DCT/IDCT scaling algorithms rather than with a separate + downsampling/upsampling algorithm. In our testing, the subsampled/upsampled + output of libjpeg v8 is less accurate than that of libjpeg v6b for this + reason. + +- When decompressing using a scaling factor > 1 and merged (AKA "non-fancy" or + "non-smooth") chrominance upsampling, because libjpeg v8 does not support + merged upsampling with scaling factors > 1. + + +Performance Pitfalls +==================== + +Restart Markers +--------------- + +The optimized Huffman decoder in libjpeg-turbo does not handle restart markers +in a way that makes the rest of the libjpeg infrastructure happy, so it is +necessary to use the slow Huffman decoder when decompressing a JPEG image that +has restart markers. This can cause the decompression performance to drop by +as much as 20%, but the performance will still be much greater than that of +libjpeg. Many consumer packages, such as PhotoShop, use restart markers when +generating JPEG images, so images generated by those programs will experience +this issue. + +Fast Integer Forward DCT at High Quality Levels +----------------------------------------------- + +The algorithm used by the SIMD-accelerated quantization function cannot produce +correct results whenever the fast integer forward DCT is used along with a JPEG +quality of 98-100. Thus, libjpeg-turbo must use the non-SIMD quantization +function in those cases. This causes performance to drop by as much as 40%. +It is therefore strongly advised that you use the slow integer forward DCT +whenever encoding images with a JPEG quality of 98 or higher. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/README-turbo.txt glmark2-2021.02/src/libjpeg-turbo/README-turbo.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/README-turbo.txt 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/README-turbo.txt 1970-01-01 08:00:00.000000000 +0800 @@ -1,361 +0,0 @@ -******************************************************************************* -** Background -******************************************************************************* - -libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX, -SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, -x86-64, and ARM systems. On such systems, libjpeg-turbo is generally 2-4x as -fast as the unmodified version of libjpeg, all else being equal. - -libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but -the TigerVNC and VirtualGL projects made numerous enhancements to the codec in -2009, including improved support for Mac OS X, 64-bit support, support for -32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman -encoding/decoding, and various bug fixes. The goal was to produce a fully -open-source codec that could replace the partially closed-source TurboJPEG/IPP -codec used by VirtualGL and TurboVNC. libjpeg-turbo generally achieves 80-120% -of the performance of TurboJPEG/IPP. It is faster in some areas but slower in -others. - -In early 2010, libjpeg-turbo spun off into its own independent project, with -the goal of making high-speed JPEG compression/decompression technology -available to a broader range of users and developers. - - -******************************************************************************* -** License -******************************************************************************* - -Most of libjpeg-turbo inherits the non-restrictive, BSD-style license used by -libjpeg (see README.) The TurboJPEG/OSS wrapper (both C and Java versions) and -associated test programs bear a similar license, which is reproduced below: - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -- Neither the name of the libjpeg-turbo Project nor the names of its - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - - -******************************************************************************* -** Using libjpeg-turbo -******************************************************************************* - -libjpeg-turbo includes two APIs that can be used to compress and decompress -JPEG images: - - TurboJPEG API: This API provides an easy-to-use interface for compressing - and decompressing JPEG images in memory. It also provides some functionality - that would not be straightforward to achieve using the underlying libjpeg - API, such as generating planar YUV images and performing multiple - simultaneous lossless transforms on an image. The Java interface for - libjpeg-turbo is written on top of the TurboJPEG API. - - libjpeg API: This is the de facto industry-standard API for compressing and - decompressing JPEG images. It is more difficult to use than the TurboJPEG - API but also more powerful. libjpeg-turbo is both API/ABI-compatible and - mathematically compatible with libjpeg v6b. It can also optionally be - configured to be API/ABI-compatible with libjpeg v7 and v8 (see below.) - - -============================= -Replacing libjpeg at Run Time -============================= - -If a Unix application is dynamically linked with libjpeg, then you can replace -libjpeg with libjpeg-turbo at run time by manipulating LD_LIBRARY_PATH. -For instance: - - [Using libjpeg] - > time cjpeg vgl_5674_0098.jpg - real 0m0.392s - user 0m0.074s - sys 0m0.020s - - [Using libjpeg-turbo] - > export LD_LIBRARY_PATH=/opt/libjpeg-turbo/{lib}:$LD_LIBRARY_PATH - > time cjpeg vgl_5674_0098.jpg - real 0m0.109s - user 0m0.029s - sys 0m0.010s - -NOTE: {lib} can be lib, lib32, lib64, or lib/64, depending on the O/S and -architecture. - -System administrators can also replace the libjpeg sym links in /usr/{lib} with -links to the libjpeg-turbo dynamic library located in /opt/libjpeg-turbo/{lib}. -This will effectively accelerate every application that uses the libjpeg -dynamic library on the system. - -The libjpeg-turbo SDK for Visual C++ installs the libjpeg-turbo DLL -(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether it was built with -libjpeg v6b, v7, or v8 emulation) into c:\libjpeg-turbo[64]\bin, and the PATH -environment variable can be modified such that this directory is searched -before any others that might contain a libjpeg DLL. However, if a libjpeg -DLL exists in an application's install directory, then Windows will load this -DLL first whenever the application is launched. Thus, if an application ships -with jpeg62.dll, jpeg7.dll, or jpeg8.dll, then back up the application's -version of this DLL and copy c:\libjpeg-turbo[64]\bin\jpeg*.dll into the -application's install directory to accelerate it. - -The version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for -Visual C++ requires the Visual C++ 2008 C run-time DLL (msvcr90.dll). -msvcr90.dll ships with more recent versions of Windows, but users of older -Windows releases can obtain it from the Visual C++ 2008 Redistributable -Package, which is available as a free download from Microsoft's web site. - -NOTE: Features of libjpeg that require passing a C run-time structure, such -as a file handle, from an application to libjpeg will probably not work with -the version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for -Visual C++, unless the application is also built to use the Visual C++ 2008 C -run-time DLL. In particular, this affects jpeg_stdio_dest() and -jpeg_stdio_src(). - -Mac applications typically embed their own copies of the libjpeg dylib inside -the (hidden) application bundle, so it is not possible to globally replace -libjpeg on OS X systems. If an application uses a shared library version of -libjpeg, then it may be possible to replace the application's version of it. -This would generally involve copying libjpeg.*.dylib from libjpeg-turbo into -the appropriate place in the application bundle and using install_name_tool to -repoint the dylib to the new directory. This requires an advanced knowledge of -OS X and would not survive an upgrade or a re-install of the application. -Thus, it is not recommended for most users. - -======================= -Replacing TurboJPEG/IPP -======================= - -libjpeg-turbo is a drop-in replacement for the TurboJPEG/IPP SDK used by -VirtualGL 2.1.x and TurboVNC 0.6 (and prior.) libjpeg-turbo contains a wrapper -library (TurboJPEG/OSS) that emulates the TurboJPEG API using libjpeg-turbo -instead of the closed-source Intel Performance Primitives. You can replace the -TurboJPEG/IPP package on Linux systems with the libjpeg-turbo package in order -to make existing releases of VirtualGL 2.1.x and TurboVNC 0.x use the new codec -at run time. Note that the 64-bit libjpeg-turbo packages contain only 64-bit -binaries, whereas the TurboJPEG/IPP 64-bit packages contained both 64-bit and -32-bit binaries. Thus, to replace a TurboJPEG/IPP 64-bit package, install -both the 64-bit and 32-bit versions of libjpeg-turbo. - -You can also build the VirtualGL 2.1.x and TurboVNC 0.6 source code with -the libjpeg-turbo SDK instead of TurboJPEG/IPP. It should work identically. -libjpeg-turbo also includes static library versions of TurboJPEG/OSS, which -are used to build VirtualGL 2.2 and TurboVNC 1.0 and later. - -======================================== -Using libjpeg-turbo in Your Own Programs -======================================== - -For the most part, libjpeg-turbo should work identically to libjpeg, so in -most cases, an application can be built against libjpeg and then run against -libjpeg-turbo. On Unix systems (including Cygwin), you can build against -libjpeg-turbo instead of libjpeg by setting - - CPATH=/opt/libjpeg-turbo/include - and - LIBRARY_PATH=/opt/libjpeg-turbo/{lib} - -({lib} = lib32 or lib64, depending on whether you are building a 32-bit or a -64-bit application.) - -If using MinGW, then set - - CPATH=/c/libjpeg-turbo-gcc[64]/include - and - LIBRARY_PATH=/c/libjpeg-turbo-gcc[64]/lib - -Building against libjpeg-turbo is useful, for instance, if you want to build an -application that leverages the libjpeg-turbo colorspace extensions (see below.) -On Linux and Solaris systems, you would still need to manipulate -LD_LIBRARY_PATH or create appropriate sym links to use libjpeg-turbo at run -time. On such systems, you can pass -R /opt/libjpeg-turbo/{lib} to the linker -to force the use of libjpeg-turbo at run time rather than libjpeg (also useful -if you want to leverage the colorspace extensions), or you can link against the -libjpeg-turbo static library. - -To force a Linux, Solaris, or MinGW application to link against the static -version of libjpeg-turbo, you can use the following linker options: - - -Wl,-Bstatic -ljpeg -Wl,-Bdynamic - -On OS X, simply add /opt/libjpeg-turbo/lib/libjpeg.a to the linker command -line (this also works on Linux and Solaris.) - -To build Visual C++ applications using libjpeg-turbo, add -c:\libjpeg-turbo[64]\include to the system or user INCLUDE environment -variable and c:\libjpeg-turbo[64]\lib to the system or user LIB environment -variable, and then link against either jpeg.lib (to use the DLL version of -libjpeg-turbo) or jpeg-static.lib (to use the static version of libjpeg-turbo.) - -===================== -Colorspace Extensions -===================== - -libjpeg-turbo includes extensions that allow JPEG images to be compressed -directly from (and decompressed directly to) buffers that use BGR, BGRX, -RGBX, XBGR, and XRGB pixel ordering. This is implemented with ten new -colorspace constants: - - JCS_EXT_RGB /* red/green/blue */ - JCS_EXT_RGBX /* red/green/blue/x */ - JCS_EXT_BGR /* blue/green/red */ - JCS_EXT_BGRX /* blue/green/red/x */ - JCS_EXT_XBGR /* x/blue/green/red */ - JCS_EXT_XRGB /* x/red/green/blue */ - JCS_EXT_RGBA /* red/green/blue/alpha */ - JCS_EXT_BGRA /* blue/green/red/alpha */ - JCS_EXT_ABGR /* alpha/blue/green/red */ - JCS_EXT_ARGB /* alpha/red/green/blue */ - -Setting cinfo.in_color_space (compression) or cinfo.out_color_space -(decompression) to one of these values will cause libjpeg-turbo to read the -red, green, and blue values from (or write them to) the appropriate position in -the pixel when compressing from/decompressing to an RGB buffer. - -Your application can check for the existence of these extensions at compile -time with: - - #ifdef JCS_EXTENSIONS - -At run time, attempting to use these extensions with a version of libjpeg -that doesn't support them will result in a "Bogus input colorspace" error. - -When using the RGBX, BGRX, XBGR, and XRGB colorspaces during decompression, the -X byte is undefined, and in order to ensure the best performance, libjpeg-turbo -can set that byte to whatever value it wishes. If an application expects the X -byte to be used as an alpha channel, then it should specify JCS_EXT_RGBA, -JCS_EXT_BGRA, JCS_EXT_ABGR, or JCS_EXT_ARGB. When these colorspace constants -are used, the X byte is guaranteed to be 0xFF, which is interpreted as opaque. - -Your application can check for the existence of the alpha channel colorspace -extensions at compile time with: - - #ifdef JCS_ALPHA_EXTENSIONS - -jcstest.c, located in the libjpeg-turbo source tree, demonstrates how to check -for the existence of the colorspace extensions at compile time and run time. - -================================= -libjpeg v7 and v8 API/ABI support -================================= - -With libjpeg v7 and v8, new features were added that necessitated extending the -compression and decompression structures. Unfortunately, due to the exposed -nature of those structures, extending them also necessitated breaking backward -ABI compatibility with previous libjpeg releases. Thus, programs that are -built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is -based on the libjpeg v6b code base. Although libjpeg v7 and v8 are still not -as widely used as v6b, enough programs (including a few Linux distros) have -made the switch that it was desirable to provide support for the libjpeg v7/v8 -API/ABI in libjpeg-turbo. Although libjpeg-turbo can now be configured as a -drop-in replacement for libjpeg v7 or v8, it should be noted that not all of -the features in libjpeg v7 and v8 are supported (see below.) - -By passing an argument of --with-jpeg7 or --with-jpeg8 to configure, or an -argument of -DWITH_JPEG7=1 or -DWITH_JPEG8=1 to cmake, you can build a version -of libjpeg-turbo that emulates the libjpeg v7 or v8 API/ABI, so that programs -that are built against libjpeg v7 or v8 can be run with libjpeg-turbo. The -following section describes which libjpeg v7+ features are supported and which -aren't. - -libjpeg v7 and v8 Features: ---------------------------- - -Fully supported: - --- cjpeg: Separate quality settings for luminance and chrominance - Note that the libpjeg v7+ API was extended to accommodate this feature only - for convenience purposes. It has always been possible to implement this - feature with libjpeg v6b (see rdswitch.c for an example.) - --- cjpeg: 32-bit BMP support - --- jpegtran: lossless cropping - --- jpegtran: -perfect option - --- rdjpgcom: -raw option - --- rdjpgcom: locale awareness - - -Fully supported when using libjpeg v7/v8 emulation: - --- libjpeg: In-memory source and destination managers - - -Not supported: - --- libjpeg: DCT scaling in compressor - cinfo.scale_num and cinfo.scale_denom are silently ignored. - There is no technical reason why DCT scaling cannot be supported, but - without the SmartScale extension (see below), it would only be able to - down-scale using ratios of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and 8/9, - which is of limited usefulness. - --- libjpeg: SmartScale - cinfo.block_size is silently ignored. - SmartScale is an extension to the JPEG format that allows for DCT block - sizes other than 8x8. It would be difficult to support this feature while - retaining backward compatibility with libjpeg v6b. - --- libjpeg: IDCT scaling extensions in decompressor - libjpeg-turbo still supports IDCT scaling with scaling factors of 1/2, 1/4, - and 1/8 (same as libjpeg v6b.) - --- libjpeg: Fancy downsampling in compressor - cinfo.do_fancy_downsampling is silently ignored. - This requires the DCT scaling feature, which is not supported. - --- jpegtran: Scaling - This requires both the DCT scaling and SmartScale features, which are not - supported. - --- Lossless RGB JPEG files - This requires the SmartScale feature, which is not supported. - - -******************************************************************************* -** Performance pitfalls -******************************************************************************* - -=============== -Restart Markers -=============== - -The optimized Huffman decoder in libjpeg-turbo does not handle restart markers -in a way that makes the rest of the libjpeg infrastructure happy, so it is -necessary to use the slow Huffman decoder when decompressing a JPEG image that -has restart markers. This can cause the decompression performance to drop by -as much as 20%, but the performance will still be much greater than that of -libjpeg. Many consumer packages, such as PhotoShop, use restart markers when -generating JPEG images, so images generated by those programs will experience -this issue. - -=============================================== -Fast Integer Forward DCT at High Quality Levels -=============================================== - -The algorithm used by the SIMD-accelerated quantization function cannot produce -correct results whenever the fast integer forward DCT is used along with a JPEG -quality of 98-100. Thus, libjpeg-turbo must use the non-SIMD quantization -function in those cases. This causes performance to drop by as much as 40%. -It is therefore strongly advised that you use the slow integer forward DCT -whenever encoding images with a JPEG quality of 98 or higher. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/CMakeLists.txt glmark2-2021.02/src/libjpeg-turbo/simd/CMakeLists.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/CMakeLists.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,80 @@ +if(NOT DEFINED NASM) + set(NASM nasm CACHE FILEPATH "Path to NASM/YASM executable") +endif() + +if(SIMD_X86_64) + set(NAFLAGS -fwin64 -DWIN64 -D__x86_64__) +else() + if(BORLAND) + set(NAFLAGS -fobj -DOBJ32) + else() + set(NAFLAGS -fwin32 -DWIN32) + endif() +endif() +set(NAFLAGS ${NAFLAGS} -I${CMAKE_SOURCE_DIR}/win/ -I${CMAKE_CURRENT_SOURCE_DIR}/) + +# This only works if building from the command line. There is currently no way +# to set a variable's value based on the build type when using the MSVC IDE. +if(CMAKE_BUILD_TYPE STREQUAL "Debug" + OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + set(NAFLAGS ${NAFLAGS} -g) +endif() + +if(SIMD_X86_64) + set(SIMD_BASENAMES jfdctflt-sse-64 jccolor-sse2-64 jcgray-sse2-64 + jchuff-sse2-64 jcsample-sse2-64 jdcolor-sse2-64 jdmerge-sse2-64 + jdsample-sse2-64 jfdctfst-sse2-64 jfdctint-sse2-64 jidctflt-sse2-64 + jidctfst-sse2-64 jidctint-sse2-64 jidctred-sse2-64 jquantf-sse2-64 + jquanti-sse2-64) + message(STATUS "Building x86_64 SIMD extensions") +else() + set(SIMD_BASENAMES jsimdcpu jfdctflt-3dn jidctflt-3dn jquant-3dn jccolor-mmx + jcgray-mmx jcsample-mmx jdcolor-mmx jdmerge-mmx jdsample-mmx jfdctfst-mmx + jfdctint-mmx jidctfst-mmx jidctint-mmx jidctred-mmx jquant-mmx jfdctflt-sse + jidctflt-sse jquant-sse jccolor-sse2 jcgray-sse2 jchuff-sse2 jcsample-sse2 + jdcolor-sse2 jdmerge-sse2 jdsample-sse2 jfdctfst-sse2 jfdctint-sse2 + jidctflt-sse2 jidctfst-sse2 jidctint-sse2 jidctred-sse2 jquantf-sse2 + jquanti-sse2) + message(STATUS "Building i386 SIMD extensions") +endif() + +if(MSVC_IDE) + set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}") +else() + set(OBJDIR ${CMAKE_CURRENT_BINARY_DIR}) +endif() + +file(GLOB INC_FILES *.inc) + +foreach(file ${SIMD_BASENAMES}) + set(DEPFILE "") + set(SIMD_SRC ${CMAKE_CURRENT_SOURCE_DIR}/${file}.asm) + if(${file} MATCHES jccolor) + set(DEPFILE ${file}) + string(REGEX REPLACE "jccolor" "jccolext" DEPFILE ${DEPFILE}) + set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + endif() + if(${file} MATCHES jcgray) + set(DEPFILE ${file}) + string(REGEX REPLACE "jcgray" "jcgryext" DEPFILE ${DEPFILE}) + set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + endif() + if(${file} MATCHES jdcolor) + set(DEPFILE ${file}) + string(REGEX REPLACE "jdcolor" "jdcolext" DEPFILE ${DEPFILE}) + set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + endif() + if(${file} MATCHES jdmerge) + set(DEPFILE ${file}) + string(REGEX REPLACE "jdmerge" "jdmrgext" DEPFILE ${DEPFILE}) + set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + endif() + set(SIMD_OBJ ${OBJDIR}/${file}.obj) + add_custom_command(OUTPUT ${SIMD_OBJ} + DEPENDS ${SIMD_SRC} ${DEPFILE} ${INC_FILES} + COMMAND ${NASM} ${NAFLAGS} ${SIMD_SRC} -o${SIMD_OBJ}) + set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ}) +endforeach() + +set(SIMD_OBJS ${SIMD_OBJS} PARENT_SCOPE) +add_custom_target(simd DEPENDS ${SIMD_OBJS}) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolext-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jccolext-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolext-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jccolext-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,267 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * Copyright (C) 2014, Jay Foad. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jccolor-altivec.c */ + + +void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + JSAMPROW inptr, outptr0, outptr1, outptr2; + int pitch = img_width * RGB_PIXELSIZE, num_cols; +#if __BIG_ENDIAN__ + int offset; +#endif + unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; + + __vector unsigned char rgb0, rgb1 = {0}, rgb2 = {0}, + rgbg0, rgbg1, rgbg2, rgbg3, y, cb, cr; +#if __BIG_ENDIAN__ || RGB_PIXELSIZE == 4 + __vector unsigned char rgb3 = {0}; +#endif +#if __BIG_ENDIAN__ && RGB_PIXELSIZE == 4 + __vector unsigned char rgb4 = {0}; +#endif + __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3; + __vector unsigned short yl, yh, crl, crh, cbl, cbh; + __vector int y0, y1, y2, y3, cr0, cr1, cr2, cr3, cb0, cb1, cb2, cb3; + + /* Constants */ + __vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) }, + pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) }, + pw_mf016_mf033 = { __4X2(-F_0_168, -F_0_331) }, + pw_mf008_mf041 = { __4X2(-F_0_081, -F_0_418) }; + __vector unsigned short pw_f050_f000 = { __4X2(F_0_500, 0) }; + __vector int pd_onehalf = { __4X(ONE_HALF) }, + pd_onehalfm1_cj = { __4X(ONE_HALF - 1 + (CENTERJSAMPLE << SCALEBITS)) }; + __vector unsigned char pb_zero = { __16X(0) }, +#if __BIG_ENDIAN__ + shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; +#else + shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; +#endif + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; + + for (num_cols = pitch; num_cols > 0; + num_cols -= RGB_PIXELSIZE * 16, inptr += RGB_PIXELSIZE * 16, + outptr0 += 16, outptr1 += 16, outptr2 += 16) { + +#if __BIG_ENDIAN__ + /* Load 16 pixels == 48 or 64 bytes */ + offset = (size_t)inptr & 15; + if (offset) { + __vector unsigned char unaligned_shift_index; + int bytes = num_cols + offset; + + if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) { + /* Slow path to prevent buffer overread. Since there is no way to + * read a partial AltiVec register, overread would occur on the last + * chunk of the last image row if the right edge is not on a 16-byte + * boundary. It could also occur on other rows if the bytes per row + * is low enough. Since we can't determine whether we're on the last + * image row, we have to assume every row is the last. + */ + memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16)); + rgb0 = vec_ld(0, tmpbuf); + rgb1 = vec_ld(16, tmpbuf); + rgb2 = vec_ld(32, tmpbuf); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_ld(48, tmpbuf); +#endif + } else { + /* Fast path */ + rgb0 = vec_ld(0, inptr); + if (bytes > 16) + rgb1 = vec_ld(16, inptr); + if (bytes > 32) + rgb2 = vec_ld(32, inptr); + if (bytes > 48) + rgb3 = vec_ld(48, inptr); +#if RGB_PIXELSIZE == 4 + if (bytes > 64) + rgb4 = vec_ld(64, inptr); +#endif + unaligned_shift_index = vec_lvsl(0, inptr); + rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index); + rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index); + rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index); +#endif + } + } else { +#endif /* __BIG_ENDIAN__ */ + if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) { + /* Slow path */ + memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16)); + rgb0 = VEC_LD(0, tmpbuf); + rgb1 = VEC_LD(16, tmpbuf); + rgb2 = VEC_LD(32, tmpbuf); +#if RGB_PIXELSIZE == 4 + rgb3 = VEC_LD(48, tmpbuf); +#endif + } else { + /* Fast path */ + rgb0 = VEC_LD(0, inptr); + if (num_cols > 16) + rgb1 = VEC_LD(16, inptr); + if (num_cols > 32) + rgb2 = VEC_LD(32, inptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + rgb3 = VEC_LD(48, inptr); +#endif + } +#if __BIG_ENDIAN__ + } +#endif + +#if RGB_PIXELSIZE == 3 + /* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 + * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga + * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf + * + * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3 + * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7 + * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb + * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf + */ + rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX0); + rgbg1 = vec_perm(rgb0, rgb1, (__vector unsigned char)RGBG_INDEX1); + rgbg2 = vec_perm(rgb1, rgb2, (__vector unsigned char)RGBG_INDEX2); + rgbg3 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX3); +#else + /* rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3 + * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7 + * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb + * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf + * + * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3 + * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7 + * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb + * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf + */ + rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX); + rgbg1 = vec_perm(rgb1, rgb1, (__vector unsigned char)RGBG_INDEX); + rgbg2 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX); + rgbg3 = vec_perm(rgb3, rgb3, (__vector unsigned char)RGBG_INDEX); +#endif + + /* rg0 = R0 G0 R1 G1 R2 G2 R3 G3 + * bg0 = B0 G0 B1 G1 B2 G2 B3 G3 + * ... + * + * NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't + * support unsigned vectors. + */ + rg0 = (__vector signed short)VEC_UNPACKHU(rgbg0); + bg0 = (__vector signed short)VEC_UNPACKLU(rgbg0); + rg1 = (__vector signed short)VEC_UNPACKHU(rgbg1); + bg1 = (__vector signed short)VEC_UNPACKLU(rgbg1); + rg2 = (__vector signed short)VEC_UNPACKHU(rgbg2); + bg2 = (__vector signed short)VEC_UNPACKLU(rgbg2); + rg3 = (__vector signed short)VEC_UNPACKHU(rgbg3); + bg3 = (__vector signed short)VEC_UNPACKLU(rgbg3); + + /* (Original) + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + * + * (This implementation) + * Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + */ + + /* Calculate Y values */ + + y0 = vec_msums(rg0, pw_f0299_f0337, pd_onehalf); + y1 = vec_msums(rg1, pw_f0299_f0337, pd_onehalf); + y2 = vec_msums(rg2, pw_f0299_f0337, pd_onehalf); + y3 = vec_msums(rg3, pw_f0299_f0337, pd_onehalf); + y0 = vec_msums(bg0, pw_f0114_f0250, y0); + y1 = vec_msums(bg1, pw_f0114_f0250, y1); + y2 = vec_msums(bg2, pw_f0114_f0250, y2); + y3 = vec_msums(bg3, pw_f0114_f0250, y3); + /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from + * each dword into a new 16-bit vector, which is the equivalent of + * descaling the 32-bit results (right-shifting by 16 bits) and then + * packing them. + */ + yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1, + shift_pack_index); + yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3, + shift_pack_index); + y = vec_pack(yl, yh); + vec_st(y, 0, outptr0); + + /* Calculate Cb values */ + cb0 = vec_msums(rg0, pw_mf016_mf033, pd_onehalfm1_cj); + cb1 = vec_msums(rg1, pw_mf016_mf033, pd_onehalfm1_cj); + cb2 = vec_msums(rg2, pw_mf016_mf033, pd_onehalfm1_cj); + cb3 = vec_msums(rg3, pw_mf016_mf033, pd_onehalfm1_cj); + cb0 = (__vector int)vec_msum((__vector unsigned short)bg0, pw_f050_f000, + (__vector unsigned int)cb0); + cb1 = (__vector int)vec_msum((__vector unsigned short)bg1, pw_f050_f000, + (__vector unsigned int)cb1); + cb2 = (__vector int)vec_msum((__vector unsigned short)bg2, pw_f050_f000, + (__vector unsigned int)cb2); + cb3 = (__vector int)vec_msum((__vector unsigned short)bg3, pw_f050_f000, + (__vector unsigned int)cb3); + cbl = vec_perm((__vector unsigned short)cb0, + (__vector unsigned short)cb1, shift_pack_index); + cbh = vec_perm((__vector unsigned short)cb2, + (__vector unsigned short)cb3, shift_pack_index); + cb = vec_pack(cbl, cbh); + vec_st(cb, 0, outptr1); + + /* Calculate Cr values */ + cr0 = vec_msums(bg0, pw_mf008_mf041, pd_onehalfm1_cj); + cr1 = vec_msums(bg1, pw_mf008_mf041, pd_onehalfm1_cj); + cr2 = vec_msums(bg2, pw_mf008_mf041, pd_onehalfm1_cj); + cr3 = vec_msums(bg3, pw_mf008_mf041, pd_onehalfm1_cj); + cr0 = (__vector int)vec_msum((__vector unsigned short)rg0, pw_f050_f000, + (__vector unsigned int)cr0); + cr1 = (__vector int)vec_msum((__vector unsigned short)rg1, pw_f050_f000, + (__vector unsigned int)cr1); + cr2 = (__vector int)vec_msum((__vector unsigned short)rg2, pw_f050_f000, + (__vector unsigned int)cr2); + cr3 = (__vector int)vec_msum((__vector unsigned short)rg3, pw_f050_f000, + (__vector unsigned int)cr3); + crl = vec_perm((__vector unsigned short)cr0, + (__vector unsigned short)cr1, shift_pack_index); + crh = vec_perm((__vector unsigned short)cr2, + (__vector unsigned short)cr3, shift_pack_index); + cr = vec_pack(crl, crh); + vec_st(cr, 0, outptr2); + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolext-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jccolext-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolext-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jccolext-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,477 @@ +; +; jccolext.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_mmx (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define img_width(b) (b)+8 ; JDIMENSION img_width +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_rgb_ycc_convert_mmx) PRIVATE + +EXTN(jsimd_rgb_ycc_convert_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_MMWORD + jae short .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + xor eax,eax + mov al, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + xor edx,edx + mov dx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd mmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd mmG, DWORD [esi+ecx] + psllq mmA, DWORD_BIT + por mmA,mmG +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + movq mmG,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + mov ecx, SIZEOF_MMWORD + jmp short .rgb_ycc_cnv +.column_ld16: + test cl, 2*SIZEOF_MMWORD + mov ecx, SIZEOF_MMWORD + jz short .rgb_ycc_cnv + movq mmF,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] + +.rgb_ycc_cnv: + ; mmA=(00 10 20 01 11 21 02 12) + ; mmG=(22 03 13 23 04 14 24 05) + ; mmF=(15 25 06 16 26 07 17 27) + + movq mmD,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) + psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) + + punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05) + psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) + + punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16) + punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27) + + movq mmE,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) + psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) + + punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) + + punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07) + punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27) + + pxor mmH,mmH + + movq mmC,mmA + punpcklbw mmA,mmH ; mmA=(00 02 04 06) + punpckhbw mmC,mmH ; mmC=(10 12 14 16) + + movq mmB,mmE + punpcklbw mmE,mmH ; mmE=(20 22 24 26) + punpckhbw mmB,mmH ; mmB=(01 03 05 07) + + movq mmF,mmD + punpcklbw mmD,mmH ; mmD=(11 13 15 17) + punpckhbw mmF,mmH ; mmF=(21 23 25 27) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_MMWORD/8 + jz short .column_ld2 + sub ecx, byte SIZEOF_MMWORD/8 + movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_MMWORD/4 + jz short .column_ld4 + sub ecx, byte SIZEOF_MMWORD/4 + movq mmF,mmA + movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld4: + test cl, SIZEOF_MMWORD/2 + mov ecx, SIZEOF_MMWORD + jz short .rgb_ycc_cnv + movq mmD,mmA + movq mmC,mmF + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] + movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] + +.rgb_ycc_cnv: + ; mmA=(00 10 20 30 01 11 21 31) + ; mmF=(02 12 22 32 03 13 23 33) + ; mmD=(04 14 24 34 05 15 25 35) + ; mmC=(06 16 26 36 07 17 27 37) + + movq mmB,mmA + punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32) + punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33) + + movq mmG,mmD + punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36) + punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37) + + movq mmE,mmA + punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36) + + movq mmH,mmB + punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17) + punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37) + + pxor mmF,mmF + + movq mmC,mmA + punpcklbw mmA,mmF ; mmA=(00 02 04 06) + punpckhbw mmC,mmF ; mmC=(10 12 14 16) + + movq mmD,mmB + punpcklbw mmB,mmF ; mmB=(01 03 05 07) + punpckhbw mmD,mmF ; mmD=(11 13 15 17) + + movq mmG,mmE + punpcklbw mmE,mmF ; mmE=(20 22 24 26) + punpckhbw mmG,mmF ; mmG=(30 32 34 36) + + punpcklbw mmF,mmH + punpckhbw mmH,mmH + psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27) + psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE + ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movq MMWORD [wk(0)], mm0 ; wk(0)=RE + movq MMWORD [wk(1)], mm1 ; wk(1)=RO + movq MMWORD [wk(2)], mm4 ; wk(2)=BE + movq MMWORD [wk(3)], mm5 ; wk(3)=BO + + movq mm6,mm1 + punpcklwd mm1,mm3 + punpckhwd mm6,mm3 + movq mm7,mm1 + movq mm4,mm6 + pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movq MMWORD [wk(4)], mm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movq MMWORD [wk(5)], mm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor mm1,mm1 + pxor mm6,mm6 + punpcklwd mm1,mm5 ; mm1=BOL + punpckhwd mm6,mm5 ; mm6=BOH + psrld mm1,1 ; mm1=BOL*FIX(0.500) + psrld mm6,1 ; mm6=BOH*FIX(0.500) + + movq mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ] + + paddd mm7,mm1 + paddd mm4,mm6 + paddd mm7,mm5 + paddd mm4,mm5 + psrld mm7,SCALEBITS ; mm7=CbOL + psrld mm4,SCALEBITS ; mm4=CbOH + packssdw mm7,mm4 ; mm7=CbO + + movq mm1, MMWORD [wk(2)] ; mm1=BE + + movq mm6,mm0 + punpcklwd mm0,mm2 + punpckhwd mm6,mm2 + movq mm5,mm0 + movq mm4,mm6 + pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movq MMWORD [wk(6)], mm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movq MMWORD [wk(7)], mm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor mm0,mm0 + pxor mm6,mm6 + punpcklwd mm0,mm1 ; mm0=BEL + punpckhwd mm6,mm1 ; mm6=BEH + psrld mm0,1 ; mm0=BEL*FIX(0.500) + psrld mm6,1 ; mm6=BEH*FIX(0.500) + + movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] + + paddd mm5,mm0 + paddd mm4,mm6 + paddd mm5,mm1 + paddd mm4,mm1 + psrld mm5,SCALEBITS ; mm5=CbEL + psrld mm4,SCALEBITS ; mm4=CbEH + packssdw mm5,mm4 ; mm5=CbE + + psllw mm7,BYTE_BIT + por mm5,mm7 ; mm5=Cb + movq MMWORD [ebx], mm5 ; Save Cb + + movq mm0, MMWORD [wk(3)] ; mm0=BO + movq mm6, MMWORD [wk(2)] ; mm6=BE + movq mm1, MMWORD [wk(1)] ; mm1=RO + + movq mm4,mm0 + punpcklwd mm0,mm3 + punpckhwd mm4,mm3 + movq mm7,mm0 + movq mm5,mm4 + pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] + + paddd mm0, MMWORD [wk(4)] + paddd mm4, MMWORD [wk(5)] + paddd mm0,mm3 + paddd mm4,mm3 + psrld mm0,SCALEBITS ; mm0=YOL + psrld mm4,SCALEBITS ; mm4=YOH + packssdw mm0,mm4 ; mm0=YO + + pxor mm3,mm3 + pxor mm4,mm4 + punpcklwd mm3,mm1 ; mm3=ROL + punpckhwd mm4,mm1 ; mm4=ROH + psrld mm3,1 ; mm3=ROL*FIX(0.500) + psrld mm4,1 ; mm4=ROH*FIX(0.500) + + movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] + + paddd mm7,mm3 + paddd mm5,mm4 + paddd mm7,mm1 + paddd mm5,mm1 + psrld mm7,SCALEBITS ; mm7=CrOL + psrld mm5,SCALEBITS ; mm5=CrOH + packssdw mm7,mm5 ; mm7=CrO + + movq mm3, MMWORD [wk(0)] ; mm3=RE + + movq mm4,mm6 + punpcklwd mm6,mm2 + punpckhwd mm4,mm2 + movq mm1,mm6 + movq mm5,mm4 + pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] + + paddd mm6, MMWORD [wk(6)] + paddd mm4, MMWORD [wk(7)] + paddd mm6,mm2 + paddd mm4,mm2 + psrld mm6,SCALEBITS ; mm6=YEL + psrld mm4,SCALEBITS ; mm4=YEH + packssdw mm6,mm4 ; mm6=YE + + psllw mm0,BYTE_BIT + por mm6,mm0 ; mm6=Y + movq MMWORD [edi], mm6 ; Save Y + + pxor mm2,mm2 + pxor mm4,mm4 + punpcklwd mm2,mm3 ; mm2=REL + punpckhwd mm4,mm3 ; mm4=REH + psrld mm2,1 ; mm2=REL*FIX(0.500) + psrld mm4,1 ; mm4=REH*FIX(0.500) + + movq mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ] + + paddd mm1,mm2 + paddd mm5,mm4 + paddd mm1,mm0 + paddd mm5,mm0 + psrld mm1,SCALEBITS ; mm1=CrEL + psrld mm5,SCALEBITS ; mm5=CrEH + packssdw mm1,mm5 ; mm1=CrE + + psllw mm7,BYTE_BIT + por mm1,mm7 ; mm1=Cr + movq MMWORD [edx], mm1 ; Save Cr + + sub ecx, byte SIZEOF_MMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr + add edi, byte SIZEOF_MMWORD ; outptr0 + add ebx, byte SIZEOF_MMWORD ; outptr1 + add edx, byte SIZEOF_MMWORD ; outptr2 + cmp ecx, byte SIZEOF_MMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolext-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jccolext-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolext-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jccolext-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,485 @@ +; +; jccolext.asm - colorspace conversion (64-bit SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2009, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +; r10 = JDIMENSION img_width +; r11 = JSAMPARRAY input_buf +; r12 = JSAMPIMAGE output_buf +; r13 = JDIMENSION output_row +; r14 = int num_rows + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 8 + + align 16 + + global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE + +EXTN(jsimd_rgb_ycc_convert_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov ecx, r10d + test rcx,rcx + jz near .return + + push rcx + + mov rsi, r12 + mov ecx, r13d + mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY] + lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] + lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] + lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rsi, r11 + mov eax, r14d + test rax,rax + jle near .return +.rowloop: + push rdx + push rbx + push rdi + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr0 + mov rbx, JSAMPROW [rbx] ; outptr1 + mov rdx, JSAMPROW [rdx] ; outptr2 + + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push rax + push rdx + lea rcx,[rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub rcx, byte SIZEOF_BYTE + movzx rax, BYTE [rsi+rcx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub rcx, byte SIZEOF_WORD + movzx rdx, WORD [rsi+rcx] + shl rax, WORD_BIT + or rax,rdx +.column_ld4: + movd xmmA,eax + pop rdx + pop rax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub rcx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [rsi+rcx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub rcx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [rsi+rcx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + mov rcx, SIZEOF_XMMWORD + jmp short .rgb_ycc_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov rcx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub rcx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub rcx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub rcx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov rcx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE + movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + movdqa xmm7,xmm1 + movdqa xmm4,xmm6 + pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor xmm1,xmm1 + pxor xmm6,xmm6 + punpcklwd xmm1,xmm5 ; xmm1=BOL + punpckhwd xmm6,xmm5 ; xmm6=BOH + psrld xmm1,1 ; xmm1=BOL*FIX(0.500) + psrld xmm6,1 ; xmm6=BOH*FIX(0.500) + + movdqa xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm1 + paddd xmm4,xmm6 + paddd xmm7,xmm5 + paddd xmm4,xmm5 + psrld xmm7,SCALEBITS ; xmm7=CbOL + psrld xmm4,SCALEBITS ; xmm4=CbOH + packssdw xmm7,xmm4 ; xmm7=CbO + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + movdqa xmm5,xmm0 + movdqa xmm4,xmm6 + pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor xmm0,xmm0 + pxor xmm6,xmm6 + punpcklwd xmm0,xmm1 ; xmm0=BEL + punpckhwd xmm6,xmm1 ; xmm6=BEH + psrld xmm0,1 ; xmm0=BEL*FIX(0.500) + psrld xmm6,1 ; xmm6=BEH*FIX(0.500) + + movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm5,xmm0 + paddd xmm4,xmm6 + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrld xmm5,SCALEBITS ; xmm5=CbEL + psrld xmm4,SCALEBITS ; xmm4=CbEH + packssdw xmm5,xmm4 ; xmm5=CbE + + psllw xmm7,BYTE_BIT + por xmm5,xmm7 ; xmm5=Cb + movdqa XMMWORD [rbx], xmm5 ; Save Cb + + movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + movdqa xmm7,xmm0 + movdqa xmm5,xmm4 + pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] + + paddd xmm0, XMMWORD [wk(4)] + paddd xmm4, XMMWORD [wk(5)] + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + pxor xmm3,xmm3 + pxor xmm4,xmm4 + punpcklwd xmm3,xmm1 ; xmm3=ROL + punpckhwd xmm4,xmm1 ; xmm4=ROH + psrld xmm3,1 ; xmm3=ROL*FIX(0.500) + psrld xmm4,1 ; xmm4=ROH*FIX(0.500) + + movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm3 + paddd xmm5,xmm4 + paddd xmm7,xmm1 + paddd xmm5,xmm1 + psrld xmm7,SCALEBITS ; xmm7=CrOL + psrld xmm5,SCALEBITS ; xmm5=CrOH + packssdw xmm7,xmm5 ; xmm7=CrO + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(6)] + paddd xmm4, XMMWORD [wk(7)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [rdi], xmm6 ; Save Y + + pxor xmm2,xmm2 + pxor xmm4,xmm4 + punpcklwd xmm2,xmm3 ; xmm2=REL + punpckhwd xmm4,xmm3 ; xmm4=REH + psrld xmm2,1 ; xmm2=REL*FIX(0.500) + psrld xmm4,1 ; xmm4=REH*FIX(0.500) + + movdqa xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ] + + paddd xmm1,xmm2 + paddd xmm5,xmm4 + paddd xmm1,xmm0 + paddd xmm5,xmm0 + psrld xmm1,SCALEBITS ; xmm1=CrEL + psrld xmm5,SCALEBITS ; xmm5=CrEH + packssdw xmm1,xmm5 ; xmm1=CrE + + psllw xmm7,BYTE_BIT + por xmm1,xmm7 ; xmm1=Cr + movdqa XMMWORD [rdx], xmm1 ; Save Cr + + sub rcx, byte SIZEOF_XMMWORD + add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add rdi, byte SIZEOF_XMMWORD ; outptr0 + add rbx, byte SIZEOF_XMMWORD ; outptr1 + add rdx, byte SIZEOF_XMMWORD ; outptr2 + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx,rcx + jnz near .column_ld1 + + pop rcx ; col + pop rsi + pop rdi + pop rbx + pop rdx + + add rsi, byte SIZEOF_JSAMPROW ; input_buf + add rdi, byte SIZEOF_JSAMPROW + add rbx, byte SIZEOF_JSAMPROW + add rdx, byte SIZEOF_JSAMPROW + dec rax ; num_rows + jg near .rowloop + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolext-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jccolext-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolext-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jccolext-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,503 @@ +; +; jccolext.asm - colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define img_width(b) (b)+8 ; JDIMENSION img_width +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + + global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE + +EXTN(jsimd_rgb_ycc_convert_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd xmmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [esi+ecx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [esi+ecx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + mov ecx, SIZEOF_XMMWORD + jmp short .rgb_ycc_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov ecx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov ecx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE + movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + movdqa xmm7,xmm1 + movdqa xmm4,xmm6 + pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor xmm1,xmm1 + pxor xmm6,xmm6 + punpcklwd xmm1,xmm5 ; xmm1=BOL + punpckhwd xmm6,xmm5 ; xmm6=BOH + psrld xmm1,1 ; xmm1=BOL*FIX(0.500) + psrld xmm6,1 ; xmm6=BOH*FIX(0.500) + + movdqa xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm1 + paddd xmm4,xmm6 + paddd xmm7,xmm5 + paddd xmm4,xmm5 + psrld xmm7,SCALEBITS ; xmm7=CbOL + psrld xmm4,SCALEBITS ; xmm4=CbOH + packssdw xmm7,xmm4 ; xmm7=CbO + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + movdqa xmm5,xmm0 + movdqa xmm4,xmm6 + pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor xmm0,xmm0 + pxor xmm6,xmm6 + punpcklwd xmm0,xmm1 ; xmm0=BEL + punpckhwd xmm6,xmm1 ; xmm6=BEH + psrld xmm0,1 ; xmm0=BEL*FIX(0.500) + psrld xmm6,1 ; xmm6=BEH*FIX(0.500) + + movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm5,xmm0 + paddd xmm4,xmm6 + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrld xmm5,SCALEBITS ; xmm5=CbEL + psrld xmm4,SCALEBITS ; xmm4=CbEH + packssdw xmm5,xmm4 ; xmm5=CbE + + psllw xmm7,BYTE_BIT + por xmm5,xmm7 ; xmm5=Cb + movdqa XMMWORD [ebx], xmm5 ; Save Cb + + movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + movdqa xmm7,xmm0 + movdqa xmm5,xmm4 + pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] + + paddd xmm0, XMMWORD [wk(4)] + paddd xmm4, XMMWORD [wk(5)] + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + pxor xmm3,xmm3 + pxor xmm4,xmm4 + punpcklwd xmm3,xmm1 ; xmm3=ROL + punpckhwd xmm4,xmm1 ; xmm4=ROH + psrld xmm3,1 ; xmm3=ROL*FIX(0.500) + psrld xmm4,1 ; xmm4=ROH*FIX(0.500) + + movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm3 + paddd xmm5,xmm4 + paddd xmm7,xmm1 + paddd xmm5,xmm1 + psrld xmm7,SCALEBITS ; xmm7=CrOL + psrld xmm5,SCALEBITS ; xmm5=CrOH + packssdw xmm7,xmm5 ; xmm7=CrO + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(6)] + paddd xmm4, XMMWORD [wk(7)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [edi], xmm6 ; Save Y + + pxor xmm2,xmm2 + pxor xmm4,xmm4 + punpcklwd xmm2,xmm3 ; xmm2=REL + punpckhwd xmm4,xmm3 ; xmm4=REH + psrld xmm2,1 ; xmm2=REL*FIX(0.500) + psrld xmm4,1 ; xmm4=REH*FIX(0.500) + + movdqa xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ] + + paddd xmm1,xmm2 + paddd xmm5,xmm4 + paddd xmm1,xmm0 + paddd xmm5,xmm0 + psrld xmm1,SCALEBITS ; xmm1=CrEL + psrld xmm5,SCALEBITS ; xmm5=CrEH + packssdw xmm1,xmm5 ; xmm1=CrE + + psllw xmm7,BYTE_BIT + por xmm1,xmm7 ; xmm1=Cr + movdqa XMMWORD [edx], xmm1 ; Save Cr + + sub ecx, byte SIZEOF_XMMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add edi, byte SIZEOF_XMMWORD ; outptr0 + add ebx, byte SIZEOF_XMMWORD ; outptr1 + add edx, byte SIZEOF_XMMWORD ; outptr2 + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolor-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jccolor-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolor-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jccolor-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,104 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* RGB --> YCC CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_081 5329 /* FIX(0.08131) */ +#define F_0_114 7471 /* FIX(0.11400) */ +#define F_0_168 11059 /* FIX(0.16874) */ +#define F_0_250 16384 /* FIX(0.25000) */ +#define F_0_299 19595 /* FIX(0.29900) */ +#define F_0_331 21709 /* FIX(0.33126) */ +#define F_0_418 27439 /* FIX(0.41869) */ +#define F_0_500 32768 /* FIX(0.50000) */ +#define F_0_587 38470 /* FIX(0.58700) */ +#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + + +#define RGBG_INDEX0 {0,1,3,4,6,7,9,10,2,1,5,4,8,7,11,10} +#define RGBG_INDEX1 {12,13,15,16,18,19,21,22,14,13,17,16,20,19,23,22} +#define RGBG_INDEX2 {8,9,11,12,14,15,17,18,10,9,13,12,16,15,19,18} +#define RGBG_INDEX3 {4,5,7,8,10,11,13,14,6,5,9,8,12,11,15,14} +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_rgb_ycc_convert_altivec jsimd_extrgb_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGBG_INDEX {0,1,4,5,8,9,12,13,2,1,6,5,10,9,14,13} +#define jsimd_rgb_ycc_convert_altivec jsimd_extrgbx_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGBG_INDEX0 {2,1,5,4,8,7,11,10,0,1,3,4,6,7,9,10} +#define RGBG_INDEX1 {14,13,17,16,20,19,23,22,12,13,15,16,18,19,21,22} +#define RGBG_INDEX2 {10,9,13,12,16,15,19,18,8,9,11,12,14,15,17,18} +#define RGBG_INDEX3 {6,5,9,8,12,11,15,14,4,5,7,8,10,11,13,14} +#define jsimd_rgb_ycc_convert_altivec jsimd_extbgr_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGBG_INDEX {2,1,6,5,10,9,14,13,0,1,4,5,8,9,12,13} +#define jsimd_rgb_ycc_convert_altivec jsimd_extbgrx_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGBG_INDEX {3,2,7,6,11,10,15,14,1,2,5,6,9,10,13,14} +#define jsimd_rgb_ycc_convert_altivec jsimd_extxbgr_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGBG_INDEX {1,2,5,6,9,10,13,14,3,2,7,6,11,10,15,14} +#define jsimd_rgb_ycc_convert_altivec jsimd_extxrgb_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolor-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jccolor-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolor-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jccolor-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,123 @@ +; +; jccolor.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_ycc_convert_mmx) PRIVATE + +EXTN(jconst_rgb_ycc_convert_mmx): + +PW_F0299_F0337 times 2 dw F_0_299, F_0_337 +PW_F0114_F0250 times 2 dw F_0_114, F_0_250 +PW_MF016_MF033 times 2 dw -F_0_168,-F_0_331 +PW_MF008_MF041 times 2 dw -F_0_081,-F_0_418 +PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 2 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx +%include "jccolext-mmx.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolor-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jccolor-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolor-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jccolor-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jccolor.asm - colorspace conversion (64-bit SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2009, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE + +EXTN(jconst_rgb_ycc_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331 +PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418 +PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 +%include "jccolext-sse2-64.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolor-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jccolor-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jccolor-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jccolor-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jccolor.asm - colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2009, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE + +EXTN(jconst_rgb_ycc_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331 +PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418 +PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 +%include "jccolext-sse2.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgray-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jcgray-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgray-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcgray-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,99 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* RGB --> GRAYSCALE CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_114 7471 /* FIX(0.11400) */ +#define F_0_250 16384 /* FIX(0.25000) */ +#define F_0_299 19595 /* FIX(0.29900) */ +#define F_0_587 38470 /* FIX(0.58700) */ +#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + + +#define RGBG_INDEX0 {0,1,3,4,6,7,9,10,2,1,5,4,8,7,11,10} +#define RGBG_INDEX1 {12,13,15,16,18,19,21,22,14,13,17,16,20,19,23,22} +#define RGBG_INDEX2 {8,9,11,12,14,15,17,18,10,9,13,12,16,15,19,18} +#define RGBG_INDEX3 {4,5,7,8,10,11,13,14,6,5,9,8,12,11,15,14} +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_rgb_gray_convert_altivec jsimd_extrgb_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGBG_INDEX {0,1,4,5,8,9,12,13,2,1,6,5,10,9,14,13} +#define jsimd_rgb_gray_convert_altivec jsimd_extrgbx_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGBG_INDEX0 {2,1,5,4,8,7,11,10,0,1,3,4,6,7,9,10} +#define RGBG_INDEX1 {14,13,17,16,20,19,23,22,12,13,15,16,18,19,21,22} +#define RGBG_INDEX2 {10,9,13,12,16,15,19,18,8,9,11,12,14,15,17,18} +#define RGBG_INDEX3 {6,5,9,8,12,11,15,14,4,5,7,8,10,11,13,14} +#define jsimd_rgb_gray_convert_altivec jsimd_extbgr_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGBG_INDEX {2,1,6,5,10,9,14,13,0,1,4,5,8,9,12,13} +#define jsimd_rgb_gray_convert_altivec jsimd_extbgrx_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGBG_INDEX {3,2,7,6,11,10,15,14,1,2,5,6,9,10,13,14} +#define jsimd_rgb_gray_convert_altivec jsimd_extxbgr_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGBG_INDEX {1,2,5,6,9,10,13,14,3,2,7,6,11,10,15,14} +#define jsimd_rgb_gray_convert_altivec jsimd_extxrgb_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgray-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcgray-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgray-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcgray-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,116 @@ +; +; jcgray.asm - grayscale colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2011 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_gray_convert_mmx) PRIVATE + +EXTN(jconst_rgb_gray_convert_mmx): + +PW_F0299_F0337 times 2 dw F_0_299, F_0_337 +PW_F0114_F0250 times 2 dw F_0_114, F_0_250 +PD_ONEHALF times 2 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extrgb_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extrgbx_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extbgr_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extbgrx_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extxbgr_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extxrgb_gray_convert_mmx +%include "jcgryext-mmx.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgray-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcgray-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgray-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcgray-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,113 @@ +; +; jcgray.asm - grayscale colorspace conversion (64-bit SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2011, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE + +EXTN(jconst_rgb_gray_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2 +%include "jcgryext-sse2-64.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgray-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcgray-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgray-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcgray-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,113 @@ +; +; jcgray.asm - grayscale colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2011, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE + +EXTN(jconst_rgb_gray_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2 +%include "jcgryext-sse2.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgryext-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jcgryext-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgryext-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcgryext-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,227 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * Copyright (C) 2014, Jay Foad. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jcgray-altivec.c */ + + +void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width, + JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + JSAMPROW inptr, outptr; + int pitch = img_width * RGB_PIXELSIZE, num_cols; +#if __BIG_ENDIAN__ + int offset; + unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; +#endif + + __vector unsigned char rgb0, rgb1 = {0}, rgb2 = {0}, + rgbg0, rgbg1, rgbg2, rgbg3, y; +#if __BIG_ENDIAN__ || RGB_PIXELSIZE == 4 + __vector unsigned char rgb3 = {0}; +#endif +#if __BIG_ENDIAN__ && RGB_PIXELSIZE == 4 + __vector unsigned char rgb4 = {0}; +#endif + __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3; + __vector unsigned short yl, yh; + __vector int y0, y1, y2, y3; + + /* Constants */ + __vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) }, + pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) }; + __vector int pd_onehalf = { __4X(ONE_HALF) }; + __vector unsigned char pb_zero = { __16X(0) }, +#if __BIG_ENDIAN__ + shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; +#else + shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; +#endif + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr = output_buf[0][output_row]; + output_row++; + + for (num_cols = pitch; num_cols > 0; + num_cols -= RGB_PIXELSIZE * 16, inptr += RGB_PIXELSIZE * 16, + outptr += 16) { + +#if __BIG_ENDIAN__ + /* Load 16 pixels == 48 or 64 bytes */ + offset = (size_t)inptr & 15; + if (offset) { + __vector unsigned char unaligned_shift_index; + int bytes = num_cols + offset; + + if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) { + /* Slow path to prevent buffer overread. Since there is no way to + * read a partial AltiVec register, overread would occur on the last + * chunk of the last image row if the right edge is not on a 16-byte + * boundary. It could also occur on other rows if the bytes per row + * is low enough. Since we can't determine whether we're on the last + * image row, we have to assume every row is the last. + */ + memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16)); + rgb0 = vec_ld(0, tmpbuf); + rgb1 = vec_ld(16, tmpbuf); + rgb2 = vec_ld(32, tmpbuf); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_ld(48, tmpbuf); +#endif + } else { + /* Fast path */ + rgb0 = vec_ld(0, inptr); + if (bytes > 16) + rgb1 = vec_ld(16, inptr); + if (bytes > 32) + rgb2 = vec_ld(32, inptr); + if (bytes > 48) + rgb3 = vec_ld(48, inptr); +#if RGB_PIXELSIZE == 4 + if (bytes > 64) + rgb4 = vec_ld(64, inptr); +#endif + unaligned_shift_index = vec_lvsl(0, inptr); + rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index); + rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index); + rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index); +#endif + } + } else { + if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) { + /* Slow path */ + memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16)); + rgb0 = vec_ld(0, tmpbuf); + rgb1 = vec_ld(16, tmpbuf); + rgb2 = vec_ld(32, tmpbuf); +#if RGB_PIXELSIZE == 4 + rgb3 = vec_ld(48, tmpbuf); +#endif + } else { + /* Fast path */ + rgb0 = vec_ld(0, inptr); + if (num_cols > 16) + rgb1 = vec_ld(16, inptr); + if (num_cols > 32) + rgb2 = vec_ld(32, inptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + rgb3 = vec_ld(48, inptr); +#endif + } + } +#else + /* Little endian */ + rgb0 = vec_vsx_ld(0, inptr); + if (num_cols > 16) + rgb1 = vec_vsx_ld(16, inptr); + if (num_cols > 32) + rgb2 = vec_vsx_ld(32, inptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + rgb3 = vec_vsx_ld(48, inptr); +#endif +#endif + +#if RGB_PIXELSIZE == 3 + /* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 + * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga + * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf + * + * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3 + * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7 + * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb + * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf + */ + rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX0); + rgbg1 = vec_perm(rgb0, rgb1, (__vector unsigned char)RGBG_INDEX1); + rgbg2 = vec_perm(rgb1, rgb2, (__vector unsigned char)RGBG_INDEX2); + rgbg3 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX3); +#else + /* rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3 + * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7 + * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb + * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf + * + * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3 + * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7 + * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb + * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf + */ + rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX); + rgbg1 = vec_perm(rgb1, rgb1, (__vector unsigned char)RGBG_INDEX); + rgbg2 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX); + rgbg3 = vec_perm(rgb3, rgb3, (__vector unsigned char)RGBG_INDEX); +#endif + + /* rg0 = R0 G0 R1 G1 R2 G2 R3 G3 + * bg0 = B0 G0 B1 G1 B2 G2 B3 G3 + * ... + * + * NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't + * support unsigned vectors. + */ + rg0 = (__vector signed short)VEC_UNPACKHU(rgbg0); + bg0 = (__vector signed short)VEC_UNPACKLU(rgbg0); + rg1 = (__vector signed short)VEC_UNPACKHU(rgbg1); + bg1 = (__vector signed short)VEC_UNPACKLU(rgbg1); + rg2 = (__vector signed short)VEC_UNPACKHU(rgbg2); + bg2 = (__vector signed short)VEC_UNPACKLU(rgbg2); + rg3 = (__vector signed short)VEC_UNPACKHU(rgbg3); + bg3 = (__vector signed short)VEC_UNPACKLU(rgbg3); + + /* (Original) + * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * + * (This implementation) + * Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + */ + + /* Calculate Y values */ + + y0 = vec_msums(rg0, pw_f0299_f0337, pd_onehalf); + y1 = vec_msums(rg1, pw_f0299_f0337, pd_onehalf); + y2 = vec_msums(rg2, pw_f0299_f0337, pd_onehalf); + y3 = vec_msums(rg3, pw_f0299_f0337, pd_onehalf); + y0 = vec_msums(bg0, pw_f0114_f0250, y0); + y1 = vec_msums(bg1, pw_f0114_f0250, y1); + y2 = vec_msums(bg2, pw_f0114_f0250, y2); + y3 = vec_msums(bg3, pw_f0114_f0250, y3); + /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from + * each dword into a new 16-bit vector, which is the equivalent of + * descaling the 32-bit results (right-shifting by 16 bits) and then + * packing them. + */ + yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1, + shift_pack_index); + yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3, + shift_pack_index); + y = vec_pack(yl, yh); + vec_st(y, 0, outptr); + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgryext-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcgryext-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgryext-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcgryext-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,357 @@ +; +; jcgryext.asm - grayscale colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2011 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_mmx (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define img_width(b) (b)+8 ; JDIMENSION img_width +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_rgb_gray_convert_mmx) PRIVATE + +EXTN(jsimd_rgb_gray_convert_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_MMWORD + jae short .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + xor eax,eax + mov al, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + xor edx,edx + mov dx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd mmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd mmG, DWORD [esi+ecx] + psllq mmA, DWORD_BIT + por mmA,mmG +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + movq mmG,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + mov ecx, SIZEOF_MMWORD + jmp short .rgb_gray_cnv +.column_ld16: + test cl, 2*SIZEOF_MMWORD + mov ecx, SIZEOF_MMWORD + jz short .rgb_gray_cnv + movq mmF,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_gray_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] + +.rgb_gray_cnv: + ; mmA=(00 10 20 01 11 21 02 12) + ; mmG=(22 03 13 23 04 14 24 05) + ; mmF=(15 25 06 16 26 07 17 27) + + movq mmD,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) + psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) + + punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05) + psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) + + punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16) + punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27) + + movq mmE,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) + psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) + + punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) + + punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07) + punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27) + + pxor mmH,mmH + + movq mmC,mmA + punpcklbw mmA,mmH ; mmA=(00 02 04 06) + punpckhbw mmC,mmH ; mmC=(10 12 14 16) + + movq mmB,mmE + punpcklbw mmE,mmH ; mmE=(20 22 24 26) + punpckhbw mmB,mmH ; mmB=(01 03 05 07) + + movq mmF,mmD + punpcklbw mmD,mmH ; mmD=(11 13 15 17) + punpckhbw mmF,mmH ; mmF=(21 23 25 27) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_MMWORD/8 + jz short .column_ld2 + sub ecx, byte SIZEOF_MMWORD/8 + movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_MMWORD/4 + jz short .column_ld4 + sub ecx, byte SIZEOF_MMWORD/4 + movq mmF,mmA + movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld4: + test cl, SIZEOF_MMWORD/2 + mov ecx, SIZEOF_MMWORD + jz short .rgb_gray_cnv + movq mmD,mmA + movq mmC,mmF + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_gray_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] + movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] + +.rgb_gray_cnv: + ; mmA=(00 10 20 30 01 11 21 31) + ; mmF=(02 12 22 32 03 13 23 33) + ; mmD=(04 14 24 34 05 15 25 35) + ; mmC=(06 16 26 36 07 17 27 37) + + movq mmB,mmA + punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32) + punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33) + + movq mmG,mmD + punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36) + punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37) + + movq mmE,mmA + punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36) + + movq mmH,mmB + punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17) + punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37) + + pxor mmF,mmF + + movq mmC,mmA + punpcklbw mmA,mmF ; mmA=(00 02 04 06) + punpckhbw mmC,mmF ; mmC=(10 12 14 16) + + movq mmD,mmB + punpcklbw mmB,mmF ; mmB=(01 03 05 07) + punpckhbw mmD,mmF ; mmD=(11 13 15 17) + + movq mmG,mmE + punpcklbw mmE,mmF ; mmE=(20 22 24 26) + punpckhbw mmG,mmF ; mmG=(30 32 34 36) + + punpcklbw mmF,mmH + punpckhbw mmH,mmH + psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27) + psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE + ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movq mm6,mm1 + punpcklwd mm1,mm3 + punpckhwd mm6,mm3 + pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movq mm7, mm6 ; mm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movq mm6,mm0 + punpcklwd mm0,mm2 + punpckhwd mm6,mm2 + pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movq MMWORD [wk(0)], mm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movq MMWORD [wk(1)], mm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movq mm0, mm5 ; mm0=BO + movq mm6, mm4 ; mm6=BE + + movq mm4,mm0 + punpcklwd mm0,mm3 + punpckhwd mm4,mm3 + pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] + + paddd mm0, mm1 + paddd mm4, mm7 + paddd mm0,mm3 + paddd mm4,mm3 + psrld mm0,SCALEBITS ; mm0=YOL + psrld mm4,SCALEBITS ; mm4=YOH + packssdw mm0,mm4 ; mm0=YO + + movq mm4,mm6 + punpcklwd mm6,mm2 + punpckhwd mm4,mm2 + pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] + + paddd mm6, MMWORD [wk(0)] + paddd mm4, MMWORD [wk(1)] + paddd mm6,mm2 + paddd mm4,mm2 + psrld mm6,SCALEBITS ; mm6=YEL + psrld mm4,SCALEBITS ; mm4=YEH + packssdw mm6,mm4 ; mm6=YE + + psllw mm0,BYTE_BIT + por mm6,mm0 ; mm6=Y + movq MMWORD [edi], mm6 ; Save Y + + sub ecx, byte SIZEOF_MMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr + add edi, byte SIZEOF_MMWORD ; outptr0 + cmp ecx, byte SIZEOF_MMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgryext-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcgryext-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgryext-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcgryext-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,364 @@ +; +; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2011, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +; r10 = JDIMENSION img_width +; r11 = JSAMPARRAY input_buf +; r12 = JSAMPIMAGE output_buf +; r13 = JDIMENSION output_row +; r14 = int num_rows + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + + global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE + +EXTN(jsimd_rgb_gray_convert_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov ecx, r10d + test rcx,rcx + jz near .return + + push rcx + + mov rsi, r12 + mov ecx, r13d + mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] + lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rsi, r11 + mov eax, r14d + test rax,rax + jle near .return +.rowloop: + push rdi + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr0 + + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push rax + push rdx + lea rcx,[rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub rcx, byte SIZEOF_BYTE + movzx rax, BYTE [rsi+rcx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub rcx, byte SIZEOF_WORD + movzx rdx, WORD [rsi+rcx] + shl rax, WORD_BIT + or rax,rdx +.column_ld4: + movd xmmA,eax + pop rdx + pop rax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub rcx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [rsi+rcx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub rcx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [rsi+rcx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + mov rcx, SIZEOF_XMMWORD + jmp short .rgb_gray_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov rcx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub rcx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub rcx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub rcx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov rcx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa xmm0, xmm5 ; xmm0=BO + movdqa xmm6, xmm4 ; xmm6=BE + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] + + paddd xmm0, xmm1 + paddd xmm4, xmm7 + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(0)] + paddd xmm4, XMMWORD [wk(1)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [rdi], xmm6 ; Save Y + + sub rcx, byte SIZEOF_XMMWORD + add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add rdi, byte SIZEOF_XMMWORD ; outptr0 + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx,rcx + jnz near .column_ld1 + + pop rcx ; col + pop rsi + pop rdi + + add rsi, byte SIZEOF_JSAMPROW ; input_buf + add rdi, byte SIZEOF_JSAMPROW + dec rax ; num_rows + jg near .rowloop + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgryext-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcgryext-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcgryext-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcgryext-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,383 @@ +; +; jcgryext.asm - grayscale colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; Copyright (C) 2011, D. R. Commander. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define img_width(b) (b)+8 ; JDIMENSION img_width +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + + global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE + +EXTN(jsimd_rgb_gray_convert_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd xmmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [esi+ecx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [esi+ecx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + mov ecx, SIZEOF_XMMWORD + jmp short .rgb_gray_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov ecx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov ecx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa xmm0, xmm5 ; xmm0=BO + movdqa xmm6, xmm4 ; xmm6=BE + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] + + paddd xmm0, xmm1 + paddd xmm4, xmm7 + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(0)] + paddd xmm4, XMMWORD [wk(1)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [edi], xmm6 ; Save Y + + sub ecx, byte SIZEOF_XMMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add edi, byte SIZEOF_XMMWORD ; outptr0 + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jchuff-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jchuff-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jchuff-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jchuff-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,361 @@ +; +; jchuff-sse2-64.asm - Huffman entropy encoding (64-bit SSE2) +; +; Copyright 2009-2011, 2014-2016 D. R. Commander. +; Copyright 2015 Matthieu Darbois +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains an SSE2 implementation for Huffman coding of one block. +; The following code is based directly on jchuff.c; see jchuff.c for more +; details. +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_huff_encode_one_block) PRIVATE + +EXTN(jconst_huff_encode_one_block): + +%include "jpeg_nbits_table.inc" + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +; These macros perform the same task as the emit_bits() function in the +; original libjpeg code. In addition to reducing overhead by explicitly +; inlining the code, additional performance is achieved by taking into +; account the size of the bit buffer and waiting until it is almost full +; before emptying it. This mostly benefits 64-bit platforms, since 6 +; bytes can be stored in a 64-bit bit buffer before it has to be emptied. + +%macro EMIT_BYTE 0 + sub put_bits, 8 ; put_bits -= 8; + mov rdx, put_buffer + mov ecx, put_bits + shr rdx, cl ; c = (JOCTET)GETJOCTET(put_buffer >> put_bits); + mov byte [buffer], dl ; *buffer++ = c; + add buffer, 1 + cmp dl, 0xFF ; need to stuff a zero byte? + jne %%.EMIT_BYTE_END + mov byte [buffer], 0 ; *buffer++ = 0; + add buffer, 1 +%%.EMIT_BYTE_END: +%endmacro + +%macro PUT_BITS 1 + add put_bits, ecx ; put_bits += size; + shl put_buffer, cl ; put_buffer = (put_buffer << size); + or put_buffer, %1 +%endmacro + +%macro CHECKBUF31 0 + cmp put_bits, 32 ; if (put_bits > 31) { + jl %%.CHECKBUF31_END + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE +%%.CHECKBUF31_END: +%endmacro + +%macro CHECKBUF47 0 + cmp put_bits, 48 ; if (put_bits > 47) { + jl %%.CHECKBUF47_END + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE +%%.CHECKBUF47_END: +%endmacro + +%macro EMIT_BITS 2 + CHECKBUF47 + mov ecx, %2 + PUT_BITS %1 +%endmacro + +%macro kloop_prepare 37 ;(ko, jno0, ..., jno31, xmm0, xmm1, xmm2, xmm3) + pxor xmm8, xmm8 ; __m128i neg = _mm_setzero_si128(); + pxor xmm9, xmm9 ; __m128i neg = _mm_setzero_si128(); + pxor xmm10, xmm10 ; __m128i neg = _mm_setzero_si128(); + pxor xmm11, xmm11 ; __m128i neg = _mm_setzero_si128(); + pinsrw %34, word [r12 + %2 * SIZEOF_WORD], 0 ; xmm_shadow[0] = block[jno0]; + pinsrw %35, word [r12 + %10 * SIZEOF_WORD], 0 ; xmm_shadow[8] = block[jno8]; + pinsrw %36, word [r12 + %18 * SIZEOF_WORD], 0 ; xmm_shadow[16] = block[jno16]; + pinsrw %37, word [r12 + %26 * SIZEOF_WORD], 0 ; xmm_shadow[24] = block[jno24]; + pinsrw %34, word [r12 + %3 * SIZEOF_WORD], 1 ; xmm_shadow[1] = block[jno1]; + pinsrw %35, word [r12 + %11 * SIZEOF_WORD], 1 ; xmm_shadow[9] = block[jno9]; + pinsrw %36, word [r12 + %19 * SIZEOF_WORD], 1 ; xmm_shadow[17] = block[jno17]; + pinsrw %37, word [r12 + %27 * SIZEOF_WORD], 1 ; xmm_shadow[25] = block[jno25]; + pinsrw %34, word [r12 + %4 * SIZEOF_WORD], 2 ; xmm_shadow[2] = block[jno2]; + pinsrw %35, word [r12 + %12 * SIZEOF_WORD], 2 ; xmm_shadow[10] = block[jno10]; + pinsrw %36, word [r12 + %20 * SIZEOF_WORD], 2 ; xmm_shadow[18] = block[jno18]; + pinsrw %37, word [r12 + %28 * SIZEOF_WORD], 2 ; xmm_shadow[26] = block[jno26]; + pinsrw %34, word [r12 + %5 * SIZEOF_WORD], 3 ; xmm_shadow[3] = block[jno3]; + pinsrw %35, word [r12 + %13 * SIZEOF_WORD], 3 ; xmm_shadow[11] = block[jno11]; + pinsrw %36, word [r12 + %21 * SIZEOF_WORD], 3 ; xmm_shadow[19] = block[jno19]; + pinsrw %37, word [r12 + %29 * SIZEOF_WORD], 3 ; xmm_shadow[27] = block[jno27]; + pinsrw %34, word [r12 + %6 * SIZEOF_WORD], 4 ; xmm_shadow[4] = block[jno4]; + pinsrw %35, word [r12 + %14 * SIZEOF_WORD], 4 ; xmm_shadow[12] = block[jno12]; + pinsrw %36, word [r12 + %22 * SIZEOF_WORD], 4 ; xmm_shadow[20] = block[jno20]; + pinsrw %37, word [r12 + %30 * SIZEOF_WORD], 4 ; xmm_shadow[28] = block[jno28]; + pinsrw %34, word [r12 + %7 * SIZEOF_WORD], 5 ; xmm_shadow[5] = block[jno5]; + pinsrw %35, word [r12 + %15 * SIZEOF_WORD], 5 ; xmm_shadow[13] = block[jno13]; + pinsrw %36, word [r12 + %23 * SIZEOF_WORD], 5 ; xmm_shadow[21] = block[jno21]; + pinsrw %37, word [r12 + %31 * SIZEOF_WORD], 5 ; xmm_shadow[29] = block[jno29]; + pinsrw %34, word [r12 + %8 * SIZEOF_WORD], 6 ; xmm_shadow[6] = block[jno6]; + pinsrw %35, word [r12 + %16 * SIZEOF_WORD], 6 ; xmm_shadow[14] = block[jno14]; + pinsrw %36, word [r12 + %24 * SIZEOF_WORD], 6 ; xmm_shadow[22] = block[jno22]; + pinsrw %37, word [r12 + %32 * SIZEOF_WORD], 6 ; xmm_shadow[30] = block[jno30]; + pinsrw %34, word [r12 + %9 * SIZEOF_WORD], 7 ; xmm_shadow[7] = block[jno7]; + pinsrw %35, word [r12 + %17 * SIZEOF_WORD], 7 ; xmm_shadow[15] = block[jno15]; + pinsrw %36, word [r12 + %25 * SIZEOF_WORD], 7 ; xmm_shadow[23] = block[jno23]; +%if %1 != 32 + pinsrw %37, word [r12 + %33 * SIZEOF_WORD], 7 ; xmm_shadow[31] = block[jno31]; +%else + pinsrw %37, ebx, 7 ; xmm_shadow[31] = block[jno31]; +%endif + pcmpgtw xmm8, %34 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm9, %35 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm10, %36 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm11, %37 ; neg = _mm_cmpgt_epi16(neg, x1); + paddw %34, xmm8 ; x1 = _mm_add_epi16(x1, neg); + paddw %35, xmm9 ; x1 = _mm_add_epi16(x1, neg); + paddw %36, xmm10 ; x1 = _mm_add_epi16(x1, neg); + paddw %37, xmm11 ; x1 = _mm_add_epi16(x1, neg); + pxor %34, xmm8 ; x1 = _mm_xor_si128(x1, neg); + pxor %35, xmm9 ; x1 = _mm_xor_si128(x1, neg); + pxor %36, xmm10 ; x1 = _mm_xor_si128(x1, neg); + pxor %37, xmm11 ; x1 = _mm_xor_si128(x1, neg); + pxor xmm8, %34 ; neg = _mm_xor_si128(neg, x1); + pxor xmm9, %35 ; neg = _mm_xor_si128(neg, x1); + pxor xmm10, %36 ; neg = _mm_xor_si128(neg, x1); + pxor xmm11, %37 ; neg = _mm_xor_si128(neg, x1); + movdqa XMMWORD [t1 + %1 * SIZEOF_WORD], %34 ; _mm_storeu_si128((__m128i *)(t1 + ko), x1); + movdqa XMMWORD [t1 + (%1 + 8) * SIZEOF_WORD], %35 ; _mm_storeu_si128((__m128i *)(t1 + ko + 8), x1); + movdqa XMMWORD [t1 + (%1 + 16) * SIZEOF_WORD], %36 ; _mm_storeu_si128((__m128i *)(t1 + ko + 16), x1); + movdqa XMMWORD [t1 + (%1 + 24) * SIZEOF_WORD], %37 ; _mm_storeu_si128((__m128i *)(t1 + ko + 24), x1); + movdqa XMMWORD [t2 + %1 * SIZEOF_WORD], xmm8 ; _mm_storeu_si128((__m128i *)(t2 + ko), neg); + movdqa XMMWORD [t2 + (%1 + 8) * SIZEOF_WORD], xmm9 ; _mm_storeu_si128((__m128i *)(t2 + ko + 8), neg); + movdqa XMMWORD [t2 + (%1 + 16) * SIZEOF_WORD], xmm10 ; _mm_storeu_si128((__m128i *)(t2 + ko + 16), neg); + movdqa XMMWORD [t2 + (%1 + 24) * SIZEOF_WORD], xmm11 ; _mm_storeu_si128((__m128i *)(t2 + ko + 24), neg); +%endmacro + +; +; Encode a single block's worth of coefficients. +; +; GLOBAL(JOCTET*) +; jsimd_huff_encode_one_block_sse2 (working_state *state, JOCTET *buffer, +; JCOEFPTR block, int last_dc_val, +; c_derived_tbl *dctbl, c_derived_tbl *actbl) +; + +; r10 = working_state *state +; r11 = JOCTET *buffer +; r12 = JCOEFPTR block +; r13 = int last_dc_val +; r14 = c_derived_tbl *dctbl +; r15 = c_derived_tbl *actbl + +%define t1 rbp-(DCTSIZE2*SIZEOF_WORD) +%define t2 t1-(DCTSIZE2*SIZEOF_WORD) +%define put_buffer r8 +%define put_bits r9d +%define buffer rax + + align 16 + global EXTN(jsimd_huff_encode_one_block_sse2) PRIVATE + +EXTN(jsimd_huff_encode_one_block_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [t2] + collect_args +%ifdef WIN64 + movaps XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm8 + movaps XMMWORD [rsp-2*SIZEOF_XMMWORD], xmm9 + movaps XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm10 + movaps XMMWORD [rsp-4*SIZEOF_XMMWORD], xmm11 + sub rsp, 4*SIZEOF_XMMWORD +%endif + push rbx + + mov buffer, r11 ; r11 is now sratch + + mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer; + mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits; + push r10 ; r10 is now scratch + + ; Encode the DC coefficient difference per section F.1.2.1 + movsx edi, word [r12] ; temp = temp2 = block[0] - last_dc_val; + sub edi, r13d ; r13 is not used anymore + mov ebx, edi + + ; This is a well-known technique for obtaining the absolute value + ; without a branch. It is derived from an assembly language technique + ; presented in "How to Optimize for the Pentium Processors", + ; Copyright (c) 1996, 1997 by Agner Fog. + mov esi, edi + sar esi, 31 ; temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); + xor edi, esi ; temp ^= temp3; + sub edi, esi ; temp -= temp3; + + ; For a negative input, want temp2 = bitwise complement of abs(input) + ; This code assumes we are on a two's complement machine + add ebx, esi ; temp2 += temp3; + + ; Find the number of bits needed for the magnitude of the coefficient + lea r11, [rel jpeg_nbits_table] + movzx rdi, byte [r11 + rdi] ; nbits = JPEG_NBITS(temp); + ; Emit the Huffman-coded symbol for the number of bits + mov r11d, INT [r14 + rdi * 4] ; code = dctbl->ehufco[nbits]; + movzx esi, byte [r14 + rdi + 1024] ; size = dctbl->ehufsi[nbits]; + EMIT_BITS r11, esi ; EMIT_BITS(code, size) + + ; Mask off any extra bits in code + mov esi, 1 + mov ecx, edi + shl esi, cl + dec esi + and ebx, esi ; temp2 &= (((JLONG) 1)<ehufco[0xf0]; + movzx r14d, byte [r15 + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + lea rsi, [t1] +.BLOOP: + bsf r12, r11 ; r = __builtin_ctzl(index); + jz .ELOOP + mov rcx, r12 + lea rsi, [rsi+r12*2] ; k += r; + shr r11, cl ; index >>= r; + movzx rdi, word [rsi] ; temp = t1[k]; + lea rbx, [rel jpeg_nbits_table] + movzx rdi, byte [rbx + rdi] ; nbits = JPEG_NBITS(temp); +.BRLOOP: + cmp r12, 16 ; while (r > 15) { + jl .ERLOOP + EMIT_BITS r13, r14d ; EMIT_BITS(code_0xf0, size_0xf0) + sub r12, 16 ; r -= 16; + jmp .BRLOOP +.ERLOOP: + ; Emit Huffman symbol for run length / number of bits + CHECKBUF31 ; uses rcx, rdx + + shl r12, 4 ; temp3 = (r << 4) + nbits; + add r12, rdi + mov ebx, INT [r15 + r12 * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [r15 + r12 + 1024] ; size = actbl->ehufsi[temp3]; + PUT_BITS rbx + + ;EMIT_CODE(code, size) + + movsx ebx, word [rsi-DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov rcx, rdi + mov rdx, 1 + shl rdx, cl + dec rdx + and rbx, rdx ; temp2 &= (((JLONG) 1)<>= 1; + add rsi, 2 ; ++k; + jmp .BLOOP +.ELOOP: + ; If the last coef(s) were zero, emit an end-of-block code + lea rdi, [t1 + (DCTSIZE2-1) * 2] ; r = DCTSIZE2-1-k; + cmp rdi, rsi ; if (r > 0) { + je .EFN + mov ebx, INT [r15] ; code = actbl->ehufco[0]; + movzx r12d, byte [r15 + 1024] ; size = actbl->ehufsi[0]; + EMIT_BITS rbx, r12d +.EFN: + pop r10 + ; Save put_buffer & put_bits + mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer; + mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits; + + pop rbx +%ifdef WIN64 + movaps xmm11, XMMWORD [rsp+0*SIZEOF_XMMWORD] + movaps xmm10, XMMWORD [rsp+1*SIZEOF_XMMWORD] + movaps xmm9, XMMWORD [rsp+2*SIZEOF_XMMWORD] + movaps xmm8, XMMWORD [rsp+3*SIZEOF_XMMWORD] + add rsp, 4*SIZEOF_XMMWORD +%endif + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jchuff-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jchuff-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jchuff-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jchuff-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,427 @@ +; +; jchuff-sse2.asm - Huffman entropy encoding (SSE2) +; +; Copyright 2009-2011, 2014-2016 D. R. Commander. +; Copyright 2015 Matthieu Darbois +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains an SSE2 implementation for Huffman coding of one block. +; The following code is based directly on jchuff.c; see jchuff.c for more +; details. +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_huff_encode_one_block) PRIVATE + +EXTN(jconst_huff_encode_one_block): + +%include "jpeg_nbits_table.inc" + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +; These macros perform the same task as the emit_bits() function in the +; original libjpeg code. In addition to reducing overhead by explicitly +; inlining the code, additional performance is achieved by taking into +; account the size of the bit buffer and waiting until it is almost full +; before emptying it. This mostly benefits 64-bit platforms, since 6 +; bytes can be stored in a 64-bit bit buffer before it has to be emptied. + +%macro EMIT_BYTE 0 + sub put_bits, 8 ; put_bits -= 8; + mov edx, put_buffer + mov ecx, put_bits + shr edx, cl ; c = (JOCTET)GETJOCTET(put_buffer >> put_bits); + mov byte [eax], dl ; *buffer++ = c; + add eax, 1 + cmp dl, 0xFF ; need to stuff a zero byte? + jne %%.EMIT_BYTE_END + mov byte [eax], 0 ; *buffer++ = 0; + add eax, 1 +%%.EMIT_BYTE_END: +%endmacro + +%macro PUT_BITS 1 + add put_bits, ecx ; put_bits += size; + shl put_buffer, cl ; put_buffer = (put_buffer << size); + or put_buffer, %1 +%endmacro + +%macro CHECKBUF15 0 + cmp put_bits, 16 ; if (put_bits > 31) { + jl %%.CHECKBUF15_END + mov eax, POINTER [esp+buffer] + EMIT_BYTE + EMIT_BYTE + mov POINTER [esp+buffer], eax +%%.CHECKBUF15_END: +%endmacro + +%macro EMIT_BITS 1 + PUT_BITS %1 + CHECKBUF15 +%endmacro + +%macro kloop_prepare 37 ;(ko, jno0, ..., jno31, xmm0, xmm1, xmm2, xmm3) + pxor xmm4, xmm4 ; __m128i neg = _mm_setzero_si128(); + pxor xmm5, xmm5 ; __m128i neg = _mm_setzero_si128(); + pxor xmm6, xmm6 ; __m128i neg = _mm_setzero_si128(); + pxor xmm7, xmm7 ; __m128i neg = _mm_setzero_si128(); + pinsrw %34, word [esi + %2 * SIZEOF_WORD], 0 ; xmm_shadow[0] = block[jno0]; + pinsrw %35, word [esi + %10 * SIZEOF_WORD], 0 ; xmm_shadow[8] = block[jno8]; + pinsrw %36, word [esi + %18 * SIZEOF_WORD], 0 ; xmm_shadow[16] = block[jno16]; + pinsrw %37, word [esi + %26 * SIZEOF_WORD], 0 ; xmm_shadow[24] = block[jno24]; + pinsrw %34, word [esi + %3 * SIZEOF_WORD], 1 ; xmm_shadow[1] = block[jno1]; + pinsrw %35, word [esi + %11 * SIZEOF_WORD], 1 ; xmm_shadow[9] = block[jno9]; + pinsrw %36, word [esi + %19 * SIZEOF_WORD], 1 ; xmm_shadow[17] = block[jno17]; + pinsrw %37, word [esi + %27 * SIZEOF_WORD], 1 ; xmm_shadow[25] = block[jno25]; + pinsrw %34, word [esi + %4 * SIZEOF_WORD], 2 ; xmm_shadow[2] = block[jno2]; + pinsrw %35, word [esi + %12 * SIZEOF_WORD], 2 ; xmm_shadow[10] = block[jno10]; + pinsrw %36, word [esi + %20 * SIZEOF_WORD], 2 ; xmm_shadow[18] = block[jno18]; + pinsrw %37, word [esi + %28 * SIZEOF_WORD], 2 ; xmm_shadow[26] = block[jno26]; + pinsrw %34, word [esi + %5 * SIZEOF_WORD], 3 ; xmm_shadow[3] = block[jno3]; + pinsrw %35, word [esi + %13 * SIZEOF_WORD], 3 ; xmm_shadow[11] = block[jno11]; + pinsrw %36, word [esi + %21 * SIZEOF_WORD], 3 ; xmm_shadow[19] = block[jno19]; + pinsrw %37, word [esi + %29 * SIZEOF_WORD], 3 ; xmm_shadow[27] = block[jno27]; + pinsrw %34, word [esi + %6 * SIZEOF_WORD], 4 ; xmm_shadow[4] = block[jno4]; + pinsrw %35, word [esi + %14 * SIZEOF_WORD], 4 ; xmm_shadow[12] = block[jno12]; + pinsrw %36, word [esi + %22 * SIZEOF_WORD], 4 ; xmm_shadow[20] = block[jno20]; + pinsrw %37, word [esi + %30 * SIZEOF_WORD], 4 ; xmm_shadow[28] = block[jno28]; + pinsrw %34, word [esi + %7 * SIZEOF_WORD], 5 ; xmm_shadow[5] = block[jno5]; + pinsrw %35, word [esi + %15 * SIZEOF_WORD], 5 ; xmm_shadow[13] = block[jno13]; + pinsrw %36, word [esi + %23 * SIZEOF_WORD], 5 ; xmm_shadow[21] = block[jno21]; + pinsrw %37, word [esi + %31 * SIZEOF_WORD], 5 ; xmm_shadow[29] = block[jno29]; + pinsrw %34, word [esi + %8 * SIZEOF_WORD], 6 ; xmm_shadow[6] = block[jno6]; + pinsrw %35, word [esi + %16 * SIZEOF_WORD], 6 ; xmm_shadow[14] = block[jno14]; + pinsrw %36, word [esi + %24 * SIZEOF_WORD], 6 ; xmm_shadow[22] = block[jno22]; + pinsrw %37, word [esi + %32 * SIZEOF_WORD], 6 ; xmm_shadow[30] = block[jno30]; + pinsrw %34, word [esi + %9 * SIZEOF_WORD], 7 ; xmm_shadow[7] = block[jno7]; + pinsrw %35, word [esi + %17 * SIZEOF_WORD], 7 ; xmm_shadow[15] = block[jno15]; + pinsrw %36, word [esi + %25 * SIZEOF_WORD], 7 ; xmm_shadow[23] = block[jno23]; +%if %1 != 32 + pinsrw %37, word [esi + %33 * SIZEOF_WORD], 7 ; xmm_shadow[31] = block[jno31]; +%else + pinsrw %37, ecx, 7 ; xmm_shadow[31] = block[jno31]; +%endif + pcmpgtw xmm4, %34 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm5, %35 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm6, %36 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm7, %37 ; neg = _mm_cmpgt_epi16(neg, x1); + paddw %34, xmm4 ; x1 = _mm_add_epi16(x1, neg); + paddw %35, xmm5 ; x1 = _mm_add_epi16(x1, neg); + paddw %36, xmm6 ; x1 = _mm_add_epi16(x1, neg); + paddw %37, xmm7 ; x1 = _mm_add_epi16(x1, neg); + pxor %34, xmm4 ; x1 = _mm_xor_si128(x1, neg); + pxor %35, xmm5 ; x1 = _mm_xor_si128(x1, neg); + pxor %36, xmm6 ; x1 = _mm_xor_si128(x1, neg); + pxor %37, xmm7 ; x1 = _mm_xor_si128(x1, neg); + pxor xmm4, %34 ; neg = _mm_xor_si128(neg, x1); + pxor xmm5, %35 ; neg = _mm_xor_si128(neg, x1); + pxor xmm6, %36 ; neg = _mm_xor_si128(neg, x1); + pxor xmm7, %37 ; neg = _mm_xor_si128(neg, x1); + movdqa XMMWORD [esp + t1 + %1 * SIZEOF_WORD], %34 ; _mm_storeu_si128((__m128i *)(t1 + ko), x1); + movdqa XMMWORD [esp + t1 + (%1 + 8) * SIZEOF_WORD], %35 ; _mm_storeu_si128((__m128i *)(t1 + ko + 8), x1); + movdqa XMMWORD [esp + t1 + (%1 + 16) * SIZEOF_WORD], %36 ; _mm_storeu_si128((__m128i *)(t1 + ko + 16), x1); + movdqa XMMWORD [esp + t1 + (%1 + 24) * SIZEOF_WORD], %37 ; _mm_storeu_si128((__m128i *)(t1 + ko + 24), x1); + movdqa XMMWORD [esp + t2 + %1 * SIZEOF_WORD], xmm4 ; _mm_storeu_si128((__m128i *)(t2 + ko), neg); + movdqa XMMWORD [esp + t2 + (%1 + 8) * SIZEOF_WORD], xmm5 ; _mm_storeu_si128((__m128i *)(t2 + ko + 8), neg); + movdqa XMMWORD [esp + t2 + (%1 + 16) * SIZEOF_WORD], xmm6 ; _mm_storeu_si128((__m128i *)(t2 + ko + 16), neg); + movdqa XMMWORD [esp + t2 + (%1 + 24) * SIZEOF_WORD], xmm7 ; _mm_storeu_si128((__m128i *)(t2 + ko + 24), neg); +%endmacro + +; +; Encode a single block's worth of coefficients. +; +; GLOBAL(JOCTET*) +; jsimd_huff_encode_one_block_sse2 (working_state *state, JOCTET *buffer, +; JCOEFPTR block, int last_dc_val, +; c_derived_tbl *dctbl, c_derived_tbl *actbl) +; + +; eax + 8 = working_state *state +; eax + 12 = JOCTET *buffer +; eax + 16 = JCOEFPTR block +; eax + 20 = int last_dc_val +; eax + 24 = c_derived_tbl *dctbl +; eax + 28 = c_derived_tbl *actbl + +%define pad 6*SIZEOF_DWORD ; Align to 16 bytes +%define t1 pad +%define t2 t1+(DCTSIZE2*SIZEOF_WORD) +%define block t2+(DCTSIZE2*SIZEOF_WORD) +%define actbl block+SIZEOF_DWORD +%define buffer actbl+SIZEOF_DWORD +%define temp buffer+SIZEOF_DWORD +%define temp2 temp+SIZEOF_DWORD +%define temp3 temp2+SIZEOF_DWORD +%define temp4 temp3+SIZEOF_DWORD +%define temp5 temp4+SIZEOF_DWORD +%define gotptr temp5+SIZEOF_DWORD ; void *gotptr +%define put_buffer ebx +%define put_bits edi + + align 16 + global EXTN(jsimd_huff_encode_one_block_sse2) PRIVATE + +EXTN(jsimd_huff_encode_one_block_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + sub esp, temp5+9*SIZEOF_DWORD-pad + push ebx + push ecx +; push edx ; need not be preserved + push esi + push edi + push ebp + + mov esi, POINTER [eax+8] ; (working_state *state) + mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer; + mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits; + push esi ; esi is now scratch + + get_GOT edx ; get GOT address + movpic POINTER [esp+gotptr], edx ; save GOT address + + mov ecx, POINTER [eax+28] + mov edx, POINTER [eax+16] + mov esi, POINTER [eax+12] + mov POINTER [esp+actbl], ecx + mov POINTER [esp+block], edx + mov POINTER [esp+buffer], esi + + ; Encode the DC coefficient difference per section F.1.2.1 + mov esi, POINTER [esp+block] ; block + movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val; + sub ecx, DWORD [eax+20] + mov esi, ecx + + ; This is a well-known technique for obtaining the absolute value + ; without a branch. It is derived from an assembly language technique + ; presented in "How to Optimize for the Pentium Processors", + ; Copyright (c) 1996, 1997 by Agner Fog. + mov edx, ecx + sar edx, 31 ; temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); + xor ecx, edx ; temp ^= temp3; + sub ecx, edx ; temp -= temp3; + + ; For a negative input, want temp2 = bitwise complement of abs(input) + ; This code assumes we are on a two's complement machine + add esi, edx ; temp2 += temp3; + mov DWORD [esp+temp], esi ; backup temp2 in temp + + ; Find the number of bits needed for the magnitude of the coefficient + movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp) + movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp); + mov DWORD [esp+temp2], edx ; backup nbits in temp2 + + ; Emit the Huffman-coded symbol for the number of bits + mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore + mov eax, INT [ebp + edx * 4] ; code = dctbl->ehufco[nbits]; + movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits]; + EMIT_BITS eax ; EMIT_BITS(code, size) + + mov ecx, DWORD [esp+temp2] ; restore nbits + + ; Mask off any extra bits in code + mov eax, 1 + shl eax, cl + dec eax + and eax, DWORD [esp+temp] ; temp2 &= (((JLONG) 1)<>= r; + mov DWORD [esp+temp3], edx +.BRLOOP: + cmp ecx, 16 ; while (r > 15) { + jl .ERLOOP + sub ecx, 16 ; r -= 16; + mov DWORD [esp+temp], ecx + mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; + movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) + mov ecx, DWORD [esp+temp] + jmp .BRLOOP +.ERLOOP: + movsx eax, word [esi] ; temp = t1[k]; + movpic edx, POINTER [esp+gotptr] ; load GOT address (edx) + movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp); + mov DWORD [esp+temp2], eax + ; Emit Huffman symbol for run length / number of bits + shl ecx, 4 ; temp3 = (r << 4) + nbits; + add ecx, eax + mov eax, INT [ebp + ecx * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [ebp + ecx + 1024] ; size = actbl->ehufsi[temp3]; + EMIT_BITS eax + + movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov ecx, DWORD [esp+temp2] + mov eax, 1 + shl eax, cl + dec eax + and eax, edx ; temp2 &= (((JLONG) 1)<>= 1; + + jmp .BLOOP +.ELOOP: + movdqa xmm0, XMMWORD [esp + t1 + 32 * SIZEOF_WORD] ; __m128i tmp0 = _mm_loadu_si128((__m128i *)(t1 + 0)); + movdqa xmm1, XMMWORD [esp + t1 + 40 * SIZEOF_WORD] ; __m128i tmp1 = _mm_loadu_si128((__m128i *)(t1 + 8)); + movdqa xmm2, XMMWORD [esp + t1 + 48 * SIZEOF_WORD] ; __m128i tmp2 = _mm_loadu_si128((__m128i *)(t1 + 16)); + movdqa xmm3, XMMWORD [esp + t1 + 56 * SIZEOF_WORD] ; __m128i tmp3 = _mm_loadu_si128((__m128i *)(t1 + 24)); + pcmpeqw xmm0, xmm7 ; tmp0 = _mm_cmpeq_epi16(tmp0, zero); + pcmpeqw xmm1, xmm7 ; tmp1 = _mm_cmpeq_epi16(tmp1, zero); + pcmpeqw xmm2, xmm7 ; tmp2 = _mm_cmpeq_epi16(tmp2, zero); + pcmpeqw xmm3, xmm7 ; tmp3 = _mm_cmpeq_epi16(tmp3, zero); + packsswb xmm0, xmm1 ; tmp0 = _mm_packs_epi16(tmp0, tmp1); + packsswb xmm2, xmm3 ; tmp2 = _mm_packs_epi16(tmp2, tmp3); + pmovmskb edx, xmm0 ; index = ((uint64_t)_mm_movemask_epi8(tmp0)) << 0; + pmovmskb ecx, xmm2 ; index = ((uint64_t)_mm_movemask_epi8(tmp2)) << 16; + shl ecx, 16 + or edx, ecx + not edx ; index = ~index; + + lea eax, [esp + t1 + (DCTSIZE2/2) * 2] + sub eax, esi + shr eax, 1 + bsf ecx, edx ; r = __builtin_ctzl(index); + jz .ELOOP2 + shr edx, cl ; index >>= r; + add ecx, eax + lea esi, [esi+ecx*2] ; k += r; + mov DWORD [esp+temp3], edx + jmp .BRLOOP2 +.BLOOP2: + bsf ecx, edx ; r = __builtin_ctzl(index); + jz .ELOOP2 + lea esi, [esi+ecx*2] ; k += r; + shr edx, cl ; index >>= r; + mov DWORD [esp+temp3], edx +.BRLOOP2: + cmp ecx, 16 ; while (r > 15) { + jl .ERLOOP2 + sub ecx, 16 ; r -= 16; + mov DWORD [esp+temp], ecx + mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; + movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) + mov ecx, DWORD [esp+temp] + jmp .BRLOOP2 +.ERLOOP2: + movsx eax, word [esi] ; temp = t1[k]; + bsr eax, eax ; nbits = 32 - __builtin_clz(temp); + inc eax + mov DWORD [esp+temp2], eax + ; Emit Huffman symbol for run length / number of bits + shl ecx, 4 ; temp3 = (r << 4) + nbits; + add ecx, eax + mov eax, INT [ebp + ecx * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [ebp + ecx + 1024] ; size = actbl->ehufsi[temp3]; + EMIT_BITS eax + + movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov ecx, DWORD [esp+temp2] + mov eax, 1 + shl eax, cl + dec eax + and eax, edx ; temp2 &= (((JLONG) 1)<>= 1; + + jmp .BLOOP2 +.ELOOP2: + ; If the last coef(s) were zero, emit an end-of-block code + lea edx, [esp + t1 + (DCTSIZE2-1) * 2] ; r = DCTSIZE2-1-k; + cmp edx, esi ; if (r > 0) { + je .EFN + mov eax, INT [ebp] ; code = actbl->ehufco[0]; + movzx ecx, byte [ebp + 1024] ; size = actbl->ehufsi[0]; + EMIT_BITS eax +.EFN: + mov eax, [esp+buffer] + pop esi + ; Save put_buffer & put_bits + mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer; + mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits; + + pop ebp + pop edi + pop esi +; pop edx ; need not be preserved + pop ecx + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcolsamp.inc glmark2-2021.02/src/libjpeg-turbo/simd/jcolsamp.inc --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcolsamp.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcolsamp.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,105 @@ +; +; jcolsamp.inc - private declarations for color conversion & up/downsampling +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; [TAB8] + +; -------------------------------------------------------------------------- + +; pseudo-resisters to make ordering of RGB configurable +; +%if RGB_RED == 0 +%define mmA mm0 +%define mmB mm1 +%define xmmA xmm0 +%define xmmB xmm1 +%elif RGB_GREEN == 0 +%define mmA mm2 +%define mmB mm3 +%define xmmA xmm2 +%define xmmB xmm3 +%elif RGB_BLUE == 0 +%define mmA mm4 +%define mmB mm5 +%define xmmA xmm4 +%define xmmB xmm5 +%else +%define mmA mm6 +%define mmB mm7 +%define xmmA xmm6 +%define xmmB xmm7 +%endif + +%if RGB_RED == 1 +%define mmC mm0 +%define mmD mm1 +%define xmmC xmm0 +%define xmmD xmm1 +%elif RGB_GREEN == 1 +%define mmC mm2 +%define mmD mm3 +%define xmmC xmm2 +%define xmmD xmm3 +%elif RGB_BLUE == 1 +%define mmC mm4 +%define mmD mm5 +%define xmmC xmm4 +%define xmmD xmm5 +%else +%define mmC mm6 +%define mmD mm7 +%define xmmC xmm6 +%define xmmD xmm7 +%endif + +%if RGB_RED == 2 +%define mmE mm0 +%define mmF mm1 +%define xmmE xmm0 +%define xmmF xmm1 +%elif RGB_GREEN == 2 +%define mmE mm2 +%define mmF mm3 +%define xmmE xmm2 +%define xmmF xmm3 +%elif RGB_BLUE == 2 +%define mmE mm4 +%define mmF mm5 +%define xmmE xmm4 +%define xmmF xmm5 +%else +%define mmE mm6 +%define mmF mm7 +%define xmmE xmm6 +%define xmmF xmm7 +%endif + +%if RGB_RED == 3 +%define mmG mm0 +%define mmH mm1 +%define xmmG xmm0 +%define xmmH xmm1 +%elif RGB_GREEN == 3 +%define mmG mm2 +%define mmH mm3 +%define xmmG xmm2 +%define xmmH xmm3 +%elif RGB_BLUE == 3 +%define mmG mm4 +%define mmH mm5 +%define xmmG xmm4 +%define xmmH xmm5 +%else +%define mmG mm6 +%define mmH mm7 +%define xmmG xmm6 +%define xmmH xmm7 +%endif + +; -------------------------------------------------------------------------- diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jcsample-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcsample-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,158 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* CHROMA DOWNSAMPLING */ + +#include "jsimd_altivec.h" +#include "jcsample.h" + + +void +jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, + JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + int outrow, outcol; + JDIMENSION output_cols = width_blocks * DCTSIZE; + JSAMPROW inptr, outptr; + + __vector unsigned char this0, next0, out; + __vector unsigned short this0e, this0o, next0e, next0o, outl, outh; + + /* Constants */ + __vector unsigned short pw_bias = { __4X2(0, 1) }, + pw_one = { __8X(1) }; + __vector unsigned char even_odd_index = + {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15}, + pb_zero = { __16X(0) }; + + expand_right_edge(input_data, max_v_samp_factor, image_width, + output_cols * 2); + + for (outrow = 0; outrow < v_samp_factor; outrow++) { + outptr = output_data[outrow]; + inptr = input_data[outrow]; + + for (outcol = output_cols; outcol > 0; + outcol -= 16, inptr += 32, outptr += 16) { + + this0 = vec_ld(0, inptr); + this0 = vec_perm(this0, this0, even_odd_index); + this0e = (__vector unsigned short)VEC_UNPACKHU(this0); + this0o = (__vector unsigned short)VEC_UNPACKLU(this0); + outl = vec_add(this0e, this0o); + outl = vec_add(outl, pw_bias); + outl = vec_sr(outl, pw_one); + + if (outcol > 8) { + next0 = vec_ld(16, inptr); + next0 = vec_perm(next0, next0, even_odd_index); + next0e = (__vector unsigned short)VEC_UNPACKHU(next0); + next0o = (__vector unsigned short)VEC_UNPACKLU(next0); + outh = vec_add(next0e, next0o); + outh = vec_add(outh, pw_bias); + outh = vec_sr(outh, pw_one); + } else + outh = vec_splat_u16(0); + + out = vec_pack(outl, outh); + vec_st(out, 0, outptr); + } + } +} + + +void +jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, + JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + int inrow, outrow, outcol; + JDIMENSION output_cols = width_blocks * DCTSIZE; + JSAMPROW inptr0, inptr1, outptr; + + __vector unsigned char this0, next0, this1, next1, out; + __vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o, + next1e, next1o, out0l, out0h, out1l, out1h, outl, outh; + + /* Constants */ + __vector unsigned short pw_bias = { __4X2(1, 2) }, + pw_two = { __8X(2) }; + __vector unsigned char even_odd_index = + { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, + pb_zero = { __16X(0) }; + + expand_right_edge(input_data, max_v_samp_factor, image_width, + output_cols * 2); + + for (inrow = 0, outrow = 0; outrow < v_samp_factor; + inrow += 2, outrow++) { + + inptr0 = input_data[inrow]; + inptr1 = input_data[inrow + 1]; + outptr = output_data[outrow]; + + for (outcol = output_cols; outcol > 0; + outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) { + + this0 = vec_ld(0, inptr0); + this0 = vec_perm(this0, this0, even_odd_index); + this0e = (__vector unsigned short)VEC_UNPACKHU(this0); + this0o = (__vector unsigned short)VEC_UNPACKLU(this0); + out0l = vec_add(this0e, this0o); + + this1 = vec_ld(0, inptr1); + this1 = vec_perm(this1, this1, even_odd_index); + this1e = (__vector unsigned short)VEC_UNPACKHU(this1); + this1o = (__vector unsigned short)VEC_UNPACKLU(this1); + out1l = vec_add(this1e, this1o); + + outl = vec_add(out0l, out1l); + outl = vec_add(outl, pw_bias); + outl = vec_sr(outl, pw_two); + + if (outcol > 8) { + next0 = vec_ld(16, inptr0); + next0 = vec_perm(next0, next0, even_odd_index); + next0e = (__vector unsigned short)VEC_UNPACKHU(next0); + next0o = (__vector unsigned short)VEC_UNPACKLU(next0); + out0h = vec_add(next0e, next0o); + + next1 = vec_ld(16, inptr1); + next1 = vec_perm(next1, next1, even_odd_index); + next1e = (__vector unsigned short)VEC_UNPACKHU(next1); + next1o = (__vector unsigned short)VEC_UNPACKLU(next1); + out1h = vec_add(next1e, next1o); + + outh = vec_add(out0h, out1h); + outh = vec_add(outh, pw_bias); + outh = vec_sr(outh, pw_two); + } else + outh = vec_splat_u16(0); + + out = vec_pack(outl, outh); + vec_st(out, 0, outptr); + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample.h glmark2-2021.02/src/libjpeg-turbo/simd/jcsample.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcsample.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,28 @@ +/* + * jcsample.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +LOCAL(void) +expand_right_edge (JSAMPARRAY image_data, int num_rows, + JDIMENSION input_cols, JDIMENSION output_cols) +{ + register JSAMPROW ptr; + register JSAMPLE pixval; + register int count; + int row; + int numcols = (int) (output_cols - input_cols); + + if (numcols > 0) { + for (row = 0; row < num_rows; row++) { + ptr = image_data[row] + input_cols; + pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ + for (count = numcols; count > 0; count--) + *ptr++ = pixval; + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcsample-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcsample-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,324 @@ +; +; jcsample.asm - downsampling (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define img_width(b) (b)+8 ; JDIMENSION image_width +%define max_v_samp(b) (b)+12 ; int max_v_samp_factor +%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b)+20 ; JDIMENSION width_blocks +%define input_data(b) (b)+24 ; JSAMPARRAY input_data +%define output_data(b) (b)+28 ; JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v1_downsample_mmx) PRIVATE + +EXTN(jsimd_h2v1_downsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00010000 ; bias pattern + movd mm7,edx + pcmpeqw mm6,mm6 + punpckldq mm7,mm7 ; mm7={0, 1, 0, 1} + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm1, MMWORD [esi+1*SIZEOF_MMWORD] + movq mm2,mm0 + movq mm3,mm1 + + pand mm0,mm6 + psrlw mm2,BYTE_BIT + pand mm1,mm6 + psrlw mm3,BYTE_BIT + + paddw mm0,mm2 + paddw mm1,mm3 + paddw mm0,mm7 + paddw mm1,mm7 + psrlw mm0,1 + psrlw mm1,1 + + packuswb mm0,mm1 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add edi, byte 1*SIZEOF_MMWORD ; outptr + sub ecx, byte SIZEOF_MMWORD ; outcol + jnz short .columnloop + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define img_width(b) (b)+8 ; JDIMENSION image_width +%define max_v_samp(b) (b)+12 ; int max_v_samp_factor +%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b)+20 ; JDIMENSION width_blocks +%define input_data(b) (b)+24 ; JSAMPARRAY input_data +%define output_data(b) (b)+28 ; JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v2_downsample_mmx) PRIVATE + +EXTN(jsimd_h2v2_downsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + movd mm7,edx + pcmpeqw mm6,mm6 + punpckldq mm7,mm7 ; mm7={1, 2, 1, 2} + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [edx+0*SIZEOF_MMWORD] + movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm2, MMWORD [edx+1*SIZEOF_MMWORD] + movq mm3, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm4,mm0 + movq mm5,mm1 + pand mm0,mm6 + psrlw mm4,BYTE_BIT + pand mm1,mm6 + psrlw mm5,BYTE_BIT + paddw mm0,mm4 + paddw mm1,mm5 + + movq mm4,mm2 + movq mm5,mm3 + pand mm2,mm6 + psrlw mm4,BYTE_BIT + pand mm3,mm6 + psrlw mm5,BYTE_BIT + paddw mm2,mm4 + paddw mm3,mm5 + + paddw mm0,mm1 + paddw mm2,mm3 + paddw mm0,mm7 + paddw mm2,mm7 + psrlw mm0,2 + psrlw mm2,2 + + packuswb mm0,mm2 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + + add edx, byte 2*SIZEOF_MMWORD ; inptr0 + add esi, byte 2*SIZEOF_MMWORD ; inptr1 + add edi, byte 1*SIZEOF_MMWORD ; outptr + sub ecx, byte SIZEOF_MMWORD ; outcol + jnz near .columnloop + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcsample-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcsample-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,330 @@ +; +; jcsample.asm - downsampling (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +; r10 = JDIMENSION image_width +; r11 = int max_v_samp_factor +; r12 = JDIMENSION v_samp_factor +; r13 = JDIMENSION width_blocks +; r14 = JSAMPARRAY input_data +; r15 = JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_downsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov ecx, r13d + shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) + jz near .return + + mov edx, r10d + + ; -- expand_right_edge + + push rcx + shl rcx,1 ; output_cols * 2 + sub rcx,rdx + jle short .expand_end + + mov rax, r11 + test rax,rax + jle short .expand_end + + cld + mov rsi, r14 ; input_data +.expandloop: + push rax + push rcx + + mov rdi, JSAMPROW [rsi] + add rdi,rdx + mov al, JSAMPLE [rdi-1] + + rep stosb + + pop rcx + pop rax + + add rsi, byte SIZEOF_JSAMPROW + dec rax + jg short .expandloop + +.expand_end: + pop rcx ; output_cols + + ; -- h2v1_downsample + + mov eax, r12d ; rowctr + test eax,eax + jle near .return + + mov rdx, 0x00010000 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov rsi, r14 ; input_data + mov rdi, r15 ; output_data +.rowloop: + push rcx + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + +.columnloop_r8: + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + pxor xmm1,xmm1 + mov rcx, SIZEOF_XMMWORD + jmp short .downsample + +.columnloop: + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm2,xmm0 + movdqa xmm3,xmm1 + + pand xmm0,xmm6 + psrlw xmm2,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm3,BYTE_BIT + + paddw xmm0,xmm2 + paddw xmm1,xmm3 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + psrlw xmm0,1 + psrlw xmm1,1 + + packuswb xmm0,xmm1 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + + sub rcx, byte SIZEOF_XMMWORD ; outcol + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rdi, byte 1*SIZEOF_XMMWORD ; outptr + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + test rcx,rcx + jnz short .columnloop_r8 + + pop rsi + pop rdi + pop rcx + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rax ; rowctr + jg near .rowloop + +.return: + uncollect_args + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +; r10 = JDIMENSION image_width +; r11 = int max_v_samp_factor +; r12 = JDIMENSION v_samp_factor +; r13 = JDIMENSION width_blocks +; r14 = JSAMPARRAY input_data +; r15 = JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_downsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov ecx, r13d + shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) + jz near .return + + mov edx, r10d + + ; -- expand_right_edge + + push rcx + shl rcx,1 ; output_cols * 2 + sub rcx,rdx + jle short .expand_end + + mov rax, r11 + test rax,rax + jle short .expand_end + + cld + mov rsi, r14 ; input_data +.expandloop: + push rax + push rcx + + mov rdi, JSAMPROW [rsi] + add rdi,rdx + mov al, JSAMPLE [rdi-1] + + rep stosb + + pop rcx + pop rax + + add rsi, byte SIZEOF_JSAMPROW + dec rax + jg short .expandloop + +.expand_end: + pop rcx ; output_cols + + ; -- h2v2_downsample + + mov eax, r12d ; rowctr + test rax,rax + jle near .return + + mov rdx, 0x00020001 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov rsi, r14 ; input_data + mov rdi, r15 ; output_data +.rowloop: + push rcx + push rdi + push rsi + + mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 + mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1 + mov rdi, JSAMPROW [rdi] ; outptr + + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + +.columnloop_r8: + movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + pxor xmm2,xmm2 + pxor xmm3,xmm3 + mov rcx, SIZEOF_XMMWORD + jmp short .downsample + +.columnloop: + movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + pand xmm0,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm0,xmm4 + paddw xmm1,xmm5 + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + pand xmm2,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm3,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm2,xmm4 + paddw xmm3,xmm5 + + paddw xmm0,xmm1 + paddw xmm2,xmm3 + paddw xmm0,xmm7 + paddw xmm2,xmm7 + psrlw xmm0,2 + psrlw xmm2,2 + + packuswb xmm0,xmm2 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + + sub rcx, byte SIZEOF_XMMWORD ; outcol + add rdx, byte 2*SIZEOF_XMMWORD ; inptr0 + add rsi, byte 2*SIZEOF_XMMWORD ; inptr1 + add rdi, byte 1*SIZEOF_XMMWORD ; outptr + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx,rcx + jnz near .columnloop_r8 + + pop rsi + pop rdi + pop rcx + + add rsi, byte 2*SIZEOF_JSAMPROW ; input_data + add rdi, byte 1*SIZEOF_JSAMPROW ; output_data + dec rax ; rowctr + jg near .rowloop + +.return: + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jcsample-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jcsample-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jcsample-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,351 @@ +; +; jcsample.asm - downsampling (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define img_width(b) (b)+8 ; JDIMENSION image_width +%define max_v_samp(b) (b)+12 ; int max_v_samp_factor +%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b)+20 ; JDIMENSION width_blocks +%define input_data(b) (b)+24 ; JSAMPARRAY input_data +%define output_data(b) (b)+28 ; JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_downsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00010000 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + alignx 16,7 + +.columnloop_r8: + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + pxor xmm1,xmm1 + mov ecx, SIZEOF_XMMWORD + jmp short .downsample + alignx 16,7 + +.columnloop: + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm2,xmm0 + movdqa xmm3,xmm1 + + pand xmm0,xmm6 + psrlw xmm2,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm3,BYTE_BIT + + paddw xmm0,xmm2 + paddw xmm1,xmm3 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + psrlw xmm0,1 + psrlw xmm1,1 + + packuswb xmm0,xmm1 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + + sub ecx, byte SIZEOF_XMMWORD ; outcol + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add edi, byte 1*SIZEOF_XMMWORD ; outptr + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + test ecx,ecx + jnz short .columnloop_r8 + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, JDIMENSION width_blocks, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define img_width(b) (b)+8 ; JDIMENSION image_width +%define max_v_samp(b) (b)+12 ; int max_v_samp_factor +%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b)+20 ; JDIMENSION width_blocks +%define input_data(b) (b)+24 ; JSAMPARRAY input_data +%define output_data(b) (b)+28 ; JSAMPARRAY output_data + + align 16 + global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_downsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + alignx 16,7 + +.columnloop_r8: + movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + pxor xmm2,xmm2 + pxor xmm3,xmm3 + mov ecx, SIZEOF_XMMWORD + jmp short .downsample + alignx 16,7 + +.columnloop: + movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + pand xmm0,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm0,xmm4 + paddw xmm1,xmm5 + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + pand xmm2,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm3,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm2,xmm4 + paddw xmm3,xmm5 + + paddw xmm0,xmm1 + paddw xmm2,xmm3 + paddw xmm0,xmm7 + paddw xmm2,xmm7 + psrlw xmm0,2 + psrlw xmm2,2 + + packuswb xmm0,xmm2 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + + sub ecx, byte SIZEOF_XMMWORD ; outcol + add edx, byte 2*SIZEOF_XMMWORD ; inptr0 + add esi, byte 2*SIZEOF_XMMWORD ; inptr1 + add edi, byte 1*SIZEOF_XMMWORD ; outptr + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx,ecx + jnz near .columnloop_r8 + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolext-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jdcolext-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolext-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdcolext-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,274 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jdcolor-altivec.c */ + + +void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf, + JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + JSAMPROW outptr, inptr0, inptr1, inptr2; + int pitch = out_width * RGB_PIXELSIZE, num_cols; +#if __BIG_ENDIAN__ + int offset; +#endif + unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; + + __vector unsigned char rgb0, rgb1, rgb2, rgbx0, rgbx1, rgbx2, rgbx3, + y, cb, cr; +#if __BIG_ENDIAN__ + __vector unsigned char edgel, edgeh, edges, out0, out1, out2, out3; +#if RGB_PIXELSIZE == 4 + __vector unsigned char out4; +#endif +#endif +#if RGB_PIXELSIZE == 4 + __vector unsigned char rgb3; +#endif + __vector short rg0, rg1, rg2, rg3, bx0, bx1, bx2, bx3, yl, yh, cbl, cbh, + crl, crh, rl, rh, gl, gh, bl, bh, g0w, g1w, g2w, g3w; + __vector int g0, g1, g2, g3; + + /* Constants + * NOTE: The >> 1 is to compensate for the fact that vec_madds() returns 17 + * high-order bits, not 16. + */ + __vector short pw_f0402 = { __8X(F_0_402 >> 1) }, + pw_mf0228 = { __8X(-F_0_228 >> 1) }, + pw_mf0344_f0285 = { __4X2(-F_0_344, F_0_285) }, + pw_one = { __8X(1) }, pw_255 = { __8X(255) }, + pw_cj = { __8X(CENTERJSAMPLE) }; + __vector int pd_onehalf = { __4X(ONE_HALF) }; + __vector unsigned char pb_zero = { __16X(0) }, +#if __BIG_ENDIAN__ + shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; +#else + shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; +#endif + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + + for (num_cols = pitch; num_cols > 0; + num_cols -= RGB_PIXELSIZE * 16, outptr += RGB_PIXELSIZE * 16, + inptr0 += 16, inptr1 += 16, inptr2 += 16) { + + y = vec_ld(0, inptr0); + /* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't + * support unsigned vectors. + */ + yl = (__vector signed short)VEC_UNPACKHU(y); + yh = (__vector signed short)VEC_UNPACKLU(y); + + cb = vec_ld(0, inptr1); + cbl = (__vector signed short)VEC_UNPACKHU(cb); + cbh = (__vector signed short)VEC_UNPACKLU(cb); + cbl = vec_sub(cbl, pw_cj); + cbh = vec_sub(cbh, pw_cj); + + cr = vec_ld(0, inptr2); + crl = (__vector signed short)VEC_UNPACKHU(cr); + crh = (__vector signed short)VEC_UNPACKLU(cr); + crl = vec_sub(crl, pw_cj); + crh = vec_sub(crh, pw_cj); + + /* (Original) + * R = Y + 1.40200 * Cr + * G = Y - 0.34414 * Cb - 0.71414 * Cr + * B = Y + 1.77200 * Cb + * + * (This implementation) + * R = Y + 0.40200 * Cr + Cr + * G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + * B = Y - 0.22800 * Cb + Cb + Cb + */ + bl = vec_add(cbl, cbl); + bh = vec_add(cbh, cbh); + bl = vec_madds(bl, pw_mf0228, pw_one); + bh = vec_madds(bh, pw_mf0228, pw_one); + bl = vec_sra(bl, (__vector unsigned short)pw_one); + bh = vec_sra(bh, (__vector unsigned short)pw_one); + bl = vec_add(bl, cbl); + bh = vec_add(bh, cbh); + bl = vec_add(bl, cbl); + bh = vec_add(bh, cbh); + bl = vec_add(bl, yl); + bh = vec_add(bh, yh); + + rl = vec_add(crl, crl); + rh = vec_add(crh, crh); + rl = vec_madds(rl, pw_f0402, pw_one); + rh = vec_madds(rh, pw_f0402, pw_one); + rl = vec_sra(rl, (__vector unsigned short)pw_one); + rh = vec_sra(rh, (__vector unsigned short)pw_one); + rl = vec_add(rl, crl); + rh = vec_add(rh, crh); + rl = vec_add(rl, yl); + rh = vec_add(rh, yh); + + g0w = vec_mergeh(cbl, crl); + g1w = vec_mergel(cbl, crl); + g0 = vec_msums(g0w, pw_mf0344_f0285, pd_onehalf); + g1 = vec_msums(g1w, pw_mf0344_f0285, pd_onehalf); + g2w = vec_mergeh(cbh, crh); + g3w = vec_mergel(cbh, crh); + g2 = vec_msums(g2w, pw_mf0344_f0285, pd_onehalf); + g3 = vec_msums(g3w, pw_mf0344_f0285, pd_onehalf); + /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from + * each dword into a new 16-bit vector, which is the equivalent of + * descaling the 32-bit results (right-shifting by 16 bits) and then + * packing them. + */ + gl = vec_perm((__vector short)g0, (__vector short)g1, shift_pack_index); + gh = vec_perm((__vector short)g2, (__vector short)g3, shift_pack_index); + gl = vec_sub(gl, crl); + gh = vec_sub(gh, crh); + gl = vec_add(gl, yl); + gh = vec_add(gh, yh); + + rg0 = vec_mergeh(rl, gl); + bx0 = vec_mergeh(bl, pw_255); + rg1 = vec_mergel(rl, gl); + bx1 = vec_mergel(bl, pw_255); + rg2 = vec_mergeh(rh, gh); + bx2 = vec_mergeh(bh, pw_255); + rg3 = vec_mergel(rh, gh); + bx3 = vec_mergel(bh, pw_255); + + rgbx0 = vec_packsu(rg0, bx0); + rgbx1 = vec_packsu(rg1, bx1); + rgbx2 = vec_packsu(rg2, bx2); + rgbx3 = vec_packsu(rg3, bx3); + +#if RGB_PIXELSIZE == 3 + /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3 + * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7 + * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb + * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf + * + * rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 + * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga + * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf + */ + rgb0 = vec_perm(rgbx0, rgbx1, (__vector unsigned char)RGB_INDEX0); + rgb1 = vec_perm(rgbx1, rgbx2, (__vector unsigned char)RGB_INDEX1); + rgb2 = vec_perm(rgbx2, rgbx3, (__vector unsigned char)RGB_INDEX2); +#else + /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3 + * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7 + * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb + * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf + * + * rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3 + * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7 + * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb + * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf + */ + rgb0 = vec_perm(rgbx0, rgbx0, (__vector unsigned char)RGB_INDEX); + rgb1 = vec_perm(rgbx1, rgbx1, (__vector unsigned char)RGB_INDEX); + rgb2 = vec_perm(rgbx2, rgbx2, (__vector unsigned char)RGB_INDEX); + rgb3 = vec_perm(rgbx3, rgbx3, (__vector unsigned char)RGB_INDEX); +#endif + +#if __BIG_ENDIAN__ + offset = (size_t)outptr & 15; + if (offset) { + __vector unsigned char unaligned_shift_index; + int bytes = num_cols + offset; + + if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) { + /* Slow path to prevent buffer overwrite. Since there is no way to + * write a partial AltiVec register, overwrite would occur on the + * last chunk of the last image row if the right edge is not on a + * 16-byte boundary. It could also occur on other rows if the bytes + * per row is low enough. Since we can't determine whether we're on + * the last image row, we have to assume every row is the last. + */ + vec_st(rgb0, 0, tmpbuf); + vec_st(rgb1, 16, tmpbuf); + vec_st(rgb2, 32, tmpbuf); +#if RGB_PIXELSIZE == 4 + vec_st(rgb3, 48, tmpbuf); +#endif + memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16)); + } else { + /* Fast path */ + unaligned_shift_index = vec_lvsl(0, outptr); + edgel = vec_ld(0, outptr); + edgeh = vec_ld(min(num_cols - 1, RGB_PIXELSIZE * 16), outptr); + edges = vec_perm(edgeh, edgel, unaligned_shift_index); + unaligned_shift_index = vec_lvsr(0, outptr); + out0 = vec_perm(edges, rgb0, unaligned_shift_index); + out1 = vec_perm(rgb0, rgb1, unaligned_shift_index); + out2 = vec_perm(rgb1, rgb2, unaligned_shift_index); +#if RGB_PIXELSIZE == 4 + out3 = vec_perm(rgb2, rgb3, unaligned_shift_index); + out4 = vec_perm(rgb3, edges, unaligned_shift_index); +#else + out3 = vec_perm(rgb2, edges, unaligned_shift_index); +#endif + vec_st(out0, 0, outptr); + if (bytes > 16) + vec_st(out1, 16, outptr); + if (bytes > 32) + vec_st(out2, 32, outptr); + if (bytes > 48) + vec_st(out3, 48, outptr); +#if RGB_PIXELSIZE == 4 + if (bytes > 64) + vec_st(out4, 64, outptr); +#endif + } + } else { +#endif /* __BIG_ENDIAN__ */ + if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) { + /* Slow path */ + VEC_ST(rgb0, 0, tmpbuf); + VEC_ST(rgb1, 16, tmpbuf); + VEC_ST(rgb2, 32, tmpbuf); +#if RGB_PIXELSIZE == 4 + VEC_ST(rgb3, 48, tmpbuf); +#endif + memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16)); + } else { + /* Fast path */ + VEC_ST(rgb0, 0, outptr); + if (num_cols > 16) + VEC_ST(rgb1, 16, outptr); + if (num_cols > 32) + VEC_ST(rgb2, 32, outptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + VEC_ST(rgb3, 48, outptr); +#endif + } +#if __BIG_ENDIAN__ + } +#endif + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolext-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdcolext-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolext-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdcolext-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,405 @@ +; +; jdcolext.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_mmx (JDIMENSION out_width, +; JSAMPIMAGE input_buf, JDIMENSION input_row, +; JSAMPARRAY output_buf, int num_rows) +; + +%define out_width(b) (b)+8 ; JDIMENSION out_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define input_row(b) (b)+16 ; JDIMENSION input_row +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_ycc_rgb_convert_mmx) PRIVATE + +EXTN(jsimd_ycc_rgb_convert_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [out_width(eax)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [input_row(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + push eax + push edi + push edx + push ebx + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr0 + mov ebx, JSAMPROW [ebx] ; inptr1 + mov edx, JSAMPROW [edx] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + movpic eax, POINTER [gotptr] ; load GOT address (eax) + alignx 16,7 +.columnloop: + + movq mm5, MMWORD [ebx] ; mm5=Cb(01234567) + movq mm1, MMWORD [edx] ; mm1=Cr(01234567) + + pcmpeqw mm4,mm4 + pcmpeqw mm7,mm7 + psrlw mm4,BYTE_BIT + psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} + movq mm0,mm4 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..} + + pand mm4,mm5 ; mm4=Cb(0246)=CbE + psrlw mm5,BYTE_BIT ; mm5=Cb(1357)=CbO + pand mm0,mm1 ; mm0=Cr(0246)=CrE + psrlw mm1,BYTE_BIT ; mm1=Cr(1357)=CrO + + paddw mm4,mm7 + paddw mm5,mm7 + paddw mm0,mm7 + paddw mm1,mm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movq mm2,mm4 ; mm2=CbE + movq mm3,mm5 ; mm3=CbO + paddw mm4,mm4 ; mm4=2*CbE + paddw mm5,mm5 ; mm5=2*CbO + movq mm6,mm0 ; mm6=CrE + movq mm7,mm1 ; mm7=CrO + paddw mm0,mm0 ; mm0=2*CrE + paddw mm1,mm1 ; mm1=2*CrO + + pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbE * -FIX(0.22800)) + pmulhw mm5,[GOTOFF(eax,PW_MF0228)] ; mm5=(2*CbO * -FIX(0.22800)) + pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrE * FIX(0.40200)) + pmulhw mm1,[GOTOFF(eax,PW_F0402)] ; mm1=(2*CrO * FIX(0.40200)) + + paddw mm4,[GOTOFF(eax,PW_ONE)] + paddw mm5,[GOTOFF(eax,PW_ONE)] + psraw mm4,1 ; mm4=(CbE * -FIX(0.22800)) + psraw mm5,1 ; mm5=(CbO * -FIX(0.22800)) + paddw mm0,[GOTOFF(eax,PW_ONE)] + paddw mm1,[GOTOFF(eax,PW_ONE)] + psraw mm0,1 ; mm0=(CrE * FIX(0.40200)) + psraw mm1,1 ; mm1=(CrO * FIX(0.40200)) + + paddw mm4,mm2 + paddw mm5,mm3 + paddw mm4,mm2 ; mm4=(CbE * FIX(1.77200))=(B-Y)E + paddw mm5,mm3 ; mm5=(CbO * FIX(1.77200))=(B-Y)O + paddw mm0,mm6 ; mm0=(CrE * FIX(1.40200))=(R-Y)E + paddw mm1,mm7 ; mm1=(CrO * FIX(1.40200))=(R-Y)O + + movq MMWORD [wk(0)], mm4 ; wk(0)=(B-Y)E + movq MMWORD [wk(1)], mm5 ; wk(1)=(B-Y)O + + movq mm4,mm2 + movq mm5,mm3 + punpcklwd mm2,mm6 + punpckhwd mm4,mm6 + pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm4,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd mm3,mm7 + punpckhwd mm5,mm7 + pmaddwd mm3,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd mm2,[GOTOFF(eax,PD_ONEHALF)] + paddd mm4,[GOTOFF(eax,PD_ONEHALF)] + psrad mm2,SCALEBITS + psrad mm4,SCALEBITS + paddd mm3,[GOTOFF(eax,PD_ONEHALF)] + paddd mm5,[GOTOFF(eax,PD_ONEHALF)] + psrad mm3,SCALEBITS + psrad mm5,SCALEBITS + + packssdw mm2,mm4 ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw mm3,mm5 ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw mm2,mm6 ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw mm3,mm7 ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movq mm5, MMWORD [esi] ; mm5=Y(01234567) + + pcmpeqw mm4,mm4 + psrlw mm4,BYTE_BIT ; mm4={0xFF 0x00 0xFF 0x00 ..} + pand mm4,mm5 ; mm4=Y(0246)=YE + psrlw mm5,BYTE_BIT ; mm5=Y(1357)=YO + + paddw mm0,mm4 ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6) + paddw mm1,mm5 ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7) + packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) + packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) + + paddw mm2,mm4 ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6) + paddw mm3,mm5 ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7) + packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + + paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6) + paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7) + packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07) + punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27) + + movq mmG,mmA + movq mmH,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03) + punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07) + + psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) + psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) + + movq mmC,mmD + movq mmB,mmD + punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14) + punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --) + + psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) + + movq mmF,mmE + punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25) + punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --) + + punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12) + punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05) + punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + + sub ecx, byte SIZEOF_MMWORD + jz short .nextrow + + add esi, byte SIZEOF_MMWORD ; inptr0 + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + jmp near .columnloop + alignx 16,7 + +.column_st16: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_MMWORD + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq mmA,mmC + sub ecx, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD + jmp short .column_st4 +.column_st8: + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmE + sub ecx, byte SIZEOF_MMWORD + add edi, byte SIZEOF_MMWORD +.column_st4: + movd eax,mmA + cmp ecx, byte SIZEOF_DWORD + jb short .column_st2 + mov DWORD [edi+0*SIZEOF_DWORD], eax + psrlq mmA,DWORD_BIT + movd eax,mmA + sub ecx, byte SIZEOF_DWORD + add edi, byte SIZEOF_DWORD +.column_st2: + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi+0*SIZEOF_WORD], ax + shr eax,WORD_BIT + sub ecx, byte SIZEOF_WORD + add edi, byte SIZEOF_WORD +.column_st1: + cmp ecx, byte SIZEOF_BYTE + jb short .nextrow + mov BYTE [edi+0*SIZEOF_BYTE], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%else + pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%endif + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36) + punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17) + punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37) + + movq mmC,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32) + punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36) + movq mmG,mmB + punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33) + punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37) + + movq mmD,mmA + punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31) + punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33) + movq mmH,mmC + punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35) + punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + movq MMWORD [edi+3*SIZEOF_MMWORD], mmH + + sub ecx, byte SIZEOF_MMWORD + jz short .nextrow + + add esi, byte SIZEOF_MMWORD ; inptr0 + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + jmp near .columnloop + alignx 16,7 + +.column_st16: + cmp ecx, byte SIZEOF_MMWORD/2 + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq mmA,mmC + movq mmD,mmH + sub ecx, byte SIZEOF_MMWORD/2 + add edi, byte 2*SIZEOF_MMWORD +.column_st8: + cmp ecx, byte SIZEOF_MMWORD/4 + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmD + sub ecx, byte SIZEOF_MMWORD/4 + add edi, byte 1*SIZEOF_MMWORD +.column_st4: + cmp ecx, byte SIZEOF_MMWORD/8 + jb short .nextrow + movd DWORD [edi+0*SIZEOF_DWORD], mmA + +%endif ; RGB_PIXELSIZE ; --------------- + + alignx 16,7 + +.nextrow: + pop ecx + pop esi + pop ebx + pop edx + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + add edi, byte SIZEOF_JSAMPROW ; output_buf + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolext-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdcolext-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolext-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdcolext-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,441 @@ +; +; jdcolext.asm - colorspace conversion (64-bit SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright 2009, 2012 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width, +; JSAMPIMAGE input_buf, JDIMENSION input_row, +; JSAMPARRAY output_buf, int num_rows) +; + +; r10 = JDIMENSION out_width +; r11 = JSAMPIMAGE input_buf +; r12 = JDIMENSION input_row +; r13 = JSAMPARRAY output_buf +; r14 = int num_rows + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE + +EXTN(jsimd_ycc_rgb_convert_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov ecx, r10d ; num_cols + test rcx,rcx + jz near .return + + push rcx + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] + lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] + lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rdi, r13 + mov eax, r14d + test rax,rax + jle near .return +.rowloop: + push rax + push rdi + push rdx + push rbx + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr0 + mov rbx, JSAMPROW [rbx] ; inptr1 + mov rdx, JSAMPROW [rdx] ; inptr2 + mov rdi, JSAMPROW [rdi] ; outptr +.columnloop: + + movdqa xmm5, XMMWORD [rbx] ; xmm5=Cb(0123456789ABCDEF) + movdqa xmm1, XMMWORD [rdx] ; xmm1=Cr(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + pcmpeqw xmm7,xmm7 + psrlw xmm4,BYTE_BIT + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} + + pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE + psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO + pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE + psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO + + paddw xmm4,xmm7 + paddw xmm5,xmm7 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm2,xmm4 ; xmm2=CbE + movdqa xmm3,xmm5 ; xmm3=CbO + paddw xmm4,xmm4 ; xmm4=2*CbE + paddw xmm5,xmm5 ; xmm5=2*CbO + movdqa xmm6,xmm0 ; xmm6=CrE + movdqa xmm7,xmm1 ; xmm7=CrO + paddw xmm0,xmm0 ; xmm0=2*CrE + paddw xmm1,xmm1 ; xmm1=2*CrO + + pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800)) + pmulhw xmm5,[rel PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800)) + pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrE * FIX(0.40200)) + pmulhw xmm1,[rel PW_F0402] ; xmm1=(2*CrO * FIX(0.40200)) + + paddw xmm4,[rel PW_ONE] + paddw xmm5,[rel PW_ONE] + psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800)) + psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800)) + paddw xmm0,[rel PW_ONE] + paddw xmm1,[rel PW_ONE] + psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200)) + psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200)) + + paddw xmm4,xmm2 + paddw xmm5,xmm3 + paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E + paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O + paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E + paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + punpcklwd xmm2,xmm6 + punpckhwd xmm4,xmm6 + pmaddwd xmm2,[rel PW_MF0344_F0285] + pmaddwd xmm4,[rel PW_MF0344_F0285] + punpcklwd xmm3,xmm7 + punpckhwd xmm5,xmm7 + pmaddwd xmm3,[rel PW_MF0344_F0285] + pmaddwd xmm5,[rel PW_MF0344_F0285] + + paddd xmm2,[rel PD_ONEHALF] + paddd xmm4,[rel PD_ONEHALF] + psrad xmm2,SCALEBITS + psrad xmm4,SCALEBITS + paddd xmm3,[rel PD_ONEHALF] + paddd xmm5,[rel PD_ONEHALF] + psrad xmm3,SCALEBITS + psrad xmm5,SCALEBITS + + packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movdqa xmm5, XMMWORD [rsi] ; xmm5=Y(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} + pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE + psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO + + paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) + paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) + paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) + paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .nextrow + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE + cmp rcx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub rcx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub rcx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + mov BYTE [rdi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .nextrow + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + cmp rcx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub rcx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub rcx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + movd XMM_DWORD [rdi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.nextrow: + pop rcx + pop rsi + pop rbx + pop rdx + pop rdi + pop rax + + add rsi, byte SIZEOF_JSAMPROW + add rbx, byte SIZEOF_JSAMPROW + add rdx, byte SIZEOF_JSAMPROW + add rdi, byte SIZEOF_JSAMPROW ; output_buf + dec rax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolext-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdcolext-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolext-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdcolext-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,460 @@ +; +; jdcolext.asm - colorspace conversion (SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright 2012 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width, +; JSAMPIMAGE input_buf, JDIMENSION input_row, +; JSAMPARRAY output_buf, int num_rows) +; + +%define out_width(b) (b)+8 ; JDIMENSION out_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define input_row(b) (b)+16 ; JDIMENSION input_row +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE + +EXTN(jsimd_ycc_rgb_convert_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [out_width(eax)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [input_row(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + push eax + push edi + push edx + push ebx + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr0 + mov ebx, JSAMPROW [ebx] ; inptr1 + mov edx, JSAMPROW [edx] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + movpic eax, POINTER [gotptr] ; load GOT address (eax) + alignx 16,7 +.columnloop: + + movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF) + movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + pcmpeqw xmm7,xmm7 + psrlw xmm4,BYTE_BIT + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} + + pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE + psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO + pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE + psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO + + paddw xmm4,xmm7 + paddw xmm5,xmm7 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm2,xmm4 ; xmm2=CbE + movdqa xmm3,xmm5 ; xmm3=CbO + paddw xmm4,xmm4 ; xmm4=2*CbE + paddw xmm5,xmm5 ; xmm5=2*CbO + movdqa xmm6,xmm0 ; xmm6=CrE + movdqa xmm7,xmm1 ; xmm7=CrO + paddw xmm0,xmm0 ; xmm0=2*CrE + paddw xmm1,xmm1 ; xmm1=2*CrO + + pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbE * -FIX(0.22800)) + pmulhw xmm5,[GOTOFF(eax,PW_MF0228)] ; xmm5=(2*CbO * -FIX(0.22800)) + pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrE * FIX(0.40200)) + pmulhw xmm1,[GOTOFF(eax,PW_F0402)] ; xmm1=(2*CrO * FIX(0.40200)) + + paddw xmm4,[GOTOFF(eax,PW_ONE)] + paddw xmm5,[GOTOFF(eax,PW_ONE)] + psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800)) + psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800)) + paddw xmm0,[GOTOFF(eax,PW_ONE)] + paddw xmm1,[GOTOFF(eax,PW_ONE)] + psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200)) + psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200)) + + paddw xmm4,xmm2 + paddw xmm5,xmm3 + paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E + paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O + paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E + paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + punpcklwd xmm2,xmm6 + punpckhwd xmm4,xmm6 + pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm4,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm3,xmm7 + punpckhwd xmm5,xmm7 + pmaddwd xmm3,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm4,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm2,SCALEBITS + psrad xmm4,SCALEBITS + paddd xmm3,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm3,SCALEBITS + psrad xmm5,SCALEBITS + + packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movdqa xmm5, XMMWORD [esi] ; xmm5=Y(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} + pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE + psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO + + paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) + paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) + paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) + paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .nextrow + + add esi, byte SIZEOF_XMMWORD ; inptr0 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + mov BYTE [edi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .nextrow + + add esi, byte SIZEOF_XMMWORD ; inptr0 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + movd XMM_DWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + + alignx 16,7 + +.nextrow: + pop ecx + pop esi + pop ebx + pop edx + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + add edi, byte SIZEOF_JSAMPROW ; output_buf + dec eax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolor-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jdcolor-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolor-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdcolor-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,96 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* YCC --> RGB CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_344 22554 /* FIX(0.34414) */ +#define F_0_714 46802 /* FIX(0.71414) */ +#define F_1_402 91881 /* FIX(1.40200) */ +#define F_1_772 116130 /* FIX(1.77200) */ +#define F_0_402 (F_1_402 - 65536) /* FIX(1.40200) - FIX(1) */ +#define F_0_285 (65536 - F_0_714) /* FIX(1) - FIX(0.71414) */ +#define F_0_228 (131072 - F_1_772) /* FIX(2) - FIX(1.77200) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + +#define RGB_INDEX0 {0,1,8,2,3,10,4,5,12,6,7,14,16,17,24,18} +#define RGB_INDEX1 {3,10,4,5,12,6,7,14,16,17,24,18,19,26,20,21} +#define RGB_INDEX2 {12,6,7,14,16,17,24,18,19,26,20,21,28,22,23,30} +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extrgb_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGB_INDEX {0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extrgbx_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGB_INDEX0 {8,1,0,10,3,2,12,5,4,14,7,6,24,17,16,26} +#define RGB_INDEX1 {3,2,12,5,4,14,7,6,24,17,16,26,19,18,28,21} +#define RGB_INDEX2 {4,14,7,6,24,17,16,26,19,18,28,21,20,30,23,22} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extbgr_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGB_INDEX {8,1,0,9,10,3,2,11,12,5,4,13,14,7,6,15} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extbgrx_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGB_INDEX {9,8,1,0,11,10,3,2,13,12,5,4,15,14,7,6} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extxbgr_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGB_INDEX {9,0,1,8,11,2,3,10,13,4,5,12,15,6,7,14} +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extxrgb_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolor-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdcolor-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolor-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdcolor-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jdcolor.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_ycc_rgb_convert_mmx) PRIVATE + +EXTN(jconst_ycc_rgb_convert_mmx): + +PW_F0402 times 4 dw F_0_402 +PW_MF0228 times 4 dw -F_0_228 +PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 +PW_ONE times 4 dw 1 +PD_ONEHALF times 2 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx +%include "jdcolext-mmx.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolor-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdcolor-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolor-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdcolor-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jdcolor.asm - colorspace conversion (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE + +EXTN(jconst_ycc_rgb_convert_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 +%include "jdcolext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 +%include "jdcolext-sse2-64.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolor-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdcolor-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdcolor-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdcolor-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,120 @@ +; +; jdcolor.asm - colorspace conversion (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE + +EXTN(jconst_ycc_rgb_convert_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 +%include "jdcolext-sse2.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdct.inc glmark2-2021.02/src/libjpeg-turbo/simd/jdct.inc --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdct.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdct.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,28 @@ +; +; jdct.inc - private declarations for forward & reverse DCT subsystems +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; [TAB8] + +; Each IDCT routine is responsible for range-limiting its results and +; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could +; be quite far out of range if the input data is corrupt, so a bulletproof +; range-limiting step is required. We use a mask-and-table-lookup method +; to do the combined operations quickly. +; +%define RANGE_MASK (MAXJSAMPLE * 4 + 3) ; 2 bits wider than legal samples + +%define ROW(n,b,s) ((b)+(n)*(s)) +%define COL(n,b,s) ((b)+(n)*(s)*DCTSIZE) + +%define DWBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD) +%define MMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD) +%define XMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD) + +; -------------------------------------------------------------------------- diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmerge-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jdmerge-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmerge-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdmerge-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,108 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* MERGED YCC --> RGB CONVERSION AND UPSAMPLING */ + +#include "jsimd_altivec.h" + + +#define F_0_344 22554 /* FIX(0.34414) */ +#define F_0_714 46802 /* FIX(0.71414) */ +#define F_1_402 91881 /* FIX(1.40200) */ +#define F_1_772 116130 /* FIX(1.77200) */ +#define F_0_402 (F_1_402 - 65536) /* FIX(1.40200) - FIX(1) */ +#define F_0_285 (65536 - F_0_714) /* FIX(1) - FIX(0.71414) */ +#define F_0_228 (131072 - F_1_772) /* FIX(2) - FIX(1.77200) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + +#define RGB_INDEX0 {0,1,8,2,3,10,4,5,12,6,7,14,16,17,24,18} +#define RGB_INDEX1 {3,10,4,5,12,6,7,14,16,17,24,18,19,26,20,21} +#define RGB_INDEX2 {12,6,7,14,16,17,24,18,19,26,20,21,28,22,23,30} +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extrgb_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extrgb_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGB_INDEX {0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extrgbx_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extrgbx_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGB_INDEX0 {8,1,0,10,3,2,12,5,4,14,7,6,24,17,16,26} +#define RGB_INDEX1 {3,2,12,5,4,14,7,6,24,17,16,26,19,18,28,21} +#define RGB_INDEX2 {4,14,7,6,24,17,16,26,19,18,28,21,20,30,23,22} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extbgr_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extbgr_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGB_INDEX {8,1,0,9,10,3,2,11,12,5,4,13,14,7,6,15} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extbgrx_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extbgrx_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGB_INDEX {9,8,1,0,11,10,3,2,13,12,5,4,15,14,7,6} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extxbgr_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extxbgr_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGB_INDEX {9,0,1,8,11,2,3,10,13,4,5,12,15,6,7,14} +#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extxrgb_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extxrgb_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmerge-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdmerge-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmerge-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdmerge-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,126 @@ +; +; jdmerge.asm - merged upsampling/color conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_merged_upsample_mmx) PRIVATE + +EXTN(jconst_merged_upsample_mmx): + +PW_F0402 times 4 dw F_0_402 +PW_MF0228 times 4 dw -F_0_228 +PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 +PW_ONE times 4 dw 1 +PD_ONEHALF times 2 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgb_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgb_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgbx_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgbx_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgr_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgr_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgrx_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgrx_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxbgr_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxbgr_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxrgb_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxrgb_merged_upsample_mmx +%include "jdmrgext-mmx.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmerge-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdmerge-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmerge-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdmerge-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,126 @@ +; +; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_merged_upsample_sse2) PRIVATE + +EXTN(jconst_merged_upsample_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2 +%include "jdmrgext-sse2-64.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmerge-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdmerge-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmerge-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdmerge-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,126 @@ +; +; jdmerge.asm - merged upsampling/color conversion (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_merged_upsample_sse2) PRIVATE + +EXTN(jconst_merged_upsample_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmrgext-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jdmrgext-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmrgext-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdmrgext-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,323 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jdmerge-altivec.c */ + + +void jsimd_h2v1_merged_upsample_altivec (JDIMENSION output_width, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + JSAMPROW outptr, inptr0, inptr1, inptr2; + int pitch = output_width * RGB_PIXELSIZE, num_cols, yloop; +#if __BIG_ENDIAN__ + int offset; +#endif + unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; + + __vector unsigned char rgb0, rgb1, rgb2, rgbx0, rgbx1, rgbx2, rgbx3, + y, cb, cr; +#if __BIG_ENDIAN__ + __vector unsigned char edgel, edgeh, edges, out0, out1, out2, out3; +#if RGB_PIXELSIZE == 4 + __vector unsigned char out4; +#endif +#endif +#if RGB_PIXELSIZE == 4 + __vector unsigned char rgb3; +#endif + __vector short rg0, rg1, rg2, rg3, bx0, bx1, bx2, bx3, ye, yo, cbl, cbh, + crl, crh, r_yl, r_yh, g_yl, g_yh, b_yl, b_yh, g_y0w, g_y1w, g_y2w, g_y3w, + rl, rh, gl, gh, bl, bh, re, ro, ge, go, be, bo; + __vector int g_y0, g_y1, g_y2, g_y3; + + /* Constants + * NOTE: The >> 1 is to compensate for the fact that vec_madds() returns 17 + * high-order bits, not 16. + */ + __vector short pw_f0402 = { __8X(F_0_402 >> 1) }, + pw_mf0228 = { __8X(-F_0_228 >> 1) }, + pw_mf0344_f0285 = { __4X2(-F_0_344, F_0_285) }, + pw_one = { __8X(1) }, pw_255 = { __8X(255) }, + pw_cj = { __8X(CENTERJSAMPLE) }; + __vector int pd_onehalf = { __4X(ONE_HALF) }; + __vector unsigned char pb_zero = { __16X(0) }, +#if __BIG_ENDIAN__ + shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}, + even_index = {0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30}, + odd_index = {0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31}; +#else + shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}, + even_index = {16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0}, + odd_index = {17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0}; +#endif + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + + for (num_cols = pitch; num_cols > 0; inptr1 += 16, inptr2 += 16) { + + cb = vec_ld(0, inptr1); + /* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't + * support unsigned vectors. + */ + cbl = (__vector signed short)VEC_UNPACKHU(cb); + cbh = (__vector signed short)VEC_UNPACKLU(cb); + cbl = vec_sub(cbl, pw_cj); + cbh = vec_sub(cbh, pw_cj); + + cr = vec_ld(0, inptr2); + crl = (__vector signed short)VEC_UNPACKHU(cr); + crh = (__vector signed short)VEC_UNPACKLU(cr); + crl = vec_sub(crl, pw_cj); + crh = vec_sub(crh, pw_cj); + + /* (Original) + * R = Y + 1.40200 * Cr + * G = Y - 0.34414 * Cb - 0.71414 * Cr + * B = Y + 1.77200 * Cb + * + * (This implementation) + * R = Y + 0.40200 * Cr + Cr + * G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + * B = Y - 0.22800 * Cb + Cb + Cb + */ + b_yl = vec_add(cbl, cbl); + b_yh = vec_add(cbh, cbh); + b_yl = vec_madds(b_yl, pw_mf0228, pw_one); + b_yh = vec_madds(b_yh, pw_mf0228, pw_one); + b_yl = vec_sra(b_yl, (__vector unsigned short)pw_one); + b_yh = vec_sra(b_yh, (__vector unsigned short)pw_one); + b_yl = vec_add(b_yl, cbl); + b_yh = vec_add(b_yh, cbh); + b_yl = vec_add(b_yl, cbl); + b_yh = vec_add(b_yh, cbh); + + r_yl = vec_add(crl, crl); + r_yh = vec_add(crh, crh); + r_yl = vec_madds(r_yl, pw_f0402, pw_one); + r_yh = vec_madds(r_yh, pw_f0402, pw_one); + r_yl = vec_sra(r_yl, (__vector unsigned short)pw_one); + r_yh = vec_sra(r_yh, (__vector unsigned short)pw_one); + r_yl = vec_add(r_yl, crl); + r_yh = vec_add(r_yh, crh); + + g_y0w = vec_mergeh(cbl, crl); + g_y1w = vec_mergel(cbl, crl); + g_y0 = vec_msums(g_y0w, pw_mf0344_f0285, pd_onehalf); + g_y1 = vec_msums(g_y1w, pw_mf0344_f0285, pd_onehalf); + g_y2w = vec_mergeh(cbh, crh); + g_y3w = vec_mergel(cbh, crh); + g_y2 = vec_msums(g_y2w, pw_mf0344_f0285, pd_onehalf); + g_y3 = vec_msums(g_y3w, pw_mf0344_f0285, pd_onehalf); + /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from + * each dword into a new 16-bit vector, which is the equivalent of + * descaling the 32-bit results (right-shifting by 16 bits) and then + * packing them. + */ + g_yl = vec_perm((__vector short)g_y0, (__vector short)g_y1, + shift_pack_index); + g_yh = vec_perm((__vector short)g_y2, (__vector short)g_y3, + shift_pack_index); + g_yl = vec_sub(g_yl, crl); + g_yh = vec_sub(g_yh, crh); + + for (yloop = 0; yloop < 2 && num_cols > 0; yloop++, + num_cols -= RGB_PIXELSIZE * 16, + outptr += RGB_PIXELSIZE * 16, inptr0 += 16) { + + y = vec_ld(0, inptr0); + ye = (__vector signed short)vec_perm(pb_zero, y, even_index); + yo = (__vector signed short)vec_perm(pb_zero, y, odd_index); + + if (yloop == 0) { + be = vec_add(b_yl, ye); + bo = vec_add(b_yl, yo); + re = vec_add(r_yl, ye); + ro = vec_add(r_yl, yo); + ge = vec_add(g_yl, ye); + go = vec_add(g_yl, yo); + } else { + be = vec_add(b_yh, ye); + bo = vec_add(b_yh, yo); + re = vec_add(r_yh, ye); + ro = vec_add(r_yh, yo); + ge = vec_add(g_yh, ye); + go = vec_add(g_yh, yo); + } + + rl = vec_mergeh(re, ro); + rh = vec_mergel(re, ro); + gl = vec_mergeh(ge, go); + gh = vec_mergel(ge, go); + bl = vec_mergeh(be, bo); + bh = vec_mergel(be, bo); + + rg0 = vec_mergeh(rl, gl); + bx0 = vec_mergeh(bl, pw_255); + rg1 = vec_mergel(rl, gl); + bx1 = vec_mergel(bl, pw_255); + rg2 = vec_mergeh(rh, gh); + bx2 = vec_mergeh(bh, pw_255); + rg3 = vec_mergel(rh, gh); + bx3 = vec_mergel(bh, pw_255); + + rgbx0 = vec_packsu(rg0, bx0); + rgbx1 = vec_packsu(rg1, bx1); + rgbx2 = vec_packsu(rg2, bx2); + rgbx3 = vec_packsu(rg3, bx3); + +#if RGB_PIXELSIZE == 3 + /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3 + * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7 + * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb + * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf + * + * rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 + * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga + * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf + */ + rgb0 = vec_perm(rgbx0, rgbx1, (__vector unsigned char)RGB_INDEX0); + rgb1 = vec_perm(rgbx1, rgbx2, (__vector unsigned char)RGB_INDEX1); + rgb2 = vec_perm(rgbx2, rgbx3, (__vector unsigned char)RGB_INDEX2); +#else + /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3 + * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7 + * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb + * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf + * + * rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3 + * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7 + * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb + * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf + */ + rgb0 = vec_perm(rgbx0, rgbx0, (__vector unsigned char)RGB_INDEX); + rgb1 = vec_perm(rgbx1, rgbx1, (__vector unsigned char)RGB_INDEX); + rgb2 = vec_perm(rgbx2, rgbx2, (__vector unsigned char)RGB_INDEX); + rgb3 = vec_perm(rgbx3, rgbx3, (__vector unsigned char)RGB_INDEX); +#endif + +#if __BIG_ENDIAN__ + offset = (size_t)outptr & 15; + if (offset) { + __vector unsigned char unaligned_shift_index; + int bytes = num_cols + offset; + + if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) { + /* Slow path to prevent buffer overwrite. Since there is no way to + * write a partial AltiVec register, overwrite would occur on the + * last chunk of the last image row if the right edge is not on a + * 16-byte boundary. It could also occur on other rows if the bytes + * per row is low enough. Since we can't determine whether we're on + * the last image row, we have to assume every row is the last. + */ + vec_st(rgb0, 0, tmpbuf); + vec_st(rgb1, 16, tmpbuf); + vec_st(rgb2, 32, tmpbuf); +#if RGB_PIXELSIZE == 4 + vec_st(rgb3, 48, tmpbuf); +#endif + memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16)); + } else { + /* Fast path */ + unaligned_shift_index = vec_lvsl(0, outptr); + edgel = vec_ld(0, outptr); + edgeh = vec_ld(min(num_cols - 1, RGB_PIXELSIZE * 16), outptr); + edges = vec_perm(edgeh, edgel, unaligned_shift_index); + unaligned_shift_index = vec_lvsr(0, outptr); + out0 = vec_perm(edges, rgb0, unaligned_shift_index); + out1 = vec_perm(rgb0, rgb1, unaligned_shift_index); + out2 = vec_perm(rgb1, rgb2, unaligned_shift_index); +#if RGB_PIXELSIZE == 4 + out3 = vec_perm(rgb2, rgb3, unaligned_shift_index); + out4 = vec_perm(rgb3, edges, unaligned_shift_index); +#else + out3 = vec_perm(rgb2, edges, unaligned_shift_index); +#endif + vec_st(out0, 0, outptr); + if (bytes > 16) + vec_st(out1, 16, outptr); + if (bytes > 32) + vec_st(out2, 32, outptr); + if (bytes > 48) + vec_st(out3, 48, outptr); +#if RGB_PIXELSIZE == 4 + if (bytes > 64) + vec_st(out4, 64, outptr); +#endif + } + } else { +#endif /* __BIG_ENDIAN__ */ + if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) { + /* Slow path */ + VEC_ST(rgb0, 0, tmpbuf); + VEC_ST(rgb1, 16, tmpbuf); + VEC_ST(rgb2, 32, tmpbuf); +#if RGB_PIXELSIZE == 4 + VEC_ST(rgb3, 48, tmpbuf); +#endif + memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16)); + } else { + /* Fast path */ + VEC_ST(rgb0, 0, outptr); + if (num_cols > 16) + VEC_ST(rgb1, 16, outptr); + if (num_cols > 32) + VEC_ST(rgb2, 32, outptr); +#if RGB_PIXELSIZE == 4 + if (num_cols > 48) + VEC_ST(rgb3, 48, outptr); +#endif + } +#if __BIG_ENDIAN__ + } +#endif + } + } +} + + +void jsimd_h2v2_merged_upsample_altivec (JDIMENSION output_width, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + JSAMPROW inptr, outptr; + + inptr = input_buf[0][in_row_group_ctr]; + outptr = output_buf[0]; + + input_buf[0][in_row_group_ctr] = input_buf[0][in_row_group_ctr * 2]; + jsimd_h2v1_merged_upsample_altivec(output_width, input_buf, in_row_group_ctr, + output_buf); + + input_buf[0][in_row_group_ctr] = input_buf[0][in_row_group_ctr * 2 + 1]; + output_buf[0] = output_buf[1]; + jsimd_h2v1_merged_upsample_altivec(output_width, input_buf, in_row_group_ctr, + output_buf); + + input_buf[0][in_row_group_ctr] = inptr; + output_buf[0] = outptr; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmrgext-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdmrgext-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmrgext-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdmrgext-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,464 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_mmx (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b)+8 ; JDIMENSION output_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_h2v1_merged_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v1_merged_upsample_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [output_width(eax)] ; col + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16,7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movq mm6, MMWORD [ebx] ; mm6=Cb(01234567) + movq mm7, MMWORD [edx] ; mm7=Cr(01234567) + + pxor mm1,mm1 ; mm1=(all 0's) + pcmpeqw mm3,mm3 + psllw mm3,7 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80} + + movq mm4,mm6 + punpckhbw mm6,mm1 ; mm6=Cb(4567)=CbH + punpcklbw mm4,mm1 ; mm4=Cb(0123)=CbL + movq mm0,mm7 + punpckhbw mm7,mm1 ; mm7=Cr(4567)=CrH + punpcklbw mm0,mm1 ; mm0=Cr(0123)=CrL + + paddw mm6,mm3 + paddw mm4,mm3 + paddw mm7,mm3 + paddw mm0,mm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movq mm5,mm6 ; mm5=CbH + movq mm2,mm4 ; mm2=CbL + paddw mm6,mm6 ; mm6=2*CbH + paddw mm4,mm4 ; mm4=2*CbL + movq mm1,mm7 ; mm1=CrH + movq mm3,mm0 ; mm3=CrL + paddw mm7,mm7 ; mm7=2*CrH + paddw mm0,mm0 ; mm0=2*CrL + + pmulhw mm6,[GOTOFF(eax,PW_MF0228)] ; mm6=(2*CbH * -FIX(0.22800)) + pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbL * -FIX(0.22800)) + pmulhw mm7,[GOTOFF(eax,PW_F0402)] ; mm7=(2*CrH * FIX(0.40200)) + pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrL * FIX(0.40200)) + + paddw mm6,[GOTOFF(eax,PW_ONE)] + paddw mm4,[GOTOFF(eax,PW_ONE)] + psraw mm6,1 ; mm6=(CbH * -FIX(0.22800)) + psraw mm4,1 ; mm4=(CbL * -FIX(0.22800)) + paddw mm7,[GOTOFF(eax,PW_ONE)] + paddw mm0,[GOTOFF(eax,PW_ONE)] + psraw mm7,1 ; mm7=(CrH * FIX(0.40200)) + psraw mm0,1 ; mm0=(CrL * FIX(0.40200)) + + paddw mm6,mm5 + paddw mm4,mm2 + paddw mm6,mm5 ; mm6=(CbH * FIX(1.77200))=(B-Y)H + paddw mm4,mm2 ; mm4=(CbL * FIX(1.77200))=(B-Y)L + paddw mm7,mm1 ; mm7=(CrH * FIX(1.40200))=(R-Y)H + paddw mm0,mm3 ; mm0=(CrL * FIX(1.40200))=(R-Y)L + + movq MMWORD [wk(0)], mm6 ; wk(0)=(B-Y)H + movq MMWORD [wk(1)], mm7 ; wk(1)=(R-Y)H + + movq mm6,mm5 + movq mm7,mm2 + punpcklwd mm5,mm1 + punpckhwd mm6,mm1 + pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm6,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd mm2,mm3 + punpckhwd mm7,mm3 + pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm7,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd mm5,[GOTOFF(eax,PD_ONEHALF)] + paddd mm6,[GOTOFF(eax,PD_ONEHALF)] + psrad mm5,SCALEBITS + psrad mm6,SCALEBITS + paddd mm2,[GOTOFF(eax,PD_ONEHALF)] + paddd mm7,[GOTOFF(eax,PD_ONEHALF)] + psrad mm2,SCALEBITS + psrad mm7,SCALEBITS + + packssdw mm5,mm6 ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw mm2,mm7 ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw mm5,mm1 ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw mm2,mm3 ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movq MMWORD [wk(2)], mm5 ; wk(2)=(G-Y)H + + mov al,2 ; Yctr + jmp short .Yloop_1st + alignx 16,7 + +.Yloop_2nd: + movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H + movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H + movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H + alignx 16,7 + +.Yloop_1st: + movq mm7, MMWORD [esi] ; mm7=Y(01234567) + + pcmpeqw mm6,mm6 + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + pand mm6,mm7 ; mm6=Y(0246)=YE + psrlw mm7,BYTE_BIT ; mm7=Y(1357)=YO + + movq mm1,mm0 ; mm1=mm0=(R-Y)(L/H) + movq mm3,mm2 ; mm3=mm2=(G-Y)(L/H) + movq mm5,mm4 ; mm5=mm4=(B-Y)(L/H) + + paddw mm0,mm6 ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6) + paddw mm1,mm7 ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7) + packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) + packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) + + paddw mm2,mm6 ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6) + paddw mm3,mm7 ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7) + packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + + paddw mm4,mm6 ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6) + paddw mm5,mm7 ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7) + packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07) + punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27) + + movq mmG,mmA + movq mmH,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03) + punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07) + + psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) + psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) + + movq mmC,mmD + movq mmB,mmD + punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14) + punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --) + + psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) + + movq mmF,mmE + punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25) + punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --) + + punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12) + punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05) + punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + + sub ecx, byte SIZEOF_MMWORD + jz near .endcolumn + + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + add esi, byte SIZEOF_MMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st16: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_MMWORD + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq mmA,mmC + sub ecx, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD + jmp short .column_st4 +.column_st8: + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmE + sub ecx, byte SIZEOF_MMWORD + add edi, byte SIZEOF_MMWORD +.column_st4: + movd eax,mmA + cmp ecx, byte SIZEOF_DWORD + jb short .column_st2 + mov DWORD [edi+0*SIZEOF_DWORD], eax + psrlq mmA,DWORD_BIT + movd eax,mmA + sub ecx, byte SIZEOF_DWORD + add edi, byte SIZEOF_DWORD +.column_st2: + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi+0*SIZEOF_WORD], ax + shr eax,WORD_BIT + sub ecx, byte SIZEOF_WORD + add edi, byte SIZEOF_WORD +.column_st1: + cmp ecx, byte SIZEOF_BYTE + jb short .endcolumn + mov BYTE [edi+0*SIZEOF_BYTE], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%else + pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%endif + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36) + punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17) + punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37) + + movq mmC,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32) + punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36) + movq mmG,mmB + punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33) + punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37) + + movq mmD,mmA + punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31) + punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33) + movq mmH,mmC + punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35) + punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + movq MMWORD [edi+3*SIZEOF_MMWORD], mmH + + sub ecx, byte SIZEOF_MMWORD + jz short .endcolumn + + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + add esi, byte SIZEOF_MMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st16: + cmp ecx, byte SIZEOF_MMWORD/2 + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq mmA,mmC + movq mmD,mmH + sub ecx, byte SIZEOF_MMWORD/2 + add edi, byte 2*SIZEOF_MMWORD +.column_st8: + cmp ecx, byte SIZEOF_MMWORD/4 + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmD + sub ecx, byte SIZEOF_MMWORD/4 + add edi, byte 1*SIZEOF_MMWORD +.column_st4: + cmp ecx, byte SIZEOF_MMWORD/8 + jb short .endcolumn + movd DWORD [edi+0*SIZEOF_DWORD], mmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_mmx (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b)+8 ; JDIMENSION output_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + + align 16 + global EXTN(jsimd_h2v2_merged_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v2_merged_upsample_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, JDIMENSION [output_width(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx,esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; output_width + + call near EXTN(jsimd_h2v1_merged_upsample_mmx) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jsimd_h2v1_merged_upsample_mmx) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmrgext-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdmrgext-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmrgext-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdmrgext-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,538 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright 2009, 2012 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +; r10 = JDIMENSION output_width +; r11 = JSAMPIMAGE input_buf +; r12 = JDIMENSION in_row_group_ctr +; r13 = JSAMPARRAY output_buf + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 3 + + align 16 + global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_merged_upsample_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov ecx, r10d ; col + test rcx,rcx + jz near .return + + push rcx + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + mov rdi, r13 + mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0 + mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1 + mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2 + mov rdi, JSAMPROW [rdi] ; outptr + + pop rcx ; col + +.columnloop: + + movdqa xmm6, XMMWORD [rbx] ; xmm6=Cb(0123456789ABCDEF) + movdqa xmm7, XMMWORD [rdx] ; xmm7=Cr(0123456789ABCDEF) + + pxor xmm1,xmm1 ; xmm1=(all 0's) + pcmpeqw xmm3,xmm3 + psllw xmm3,7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + movdqa xmm4,xmm6 + punpckhbw xmm6,xmm1 ; xmm6=Cb(89ABCDEF)=CbH + punpcklbw xmm4,xmm1 ; xmm4=Cb(01234567)=CbL + movdqa xmm0,xmm7 + punpckhbw xmm7,xmm1 ; xmm7=Cr(89ABCDEF)=CrH + punpcklbw xmm0,xmm1 ; xmm0=Cr(01234567)=CrL + + paddw xmm6,xmm3 + paddw xmm4,xmm3 + paddw xmm7,xmm3 + paddw xmm0,xmm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm5,xmm6 ; xmm5=CbH + movdqa xmm2,xmm4 ; xmm2=CbL + paddw xmm6,xmm6 ; xmm6=2*CbH + paddw xmm4,xmm4 ; xmm4=2*CbL + movdqa xmm1,xmm7 ; xmm1=CrH + movdqa xmm3,xmm0 ; xmm3=CrL + paddw xmm7,xmm7 ; xmm7=2*CrH + paddw xmm0,xmm0 ; xmm0=2*CrL + + pmulhw xmm6,[rel PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7,[rel PW_F0402] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrL * FIX(0.40200)) + + paddw xmm6,[rel PW_ONE] + paddw xmm4,[rel PW_ONE] + psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) + psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) + paddw xmm7,[rel PW_ONE] + paddw xmm0,[rel PW_ONE] + psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) + psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) + + paddw xmm6,xmm5 + paddw xmm4,xmm2 + paddw xmm6,xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H + paddw xmm4,xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L + paddw xmm7,xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H + paddw xmm0,xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L + + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H + + movdqa xmm6,xmm5 + movdqa xmm7,xmm2 + punpcklwd xmm5,xmm1 + punpckhwd xmm6,xmm1 + pmaddwd xmm5,[rel PW_MF0344_F0285] + pmaddwd xmm6,[rel PW_MF0344_F0285] + punpcklwd xmm2,xmm3 + punpckhwd xmm7,xmm3 + pmaddwd xmm2,[rel PW_MF0344_F0285] + pmaddwd xmm7,[rel PW_MF0344_F0285] + + paddd xmm5,[rel PD_ONEHALF] + paddd xmm6,[rel PD_ONEHALF] + psrad xmm5,SCALEBITS + psrad xmm6,SCALEBITS + paddd xmm2,[rel PD_ONEHALF] + paddd xmm7,[rel PD_ONEHALF] + psrad xmm2,SCALEBITS + psrad xmm7,SCALEBITS + + packssdw xmm5,xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw xmm2,xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw xmm5,xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw xmm2,xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H + + mov al,2 ; Yctr + jmp short .Yloop_1st + +.Yloop_2nd: + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H + +.Yloop_1st: + movdqa xmm7, XMMWORD [rsi] ; xmm7=Y(0123456789ABCDEF) + + pcmpeqw xmm6,xmm6 + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + pand xmm6,xmm7 ; xmm6=Y(02468ACE)=YE + psrlw xmm7,BYTE_BIT ; xmm7=Y(13579BDF)=YO + + movdqa xmm1,xmm0 ; xmm1=xmm0=(R-Y)(L/H) + movdqa xmm3,xmm2 ; xmm3=xmm2=(G-Y)(L/H) + movdqa xmm5,xmm4 ; xmm5=xmm4=(B-Y)(L/H) + + paddw xmm0,xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) + paddw xmm1,xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) + paddw xmm3,xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4,xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) + paddw xmm5,xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE + cmp rcx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub rcx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub rcx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + mov BYTE [rdi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + cmp rcx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub rcx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub rcx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + movd XMM_DWORD [rdi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_sse2 (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +; r10 = JDIMENSION output_width +; r11 = JSAMPIMAGE input_buf +; r12 = JDIMENSION in_row_group_ctr +; r13 = JSAMPARRAY output_buf + + align 16 + global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_merged_upsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + mov eax, r10d + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + mov rdi, r13 + lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] + + push rdx ; inptr2 + push rbx ; inptr1 + push rsi ; inptr00 + mov rbx,rsp + + push rdi + push rcx + push rax + + %ifdef WIN64 + mov r8, rcx + mov r9, rdi + mov rcx, rax + mov rdx, rbx + %else + mov rdx, rcx + mov rcx, rdi + mov rdi, rax + mov rsi, rbx + %endif + + call EXTN(jsimd_h2v1_merged_upsample_sse2) + + pop rax + pop rcx + pop rdi + pop rsi + pop rbx + pop rdx + + add rdi, byte SIZEOF_JSAMPROW ; outptr1 + add rsi, byte SIZEOF_JSAMPROW ; inptr01 + + push rdx ; inptr2 + push rbx ; inptr1 + push rsi ; inptr00 + mov rbx,rsp + + push rdi + push rcx + push rax + + %ifdef WIN64 + mov r8, rcx + mov r9, rdi + mov rcx, rax + mov rdx, rbx + %else + mov rdx, rcx + mov rcx, rdi + mov rdi, rax + mov rsi, rbx + %endif + + call EXTN(jsimd_h2v1_merged_upsample_sse2) + + pop rax + pop rcx + pop rdi + pop rsi + pop rbx + pop rdx + + pop rbx + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmrgext-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdmrgext-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdmrgext-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdmrgext-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,519 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright 2012 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b)+8 ; JDIMENSION output_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_merged_upsample_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [output_width(eax)] ; col + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16,7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF) + movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF) + + pxor xmm1,xmm1 ; xmm1=(all 0's) + pcmpeqw xmm3,xmm3 + psllw xmm3,7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + movdqa xmm4,xmm6 + punpckhbw xmm6,xmm1 ; xmm6=Cb(89ABCDEF)=CbH + punpcklbw xmm4,xmm1 ; xmm4=Cb(01234567)=CbL + movdqa xmm0,xmm7 + punpckhbw xmm7,xmm1 ; xmm7=Cr(89ABCDEF)=CrH + punpcklbw xmm0,xmm1 ; xmm0=Cr(01234567)=CrL + + paddw xmm6,xmm3 + paddw xmm4,xmm3 + paddw xmm7,xmm3 + paddw xmm0,xmm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm5,xmm6 ; xmm5=CbH + movdqa xmm2,xmm4 ; xmm2=CbL + paddw xmm6,xmm6 ; xmm6=2*CbH + paddw xmm4,xmm4 ; xmm4=2*CbL + movdqa xmm1,xmm7 ; xmm1=CrH + movdqa xmm3,xmm0 ; xmm3=CrL + paddw xmm7,xmm7 ; xmm7=2*CrH + paddw xmm0,xmm0 ; xmm0=2*CrL + + pmulhw xmm6,[GOTOFF(eax,PW_MF0228)] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7,[GOTOFF(eax,PW_F0402)] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrL * FIX(0.40200)) + + paddw xmm6,[GOTOFF(eax,PW_ONE)] + paddw xmm4,[GOTOFF(eax,PW_ONE)] + psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) + psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) + paddw xmm7,[GOTOFF(eax,PW_ONE)] + paddw xmm0,[GOTOFF(eax,PW_ONE)] + psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) + psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) + + paddw xmm6,xmm5 + paddw xmm4,xmm2 + paddw xmm6,xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H + paddw xmm4,xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L + paddw xmm7,xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H + paddw xmm0,xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L + + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H + + movdqa xmm6,xmm5 + movdqa xmm7,xmm2 + punpcklwd xmm5,xmm1 + punpckhwd xmm6,xmm1 + pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm6,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm2,xmm3 + punpckhwd xmm7,xmm3 + pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm7,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm6,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm5,SCALEBITS + psrad xmm6,SCALEBITS + paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm7,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm2,SCALEBITS + psrad xmm7,SCALEBITS + + packssdw xmm5,xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw xmm2,xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw xmm5,xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw xmm2,xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H + + mov al,2 ; Yctr + jmp short .Yloop_1st + alignx 16,7 + +.Yloop_2nd: + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H + alignx 16,7 + +.Yloop_1st: + movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF) + + pcmpeqw xmm6,xmm6 + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + pand xmm6,xmm7 ; xmm6=Y(02468ACE)=YE + psrlw xmm7,BYTE_BIT ; xmm7=Y(13579BDF)=YO + + movdqa xmm1,xmm0 ; xmm1=xmm0=(R-Y)(L/H) + movdqa xmm3,xmm2 ; xmm3=xmm2=(G-Y)(L/H) + movdqa xmm5,xmm4 ; xmm5=xmm4=(B-Y)(L/H) + + paddw xmm0,xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) + paddw xmm1,xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) + paddw xmm3,xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4,xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) + paddw xmm5,xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + mov BYTE [edi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + movd XMM_DWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_sse2 (JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b)+8 ; JDIMENSION output_width +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + + align 16 + global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_merged_upsample_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, POINTER [output_width(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx,esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; output_width + + call near EXTN(jsimd_h2v1_merged_upsample_sse2) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jsimd_h2v1_merged_upsample_sse2) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdsample-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jdsample-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdsample-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdsample-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,392 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* CHROMA UPSAMPLING */ + +#include "jsimd_altivec.h" + + +void +jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, + JDIMENSION downsampled_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr, outptr; + int inrow, incol; + + __vector unsigned char this0, last0, p_last0, next0 = {0}, p_next0, + out; + __vector short this0e, this0o, this0l, this0h, last0l, last0h, + next0l, next0h, outle, outhe, outlo, outho; + + /* Constants */ + __vector unsigned char pb_zero = { __16X(0) }, pb_three = { __16X(3) }, + last_index_col0 = {0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14}, + last_index = {15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30}, + next_index = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, + next_index_lastcol = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,15}, +#if __BIG_ENDIAN__ + merge_pack_index = {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}; +#else + merge_pack_index = {0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30}; +#endif + __vector short pw_one = { __8X(1) }, pw_two = { __8X(2) }; + + for (inrow = 0; inrow < max_v_samp_factor; inrow++) { + inptr = input_data[inrow]; + outptr = output_data[inrow]; + + if (downsampled_width & 15) + inptr[downsampled_width] = inptr[downsampled_width - 1]; + + this0 = vec_ld(0, inptr); + p_last0 = vec_perm(this0, this0, last_index_col0); + last0 = this0; + + for (incol = downsampled_width; incol > 0; + incol -= 16, inptr += 16, outptr += 32) { + + if (downsampled_width - incol > 0) { + p_last0 = vec_perm(last0, this0, last_index); + last0 = this0; + } + + if (incol <= 16) + p_next0 = vec_perm(this0, this0, next_index_lastcol); + else { + next0 = vec_ld(16, inptr); + p_next0 = vec_perm(this0, next0, next_index); + } + + this0e = (__vector short)vec_mule(this0, pb_three); + this0o = (__vector short)vec_mulo(this0, pb_three); + this0l = vec_mergeh(this0e, this0o); + this0h = vec_mergel(this0e, this0o); + + last0l = (__vector short)VEC_UNPACKHU(p_last0); + last0h = (__vector short)VEC_UNPACKLU(p_last0); + last0l = vec_add(last0l, pw_one); + + next0l = (__vector short)VEC_UNPACKHU(p_next0); + next0h = (__vector short)VEC_UNPACKLU(p_next0); + next0l = vec_add(next0l, pw_two); + + outle = vec_add(this0l, last0l); + outlo = vec_add(this0l, next0l); + outle = vec_sr(outle, (__vector unsigned short)pw_two); + outlo = vec_sr(outlo, (__vector unsigned short)pw_two); + + out = vec_perm((__vector unsigned char)outle, + (__vector unsigned char)outlo, merge_pack_index); + vec_st(out, 0, outptr); + + if (incol > 8) { + last0h = vec_add(last0h, pw_one); + next0h = vec_add(next0h, pw_two); + + outhe = vec_add(this0h, last0h); + outho = vec_add(this0h, next0h); + outhe = vec_sr(outhe, (__vector unsigned short)pw_two); + outho = vec_sr(outho, (__vector unsigned short)pw_two); + + out = vec_perm((__vector unsigned char)outhe, + (__vector unsigned char)outho, merge_pack_index); + vec_st(out, 16, outptr); + } + + this0 = next0; + } + } +} + + +void +jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, + JDIMENSION downsampled_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr_1, inptr0, inptr1, outptr0, outptr1; + int inrow, outrow, incol; + + __vector unsigned char this_1, this0, this1, out; + __vector short this_1l, this_1h, this0l, this0h, this1l, this1h, + lastcolsum_1h, lastcolsum1h, + p_lastcolsum_1l, p_lastcolsum_1h, p_lastcolsum1l, p_lastcolsum1h, + thiscolsum_1l, thiscolsum_1h, thiscolsum1l, thiscolsum1h, + nextcolsum_1l = {0}, nextcolsum_1h = {0}, + nextcolsum1l = {0}, nextcolsum1h = {0}, + p_nextcolsum_1l, p_nextcolsum_1h, p_nextcolsum1l, p_nextcolsum1h, + tmpl, tmph, outle, outhe, outlo, outho; + + /* Constants */ + __vector unsigned char pb_zero = { __16X(0) }, + last_index_col0 = {0,1,0,1,2,3,4,5,6,7,8,9,10,11,12,13}, + last_index={14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29}, + next_index = {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}, + next_index_lastcol = {2,3,4,5,6,7,8,9,10,11,12,13,14,15,14,15}, +#if __BIG_ENDIAN__ + merge_pack_index = {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}; +#else + merge_pack_index = {0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30}; +#endif + __vector short pw_zero = { __8X(0) }, pw_three = { __8X(3) }, + pw_seven = { __8X(7) }, pw_eight = { __8X(8) }; + __vector unsigned short pw_four = { __8X(4) }; + + for (inrow = 0, outrow = 0; outrow < max_v_samp_factor; inrow++) { + + inptr_1 = input_data[inrow - 1]; + inptr0 = input_data[inrow]; + inptr1 = input_data[inrow + 1]; + outptr0 = output_data[outrow++]; + outptr1 = output_data[outrow++]; + + if (downsampled_width & 15) { + inptr_1[downsampled_width] = inptr_1[downsampled_width - 1]; + inptr0[downsampled_width] = inptr0[downsampled_width - 1]; + inptr1[downsampled_width] = inptr1[downsampled_width - 1]; + } + + this0 = vec_ld(0, inptr0); + this0l = (__vector short)VEC_UNPACKHU(this0); + this0h = (__vector short)VEC_UNPACKLU(this0); + this0l = vec_mladd(this0l, pw_three, pw_zero); + this0h = vec_mladd(this0h, pw_three, pw_zero); + + this_1 = vec_ld(0, inptr_1); + this_1l = (__vector short)VEC_UNPACKHU(this_1); + this_1h = (__vector short)VEC_UNPACKLU(this_1); + thiscolsum_1l = vec_add(this0l, this_1l); + thiscolsum_1h = vec_add(this0h, this_1h); + lastcolsum_1h = thiscolsum_1h; + p_lastcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1l, last_index_col0); + p_lastcolsum_1h = vec_perm(thiscolsum_1l, thiscolsum_1h, last_index); + + this1 = vec_ld(0, inptr1); + this1l = (__vector short)VEC_UNPACKHU(this1); + this1h = (__vector short)VEC_UNPACKLU(this1); + thiscolsum1l = vec_add(this0l, this1l); + thiscolsum1h = vec_add(this0h, this1h); + lastcolsum1h = thiscolsum1h; + p_lastcolsum1l = vec_perm(thiscolsum1l, thiscolsum1l, last_index_col0); + p_lastcolsum1h = vec_perm(thiscolsum1l, thiscolsum1h, last_index); + + for (incol = downsampled_width; incol > 0; + incol -= 16, inptr_1 += 16, inptr0 += 16, inptr1 += 16, + outptr0 += 32, outptr1 += 32) { + + if (downsampled_width - incol > 0) { + p_lastcolsum_1l = vec_perm(lastcolsum_1h, thiscolsum_1l, last_index); + p_lastcolsum_1h = vec_perm(thiscolsum_1l, thiscolsum_1h, last_index); + p_lastcolsum1l = vec_perm(lastcolsum1h, thiscolsum1l, last_index); + p_lastcolsum1h = vec_perm(thiscolsum1l, thiscolsum1h, last_index); + lastcolsum_1h = thiscolsum_1h; lastcolsum1h = thiscolsum1h; + } + + if (incol <= 16) { + p_nextcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1h, next_index); + p_nextcolsum_1h = vec_perm(thiscolsum_1h, thiscolsum_1h, + next_index_lastcol); + p_nextcolsum1l = vec_perm(thiscolsum1l, thiscolsum1h, next_index); + p_nextcolsum1h = vec_perm(thiscolsum1h, thiscolsum1h, + next_index_lastcol); + } else { + this0 = vec_ld(16, inptr0); + this0l = (__vector short)VEC_UNPACKHU(this0); + this0h = (__vector short)VEC_UNPACKLU(this0); + this0l = vec_mladd(this0l, pw_three, pw_zero); + this0h = vec_mladd(this0h, pw_three, pw_zero); + + this_1 = vec_ld(16, inptr_1); + this_1l = (__vector short)VEC_UNPACKHU(this_1); + this_1h = (__vector short)VEC_UNPACKLU(this_1); + nextcolsum_1l = vec_add(this0l, this_1l); + nextcolsum_1h = vec_add(this0h, this_1h); + p_nextcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1h, next_index); + p_nextcolsum_1h = vec_perm(thiscolsum_1h, nextcolsum_1l, next_index); + + this1 = vec_ld(16, inptr1); + this1l = (__vector short)VEC_UNPACKHU(this1); + this1h = (__vector short)VEC_UNPACKLU(this1); + nextcolsum1l = vec_add(this0l, this1l); + nextcolsum1h = vec_add(this0h, this1h); + p_nextcolsum1l = vec_perm(thiscolsum1l, thiscolsum1h, next_index); + p_nextcolsum1h = vec_perm(thiscolsum1h, nextcolsum1l, next_index); + } + + /* Process the upper row */ + + tmpl = vec_mladd(thiscolsum_1l, pw_three, pw_zero); + outle = vec_add(tmpl, p_lastcolsum_1l); + outle = vec_add(outle, pw_eight); + outle = vec_sr(outle, pw_four); + + outlo = vec_add(tmpl, p_nextcolsum_1l); + outlo = vec_add(outlo, pw_seven); + outlo = vec_sr(outlo, pw_four); + + out = vec_perm((__vector unsigned char)outle, + (__vector unsigned char)outlo, merge_pack_index); + vec_st(out, 0, outptr0); + + if (incol > 8) { + tmph = vec_mladd(thiscolsum_1h, pw_three, pw_zero); + outhe = vec_add(tmph, p_lastcolsum_1h); + outhe = vec_add(outhe, pw_eight); + outhe = vec_sr(outhe, pw_four); + + outho = vec_add(tmph, p_nextcolsum_1h); + outho = vec_add(outho, pw_seven); + outho = vec_sr(outho, pw_four); + + out = vec_perm((__vector unsigned char)outhe, + (__vector unsigned char)outho, merge_pack_index); + vec_st(out, 16, outptr0); + } + + /* Process the lower row */ + + tmpl = vec_mladd(thiscolsum1l, pw_three, pw_zero); + outle = vec_add(tmpl, p_lastcolsum1l); + outle = vec_add(outle, pw_eight); + outle = vec_sr(outle, pw_four); + + outlo = vec_add(tmpl, p_nextcolsum1l); + outlo = vec_add(outlo, pw_seven); + outlo = vec_sr(outlo, pw_four); + + out = vec_perm((__vector unsigned char)outle, + (__vector unsigned char)outlo, merge_pack_index); + vec_st(out, 0, outptr1); + + if (incol > 8) { + tmph = vec_mladd(thiscolsum1h, pw_three, pw_zero); + outhe = vec_add(tmph, p_lastcolsum1h); + outhe = vec_add(outhe, pw_eight); + outhe = vec_sr(outhe, pw_four); + + outho = vec_add(tmph, p_nextcolsum1h); + outho = vec_add(outho, pw_seven); + outho = vec_sr(outho, pw_four); + + out = vec_perm((__vector unsigned char)outhe, + (__vector unsigned char)outho, merge_pack_index); + vec_st(out, 16, outptr1); + } + + thiscolsum_1l = nextcolsum_1l; thiscolsum_1h = nextcolsum_1h; + thiscolsum1l = nextcolsum1l; thiscolsum1h = nextcolsum1h; + } + } +} + + +/* These are rarely used (mainly just for decompressing YCCK images) */ + +void +jsimd_h2v1_upsample_altivec (int max_v_samp_factor, + JDIMENSION output_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr, outptr; + int inrow, incol; + + __vector unsigned char in, inl, inh; + + for (inrow = 0; inrow < max_v_samp_factor; inrow++) { + inptr = input_data[inrow]; + outptr = output_data[inrow]; + + for (incol = (output_width + 31) & (~31); incol > 0; + incol -= 64, inptr += 32, outptr += 64) { + + in = vec_ld(0, inptr); + inl = vec_mergeh(in, in); + inh = vec_mergel(in, in); + + vec_st(inl, 0, outptr); + vec_st(inh, 16, outptr); + + if (incol > 32) { + in = vec_ld(16, inptr); + inl = vec_mergeh(in, in); + inh = vec_mergel(in, in); + + vec_st(inl, 32, outptr); + vec_st(inh, 48, outptr); + } + } + } +} + + +void +jsimd_h2v2_upsample_altivec (int max_v_samp_factor, + JDIMENSION output_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr, outptr0, outptr1; + int inrow, outrow, incol; + + __vector unsigned char in, inl, inh; + + for (inrow = 0, outrow = 0; outrow < max_v_samp_factor; inrow++) { + + inptr = input_data[inrow]; + outptr0 = output_data[outrow++]; + outptr1 = output_data[outrow++]; + + for (incol = (output_width + 31) & (~31); incol > 0; + incol -= 64, inptr += 32, outptr0 += 64, outptr1 += 64) { + + in = vec_ld(0, inptr); + inl = vec_mergeh(in, in); + inh = vec_mergel(in, in); + + vec_st(inl, 0, outptr0); + vec_st(inl, 0, outptr1); + + vec_st(inh, 16, outptr0); + vec_st(inh, 16, outptr1); + + if (incol > 32) { + in = vec_ld(16, inptr); + inl = vec_mergeh(in, in); + inh = vec_mergel(in, in); + + vec_st(inl, 32, outptr0); + vec_st(inl, 32, outptr1); + + vec_st(inh, 48, outptr0); + vec_st(inh, 48, outptr1); + } + } + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdsample-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdsample-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdsample-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdsample-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,737 @@ +; +; jdsample.asm - upsampling (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fancy_upsample_mmx) PRIVATE + +EXTN(jconst_fancy_upsample_mmx): + +PW_ONE times 4 dw 1 +PW_TWO times 4 dw 2 +PW_THREE times 4 dw 3 +PW_SEVEN times 4 dw 7 +PW_EIGHT times 4 dw 8 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_mmx (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v1_fancy_upsample_mmx): + push ebp + mov ebp,esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr + test eax,eax + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_MMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor mm0,mm0 ; mm0=(all 0's) + pcmpeqb mm7,mm7 + psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT + pand mm7, MMWORD [esi+0*SIZEOF_MMWORD] + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + cmp eax, byte SIZEOF_MMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + pcmpeqb mm6,mm6 + psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT + pand mm6, MMWORD [esi+0*SIZEOF_MMWORD] + jmp short .upsample + alignx 16,7 + +.columnloop: + movq mm6, MMWORD [esi+1*SIZEOF_MMWORD] + psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT + +.upsample: + movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm2,mm1 + movq mm3,mm1 ; mm1=( 0 1 2 3 4 5 6 7) + psllq mm2,BYTE_BIT ; mm2=( - 0 1 2 3 4 5 6) + psrlq mm3,BYTE_BIT ; mm3=( 1 2 3 4 5 6 7 -) + + por mm2,mm7 ; mm2=(-1 0 1 2 3 4 5 6) + por mm3,mm6 ; mm3=( 1 2 3 4 5 6 7 8) + + movq mm7,mm1 + psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT ; mm7=( 7 - - - - - - -) + + movq mm4,mm1 + punpcklbw mm1,mm0 ; mm1=( 0 1 2 3) + punpckhbw mm4,mm0 ; mm4=( 4 5 6 7) + movq mm5,mm2 + punpcklbw mm2,mm0 ; mm2=(-1 0 1 2) + punpckhbw mm5,mm0 ; mm5=( 3 4 5 6) + movq mm6,mm3 + punpcklbw mm3,mm0 ; mm3=( 1 2 3 4) + punpckhbw mm6,mm0 ; mm6=( 5 6 7 8) + + pmullw mm1,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + paddw mm2,[GOTOFF(ebx,PW_ONE)] + paddw mm5,[GOTOFF(ebx,PW_ONE)] + paddw mm3,[GOTOFF(ebx,PW_TWO)] + paddw mm6,[GOTOFF(ebx,PW_TWO)] + + paddw mm2,mm1 + paddw mm5,mm4 + psrlw mm2,2 ; mm2=OutLE=( 0 2 4 6) + psrlw mm5,2 ; mm5=OutHE=( 8 10 12 14) + paddw mm3,mm1 + paddw mm6,mm4 + psrlw mm3,2 ; mm3=OutLO=( 1 3 5 7) + psrlw mm6,2 ; mm6=OutHO=( 9 11 13 15) + + psllw mm3,BYTE_BIT + psllw mm6,BYTE_BIT + por mm2,mm3 ; mm2=OutL=( 0 1 2 3 4 5 6 7) + por mm5,mm6 ; mm5=OutH=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm5 + + sub eax, byte SIZEOF_MMWORD + add esi, byte 1*SIZEOF_MMWORD ; inptr + add edi, byte 2*SIZEOF_MMWORD ; outptr + cmp eax, byte SIZEOF_MMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_mmx (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr + + align 16 + global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v2_fancy_upsample_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx,eax ; edx = original ebp + mov eax, JDIMENSION [downsamp_width(edx)] ; colctr + test eax,eax + jz near .return + + mov ecx, INT [max_v_samp(edx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_MMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + movq mm0, MMWORD [ebx+0*SIZEOF_MMWORD] ; mm0=row[ 0][0] + movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0] + movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor mm3,mm3 ; mm3=(all 0's) + movq mm4,mm0 + punpcklbw mm0,mm3 ; mm0=row[ 0][0]( 0 1 2 3) + punpckhbw mm4,mm3 ; mm4=row[ 0][0]( 4 5 6 7) + movq mm5,mm1 + punpcklbw mm1,mm3 ; mm1=row[-1][0]( 0 1 2 3) + punpckhbw mm5,mm3 ; mm5=row[-1][0]( 4 5 6 7) + movq mm6,mm2 + punpcklbw mm2,mm3 ; mm2=row[+1][0]( 0 1 2 3) + punpckhbw mm6,mm3 ; mm6=row[+1][0]( 4 5 6 7) + + pmullw mm0,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + + pcmpeqb mm7,mm7 + psrlq mm7,(SIZEOF_MMWORD-2)*BYTE_BIT + + paddw mm1,mm0 ; mm1=Int0L=( 0 1 2 3) + paddw mm5,mm4 ; mm5=Int0H=( 4 5 6 7) + paddw mm2,mm0 ; mm2=Int1L=( 0 1 2 3) + paddw mm6,mm4 ; mm6=Int1H=( 4 5 6 7) + + movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 ; temporarily save + movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 ; the intermediate data + movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm6 + + pand mm1,mm7 ; mm1=( 0 - - -) + pand mm2,mm7 ; mm2=( 0 - - -) + + movq MMWORD [wk(0)], mm1 + movq MMWORD [wk(1)], mm2 + + poppic ebx + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + cmp eax, byte SIZEOF_MMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pcmpeqb mm1,mm1 + psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT + movq mm2,mm1 + + pand mm1, MMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7) + pand mm2, MMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 + + jmp short .upsample + alignx 16,7 + +.columnloop: + ; -- process the next column block + + movq mm0, MMWORD [ebx+1*SIZEOF_MMWORD] ; mm0=row[ 0][1] + movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1] + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor mm3,mm3 ; mm3=(all 0's) + movq mm4,mm0 + punpcklbw mm0,mm3 ; mm0=row[ 0][1]( 0 1 2 3) + punpckhbw mm4,mm3 ; mm4=row[ 0][1]( 4 5 6 7) + movq mm5,mm1 + punpcklbw mm1,mm3 ; mm1=row[-1][1]( 0 1 2 3) + punpckhbw mm5,mm3 ; mm5=row[-1][1]( 4 5 6 7) + movq mm6,mm2 + punpcklbw mm2,mm3 ; mm2=row[+1][1]( 0 1 2 3) + punpckhbw mm6,mm3 ; mm6=row[+1][1]( 4 5 6 7) + + pmullw mm0,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + + paddw mm1,mm0 ; mm1=Int0L=( 0 1 2 3) + paddw mm5,mm4 ; mm5=Int0H=( 4 5 6 7) + paddw mm2,mm0 ; mm2=Int1L=( 0 1 2 3) + paddw mm6,mm4 ; mm6=Int1H=( 4 5 6 7) + + movq MMWORD [edx+2*SIZEOF_MMWORD], mm1 ; temporarily save + movq MMWORD [edx+3*SIZEOF_MMWORD], mm5 ; the intermediate data + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm6 + + psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm1=( - - - 0) + psllq mm2,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm2=( - - - 0) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 + +.upsample: + ; -- process the upper row + + movq mm7, MMWORD [edx+0*SIZEOF_MMWORD] ; mm7=Int0L=( 0 1 2 3) + movq mm3, MMWORD [edx+1*SIZEOF_MMWORD] ; mm3=Int0H=( 4 5 6 7) + + movq mm0,mm7 + movq mm4,mm3 + psrlq mm0,2*BYTE_BIT ; mm0=( 1 2 3 -) + psllq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( - - - 4) + movq mm5,mm7 + movq mm6,mm3 + psrlq mm5,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm5=( 3 - - -) + psllq mm6,2*BYTE_BIT ; mm6=( - 4 5 6) + + por mm0,mm4 ; mm0=( 1 2 3 4) + por mm5,mm6 ; mm5=( 3 4 5 6) + + movq mm1,mm7 + movq mm2,mm3 + psllq mm1,2*BYTE_BIT ; mm1=( - 0 1 2) + psrlq mm2,2*BYTE_BIT ; mm2=( 5 6 7 -) + movq mm4,mm3 + psrlq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( 7 - - -) + + por mm1, MMWORD [wk(0)] ; mm1=(-1 0 1 2) + por mm2, MMWORD [wk(2)] ; mm2=( 5 6 7 8) + + movq MMWORD [wk(0)], mm4 + + pmullw mm7,[GOTOFF(ebx,PW_THREE)] + pmullw mm3,[GOTOFF(ebx,PW_THREE)] + paddw mm1,[GOTOFF(ebx,PW_EIGHT)] + paddw mm5,[GOTOFF(ebx,PW_EIGHT)] + paddw mm0,[GOTOFF(ebx,PW_SEVEN)] + paddw mm2,[GOTOFF(ebx,PW_SEVEN)] + + paddw mm1,mm7 + paddw mm5,mm3 + psrlw mm1,4 ; mm1=Out0LE=( 0 2 4 6) + psrlw mm5,4 ; mm5=Out0HE=( 8 10 12 14) + paddw mm0,mm7 + paddw mm2,mm3 + psrlw mm0,4 ; mm0=Out0LO=( 1 3 5 7) + psrlw mm2,4 ; mm2=Out0HO=( 9 11 13 15) + + psllw mm0,BYTE_BIT + psllw mm2,BYTE_BIT + por mm1,mm0 ; mm1=Out0L=( 0 1 2 3 4 5 6 7) + por mm5,mm2 ; mm5=Out0H=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 + movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 + + ; -- process the lower row + + movq mm6, MMWORD [edi+0*SIZEOF_MMWORD] ; mm6=Int1L=( 0 1 2 3) + movq mm4, MMWORD [edi+1*SIZEOF_MMWORD] ; mm4=Int1H=( 4 5 6 7) + + movq mm7,mm6 + movq mm3,mm4 + psrlq mm7,2*BYTE_BIT ; mm7=( 1 2 3 -) + psllq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( - - - 4) + movq mm0,mm6 + movq mm2,mm4 + psrlq mm0,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm0=( 3 - - -) + psllq mm2,2*BYTE_BIT ; mm2=( - 4 5 6) + + por mm7,mm3 ; mm7=( 1 2 3 4) + por mm0,mm2 ; mm0=( 3 4 5 6) + + movq mm1,mm6 + movq mm5,mm4 + psllq mm1,2*BYTE_BIT ; mm1=( - 0 1 2) + psrlq mm5,2*BYTE_BIT ; mm5=( 5 6 7 -) + movq mm3,mm4 + psrlq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( 7 - - -) + + por mm1, MMWORD [wk(1)] ; mm1=(-1 0 1 2) + por mm5, MMWORD [wk(3)] ; mm5=( 5 6 7 8) + + movq MMWORD [wk(1)], mm3 + + pmullw mm6,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + paddw mm1,[GOTOFF(ebx,PW_EIGHT)] + paddw mm0,[GOTOFF(ebx,PW_EIGHT)] + paddw mm7,[GOTOFF(ebx,PW_SEVEN)] + paddw mm5,[GOTOFF(ebx,PW_SEVEN)] + + paddw mm1,mm6 + paddw mm0,mm4 + psrlw mm1,4 ; mm1=Out1LE=( 0 2 4 6) + psrlw mm0,4 ; mm0=Out1HE=( 8 10 12 14) + paddw mm7,mm6 + paddw mm5,mm4 + psrlw mm7,4 ; mm7=Out1LO=( 1 3 5 7) + psrlw mm5,4 ; mm5=Out1HO=( 9 11 13 15) + + psllw mm7,BYTE_BIT + psllw mm5,BYTE_BIT + por mm1,mm7 ; mm1=Out1L=( 0 1 2 3 4 5 6 7) + por mm0,mm5 ; mm0=Out1H=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm1 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm0 + + poppic ebx + + sub eax, byte SIZEOF_MMWORD + add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_MMWORD ; inptr0 + add esi, byte 1*SIZEOF_MMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_MMWORD ; outptr0 + add edi, byte 2*SIZEOF_MMWORD ; outptr1 + cmp eax, byte SIZEOF_MMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_mmx (int max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define output_width(b) (b)+12 ; JDIMENSION output_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v1_upsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_MMWORD)-1 + and edx, byte -(2*SIZEOF_MMWORD) + jz short .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + + movq mm1,mm0 + punpcklbw mm0,mm0 + punpckhbw mm1,mm1 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm3,mm2 + punpcklbw mm2,mm2 + punpckhbw mm3,mm3 + + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add edi, byte 4*SIZEOF_MMWORD ; outptr + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_mmx (int max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define output_width(b) (b)+12 ; JDIMENSION output_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE + +EXTN(jsimd_h2v2_upsample_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_MMWORD)-1 + and edx, byte -(2*SIZEOF_MMWORD) + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + + movq mm1,mm0 + punpcklbw mm0,mm0 + punpckhbw mm1,mm1 + + movq MMWORD [ebx+0*SIZEOF_MMWORD], mm0 + movq MMWORD [ebx+1*SIZEOF_MMWORD], mm1 + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm3,mm2 + punpcklbw mm2,mm2 + punpckhbw mm3,mm3 + + movq MMWORD [ebx+2*SIZEOF_MMWORD], mm2 + movq MMWORD [ebx+3*SIZEOF_MMWORD], mm3 + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add ebx, byte 4*SIZEOF_MMWORD ; outptr0 + add edi, byte 4*SIZEOF_MMWORD ; outptr1 + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdsample-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdsample-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdsample-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdsample-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,671 @@ +; +; jdsample.asm - upsampling (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fancy_upsample_sse2) PRIVATE + +EXTN(jconst_fancy_upsample_sse2): + +PW_ONE times 8 dw 1 +PW_TWO times 8 dw 2 +PW_THREE times 8 dw 3 +PW_SEVEN times 8 dw 7 +PW_EIGHT times 8 dw 8 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11 = JDIMENSION downsampled_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_fancy_upsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov eax, r11d ; colctr + test rax,rax + jz near .return + + mov rcx, r10 ; rowctr + test rcx,rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rax ; colctr + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + + test rax, SIZEOF_XMMWORD-1 + jz short .skip + mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor xmm0,xmm0 ; xmm0=(all 0's) + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-1) + pand xmm7, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + add rax, byte SIZEOF_XMMWORD-1 + and rax, byte -SIZEOF_XMMWORD + cmp rax, byte SIZEOF_XMMWORD + ja short .columnloop + +.columnloop_last: + pcmpeqb xmm6,xmm6 + pslldq xmm6,(SIZEOF_XMMWORD-1) + pand xmm6, XMMWORD [rsi+0*SIZEOF_XMMWORD] + jmp short .upsample + +.columnloop: + movdqa xmm6, XMMWORD [rsi+1*SIZEOF_XMMWORD] + pslldq xmm6,(SIZEOF_XMMWORD-1) + +.upsample: + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm2,xmm1 + movdqa xmm3,xmm1 ; xmm1=( 0 1 2 ... 13 14 15) + pslldq xmm2,1 ; xmm2=(-- 0 1 ... 12 13 14) + psrldq xmm3,1 ; xmm3=( 1 2 3 ... 14 15 --) + + por xmm2,xmm7 ; xmm2=(-1 0 1 ... 12 13 14) + por xmm3,xmm6 ; xmm3=( 1 2 3 ... 14 15 16) + + movdqa xmm7,xmm1 + psrldq xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) + + movdqa xmm4,xmm1 + punpcklbw xmm1,xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm2 + punpcklbw xmm2,xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) + punpckhbw xmm5,xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) + movdqa xmm6,xmm3 + punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) + punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) + + pmullw xmm1,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + paddw xmm2,[rel PW_ONE] + paddw xmm5,[rel PW_ONE] + paddw xmm3,[rel PW_TWO] + paddw xmm6,[rel PW_TWO] + + paddw xmm2,xmm1 + paddw xmm5,xmm4 + psrlw xmm2,2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) + paddw xmm3,xmm1 + paddw xmm6,xmm4 + psrlw xmm3,2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) + psrlw xmm6,2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) + + psllw xmm3,BYTE_BIT + psllw xmm6,BYTE_BIT + por xmm2,xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) + por xmm5,xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm5 + + sub rax, byte SIZEOF_XMMWORD + add rsi, byte 1*SIZEOF_XMMWORD ; inptr + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + cmp rax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop rsi + pop rdi + pop rax + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rcx ; rowctr + jg near .rowloop + +.return: + uncollect_args + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11 = JDIMENSION downsampled_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 4 + + align 16 + global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_fancy_upsample_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + push rbx + + mov eax, r11d ; colctr + test rax,rax + jz near .return + + mov rcx, r10 ; rowctr + test rcx,rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rax ; colctr + push rcx + push rdi + push rsi + + mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 + mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 + mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 + + test rax, SIZEOF_XMMWORD-1 + jz short .skip + push rdx + mov dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop rdx +.skip: + ; -- process the first column block + + movdqa xmm0, XMMWORD [rbx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] + movdqa xmm1, XMMWORD [rcx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] + movdqa xmm2, XMMWORD [rsi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-2) + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm6 + + pand xmm1,xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) + pand xmm2,xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) + + movdqa XMMWORD [wk(0)], xmm1 + movdqa XMMWORD [wk(1)], xmm2 + + add rax, byte SIZEOF_XMMWORD-1 + and rax, byte -SIZEOF_XMMWORD + cmp rax, byte SIZEOF_XMMWORD + ja short .columnloop + +.columnloop_last: + ; -- process the last column block + + pcmpeqb xmm1,xmm1 + pslldq xmm1,(SIZEOF_XMMWORD-2) + movdqa xmm2,xmm1 + + pand xmm1, XMMWORD [rdx+1*SIZEOF_XMMWORD] + pand xmm2, XMMWORD [rdi+1*SIZEOF_XMMWORD] + + movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) + movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) + + jmp near .upsample + +.columnloop: + ; -- process the next column block + + movdqa xmm0, XMMWORD [rbx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] + movdqa xmm1, XMMWORD [rcx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [rdx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [rdx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm6 + + pslldq xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) + pslldq xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) + + movdqa XMMWORD [wk(2)], xmm1 + movdqa XMMWORD [wk(3)], xmm2 + +.upsample: + ; -- process the upper row + + movdqa xmm7, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [rdx+1*SIZEOF_XMMWORD] + + movdqa xmm0,xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) + movdqa xmm4,xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) + psrldq xmm0,2 ; xmm0=( 1 2 3 4 5 6 7 --) + pslldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) + movdqa xmm5,xmm7 + movdqa xmm6,xmm3 + psrldq xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) + pslldq xmm6,2 ; xmm6=(-- 8 9 10 11 12 13 14) + + por xmm0,xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) + por xmm5,xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm7 + movdqa xmm2,xmm3 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm2,2 ; xmm2=( 9 10 11 12 13 14 15 --) + movdqa xmm4,xmm3 + psrldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(0)], xmm4 + + pmullw xmm7,[rel PW_THREE] + pmullw xmm3,[rel PW_THREE] + paddw xmm1,[rel PW_EIGHT] + paddw xmm5,[rel PW_EIGHT] + paddw xmm0,[rel PW_SEVEN] + paddw xmm2,[rel PW_SEVEN] + + paddw xmm1,xmm7 + paddw xmm5,xmm3 + psrlw xmm1,4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) + paddw xmm0,xmm7 + paddw xmm2,xmm3 + psrlw xmm0,4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) + psrlw xmm2,4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) + + psllw xmm0,BYTE_BIT + psllw xmm2,BYTE_BIT + por xmm1,xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) + por xmm5,xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 + + ; -- process the lower row + + movdqa xmm6, XMMWORD [rdi+0*SIZEOF_XMMWORD] + movdqa xmm4, XMMWORD [rdi+1*SIZEOF_XMMWORD] + + movdqa xmm7,xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) + movdqa xmm3,xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) + psrldq xmm7,2 ; xmm7=( 1 2 3 4 5 6 7 --) + pslldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) + movdqa xmm0,xmm6 + movdqa xmm2,xmm4 + psrldq xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) + pslldq xmm2,2 ; xmm2=(-- 8 9 10 11 12 13 14) + + por xmm7,xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) + por xmm0,xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm5,2 ; xmm5=( 9 10 11 12 13 14 15 --) + movdqa xmm3,xmm4 + psrldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(1)], xmm3 + + pmullw xmm6,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + paddw xmm1,[rel PW_EIGHT] + paddw xmm0,[rel PW_EIGHT] + paddw xmm7,[rel PW_SEVEN] + paddw xmm5,[rel PW_SEVEN] + + paddw xmm1,xmm6 + paddw xmm0,xmm4 + psrlw xmm1,4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) + psrlw xmm0,4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) + paddw xmm7,xmm6 + paddw xmm5,xmm4 + psrlw xmm7,4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) + psrlw xmm5,4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) + + psllw xmm7,BYTE_BIT + psllw xmm5,BYTE_BIT + por xmm1,xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) + por xmm0,xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm0 + + sub rax, byte SIZEOF_XMMWORD + add rcx, byte 1*SIZEOF_XMMWORD ; inptr1(above) + add rbx, byte 1*SIZEOF_XMMWORD ; inptr0 + add rsi, byte 1*SIZEOF_XMMWORD ; inptr1(below) + add rdx, byte 2*SIZEOF_XMMWORD ; outptr0 + add rdi, byte 2*SIZEOF_XMMWORD ; outptr1 + cmp rax, byte SIZEOF_XMMWORD + ja near .columnloop + test rax,rax + jnz near .columnloop_last + + pop rsi + pop rdi + pop rcx + pop rax + + add rsi, byte 1*SIZEOF_JSAMPROW ; input_data + add rdi, byte 2*SIZEOF_JSAMPROW ; output_data + sub rcx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11 = JDIMENSION output_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_upsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov edx, r11d + add rdx, byte (2*SIZEOF_XMMWORD)-1 + and rdx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov rcx, r10 ; rowctr + test rcx,rcx + jz short .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + mov rax,rdx ; colctr +.columnloop: + + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rdi, byte 4*SIZEOF_XMMWORD ; outptr + jmp short .columnloop + +.nextrow: + pop rsi + pop rdi + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rcx ; rowctr + jg short .rowloop + +.return: + uncollect_args + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_sse2 (nt max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11 = JDIMENSION output_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_upsample_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + mov edx, r11d + add rdx, byte (2*SIZEOF_XMMWORD)-1 + and rdx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov rcx, r10 ; rowctr + test rcx,rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 + mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 + mov rax,rdx ; colctr +.columnloop: + + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [rbx+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rbx+3*SIZEOF_XMMWORD], xmm3 + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rbx, byte 4*SIZEOF_XMMWORD ; outptr0 + add rdi, byte 4*SIZEOF_XMMWORD ; outptr1 + jmp short .columnloop + +.nextrow: + pop rsi + pop rdi + + add rsi, byte 1*SIZEOF_JSAMPROW ; input_data + add rdi, byte 2*SIZEOF_JSAMPROW ; output_data + sub rcx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop rbx + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdsample-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jdsample-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jdsample-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jdsample-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,729 @@ +; +; jdsample.asm - upsampling (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fancy_upsample_sse2) PRIVATE + +EXTN(jconst_fancy_upsample_sse2): + +PW_ONE times 8 dw 1 +PW_TWO times 8 dw 2 +PW_THREE times 8 dw 3 +PW_SEVEN times 8 dw 7 +PW_EIGHT times 8 dw 8 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_fancy_upsample_sse2): + push ebp + mov ebp,esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr + test eax,eax + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_XMMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor xmm0,xmm0 ; xmm0=(all 0's) + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-1) + pand xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD] + + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + cmp eax, byte SIZEOF_XMMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + pcmpeqb xmm6,xmm6 + pslldq xmm6,(SIZEOF_XMMWORD-1) + pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD] + jmp short .upsample + alignx 16,7 + +.columnloop: + movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD] + pslldq xmm6,(SIZEOF_XMMWORD-1) + +.upsample: + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm2,xmm1 + movdqa xmm3,xmm1 ; xmm1=( 0 1 2 ... 13 14 15) + pslldq xmm2,1 ; xmm2=(-- 0 1 ... 12 13 14) + psrldq xmm3,1 ; xmm3=( 1 2 3 ... 14 15 --) + + por xmm2,xmm7 ; xmm2=(-1 0 1 ... 12 13 14) + por xmm3,xmm6 ; xmm3=( 1 2 3 ... 14 15 16) + + movdqa xmm7,xmm1 + psrldq xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) + + movdqa xmm4,xmm1 + punpcklbw xmm1,xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm2 + punpcklbw xmm2,xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) + punpckhbw xmm5,xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) + movdqa xmm6,xmm3 + punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) + punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) + + pmullw xmm1,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + paddw xmm2,[GOTOFF(ebx,PW_ONE)] + paddw xmm5,[GOTOFF(ebx,PW_ONE)] + paddw xmm3,[GOTOFF(ebx,PW_TWO)] + paddw xmm6,[GOTOFF(ebx,PW_TWO)] + + paddw xmm2,xmm1 + paddw xmm5,xmm4 + psrlw xmm2,2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) + paddw xmm3,xmm1 + paddw xmm6,xmm4 + psrlw xmm3,2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) + psrlw xmm6,2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) + + psllw xmm3,BYTE_BIT + psllw xmm6,BYTE_BIT + por xmm2,xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) + por xmm5,xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5 + + sub eax, byte SIZEOF_XMMWORD + add esi, byte 1*SIZEOF_XMMWORD ; inptr + add edi, byte 2*SIZEOF_XMMWORD ; outptr + cmp eax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr + + align 16 + global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_fancy_upsample_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx,eax ; edx = original ebp + mov eax, JDIMENSION [downsamp_width(edx)] ; colctr + test eax,eax + jz near .return + + mov ecx, INT [max_v_samp(edx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_XMMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + movdqa xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] + movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] + movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-2) + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6 + + pand xmm1,xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) + pand xmm2,xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) + + movdqa XMMWORD [wk(0)], xmm1 + movdqa XMMWORD [wk(1)], xmm2 + + poppic ebx + + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + cmp eax, byte SIZEOF_XMMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pcmpeqb xmm1,xmm1 + pslldq xmm1,(SIZEOF_XMMWORD-2) + movdqa xmm2,xmm1 + + pand xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD] + pand xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD] + + movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) + movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) + + jmp near .upsample + alignx 16,7 + +.columnloop: + ; -- process the next column block + + movdqa xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] + movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6 + + pslldq xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) + pslldq xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) + + movdqa XMMWORD [wk(2)], xmm1 + movdqa XMMWORD [wk(3)], xmm2 + +.upsample: + ; -- process the upper row + + movdqa xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD] + + movdqa xmm0,xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) + movdqa xmm4,xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) + psrldq xmm0,2 ; xmm0=( 1 2 3 4 5 6 7 --) + pslldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) + movdqa xmm5,xmm7 + movdqa xmm6,xmm3 + psrldq xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) + pslldq xmm6,2 ; xmm6=(-- 8 9 10 11 12 13 14) + + por xmm0,xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) + por xmm5,xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm7 + movdqa xmm2,xmm3 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm2,2 ; xmm2=( 9 10 11 12 13 14 15 --) + movdqa xmm4,xmm3 + psrldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(0)], xmm4 + + pmullw xmm7,[GOTOFF(ebx,PW_THREE)] + pmullw xmm3,[GOTOFF(ebx,PW_THREE)] + paddw xmm1,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm5,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm0,[GOTOFF(ebx,PW_SEVEN)] + paddw xmm2,[GOTOFF(ebx,PW_SEVEN)] + + paddw xmm1,xmm7 + paddw xmm5,xmm3 + psrlw xmm1,4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) + paddw xmm0,xmm7 + paddw xmm2,xmm3 + psrlw xmm0,4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) + psrlw xmm2,4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) + + psllw xmm0,BYTE_BIT + psllw xmm2,BYTE_BIT + por xmm1,xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) + por xmm5,xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 + + ; -- process the lower row + + movdqa xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD] + movdqa xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD] + + movdqa xmm7,xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) + movdqa xmm3,xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) + psrldq xmm7,2 ; xmm7=( 1 2 3 4 5 6 7 --) + pslldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) + movdqa xmm0,xmm6 + movdqa xmm2,xmm4 + psrldq xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) + pslldq xmm2,2 ; xmm2=(-- 8 9 10 11 12 13 14) + + por xmm7,xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) + por xmm0,xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm5,2 ; xmm5=( 9 10 11 12 13 14 15 --) + movdqa xmm3,xmm4 + psrldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(1)], xmm3 + + pmullw xmm6,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + paddw xmm1,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm0,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm7,[GOTOFF(ebx,PW_SEVEN)] + paddw xmm5,[GOTOFF(ebx,PW_SEVEN)] + + paddw xmm1,xmm6 + paddw xmm0,xmm4 + psrlw xmm1,4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) + psrlw xmm0,4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) + paddw xmm7,xmm6 + paddw xmm5,xmm4 + psrlw xmm7,4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) + psrlw xmm5,4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) + + psllw xmm7,BYTE_BIT + psllw xmm5,BYTE_BIT + por xmm1,xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) + por xmm0,xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0 + + poppic ebx + + sub eax, byte SIZEOF_XMMWORD + add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_XMMWORD ; inptr0 + add esi, byte 1*SIZEOF_XMMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_XMMWORD ; outptr0 + add edi, byte 2*SIZEOF_XMMWORD ; outptr1 + cmp eax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_sse2 (int max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define output_width(b) (b)+12 ; JDIMENSION output_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v1_upsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_XMMWORD)-1 + and edx, byte -(2*SIZEOF_XMMWORD) + jz short .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add edi, byte 4*SIZEOF_XMMWORD ; outptr + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_sse2 (nt max_v_samp_factor, +; JDIMENSION output_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b)+8 ; int max_v_samp_factor +%define output_width(b) (b)+12 ; JDIMENSION output_width +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr + + align 16 + global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE + +EXTN(jsimd_h2v2_upsample_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_XMMWORD)-1 + and edx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3 + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add ebx, byte 4*SIZEOF_XMMWORD ; outptr0 + add edi, byte 4*SIZEOF_XMMWORD ; outptr1 + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg short .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctflt-3dn.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctflt-3dn.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctflt-3dn.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctflt-3dn.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,320 @@ +; +; jfdctflt.asm - floating-point FDCT (3DNow!) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_float_3dnow) PRIVATE + +EXTN(jconst_fdct_float_3dnow): + +PD_0_382 times 2 dd 0.382683432365089771728460 +PD_0_707 times 2 dd 0.707106781186547524400844 +PD_0_541 times 2 dd 0.541196100146196984399723 +PD_1_306 times 2 dd 1.306562964876376527856643 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_3dnow (FAST_FLOAT *data) +; + +%define data(b) (b)+8 ; FAST_FLOAT *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_float_3dnow) PRIVATE + +EXTN(jsimd_fdct_float_3dnow): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17) + + movq mm4,mm0 ; transpose coefficients + punpckldq mm0,mm1 ; mm0=(00 10)=data0 + punpckhdq mm4,mm1 ; mm4=(01 11)=data1 + movq mm5,mm2 ; transpose coefficients + punpckldq mm2,mm3 ; mm2=(06 16)=data6 + punpckhdq mm5,mm3 ; mm5=(07 17)=data7 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm2 ; mm4=data1-data6=tmp6 + pfsub mm0,mm5 ; mm0=data0-data7=tmp7 + pfadd mm6,mm2 ; mm6=data1+data6=tmp1 + pfadd mm7,mm5 ; mm7=data0+data7=tmp0 + + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] + + ; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15) + + movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 + + movq mm4,mm1 ; transpose coefficients + punpckldq mm1,mm3 ; mm1=(02 12)=data2 + punpckhdq mm4,mm3 ; mm4=(03 13)=data3 + movq mm0,mm2 ; transpose coefficients + punpckldq mm2,mm5 ; mm2=(04 14)=data4 + punpckhdq mm0,mm5 ; mm0=(05 15)=data5 + + movq mm3,mm4 + movq mm5,mm1 + pfadd mm4,mm2 ; mm4=data3+data4=tmp3 + pfadd mm1,mm0 ; mm1=data2+data5=tmp2 + pfsub mm3,mm2 ; mm3=data3-data4=tmp4 + pfsub mm5,mm0 ; mm5=data2-data5=tmp5 + + ; -- Even part + + movq mm2,mm7 + movq mm0,mm6 + pfsub mm7,mm4 ; mm7=tmp13 + pfsub mm6,mm1 ; mm6=tmp12 + pfadd mm2,mm4 ; mm2=tmp10 + pfadd mm0,mm1 ; mm0=tmp11 + + pfadd mm6,mm7 + pfmul mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1 + + movq mm4,mm2 + movq mm1,mm7 + pfsub mm2,mm0 ; mm2=data4 + pfsub mm7,mm6 ; mm7=data6 + pfadd mm4,mm0 ; mm4=data0 + pfadd mm1,mm6 ; mm1=data2 + + movq MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1 + + ; -- Odd part + + movq mm0, MMWORD [wk(0)] ; mm0=tmp6 + movq mm6, MMWORD [wk(1)] ; mm6=tmp7 + + pfadd mm3,mm5 ; mm3=tmp10 + pfadd mm5,mm0 ; mm5=tmp11 + pfadd mm0,mm6 ; mm0=tmp12, mm6=tmp7 + + pfmul mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3 + + movq mm2,mm3 ; mm2=tmp10 + pfsub mm3,mm0 + pfmul mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5 + pfmul mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) + pfmul mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) + pfadd mm2,mm3 ; mm2=z2 + pfadd mm0,mm3 ; mm0=z4 + + movq mm7,mm6 + pfsub mm6,mm5 ; mm6=z13 + pfadd mm7,mm5 ; mm7=z11 + + movq mm4,mm6 + movq mm1,mm7 + pfsub mm6,mm2 ; mm6=data3 + pfsub mm7,mm0 ; mm7=data7 + pfadd mm4,mm2 ; mm4=data5 + pfadd mm1,mm0 ; mm1=data1 + + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + add edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] + + ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71) + + movq mm4,mm0 ; transpose coefficients + punpckldq mm0,mm1 ; mm0=(00 01)=data0 + punpckhdq mm4,mm1 ; mm4=(10 11)=data1 + movq mm5,mm2 ; transpose coefficients + punpckldq mm2,mm3 ; mm2=(60 61)=data6 + punpckhdq mm5,mm3 ; mm5=(70 71)=data7 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm2 ; mm4=data1-data6=tmp6 + pfsub mm0,mm5 ; mm0=data0-data7=tmp7 + pfadd mm6,mm2 ; mm6=data1+data6=tmp1 + pfadd mm7,mm5 ; mm7=data0+data7=tmp0 + + movq mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] + + ; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51) + + movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 + + movq mm4,mm1 ; transpose coefficients + punpckldq mm1,mm3 ; mm1=(20 21)=data2 + punpckhdq mm4,mm3 ; mm4=(30 31)=data3 + movq mm0,mm2 ; transpose coefficients + punpckldq mm2,mm5 ; mm2=(40 41)=data4 + punpckhdq mm0,mm5 ; mm0=(50 51)=data5 + + movq mm3,mm4 + movq mm5,mm1 + pfadd mm4,mm2 ; mm4=data3+data4=tmp3 + pfadd mm1,mm0 ; mm1=data2+data5=tmp2 + pfsub mm3,mm2 ; mm3=data3-data4=tmp4 + pfsub mm5,mm0 ; mm5=data2-data5=tmp5 + + ; -- Even part + + movq mm2,mm7 + movq mm0,mm6 + pfsub mm7,mm4 ; mm7=tmp13 + pfsub mm6,mm1 ; mm6=tmp12 + pfadd mm2,mm4 ; mm2=tmp10 + pfadd mm0,mm1 ; mm0=tmp11 + + pfadd mm6,mm7 + pfmul mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1 + + movq mm4,mm2 + movq mm1,mm7 + pfsub mm2,mm0 ; mm2=data4 + pfsub mm7,mm6 ; mm7=data6 + pfadd mm4,mm0 ; mm4=data0 + pfadd mm1,mm6 ; mm1=data2 + + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + ; -- Odd part + + movq mm0, MMWORD [wk(0)] ; mm0=tmp6 + movq mm6, MMWORD [wk(1)] ; mm6=tmp7 + + pfadd mm3,mm5 ; mm3=tmp10 + pfadd mm5,mm0 ; mm5=tmp11 + pfadd mm0,mm6 ; mm0=tmp12, mm6=tmp7 + + pfmul mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3 + + movq mm2,mm3 ; mm2=tmp10 + pfsub mm3,mm0 + pfmul mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5 + pfmul mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) + pfmul mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) + pfadd mm2,mm3 ; mm2=z2 + pfadd mm0,mm3 ; mm0=z4 + + movq mm7,mm6 + pfsub mm6,mm5 ; mm6=z13 + pfadd mm7,mm5 ; mm7=z11 + + movq mm4,mm6 + movq mm1,mm7 + pfsub mm6,mm2 ; mm6=data3 + pfsub mm7,mm0 ; mm7=data7 + pfadd mm4,mm2 ; mm4=data5 + pfadd mm1,mm0 ; mm1=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + add edx, byte 2*SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + + femms ; empty MMX/3DNow! state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctflt-sse-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctflt-sse-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctflt-sse-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctflt-sse-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,358 @@ +; +; jfdctflt.asm - floating-point FDCT (64-bit SSE) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_float_sse) PRIVATE + +EXTN(jconst_fdct_float_sse): + +PD_0_382 times 4 dd 0.382683432365089771728460 +PD_0_707 times 4 dd 0.707106781186547524400844 +PD_0_541 times 4 dd 0.541196100146196984399723 +PD_1_306 times 4 dd 1.306562964876376527856643 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_sse (FAST_FLOAT *data) +; + +; r10 = FAST_FLOAT *data + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_float_sse) PRIVATE + +EXTN(jsimd_fdct_float_sse): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (FAST_FLOAT *) + mov rcx, DCTSIZE/4 +.rowloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) + ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(20 30 21 31) + unpckhps xmm4,xmm1 ; xmm4=(22 32 23 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(24 34 25 35) + unpckhps xmm5,xmm3 ; xmm5=(26 36 27 37) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) + ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm4,xmm7 ; xmm4=(02 12 03 13) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(04 14 05 15) + unpckhps xmm2,xmm3 ; xmm2=(06 16 07 17) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 10 20 30)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(01 11 21 31)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(06 16 26 36)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(07 17 27 37)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(02 12 22 32)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(03 13 23 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(04 14 24 34)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(05 15 25 35)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[rel PD_0_707] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[rel PD_0_707] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[rel PD_0_382] ; xmm2=z5 + mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + add rdx, 4*DCTSIZE*SIZEOF_FAST_FLOAT + dec rcx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov rdx, r10 ; (FAST_FLOAT *) + mov rcx, DCTSIZE/4 +.columnloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) + ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(02 03 12 13) + unpckhps xmm4,xmm1 ; xmm4=(22 23 32 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(42 43 52 53) + unpckhps xmm5,xmm3 ; xmm5=(62 63 72 73) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) + ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 01 10 11) + unpckhps xmm4,xmm7 ; xmm4=(20 21 30 31) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(40 41 50 51) + unpckhps xmm2,xmm3 ; xmm2=(60 61 70 71) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 01 02 03)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(10 11 12 13)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(60 61 62 63)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(70 71 72 73)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(20 21 22 23)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(30 31 32 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(40 41 42 43)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(50 51 52 53)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[rel PD_0_707] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[rel PD_0_707] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[rel PD_0_382] ; xmm2=z5 + mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + add rdx, byte 4*SIZEOF_FAST_FLOAT + dec rcx + jnz near .columnloop + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctflt-sse.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctflt-sse.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctflt-sse.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctflt-sse.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,370 @@ +; +; jfdctflt.asm - floating-point FDCT (SSE) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_float_sse) PRIVATE + +EXTN(jconst_fdct_float_sse): + +PD_0_382 times 4 dd 0.382683432365089771728460 +PD_0_707 times 4 dd 0.707106781186547524400844 +PD_0_541 times 4 dd 0.541196100146196984399723 +PD_1_306 times 4 dd 1.306562964876376527856643 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_sse (FAST_FLOAT *data) +; + +%define data(b) (b)+8 ; FAST_FLOAT *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_float_sse) PRIVATE + +EXTN(jsimd_fdct_float_sse): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) + ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(20 30 21 31) + unpckhps xmm4,xmm1 ; xmm4=(22 32 23 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(24 34 25 35) + unpckhps xmm5,xmm3 ; xmm5=(26 36 27 37) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) + ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm4,xmm7 ; xmm4=(02 12 03 13) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(04 14 05 15) + unpckhps xmm2,xmm3 ; xmm2=(06 16 07 17) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 10 20 30)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(01 11 21 31)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(06 16 26 36)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(07 17 27 37)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(02 12 22 32)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(03 13 23 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(04 14 24 34)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(05 15 25 35)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5 + mulps xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + add edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) + ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(02 03 12 13) + unpckhps xmm4,xmm1 ; xmm4=(22 23 32 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(42 43 52 53) + unpckhps xmm5,xmm3 ; xmm5=(62 63 72 73) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) + ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 01 10 11) + unpckhps xmm4,xmm7 ; xmm4=(20 21 30 31) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(40 41 50 51) + unpckhps xmm2,xmm3 ; xmm2=(60 61 70 71) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 01 02 03)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(10 11 12 13)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(60 61 62 63)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(70 71 72 73)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(20 21 22 23)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(30 31 32 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(40 41 42 43)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(50 51 52 53)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5 + mulps xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + add edx, byte 4*SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctfst-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jfdctfst-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctfst-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctfst-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,156 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* FAST INTEGER FORWARD DCT + * + * This is similar to the SSE2 implementation, except that we left-shift the + * constants by 1 less bit (the -1 in CONST_SHIFT.) This is because + * vec_madds(arg1, arg2, arg3) generates the 16-bit saturated sum of: + * the elements in arg3 + the most significant 17 bits of + * (the elements in arg1 * the elements in arg2). + */ + +#include "jsimd_altivec.h" + + +#define F_0_382 98 /* FIX(0.382683433) */ +#define F_0_541 139 /* FIX(0.541196100) */ +#define F_0_707 181 /* FIX(0.707106781) */ +#define F_1_306 334 /* FIX(1.306562965) */ + +#define CONST_BITS 8 +#define PRE_MULTIPLY_SCALE_BITS 2 +#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1) + + +#define DO_FDCT() \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out4 = vec_sub(tmp10, tmp11); \ + \ + z1 = vec_add(tmp12, tmp13); \ + z1 = vec_sl(z1, pre_multiply_scale_bits); \ + z1 = vec_madds(z1, pw_0707, pw_zero); \ + \ + out2 = vec_add(tmp13, z1); \ + out6 = vec_sub(tmp13, z1); \ + \ + /* Odd part */ \ + \ + tmp10 = vec_add(tmp4, tmp5); \ + tmp11 = vec_add(tmp5, tmp6); \ + tmp12 = vec_add(tmp6, tmp7); \ + \ + tmp10 = vec_sl(tmp10, pre_multiply_scale_bits); \ + tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ + z5 = vec_sub(tmp10, tmp12); \ + z5 = vec_madds(z5, pw_0382, pw_zero); \ + \ + z2 = vec_madds(tmp10, pw_0541, z5); \ + z4 = vec_madds(tmp12, pw_1306, z5); \ + \ + tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ + z3 = vec_madds(tmp11, pw_0707, pw_zero); \ + \ + z11 = vec_add(tmp7, z3); \ + z13 = vec_sub(tmp7, z3); \ + \ + out5 = vec_add(z13, z2); \ + out3 = vec_sub(z13, z2); \ + out1 = vec_add(z11, z4); \ + out7 = vec_sub(z11, z4); \ +} + + +void +jsimd_fdct_ifast_altivec (DCTELEM *data) +{ + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, + z1, z2, z3, z4, z5, z11, z13, + out0, out1, out2, out3, out4, out5, out6, out7; + + /* Constants */ + __vector short pw_zero = { __8X(0) }, + pw_0382 = { __8X(F_0_382 << CONST_SHIFT) }, + pw_0541 = { __8X(F_0_541 << CONST_SHIFT) }, + pw_0707 = { __8X(F_0_707 << CONST_SHIFT) }, + pw_1306 = { __8X(F_1_306 << CONST_SHIFT) }; + __vector unsigned short + pre_multiply_scale_bits = { __8X(PRE_MULTIPLY_SCALE_BITS) }; + + /* Pass 1: process rows */ + + row0 = vec_ld(0, data); + row1 = vec_ld(16, data); + row2 = vec_ld(32, data); + row3 = vec_ld(48, data); + row4 = vec_ld(64, data); + row5 = vec_ld(80, data); + row6 = vec_ld(96, data); + row7 = vec_ld(112, data); + + TRANSPOSE(row, col); + + tmp0 = vec_add(col0, col7); + tmp7 = vec_sub(col0, col7); + tmp1 = vec_add(col1, col6); + tmp6 = vec_sub(col1, col6); + tmp2 = vec_add(col2, col5); + tmp5 = vec_sub(col2, col5); + tmp3 = vec_add(col3, col4); + tmp4 = vec_sub(col3, col4); + + DO_FDCT(); + + /* Pass 2: process columns */ + + TRANSPOSE(out, row); + + tmp0 = vec_add(row0, row7); + tmp7 = vec_sub(row0, row7); + tmp1 = vec_add(row1, row6); + tmp6 = vec_sub(row1, row6); + tmp2 = vec_add(row2, row5); + tmp5 = vec_sub(row2, row5); + tmp3 = vec_add(row3, row4); + tmp4 = vec_sub(row3, row4); + + DO_FDCT(); + + vec_st(out0, 0, data); + vec_st(out1, 16, data); + vec_st(out2, 32, data); + vec_st(out3, 48, data); + vec_st(out4, 64, data); + vec_st(out5, 80, data); + vec_st(out6, 96, data); + vec_st(out7, 112, data); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctfst-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctfst-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctfst-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctfst-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,397 @@ +; +; jfdctfst.asm - fast integer FDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_fdct_ifast_mmx) PRIVATE + +EXTN(jconst_fdct_ifast_mmx): + +PW_F0707 times 4 dw F_0_707 << CONST_SHIFT +PW_F0382 times 4 dw F_0_382 << CONST_SHIFT +PW_F0541 times 4 dw F_0_541 << CONST_SHIFT +PW_F1306 times 4 dw F_1_306 << CONST_SHIFT + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_mmx (DCTELEM *data) +; + +%define data(b) (b)+8 ; DCTELEM *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_ifast_mmx) PRIVATE + +EXTN(jsimd_fdct_ifast_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] + + ; mm0=(20 21 22 23), mm2=(24 25 26 27) + ; mm1=(30 31 32 33), mm3=(34 35 36 37) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(24 34 25 35) + punpckhwd mm5,mm3 ; mm5=(26 36 27 37) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 01 02 03), mm1=(04 05 06 07) + ; mm7=(10 11 12 13), mm3=(14 15 16 17) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm4,mm7 ; mm4=(02 12 03 13) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(04 14 05 15) + punpckhwd mm2,mm3 ; mm2=(06 16 07 17) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 10 20 30)=data0 + punpckhdq mm7,mm0 ; mm7=(01 11 21 31)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(06 16 26 36)=data6 + punpckhdq mm3,mm5 ; mm3=(07 17 27 37)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) + movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(02 12 22 32)=data2 + punpckhdq mm7,mm2 ; mm7=(03 13 23 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(04 14 24 34)=data4 + punpckhdq mm6,mm3 ; mm6=(05 15 25 35)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + psubw mm5,mm7 ; mm5=tmp13 + psubw mm0,mm4 ; mm0=tmp12 + paddw mm1,mm7 ; mm1=tmp10 + paddw mm6,mm4 ; mm6=tmp11 + + paddw mm0,mm5 + psllw mm0,PRE_MULTIPLY_SCALE_BITS + pmulhw mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1 + + movq mm7,mm1 + movq mm4,mm5 + psubw mm1,mm6 ; mm1=data4 + psubw mm5,mm0 ; mm5=data6 + paddw mm7,mm6 ; mm7=data0 + paddw mm4,mm0 ; mm4=data2 + + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + + ; -- Odd part + + movq mm6, MMWORD [wk(0)] ; mm6=tmp6 + movq mm0, MMWORD [wk(1)] ; mm0=tmp7 + + paddw mm2,mm3 ; mm2=tmp10 + paddw mm3,mm6 ; mm3=tmp11 + paddw mm6,mm0 ; mm6=tmp12, mm0=tmp7 + + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm6,PRE_MULTIPLY_SCALE_BITS + + psllw mm3,PRE_MULTIPLY_SCALE_BITS + pmulhw mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3 + + movq mm1,mm2 ; mm1=tmp10 + psubw mm2,mm6 + pmulhw mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5 + pmulhw mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) + pmulhw mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) + paddw mm1,mm2 ; mm1=z2 + paddw mm6,mm2 ; mm6=z4 + + movq mm5,mm0 + psubw mm0,mm3 ; mm0=z13 + paddw mm5,mm3 ; mm5=z11 + + movq mm7,mm0 + movq mm4,mm5 + psubw mm0,mm1 ; mm0=data3 + psubw mm5,mm6 ; mm5=data7 + paddw mm7,mm1 ; mm7=data5 + paddw mm4,mm6 ; mm4=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 + + add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; mm0=(02 12 22 32), mm2=(42 52 62 72) + ; mm1=(03 13 23 33), mm3=(43 53 63 73) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(02 03 12 13) + punpckhwd mm4,mm1 ; mm4=(22 23 32 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(42 43 52 53) + punpckhwd mm5,mm3 ; mm5=(62 63 72 73) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 10 20 30), mm1=(40 50 60 70) + ; mm7=(01 11 21 31), mm3=(41 51 61 71) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 01 10 11) + punpckhwd mm4,mm7 ; mm4=(20 21 30 31) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(40 41 50 51) + punpckhwd mm2,mm3 ; mm2=(60 61 70 71) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 01 02 03)=data0 + punpckhdq mm7,mm0 ; mm7=(10 11 12 13)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(60 61 62 63)=data6 + punpckhdq mm3,mm5 ; mm3=(70 71 72 73)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) + movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(20 21 22 23)=data2 + punpckhdq mm7,mm2 ; mm7=(30 31 32 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(40 41 42 43)=data4 + punpckhdq mm6,mm3 ; mm6=(50 51 52 53)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + psubw mm5,mm7 ; mm5=tmp13 + psubw mm0,mm4 ; mm0=tmp12 + paddw mm1,mm7 ; mm1=tmp10 + paddw mm6,mm4 ; mm6=tmp11 + + paddw mm0,mm5 + psllw mm0,PRE_MULTIPLY_SCALE_BITS + pmulhw mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1 + + movq mm7,mm1 + movq mm4,mm5 + psubw mm1,mm6 ; mm1=data4 + psubw mm5,mm0 ; mm5=data6 + paddw mm7,mm6 ; mm7=data0 + paddw mm4,mm0 ; mm4=data2 + + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + + ; -- Odd part + + movq mm6, MMWORD [wk(0)] ; mm6=tmp6 + movq mm0, MMWORD [wk(1)] ; mm0=tmp7 + + paddw mm2,mm3 ; mm2=tmp10 + paddw mm3,mm6 ; mm3=tmp11 + paddw mm6,mm0 ; mm6=tmp12, mm0=tmp7 + + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm6,PRE_MULTIPLY_SCALE_BITS + + psllw mm3,PRE_MULTIPLY_SCALE_BITS + pmulhw mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3 + + movq mm1,mm2 ; mm1=tmp10 + psubw mm2,mm6 + pmulhw mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5 + pmulhw mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) + pmulhw mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) + paddw mm1,mm2 ; mm1=z2 + paddw mm6,mm2 ; mm6=z4 + + movq mm5,mm0 + psubw mm0,mm3 ; mm0=z13 + paddw mm5,mm3 ; mm5=z11 + + movq mm7,mm0 + movq mm4,mm5 + psubw mm0,mm1 ; mm0=data3 + psubw mm5,mm6 ; mm5=data7 + paddw mm7,mm1 ; mm7=data5 + paddw mm4,mm6 ; mm4=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 + + add edx, byte 4*SIZEOF_DCTELEM + dec ecx + jnz near .columnloop + + emms ; empty MMX state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctfst-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctfst-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctfst-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctfst-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,392 @@ +; +; jfdctfst.asm - fast integer FDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_fdct_ifast_sse2) PRIVATE + +EXTN(jconst_fdct_ifast_sse2): + +PW_F0707 times 8 dw F_0_707 << CONST_SHIFT +PW_F0382 times 8 dw F_0_382 << CONST_SHIFT +PW_F0541 times 8 dw F_0_541 << CONST_SHIFT +PW_F1306 times 8 dw F_1_306 << CONST_SHIFT + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_sse2 (DCTELEM *data) +; + +; r10 = DCTELEM *data + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_ifast_sse2) PRIVATE + +EXTN(jsimd_fdct_ifast_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + psubw xmm3,xmm1 ; xmm3=tmp13 + psubw xmm6,xmm7 ; xmm6=tmp12 + paddw xmm4,xmm1 ; xmm4=tmp10 + paddw xmm0,xmm7 ; xmm0=tmp11 + + paddw xmm6,xmm3 + psllw xmm6,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm6,[rel PW_F0707] ; xmm6=z1 + + movdqa xmm1,xmm4 + movdqa xmm7,xmm3 + psubw xmm4,xmm0 ; xmm4=data4 + psubw xmm3,xmm6 ; xmm3=data6 + paddw xmm1,xmm0 ; xmm1=data0 + paddw xmm7,xmm6 ; xmm7=data2 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 + + ; -- Odd part + + paddw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm5,xmm0 ; xmm5=tmp11 + paddw xmm0,xmm6 ; xmm0=tmp12, xmm6=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[rel PW_F0707] ; xmm5=z3 + + movdqa xmm4,xmm2 ; xmm4=tmp10 + psubw xmm2,xmm0 + pmulhw xmm2,[rel PW_F0382] ; xmm2=z5 + pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm0,[rel PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm2 ; xmm4=z2 + paddw xmm0,xmm2 ; xmm0=z4 + + movdqa xmm3,xmm6 + psubw xmm6,xmm5 ; xmm6=z13 + paddw xmm3,xmm5 ; xmm3=z11 + + movdqa xmm2,xmm6 + movdqa xmm5,xmm3 + psubw xmm6,xmm4 ; xmm6=data3 + psubw xmm3,xmm0 ; xmm3=data7 + paddw xmm2,xmm4 ; xmm2=data5 + paddw xmm5,xmm0 ; xmm5=data1 + + ; ---- Pass 2: process columns. + + ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) + ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 10 11 20 21 30 31) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 50 51 60 61 70 71) + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm6 ; xmm7=(02 03 12 13 22 23 32 33) + punpckhwd xmm0,xmm6 ; xmm0=(42 43 52 53 62 63 72 73) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 + + ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) + ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm7,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm2 ; xmm5=(04 05 14 15 24 25 34 35) + punpckhwd xmm7,xmm2 ; xmm7=(44 45 54 55 64 65 74 75) + movdqa xmm0,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm3 ; xmm6=(06 07 16 17 26 27 36 37) + punpckhwd xmm0,xmm3 ; xmm0=(46 47 56 57 66 67 76 77) + + movdqa xmm2,xmm5 ; transpose coefficients(phase 2) + punpckldq xmm5,xmm6 ; xmm5=(04 05 06 07 14 15 16 17) + punpckhdq xmm2,xmm6 ; xmm2=(24 25 26 27 34 35 36 37) + movdqa xmm3,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm0 ; xmm7=(44 45 46 47 54 55 56 57) + punpckhdq xmm3,xmm0 ; xmm3=(64 65 66 67 74 75 76 77) + + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) + + movdqa xmm2,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 10 11 12 13) + punpckhdq xmm2,xmm6 ; xmm2=(20 21 22 23 30 31 32 33) + movdqa xmm7,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(40 41 42 43 50 51 52 53) + punpckhdq xmm7,xmm0 ; xmm7=(60 61 62 63 70 71 72 73) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm6,xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm0,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm0,xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm5,xmm6 + movdqa xmm3,xmm1 + psubw xmm6,xmm7 ; xmm6=data1-data6=tmp6 + psubw xmm1,xmm0 ; xmm1=data0-data7=tmp7 + paddw xmm5,xmm7 ; xmm5=data1+data6=tmp1 + paddw xmm3,xmm0 ; xmm3=data0+data7=tmp0 + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 + + movdqa xmm6,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm6,xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm1,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm1,xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm7,xmm6 + movdqa xmm0,xmm2 + paddw xmm6,xmm4 ; xmm6=data3+data4=tmp3 + paddw xmm2,xmm1 ; xmm2=data2+data5=tmp2 + psubw xmm7,xmm4 ; xmm7=data3-data4=tmp4 + psubw xmm0,xmm1 ; xmm0=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm1,xmm5 + psubw xmm3,xmm6 ; xmm3=tmp13 + psubw xmm5,xmm2 ; xmm5=tmp12 + paddw xmm4,xmm6 ; xmm4=tmp10 + paddw xmm1,xmm2 ; xmm1=tmp11 + + paddw xmm5,xmm3 + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[rel PW_F0707] ; xmm5=z1 + + movdqa xmm6,xmm4 + movdqa xmm2,xmm3 + psubw xmm4,xmm1 ; xmm4=data4 + psubw xmm3,xmm5 ; xmm3=data6 + paddw xmm6,xmm1 ; xmm6=data0 + paddw xmm2,xmm5 ; xmm2=data2 + + movdqa XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm2 + + ; -- Odd part + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + paddw xmm7,xmm0 ; xmm7=tmp10 + paddw xmm0,xmm1 ; xmm0=tmp11 + paddw xmm1,xmm5 ; xmm1=tmp12, xmm5=tmp7 + + psllw xmm7,PRE_MULTIPLY_SCALE_BITS + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm0,[rel PW_F0707] ; xmm0=z3 + + movdqa xmm4,xmm7 ; xmm4=tmp10 + psubw xmm7,xmm1 + pmulhw xmm7,[rel PW_F0382] ; xmm7=z5 + pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm1,[rel PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm7 ; xmm4=z2 + paddw xmm1,xmm7 ; xmm1=z4 + + movdqa xmm3,xmm5 + psubw xmm5,xmm0 ; xmm5=z13 + paddw xmm3,xmm0 ; xmm3=z11 + + movdqa xmm6,xmm5 + movdqa xmm2,xmm3 + psubw xmm5,xmm4 ; xmm5=data3 + psubw xmm3,xmm1 ; xmm3=data7 + paddw xmm6,xmm4 ; xmm6=data5 + paddw xmm2,xmm1 ; xmm2=data1 + + movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm5 + movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctfst-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctfst-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctfst-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctfst-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,404 @@ +; +; jfdctfst.asm - fast integer FDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_fdct_ifast_sse2) PRIVATE + +EXTN(jconst_fdct_ifast_sse2): + +PW_F0707 times 8 dw F_0_707 << CONST_SHIFT +PW_F0382 times 8 dw F_0_382 << CONST_SHIFT +PW_F0541 times 8 dw F_0_541 << CONST_SHIFT +PW_F1306 times 8 dw F_1_306 << CONST_SHIFT + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_sse2 (DCTELEM *data) +; + +%define data(b) (b)+8 ; DCTELEM *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_ifast_sse2) PRIVATE + +EXTN(jsimd_fdct_ifast_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + psubw xmm3,xmm1 ; xmm3=tmp13 + psubw xmm6,xmm7 ; xmm6=tmp12 + paddw xmm4,xmm1 ; xmm4=tmp10 + paddw xmm0,xmm7 ; xmm0=tmp11 + + paddw xmm6,xmm3 + psllw xmm6,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm6,[GOTOFF(ebx,PW_F0707)] ; xmm6=z1 + + movdqa xmm1,xmm4 + movdqa xmm7,xmm3 + psubw xmm4,xmm0 ; xmm4=data4 + psubw xmm3,xmm6 ; xmm3=data6 + paddw xmm1,xmm0 ; xmm1=data0 + paddw xmm7,xmm6 ; xmm7=data2 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 + + ; -- Odd part + + paddw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm5,xmm0 ; xmm5=tmp11 + paddw xmm0,xmm6 ; xmm0=tmp12, xmm6=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z3 + + movdqa xmm4,xmm2 ; xmm4=tmp10 + psubw xmm2,xmm0 + pmulhw xmm2,[GOTOFF(ebx,PW_F0382)] ; xmm2=z5 + pmulhw xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm0,[GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm2 ; xmm4=z2 + paddw xmm0,xmm2 ; xmm0=z4 + + movdqa xmm3,xmm6 + psubw xmm6,xmm5 ; xmm6=z13 + paddw xmm3,xmm5 ; xmm3=z11 + + movdqa xmm2,xmm6 + movdqa xmm5,xmm3 + psubw xmm6,xmm4 ; xmm6=data3 + psubw xmm3,xmm0 ; xmm3=data7 + paddw xmm2,xmm4 ; xmm2=data5 + paddw xmm5,xmm0 ; xmm5=data1 + + ; ---- Pass 2: process columns. + +; mov edx, POINTER [data(eax)] ; (DCTELEM *) + + ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) + ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 10 11 20 21 30 31) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 50 51 60 61 70 71) + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm6 ; xmm7=(02 03 12 13 22 23 32 33) + punpckhwd xmm0,xmm6 ; xmm0=(42 43 52 53 62 63 72 73) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 + + ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) + ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm7,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm2 ; xmm5=(04 05 14 15 24 25 34 35) + punpckhwd xmm7,xmm2 ; xmm7=(44 45 54 55 64 65 74 75) + movdqa xmm0,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm3 ; xmm6=(06 07 16 17 26 27 36 37) + punpckhwd xmm0,xmm3 ; xmm0=(46 47 56 57 66 67 76 77) + + movdqa xmm2,xmm5 ; transpose coefficients(phase 2) + punpckldq xmm5,xmm6 ; xmm5=(04 05 06 07 14 15 16 17) + punpckhdq xmm2,xmm6 ; xmm2=(24 25 26 27 34 35 36 37) + movdqa xmm3,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm0 ; xmm7=(44 45 46 47 54 55 56 57) + punpckhdq xmm3,xmm0 ; xmm3=(64 65 66 67 74 75 76 77) + + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) + + movdqa xmm2,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 10 11 12 13) + punpckhdq xmm2,xmm6 ; xmm2=(20 21 22 23 30 31 32 33) + movdqa xmm7,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(40 41 42 43 50 51 52 53) + punpckhdq xmm7,xmm0 ; xmm7=(60 61 62 63 70 71 72 73) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm6,xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm0,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm0,xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm5,xmm6 + movdqa xmm3,xmm1 + psubw xmm6,xmm7 ; xmm6=data1-data6=tmp6 + psubw xmm1,xmm0 ; xmm1=data0-data7=tmp7 + paddw xmm5,xmm7 ; xmm5=data1+data6=tmp1 + paddw xmm3,xmm0 ; xmm3=data0+data7=tmp0 + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 + + movdqa xmm6,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm6,xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm1,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm1,xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm7,xmm6 + movdqa xmm0,xmm2 + paddw xmm6,xmm4 ; xmm6=data3+data4=tmp3 + paddw xmm2,xmm1 ; xmm2=data2+data5=tmp2 + psubw xmm7,xmm4 ; xmm7=data3-data4=tmp4 + psubw xmm0,xmm1 ; xmm0=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm1,xmm5 + psubw xmm3,xmm6 ; xmm3=tmp13 + psubw xmm5,xmm2 ; xmm5=tmp12 + paddw xmm4,xmm6 ; xmm4=tmp10 + paddw xmm1,xmm2 ; xmm1=tmp11 + + paddw xmm5,xmm3 + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z1 + + movdqa xmm6,xmm4 + movdqa xmm2,xmm3 + psubw xmm4,xmm1 ; xmm4=data4 + psubw xmm3,xmm5 ; xmm3=data6 + paddw xmm6,xmm1 ; xmm6=data0 + paddw xmm2,xmm5 ; xmm2=data2 + + movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2 + + ; -- Odd part + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + paddw xmm7,xmm0 ; xmm7=tmp10 + paddw xmm0,xmm1 ; xmm0=tmp11 + paddw xmm1,xmm5 ; xmm1=tmp12, xmm5=tmp7 + + psllw xmm7,PRE_MULTIPLY_SCALE_BITS + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm0,[GOTOFF(ebx,PW_F0707)] ; xmm0=z3 + + movdqa xmm4,xmm7 ; xmm4=tmp10 + psubw xmm7,xmm1 + pmulhw xmm7,[GOTOFF(ebx,PW_F0382)] ; xmm7=z5 + pmulhw xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm1,[GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm7 ; xmm4=z2 + paddw xmm1,xmm7 ; xmm1=z4 + + movdqa xmm3,xmm5 + psubw xmm5,xmm0 ; xmm5=z13 + paddw xmm3,xmm0 ; xmm3=z11 + + movdqa xmm6,xmm5 + movdqa xmm2,xmm3 + psubw xmm5,xmm4 ; xmm5=data3 + psubw xmm3,xmm1 ; xmm3=data7 + paddw xmm6,xmm4 ; xmm6=data5 + paddw xmm2,xmm1 ; xmm2=data1 + + movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5 + movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2 + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctint-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jfdctint-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctint-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctint-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,262 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* SLOW INTEGER FORWARD DCT */ + +#include "jsimd_altivec.h" + + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS) + + +#define DO_FDCT_COMMON(PASS) \ +{ \ + /* (Original) \ + * z1 = (tmp12 + tmp13) * 0.541196100; \ + * data2 = z1 + tmp13 * 0.765366865; \ + * data6 = z1 + tmp12 * -1.847759065; \ + * \ + * (This implementation) \ + * data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \ + * data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \ + */ \ + \ + tmp1312l = vec_mergeh(tmp13, tmp12); \ + tmp1312h = vec_mergel(tmp13, tmp12); \ + \ + out2l = vec_msums(tmp1312l, pw_f130_f054, pd_descale_p##PASS); \ + out2h = vec_msums(tmp1312h, pw_f130_f054, pd_descale_p##PASS); \ + out6l = vec_msums(tmp1312l, pw_f054_mf130, pd_descale_p##PASS); \ + out6h = vec_msums(tmp1312h, pw_f054_mf130, pd_descale_p##PASS); \ + \ + out2l = vec_sra(out2l, descale_p##PASS); \ + out2h = vec_sra(out2h, descale_p##PASS); \ + out6l = vec_sra(out6l, descale_p##PASS); \ + out6h = vec_sra(out6h, descale_p##PASS); \ + \ + out2 = vec_pack(out2l, out2h); \ + out6 = vec_pack(out6l, out6h); \ + \ + /* Odd part */ \ + \ + z3 = vec_add(tmp4, tmp6); \ + z4 = vec_add(tmp5, tmp7); \ + \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ + z34l = vec_mergeh(z3, z4); \ + z34h = vec_mergel(z3, z4); \ + \ + z3l = vec_msums(z34l, pw_mf078_f117, pd_descale_p##PASS); \ + z3h = vec_msums(z34h, pw_mf078_f117, pd_descale_p##PASS); \ + z4l = vec_msums(z34l, pw_f117_f078, pd_descale_p##PASS); \ + z4h = vec_msums(z34h, pw_f117_f078, pd_descale_p##PASS); \ + \ + /* (Original) \ + * z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \ + * tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \ + * tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; \ + * data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; \ + * \ + * (This implementation) \ + * tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \ + * tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \ + * tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \ + * tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \ + * data7 = tmp4 + z3; data5 = tmp5 + z4; \ + * data3 = tmp6 + z3; data1 = tmp7 + z4; \ + */ \ + \ + tmp47l = vec_mergeh(tmp4, tmp7); \ + tmp47h = vec_mergel(tmp4, tmp7); \ + \ + out7l = vec_msums(tmp47l, pw_mf060_mf089, z3l); \ + out7h = vec_msums(tmp47h, pw_mf060_mf089, z3h); \ + out1l = vec_msums(tmp47l, pw_mf089_f060, z4l); \ + out1h = vec_msums(tmp47h, pw_mf089_f060, z4h); \ + \ + out7l = vec_sra(out7l, descale_p##PASS); \ + out7h = vec_sra(out7h, descale_p##PASS); \ + out1l = vec_sra(out1l, descale_p##PASS); \ + out1h = vec_sra(out1h, descale_p##PASS); \ + \ + out7 = vec_pack(out7l, out7h); \ + out1 = vec_pack(out1l, out1h); \ + \ + tmp56l = vec_mergeh(tmp5, tmp6); \ + tmp56h = vec_mergel(tmp5, tmp6); \ + \ + out5l = vec_msums(tmp56l, pw_mf050_mf256, z4l); \ + out5h = vec_msums(tmp56h, pw_mf050_mf256, z4h); \ + out3l = vec_msums(tmp56l, pw_mf256_f050, z3l); \ + out3h = vec_msums(tmp56h, pw_mf256_f050, z3h); \ + \ + out5l = vec_sra(out5l, descale_p##PASS); \ + out5h = vec_sra(out5h, descale_p##PASS); \ + out3l = vec_sra(out3l, descale_p##PASS); \ + out3h = vec_sra(out3h, descale_p##PASS); \ + \ + out5 = vec_pack(out5l, out5h); \ + out3 = vec_pack(out3l, out3h); \ +} + +#define DO_FDCT_PASS1() \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out0 = vec_sl(out0, pass1_bits); \ + out4 = vec_sub(tmp10, tmp11); \ + out4 = vec_sl(out4, pass1_bits); \ + \ + DO_FDCT_COMMON(1); \ +} + +#define DO_FDCT_PASS2() \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out0 = vec_add(out0, pw_descale_p2x); \ + out0 = vec_sra(out0, pass1_bits); \ + out4 = vec_sub(tmp10, tmp11); \ + out4 = vec_add(out4, pw_descale_p2x); \ + out4 = vec_sra(out4, pass1_bits); \ + \ + DO_FDCT_COMMON(2); \ +} + + +void +jsimd_fdct_islow_altivec (DCTELEM *data) +{ + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, + tmp47l, tmp47h, tmp56l, tmp56h, tmp1312l, tmp1312h, + z3, z4, z34l, z34h, + out0, out1, out2, out3, out4, out5, out6, out7; + __vector int z3l, z3h, z4l, z4h, + out1l, out1h, out2l, out2h, out3l, out3h, out5l, out5h, out6l, out6h, + out7l, out7h; + + /* Constants */ + __vector short + pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) }, + pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) }, + pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) }, + pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) }, + pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) }, + pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) }, + pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) }, + pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) }, + pw_descale_p2x = { __8X(1 << (PASS1_BITS - 1)) }; + __vector unsigned short pass1_bits = { __8X(PASS1_BITS) }; + __vector int pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) }, + pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) }; + __vector unsigned int descale_p1 = { __4X(DESCALE_P1) }, + descale_p2 = { __4X(DESCALE_P2) }; + + /* Pass 1: process rows */ + + row0 = vec_ld(0, data); + row1 = vec_ld(16, data); + row2 = vec_ld(32, data); + row3 = vec_ld(48, data); + row4 = vec_ld(64, data); + row5 = vec_ld(80, data); + row6 = vec_ld(96, data); + row7 = vec_ld(112, data); + + TRANSPOSE(row, col); + + tmp0 = vec_add(col0, col7); + tmp7 = vec_sub(col0, col7); + tmp1 = vec_add(col1, col6); + tmp6 = vec_sub(col1, col6); + tmp2 = vec_add(col2, col5); + tmp5 = vec_sub(col2, col5); + tmp3 = vec_add(col3, col4); + tmp4 = vec_sub(col3, col4); + + DO_FDCT_PASS1(); + + /* Pass 2: process columns */ + + TRANSPOSE(out, row); + + tmp0 = vec_add(row0, row7); + tmp7 = vec_sub(row0, row7); + tmp1 = vec_add(row1, row6); + tmp6 = vec_sub(row1, row6); + tmp2 = vec_add(row2, row5); + tmp5 = vec_sub(row2, row5); + tmp3 = vec_add(row3, row4); + tmp4 = vec_sub(row3, row4); + + DO_FDCT_PASS2(); + + vec_st(out0, 0, data); + vec_st(out1, 16, data); + vec_st(out2, 32, data); + vec_st(out3, 48, data); + vec_st(out4, 64, data); + vec_st(out5, 80, data); + vec_st(out6, 96, data); + vec_st(out7, 112, data); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctint-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctint-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctint-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctint-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,622 @@ +; +; jfdctint.asm - accurate integer FDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_islow_mmx) PRIVATE + +EXTN(jconst_fdct_islow_mmx): + +PW_F130_F054 times 2 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 2 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 2 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 2 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 2 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 2 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 2 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 2 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2-1) +PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_mmx (DCTELEM *data) +; + +%define data(b) (b)+8 ; DCTELEM *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_fdct_islow_mmx) PRIVATE + +EXTN(jsimd_fdct_islow_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] + + ; mm0=(20 21 22 23), mm2=(24 25 26 27) + ; mm1=(30 31 32 33), mm3=(34 35 36 37) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(24 34 25 35) + punpckhwd mm5,mm3 ; mm5=(26 36 27 37) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 01 02 03), mm1=(04 05 06 07) + ; mm7=(10 11 12 13), mm3=(14 15 16 17) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm4,mm7 ; mm4=(02 12 03 13) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(04 14 05 15) + punpckhwd mm2,mm3 ; mm2=(06 16 07 17) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 10 20 30)=data0 + punpckhdq mm7,mm0 ; mm7=(01 11 21 31)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(06 16 26 36)=data6 + punpckhdq mm3,mm5 ; mm3=(07 17 27 37)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) + movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(02 12 22 32)=data2 + punpckhdq mm7,mm2 ; mm7=(03 13 23 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(04 14 24 34)=data4 + punpckhdq mm6,mm3 ; mm6=(05 15 25 35)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + paddw mm5,mm7 ; mm5=tmp10 + paddw mm0,mm4 ; mm0=tmp11 + psubw mm1,mm7 ; mm1=tmp13 + psubw mm6,mm4 ; mm6=tmp12 + + movq mm7,mm5 + paddw mm5,mm0 ; mm5=tmp10+tmp11 + psubw mm7,mm0 ; mm7=tmp10-tmp11 + + psllw mm5,PASS1_BITS ; mm5=data0 + psllw mm7,PASS1_BITS ; mm7=data4 + + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movq mm4,mm1 ; mm1=tmp13 + movq mm0,mm1 + punpcklwd mm4,mm6 ; mm6=tmp12 + punpckhwd mm0,mm6 + movq mm1,mm4 + movq mm6,mm0 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L + pmaddwd mm0,[GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L + pmaddwd mm6,[GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm0,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm4,DESCALE_P1 + psrad mm0,DESCALE_P1 + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm6,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm1,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm4,mm0 ; mm4=data2 + packssdw mm1,mm6 ; mm1=data6 + + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1 + + ; -- Odd part + + movq mm5, MMWORD [wk(0)] ; mm5=tmp6 + movq mm7, MMWORD [wk(1)] ; mm7=tmp7 + + movq mm0,mm2 ; mm2=tmp4 + movq mm6,mm3 ; mm3=tmp5 + paddw mm0,mm5 ; mm0=z3 + paddw mm6,mm7 ; mm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm4,mm0 + movq mm1,mm0 + punpcklwd mm4,mm6 + punpckhwd mm1,mm6 + movq mm0,mm4 + movq mm6,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L + pmaddwd mm1,[GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H + pmaddwd mm0,[GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L + pmaddwd mm6,[GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H + + movq MMWORD [wk(0)], mm4 ; wk(0)=z3L + movq MMWORD [wk(1)], mm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movq mm4,mm2 + movq mm1,mm2 + punpcklwd mm4,mm7 + punpckhwd mm1,mm7 + movq mm2,mm4 + movq mm7,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L + pmaddwd mm1,[GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H + pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + + paddd mm4, MMWORD [wk(0)] ; mm4=data7L + paddd mm1, MMWORD [wk(1)] ; mm1=data7H + paddd mm2,mm0 ; mm2=data1L + paddd mm7,mm6 ; mm7=data1H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm4,DESCALE_P1 + psrad mm1,DESCALE_P1 + paddd mm2,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm2,DESCALE_P1 + psrad mm7,DESCALE_P1 + + packssdw mm4,mm1 ; mm4=data7 + packssdw mm2,mm7 ; mm2=data1 + + movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 + + movq mm1,mm3 + movq mm7,mm3 + punpcklwd mm1,mm5 + punpckhwd mm7,mm5 + movq mm3,mm1 + movq mm5,mm7 + pmaddwd mm1,[GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L + pmaddwd mm7,[GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H + pmaddwd mm3,[GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L + pmaddwd mm5,[GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H + + paddd mm1,mm0 ; mm1=data5L + paddd mm7,mm6 ; mm7=data5H + paddd mm3, MMWORD [wk(0)] ; mm3=data3L + paddd mm5, MMWORD [wk(1)] ; mm5=data3H + + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm1,DESCALE_P1 + psrad mm7,DESCALE_P1 + paddd mm3,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm5,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm3,DESCALE_P1 + psrad mm5,DESCALE_P1 + + packssdw mm1,mm7 ; mm1=data5 + packssdw mm3,mm5 ; mm3=data3 + + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 + + add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; mm0=(02 12 22 32), mm2=(42 52 62 72) + ; mm1=(03 13 23 33), mm3=(43 53 63 73) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(02 03 12 13) + punpckhwd mm4,mm1 ; mm4=(22 23 32 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(42 43 52 53) + punpckhwd mm5,mm3 ; mm5=(62 63 72 73) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 10 20 30), mm1=(40 50 60 70) + ; mm7=(01 11 21 31), mm3=(41 51 61 71) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 01 10 11) + punpckhwd mm4,mm7 ; mm4=(20 21 30 31) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(40 41 50 51) + punpckhwd mm2,mm3 ; mm2=(60 61 70 71) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 01 02 03)=data0 + punpckhdq mm7,mm0 ; mm7=(10 11 12 13)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(60 61 62 63)=data6 + punpckhdq mm3,mm5 ; mm3=(70 71 72 73)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) + movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(20 21 22 23)=data2 + punpckhdq mm7,mm2 ; mm7=(30 31 32 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(40 41 42 43)=data4 + punpckhdq mm6,mm3 ; mm6=(50 51 52 53)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + paddw mm5,mm7 ; mm5=tmp10 + paddw mm0,mm4 ; mm0=tmp11 + psubw mm1,mm7 ; mm1=tmp13 + psubw mm6,mm4 ; mm6=tmp12 + + movq mm7,mm5 + paddw mm5,mm0 ; mm5=tmp10+tmp11 + psubw mm7,mm0 ; mm7=tmp10-tmp11 + + paddw mm5,[GOTOFF(ebx,PW_DESCALE_P2X)] + paddw mm7,[GOTOFF(ebx,PW_DESCALE_P2X)] + psraw mm5,PASS1_BITS ; mm5=data0 + psraw mm7,PASS1_BITS ; mm7=data4 + + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movq mm4,mm1 ; mm1=tmp13 + movq mm0,mm1 + punpcklwd mm4,mm6 ; mm6=tmp12 + punpckhwd mm0,mm6 + movq mm1,mm4 + movq mm6,mm0 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L + pmaddwd mm0,[GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L + pmaddwd mm6,[GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm0,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm4,DESCALE_P2 + psrad mm0,DESCALE_P2 + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm6,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm1,DESCALE_P2 + psrad mm6,DESCALE_P2 + + packssdw mm4,mm0 ; mm4=data2 + packssdw mm1,mm6 ; mm1=data6 + + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1 + + ; -- Odd part + + movq mm5, MMWORD [wk(0)] ; mm5=tmp6 + movq mm7, MMWORD [wk(1)] ; mm7=tmp7 + + movq mm0,mm2 ; mm2=tmp4 + movq mm6,mm3 ; mm3=tmp5 + paddw mm0,mm5 ; mm0=z3 + paddw mm6,mm7 ; mm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm4,mm0 + movq mm1,mm0 + punpcklwd mm4,mm6 + punpckhwd mm1,mm6 + movq mm0,mm4 + movq mm6,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L + pmaddwd mm1,[GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H + pmaddwd mm0,[GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L + pmaddwd mm6,[GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H + + movq MMWORD [wk(0)], mm4 ; wk(0)=z3L + movq MMWORD [wk(1)], mm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movq mm4,mm2 + movq mm1,mm2 + punpcklwd mm4,mm7 + punpckhwd mm1,mm7 + movq mm2,mm4 + movq mm7,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L + pmaddwd mm1,[GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H + pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + + paddd mm4, MMWORD [wk(0)] ; mm4=data7L + paddd mm1, MMWORD [wk(1)] ; mm1=data7H + paddd mm2,mm0 ; mm2=data1L + paddd mm7,mm6 ; mm7=data1H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm4,DESCALE_P2 + psrad mm1,DESCALE_P2 + paddd mm2,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm2,DESCALE_P2 + psrad mm7,DESCALE_P2 + + packssdw mm4,mm1 ; mm4=data7 + packssdw mm2,mm7 ; mm2=data1 + + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 + + movq mm1,mm3 + movq mm7,mm3 + punpcklwd mm1,mm5 + punpckhwd mm7,mm5 + movq mm3,mm1 + movq mm5,mm7 + pmaddwd mm1,[GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L + pmaddwd mm7,[GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H + pmaddwd mm3,[GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L + pmaddwd mm5,[GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H + + paddd mm1,mm0 ; mm1=data5L + paddd mm7,mm6 ; mm7=data5H + paddd mm3, MMWORD [wk(0)] ; mm3=data3L + paddd mm5, MMWORD [wk(1)] ; mm5=data3H + + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm1,DESCALE_P2 + psrad mm7,DESCALE_P2 + paddd mm3,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm3,DESCALE_P2 + psrad mm5,DESCALE_P2 + + packssdw mm1,mm7 ; mm1=data5 + packssdw mm3,mm5 ; mm3=data3 + + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 + + add edx, byte 4*SIZEOF_DCTELEM + dec ecx + jnz near .columnloop + + emms ; empty MMX state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctint-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctint-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctint-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctint-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,622 @@ +; +; jfdctint.asm - accurate integer FDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_islow_sse2) PRIVATE + +EXTN(jconst_fdct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_sse2 (DCTELEM *data) +; + +; r10 = DCTELEM *data + +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 6 + + align 16 + global EXTN(jsimd_fdct_islow_sse2) PRIVATE + +EXTN(jsimd_fdct_islow_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + paddw xmm3,xmm1 ; xmm3=tmp10 + paddw xmm6,xmm7 ; xmm6=tmp11 + psubw xmm4,xmm1 ; xmm4=tmp13 + psubw xmm0,xmm7 ; xmm0=tmp12 + + movdqa xmm1,xmm3 + paddw xmm3,xmm6 ; xmm3=tmp10+tmp11 + psubw xmm1,xmm6 ; xmm1=tmp10-tmp11 + + psllw xmm3,PASS1_BITS ; xmm3=data0 + psllw xmm1,PASS1_BITS ; xmm1=data4 + + movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 + movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm7,xmm4 ; xmm4=tmp13 + movdqa xmm6,xmm4 + punpcklwd xmm7,xmm0 ; xmm0=tmp12 + punpckhwd xmm6,xmm0 + movdqa xmm4,xmm7 + movdqa xmm0,xmm6 + pmaddwd xmm7,[rel PW_F130_F054] ; xmm7=data2L + pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=data2H + pmaddwd xmm4,[rel PW_F054_MF130] ; xmm4=data6L + pmaddwd xmm0,[rel PW_F054_MF130] ; xmm0=data6H + + paddd xmm7,[rel PD_DESCALE_P1] + paddd xmm6,[rel PD_DESCALE_P1] + psrad xmm7,DESCALE_P1 + psrad xmm6,DESCALE_P1 + paddd xmm4,[rel PD_DESCALE_P1] + paddd xmm0,[rel PD_DESCALE_P1] + psrad xmm4,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm7,xmm6 ; xmm7=data2 + packssdw xmm4,xmm0 ; xmm4=data6 + + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 + + ; -- Odd part + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 + + movdqa xmm6,xmm2 ; xmm2=tmp4 + movdqa xmm0,xmm5 ; xmm5=tmp5 + paddw xmm6,xmm3 ; xmm6=z3 + paddw xmm0,xmm1 ; xmm0=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm7,xmm6 + movdqa xmm4,xmm6 + punpcklwd xmm7,xmm0 + punpckhwd xmm4,xmm0 + movdqa xmm6,xmm7 + movdqa xmm0,xmm4 + pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3L + pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3H + pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4L + pmaddwd xmm0,[rel PW_F117_F078] ; xmm0=z4H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm7,xmm2 + movdqa xmm4,xmm2 + punpcklwd xmm7,xmm1 + punpckhwd xmm4,xmm1 + movdqa xmm2,xmm7 + movdqa xmm1,xmm4 + pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp4L + pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4H + pmaddwd xmm2,[rel PW_MF089_F060] ; xmm2=tmp7L + pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp7H + + paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L + paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H + paddd xmm2,xmm6 ; xmm2=data1L + paddd xmm1,xmm0 ; xmm1=data1H + + paddd xmm7,[rel PD_DESCALE_P1] + paddd xmm4,[rel PD_DESCALE_P1] + psrad xmm7,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm2,[rel PD_DESCALE_P1] + paddd xmm1,[rel PD_DESCALE_P1] + psrad xmm2,DESCALE_P1 + psrad xmm1,DESCALE_P1 + + packssdw xmm7,xmm4 ; xmm7=data7 + packssdw xmm2,xmm1 ; xmm2=data1 + + movdqa xmm4,xmm5 + movdqa xmm1,xmm5 + punpcklwd xmm4,xmm3 + punpckhwd xmm1,xmm3 + movdqa xmm5,xmm4 + movdqa xmm3,xmm1 + pmaddwd xmm4,[rel PW_MF050_MF256] ; xmm4=tmp5L + pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5H + pmaddwd xmm5,[rel PW_MF256_F050] ; xmm5=tmp6L + pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6H + + paddd xmm4,xmm6 ; xmm4=data5L + paddd xmm1,xmm0 ; xmm1=data5H + paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L + paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H + + paddd xmm4,[rel PD_DESCALE_P1] + paddd xmm1,[rel PD_DESCALE_P1] + psrad xmm4,DESCALE_P1 + psrad xmm1,DESCALE_P1 + paddd xmm5,[rel PD_DESCALE_P1] + paddd xmm3,[rel PD_DESCALE_P1] + psrad xmm5,DESCALE_P1 + psrad xmm3,DESCALE_P1 + + packssdw xmm4,xmm1 ; xmm4=data5 + packssdw xmm5,xmm3 ; xmm5=data3 + + ; ---- Pass 2: process columns. + + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 + movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 + + ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) + ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) + + movdqa xmm1,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm2 ; xmm6=(00 01 10 11 20 21 30 31) + punpckhwd xmm1,xmm2 ; xmm1=(40 41 50 51 60 61 70 71) + movdqa xmm3,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(02 03 12 13 22 23 32 33) + punpckhwd xmm3,xmm5 ; xmm3=(42 43 52 53 62 63 72 73) + + movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 + movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 + + ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) + ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm0,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm4 ; xmm2=(04 05 14 15 24 25 34 35) + punpckhwd xmm0,xmm4 ; xmm0=(44 45 54 55 64 65 74 75) + movdqa xmm3,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm7 ; xmm5=(06 07 16 17 26 27 36 37) + punpckhwd xmm3,xmm7 ; xmm3=(46 47 56 57 66 67 76 77) + + movdqa xmm4,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(04 05 06 07 14 15 16 17) + punpckhdq xmm4,xmm5 ; xmm4=(24 25 26 27 34 35 36 37) + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(44 45 46 47 54 55 56 57) + punpckhdq xmm7,xmm3 ; xmm7=(64 65 66 67 74 75 76 77) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm5 ; xmm6=(00 01 02 03 10 11 12 13) + punpckhdq xmm4,xmm5 ; xmm4=(20 21 22 23 30 31 32 33) + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm3 ; xmm1=(40 41 42 43 50 51 52 53) + punpckhdq xmm0,xmm3 ; xmm0=(60 61 62 63 70 71 72 73) + + movdqa xmm5,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm5,xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm3,xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm2,xmm5 + movdqa xmm7,xmm6 + psubw xmm5,xmm0 ; xmm5=data1-data6=tmp6 + psubw xmm6,xmm3 ; xmm6=data0-data7=tmp7 + paddw xmm2,xmm0 ; xmm2=data1+data6=tmp1 + paddw xmm7,xmm3 ; xmm7=data0+data7=tmp0 + + movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) + movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movdqa xmm5,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm5,xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm6,xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm0,xmm5 + movdqa xmm3,xmm4 + paddw xmm5,xmm1 ; xmm5=data3+data4=tmp3 + paddw xmm4,xmm6 ; xmm4=data2+data5=tmp2 + psubw xmm0,xmm1 ; xmm0=data3-data4=tmp4 + psubw xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm1,xmm7 + movdqa xmm6,xmm2 + paddw xmm7,xmm5 ; xmm7=tmp10 + paddw xmm2,xmm4 ; xmm2=tmp11 + psubw xmm1,xmm5 ; xmm1=tmp13 + psubw xmm6,xmm4 ; xmm6=tmp12 + + movdqa xmm5,xmm7 + paddw xmm7,xmm2 ; xmm7=tmp10+tmp11 + psubw xmm5,xmm2 ; xmm5=tmp10-tmp11 + + paddw xmm7,[rel PW_DESCALE_P2X] + paddw xmm5,[rel PW_DESCALE_P2X] + psraw xmm7,PASS1_BITS ; xmm7=data0 + psraw xmm5,PASS1_BITS ; xmm5=data4 + + movdqa XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm7 + movdqa XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm5 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm4,xmm1 ; xmm1=tmp13 + movdqa xmm2,xmm1 + punpcklwd xmm4,xmm6 ; xmm6=tmp12 + punpckhwd xmm2,xmm6 + movdqa xmm1,xmm4 + movdqa xmm6,xmm2 + pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=data2L + pmaddwd xmm2,[rel PW_F130_F054] ; xmm2=data2H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=data6L + pmaddwd xmm6,[rel PW_F054_MF130] ; xmm6=data6H + + paddd xmm4,[rel PD_DESCALE_P2] + paddd xmm2,[rel PD_DESCALE_P2] + psrad xmm4,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm1,[rel PD_DESCALE_P2] + paddd xmm6,[rel PD_DESCALE_P2] + psrad xmm1,DESCALE_P2 + psrad xmm6,DESCALE_P2 + + packssdw xmm4,xmm2 ; xmm4=data2 + packssdw xmm1,xmm6 ; xmm1=data6 + + movdqa XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm1 + + ; -- Odd part + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + movdqa xmm2,xmm0 ; xmm0=tmp4 + movdqa xmm6,xmm3 ; xmm3=tmp5 + paddw xmm2,xmm7 ; xmm2=z3 + paddw xmm6,xmm5 ; xmm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm4,xmm2 + movdqa xmm1,xmm2 + punpcklwd xmm4,xmm6 + punpckhwd xmm1,xmm6 + movdqa xmm2,xmm4 + movdqa xmm6,xmm1 + pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3L + pmaddwd xmm1,[rel PW_MF078_F117] ; xmm1=z3H + pmaddwd xmm2,[rel PW_F117_F078] ; xmm2=z4L + pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm4,xmm0 + movdqa xmm1,xmm0 + punpcklwd xmm4,xmm5 + punpckhwd xmm1,xmm5 + movdqa xmm0,xmm4 + movdqa xmm5,xmm1 + pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4L + pmaddwd xmm1,[rel PW_MF060_MF089] ; xmm1=tmp4H + pmaddwd xmm0,[rel PW_MF089_F060] ; xmm0=tmp7L + pmaddwd xmm5,[rel PW_MF089_F060] ; xmm5=tmp7H + + paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L + paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H + paddd xmm0,xmm2 ; xmm0=data1L + paddd xmm5,xmm6 ; xmm5=data1H + + paddd xmm4,[rel PD_DESCALE_P2] + paddd xmm1,[rel PD_DESCALE_P2] + psrad xmm4,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm0,[rel PD_DESCALE_P2] + paddd xmm5,[rel PD_DESCALE_P2] + psrad xmm0,DESCALE_P2 + psrad xmm5,DESCALE_P2 + + packssdw xmm4,xmm1 ; xmm4=data7 + packssdw xmm0,xmm5 ; xmm0=data1 + + movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm0 + + movdqa xmm1,xmm3 + movdqa xmm5,xmm3 + punpcklwd xmm1,xmm7 + punpckhwd xmm5,xmm7 + movdqa xmm3,xmm1 + movdqa xmm7,xmm5 + pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5L + pmaddwd xmm5,[rel PW_MF050_MF256] ; xmm5=tmp5H + pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6L + pmaddwd xmm7,[rel PW_MF256_F050] ; xmm7=tmp6H + + paddd xmm1,xmm2 ; xmm1=data5L + paddd xmm5,xmm6 ; xmm5=data5H + paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L + paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H + + paddd xmm1,[rel PD_DESCALE_P2] + paddd xmm5,[rel PD_DESCALE_P2] + psrad xmm1,DESCALE_P2 + psrad xmm5,DESCALE_P2 + paddd xmm3,[rel PD_DESCALE_P2] + paddd xmm7,[rel PD_DESCALE_P2] + psrad xmm3,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm1,xmm5 ; xmm1=data5 + packssdw xmm3,xmm7 ; xmm3=data3 + + movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctint-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jfdctint-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jfdctint-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jfdctint-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,634 @@ +; +; jfdctint.asm - accurate integer FDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_islow_sse2) PRIVATE + +EXTN(jconst_fdct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_sse2 (DCTELEM *data) +; + +%define data(b) (b)+8 ; DCTELEM *data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 6 + + align 16 + global EXTN(jsimd_fdct_islow_sse2) PRIVATE + +EXTN(jsimd_fdct_islow_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + paddw xmm3,xmm1 ; xmm3=tmp10 + paddw xmm6,xmm7 ; xmm6=tmp11 + psubw xmm4,xmm1 ; xmm4=tmp13 + psubw xmm0,xmm7 ; xmm0=tmp12 + + movdqa xmm1,xmm3 + paddw xmm3,xmm6 ; xmm3=tmp10+tmp11 + psubw xmm1,xmm6 ; xmm1=tmp10-tmp11 + + psllw xmm3,PASS1_BITS ; xmm3=data0 + psllw xmm1,PASS1_BITS ; xmm1=data4 + + movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 + movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm7,xmm4 ; xmm4=tmp13 + movdqa xmm6,xmm4 + punpcklwd xmm7,xmm0 ; xmm0=tmp12 + punpckhwd xmm6,xmm0 + movdqa xmm4,xmm7 + movdqa xmm0,xmm6 + pmaddwd xmm7,[GOTOFF(ebx,PW_F130_F054)] ; xmm7=data2L + pmaddwd xmm6,[GOTOFF(ebx,PW_F130_F054)] ; xmm6=data2H + pmaddwd xmm4,[GOTOFF(ebx,PW_F054_MF130)] ; xmm4=data6L + pmaddwd xmm0,[GOTOFF(ebx,PW_F054_MF130)] ; xmm0=data6H + + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm7,DESCALE_P1 + psrad xmm6,DESCALE_P1 + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm0,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm4,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm7,xmm6 ; xmm7=data2 + packssdw xmm4,xmm0 ; xmm4=data6 + + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 + + ; -- Odd part + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 + + movdqa xmm6,xmm2 ; xmm2=tmp4 + movdqa xmm0,xmm5 ; xmm5=tmp5 + paddw xmm6,xmm3 ; xmm6=z3 + paddw xmm0,xmm1 ; xmm0=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm7,xmm6 + movdqa xmm4,xmm6 + punpcklwd xmm7,xmm0 + punpckhwd xmm4,xmm0 + movdqa xmm6,xmm7 + movdqa xmm0,xmm4 + pmaddwd xmm7,[GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3H + pmaddwd xmm6,[GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4L + pmaddwd xmm0,[GOTOFF(ebx,PW_F117_F078)] ; xmm0=z4H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm7,xmm2 + movdqa xmm4,xmm2 + punpcklwd xmm7,xmm1 + punpckhwd xmm4,xmm1 + movdqa xmm2,xmm7 + movdqa xmm1,xmm4 + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp4L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4H + pmaddwd xmm2,[GOTOFF(ebx,PW_MF089_F060)] ; xmm2=tmp7L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp7H + + paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L + paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H + paddd xmm2,xmm6 ; xmm2=data1L + paddd xmm1,xmm0 ; xmm1=data1H + + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm7,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm2,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm2,DESCALE_P1 + psrad xmm1,DESCALE_P1 + + packssdw xmm7,xmm4 ; xmm7=data7 + packssdw xmm2,xmm1 ; xmm2=data1 + + movdqa xmm4,xmm5 + movdqa xmm1,xmm5 + punpcklwd xmm4,xmm3 + punpckhwd xmm1,xmm3 + movdqa xmm5,xmm4 + movdqa xmm3,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm4=tmp5L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5H + pmaddwd xmm5,[GOTOFF(ebx,PW_MF256_F050)] ; xmm5=tmp6L + pmaddwd xmm3,[GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6H + + paddd xmm4,xmm6 ; xmm4=data5L + paddd xmm1,xmm0 ; xmm1=data5H + paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L + paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm4,DESCALE_P1 + psrad xmm1,DESCALE_P1 + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm3,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm5,DESCALE_P1 + psrad xmm3,DESCALE_P1 + + packssdw xmm4,xmm1 ; xmm4=data5 + packssdw xmm5,xmm3 ; xmm5=data3 + + ; ---- Pass 2: process columns. + +; mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 + movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 + + ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) + ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) + + movdqa xmm1,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm2 ; xmm6=(00 01 10 11 20 21 30 31) + punpckhwd xmm1,xmm2 ; xmm1=(40 41 50 51 60 61 70 71) + movdqa xmm3,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(02 03 12 13 22 23 32 33) + punpckhwd xmm3,xmm5 ; xmm3=(42 43 52 53 62 63 72 73) + + movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 + movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 + + ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) + ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm0,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm4 ; xmm2=(04 05 14 15 24 25 34 35) + punpckhwd xmm0,xmm4 ; xmm0=(44 45 54 55 64 65 74 75) + movdqa xmm3,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm7 ; xmm5=(06 07 16 17 26 27 36 37) + punpckhwd xmm3,xmm7 ; xmm3=(46 47 56 57 66 67 76 77) + + movdqa xmm4,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(04 05 06 07 14 15 16 17) + punpckhdq xmm4,xmm5 ; xmm4=(24 25 26 27 34 35 36 37) + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(44 45 46 47 54 55 56 57) + punpckhdq xmm7,xmm3 ; xmm7=(64 65 66 67 74 75 76 77) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm5 ; xmm6=(00 01 02 03 10 11 12 13) + punpckhdq xmm4,xmm5 ; xmm4=(20 21 22 23 30 31 32 33) + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm3 ; xmm1=(40 41 42 43 50 51 52 53) + punpckhdq xmm0,xmm3 ; xmm0=(60 61 62 63 70 71 72 73) + + movdqa xmm5,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm5,xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm3,xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm2,xmm5 + movdqa xmm7,xmm6 + psubw xmm5,xmm0 ; xmm5=data1-data6=tmp6 + psubw xmm6,xmm3 ; xmm6=data0-data7=tmp7 + paddw xmm2,xmm0 ; xmm2=data1+data6=tmp1 + paddw xmm7,xmm3 ; xmm7=data0+data7=tmp0 + + movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) + movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movdqa xmm5,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm5,xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm6,xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm0,xmm5 + movdqa xmm3,xmm4 + paddw xmm5,xmm1 ; xmm5=data3+data4=tmp3 + paddw xmm4,xmm6 ; xmm4=data2+data5=tmp2 + psubw xmm0,xmm1 ; xmm0=data3-data4=tmp4 + psubw xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm1,xmm7 + movdqa xmm6,xmm2 + paddw xmm7,xmm5 ; xmm7=tmp10 + paddw xmm2,xmm4 ; xmm2=tmp11 + psubw xmm1,xmm5 ; xmm1=tmp13 + psubw xmm6,xmm4 ; xmm6=tmp12 + + movdqa xmm5,xmm7 + paddw xmm7,xmm2 ; xmm7=tmp10+tmp11 + psubw xmm5,xmm2 ; xmm5=tmp10-tmp11 + + paddw xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)] + paddw xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)] + psraw xmm7,PASS1_BITS ; xmm7=data0 + psraw xmm5,PASS1_BITS ; xmm5=data4 + + movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7 + movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm4,xmm1 ; xmm1=tmp13 + movdqa xmm2,xmm1 + punpcklwd xmm4,xmm6 ; xmm6=tmp12 + punpckhwd xmm2,xmm6 + movdqa xmm1,xmm4 + movdqa xmm6,xmm2 + pmaddwd xmm4,[GOTOFF(ebx,PW_F130_F054)] ; xmm4=data2L + pmaddwd xmm2,[GOTOFF(ebx,PW_F130_F054)] ; xmm2=data2H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=data6L + pmaddwd xmm6,[GOTOFF(ebx,PW_F054_MF130)] ; xmm6=data6H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm2,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm4,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm1,DESCALE_P2 + psrad xmm6,DESCALE_P2 + + packssdw xmm4,xmm2 ; xmm4=data2 + packssdw xmm1,xmm6 ; xmm1=data6 + + movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1 + + ; -- Odd part + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + movdqa xmm2,xmm0 ; xmm0=tmp4 + movdqa xmm6,xmm3 ; xmm3=tmp5 + paddw xmm2,xmm7 ; xmm2=z3 + paddw xmm6,xmm5 ; xmm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm4,xmm2 + movdqa xmm1,xmm2 + punpcklwd xmm4,xmm6 + punpckhwd xmm1,xmm6 + movdqa xmm2,xmm4 + movdqa xmm6,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF078_F117)] ; xmm1=z3H + pmaddwd xmm2,[GOTOFF(ebx,PW_F117_F078)] ; xmm2=z4L + pmaddwd xmm6,[GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm4,xmm0 + movdqa xmm1,xmm0 + punpcklwd xmm4,xmm5 + punpckhwd xmm1,xmm5 + movdqa xmm0,xmm4 + movdqa xmm5,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm1=tmp4H + pmaddwd xmm0,[GOTOFF(ebx,PW_MF089_F060)] ; xmm0=tmp7L + pmaddwd xmm5,[GOTOFF(ebx,PW_MF089_F060)] ; xmm5=tmp7H + + paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L + paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H + paddd xmm0,xmm2 ; xmm0=data1L + paddd xmm5,xmm6 ; xmm5=data1H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm4,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm0,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm0,DESCALE_P2 + psrad xmm5,DESCALE_P2 + + packssdw xmm4,xmm1 ; xmm4=data7 + packssdw xmm0,xmm5 ; xmm0=data1 + + movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0 + + movdqa xmm1,xmm3 + movdqa xmm5,xmm3 + punpcklwd xmm1,xmm7 + punpckhwd xmm5,xmm7 + movdqa xmm3,xmm1 + movdqa xmm7,xmm5 + pmaddwd xmm1,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5L + pmaddwd xmm5,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm5=tmp5H + pmaddwd xmm3,[GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF256_F050)] ; xmm7=tmp6H + + paddd xmm1,xmm2 ; xmm1=data5L + paddd xmm5,xmm6 ; xmm5=data5H + paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L + paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H + + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm1,DESCALE_P2 + psrad xmm5,DESCALE_P2 + paddd xmm3,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm3,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm1,xmm5 ; xmm1=data5 + packssdw xmm3,xmm7 ; xmm3=data3 + + movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3 + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctflt-3dn.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctflt-3dn.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctflt-3dn.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctflt-3dn.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,452 @@ +; +; jidctflt.asm - floating-point IDCT (3DNow! & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_3dnow) PRIVATE + +EXTN(jconst_idct_float_3dnow): + +PD_1_414 times 2 dd 1.414213562373095048801689 +PD_1_847 times 2 dd 1.847759065022573512256366 +PD_1_082 times 2 dd 1.082392200292393968799446 +PD_2_613 times 2 dd 2.613125929752753055713286 +PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_3dnow (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_float_3dnow) PRIVATE + +EXTN(jsimd_idct_float_3dnow): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/2 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + pushpic ebx ; save GOT address + mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + or eax,ebx + poppic ebx ; restore GOT address + jnz short .columnDCT + + ; -- AC terms all zero + + movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm0,mm0 + psrad mm0,(DWORD_BIT-WORD_BIT) + pi2fd mm0,mm0 + + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm1,mm0 + punpckldq mm0,mm0 + punpckhdq mm1,mm1 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm0,mm0 + punpcklwd mm1,mm1 + psrad mm0,(DWORD_BIT-WORD_BIT) + psrad mm1,(DWORD_BIT-WORD_BIT) + pi2fd mm0,mm0 + pi2fd mm1,mm1 + + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm2,mm2 + punpcklwd mm3,mm3 + psrad mm2,(DWORD_BIT-WORD_BIT) + psrad mm3,(DWORD_BIT-WORD_BIT) + pi2fd mm2,mm2 + pi2fd mm3,mm3 + + pfmul mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm1 + pfsub mm0,mm2 ; mm0=tmp11 + pfsub mm1,mm3 + pfadd mm4,mm2 ; mm4=tmp10 + pfadd mm5,mm3 ; mm5=tmp13 + + pfmul mm1,[GOTOFF(ebx,PD_1_414)] + pfsub mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm5 ; mm4=tmp3 + pfsub mm0,mm1 ; mm0=tmp2 + pfadd mm6,mm5 ; mm6=tmp0 + pfadd mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; tmp3 + movq MMWORD [wk(0)], mm0 ; tmp2 + + ; -- Odd part + + movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm2,mm2 + punpcklwd mm3,mm3 + psrad mm2,(DWORD_BIT-WORD_BIT) + psrad mm3,(DWORD_BIT-WORD_BIT) + pi2fd mm2,mm2 + pi2fd mm3,mm3 + + pfmul mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm5,mm5 + punpcklwd mm1,mm1 + psrad mm5,(DWORD_BIT-WORD_BIT) + psrad mm1,(DWORD_BIT-WORD_BIT) + pi2fd mm5,mm5 + pi2fd mm1,mm1 + + pfmul mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4,mm2 + movq mm0,mm5 + pfadd mm2,mm1 ; mm2=z11 + pfadd mm5,mm3 ; mm5=z13 + pfsub mm4,mm1 ; mm4=z12 + pfsub mm0,mm3 ; mm0=z10 + + movq mm1,mm2 + pfsub mm2,mm5 + pfadd mm1,mm5 ; mm1=tmp7 + + pfmul mm2,[GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 + + movq mm3,mm0 + pfadd mm0,mm4 + pfmul mm0,[GOTOFF(ebx,PD_1_847)] ; mm0=z5 + pfmul mm3,[GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) + pfmul mm4,[GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) + pfsubr mm3,mm0 ; mm3=tmp12 + pfsub mm4,mm0 ; mm4=tmp10 + + ; -- Final output stage + + pfsub mm3,mm1 ; mm3=tmp6 + movq mm5,mm6 + movq mm0,mm7 + pfadd mm6,mm1 ; mm6=data0=(00 01) + pfadd mm7,mm3 ; mm7=data1=(10 11) + pfsub mm5,mm1 ; mm5=data7=(70 71) + pfsub mm0,mm3 ; mm0=data6=(60 61) + pfsub mm2,mm3 ; mm2=tmp5 + + movq mm1,mm6 ; transpose coefficients + punpckldq mm6,mm7 ; mm6=(00 10) + punpckhdq mm1,mm7 ; mm1=(01 11) + movq mm3,mm0 ; transpose coefficients + punpckldq mm0,mm5 ; mm0=(60 70) + punpckhdq mm3,mm5 ; mm3=(61 71) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3 + + movq mm7, MMWORD [wk(0)] ; mm7=tmp2 + movq mm5, MMWORD [wk(1)] ; mm5=tmp3 + + pfadd mm4,mm2 ; mm4=tmp4 + movq mm6,mm7 + movq mm1,mm5 + pfadd mm7,mm2 ; mm7=data2=(20 21) + pfadd mm5,mm4 ; mm5=data4=(40 41) + pfsub mm6,mm2 ; mm6=data5=(50 51) + pfsub mm1,mm4 ; mm1=data3=(30 31) + + movq mm0,mm7 ; transpose coefficients + punpckldq mm7,mm1 ; mm7=(20 30) + punpckhdq mm0,mm1 ; mm0=(21 31) + movq mm3,mm5 ; transpose coefficients + punpckldq mm5,mm6 ; mm5=(40 50) + punpckhdq mm3,mm6 ; mm3=(41 51) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3 + +.nextcolumn: + add esi, byte 2*SIZEOF_JCOEF ; coef_block + add edx, byte 2*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/2 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movq mm4,mm0 + movq mm5,mm1 + pfsub mm0,mm2 ; mm0=tmp11 + pfsub mm1,mm3 + pfadd mm4,mm2 ; mm4=tmp10 + pfadd mm5,mm3 ; mm5=tmp13 + + pfmul mm1,[GOTOFF(ebx,PD_1_414)] + pfsub mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm5 ; mm4=tmp3 + pfsub mm0,mm1 ; mm0=tmp2 + pfadd mm6,mm5 ; mm6=tmp0 + pfadd mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; tmp3 + movq MMWORD [wk(0)], mm0 ; tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movq mm4,mm2 + movq mm0,mm5 + pfadd mm2,mm1 ; mm2=z11 + pfadd mm5,mm3 ; mm5=z13 + pfsub mm4,mm1 ; mm4=z12 + pfsub mm0,mm3 ; mm0=z10 + + movq mm1,mm2 + pfsub mm2,mm5 + pfadd mm1,mm5 ; mm1=tmp7 + + pfmul mm2,[GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 + + movq mm3,mm0 + pfadd mm0,mm4 + pfmul mm0,[GOTOFF(ebx,PD_1_847)] ; mm0=z5 + pfmul mm3,[GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) + pfmul mm4,[GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) + pfsubr mm3,mm0 ; mm3=tmp12 + pfsub mm4,mm0 ; mm4=tmp10 + + ; -- Final output stage + + pfsub mm3,mm1 ; mm3=tmp6 + movq mm5,mm6 + movq mm0,mm7 + pfadd mm6,mm1 ; mm6=data0=(00 10) + pfadd mm7,mm3 ; mm7=data1=(01 11) + pfsub mm5,mm1 ; mm5=data7=(07 17) + pfsub mm0,mm3 ; mm0=data6=(06 16) + pfsub mm2,mm3 ; mm2=tmp5 + + movq mm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm1=[PD_RNDINT_MAGIC] + pcmpeqd mm3,mm3 + psrld mm3,WORD_BIT ; mm3={0xFFFF 0x0000 0xFFFF 0x0000} + + pfadd mm6,mm1 ; mm6=roundint(data0/8)=(00 ** 10 **) + pfadd mm7,mm1 ; mm7=roundint(data1/8)=(01 ** 11 **) + pfadd mm0,mm1 ; mm0=roundint(data6/8)=(06 ** 16 **) + pfadd mm5,mm1 ; mm5=roundint(data7/8)=(07 ** 17 **) + + pand mm6,mm3 ; mm6=(00 -- 10 --) + pslld mm7,WORD_BIT ; mm7=(-- 01 -- 11) + pand mm0,mm3 ; mm0=(06 -- 16 --) + pslld mm5,WORD_BIT ; mm5=(-- 07 -- 17) + por mm6,mm7 ; mm6=(00 01 10 11) + por mm0,mm5 ; mm0=(06 07 16 17) + + movq mm1, MMWORD [wk(0)] ; mm1=tmp2 + movq mm3, MMWORD [wk(1)] ; mm3=tmp3 + + pfadd mm4,mm2 ; mm4=tmp4 + movq mm7,mm1 + movq mm5,mm3 + pfadd mm1,mm2 ; mm1=data2=(02 12) + pfadd mm3,mm4 ; mm3=data4=(04 14) + pfsub mm7,mm2 ; mm7=data5=(05 15) + pfsub mm5,mm4 ; mm5=data3=(03 13) + + movq mm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm2=[PD_RNDINT_MAGIC] + pcmpeqd mm4,mm4 + psrld mm4,WORD_BIT ; mm4={0xFFFF 0x0000 0xFFFF 0x0000} + + pfadd mm3,mm2 ; mm3=roundint(data4/8)=(04 ** 14 **) + pfadd mm7,mm2 ; mm7=roundint(data5/8)=(05 ** 15 **) + pfadd mm1,mm2 ; mm1=roundint(data2/8)=(02 ** 12 **) + pfadd mm5,mm2 ; mm5=roundint(data3/8)=(03 ** 13 **) + + pand mm3,mm4 ; mm3=(04 -- 14 --) + pslld mm7,WORD_BIT ; mm7=(-- 05 -- 15) + pand mm1,mm4 ; mm1=(02 -- 12 --) + pslld mm5,WORD_BIT ; mm5=(-- 03 -- 13) + por mm3,mm7 ; mm3=(04 05 14 15) + por mm1,mm5 ; mm1=(02 03 12 13) + + movq mm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm2=[PB_CENTERJSAMP] + + packsswb mm6,mm3 ; mm6=(00 01 10 11 04 05 14 15) + packsswb mm1,mm0 ; mm1=(02 03 12 13 06 07 16 17) + paddb mm6,mm2 + paddb mm1,mm2 + + movq mm4,mm6 ; transpose coefficients(phase 2) + punpcklwd mm6,mm1 ; mm6=(00 01 02 03 10 11 12 13) + punpckhwd mm4,mm1 ; mm4=(04 05 06 07 14 15 16 17) + + movq mm7,mm6 ; transpose coefficients(phase 3) + punpckldq mm6,mm4 ; mm6=(00 01 02 03 04 05 06 07) + punpckhdq mm7,mm4 ; mm7=(10 11 12 13 14 15 16 17) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 + + poppic ebx ; restore GOT address + + add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 2*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctflt-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctflt-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctflt-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctflt-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,483 @@ +; +; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_sse2) PRIVATE + +EXTN(jconst_idct_float_sse2): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + +%define original_rbp rbp+0 +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_float_sse2) PRIVATE + +EXTN(jsimd_idct_float_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [workspace] + collect_args + push rbx + + ; ---- Pass 1: process columns from input, store into work array. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + lea rdi, [workspace] ; FAST_FLOAT *wsptr + mov rcx, DCTSIZE/4 ; ctr +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + movq xmm4, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movq xmm6, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + movq xmm7, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1,xmm2 + por xmm3,xmm4 + por xmm5,xmm6 + por xmm1,xmm3 + por xmm5,xmm7 + por xmm1,xmm5 + packsswb xmm1,xmm1 + movd eax,xmm1 + test rax,rax + jnz short .columnDCT + + ; -- AC terms all zero + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1,xmm0 + movaps xmm2,xmm0 + movaps xmm3,xmm0 + + shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) + shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) + shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) + shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn +%endif +.columnDCT: + + ; -- Even part + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpcklwd xmm1,xmm1 ; xmm1=(20 20 21 21 22 22 23 23) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm1,xmm1 ; xmm1=in2=(20 21 22 23) + + punpcklwd xmm2,xmm2 ; xmm2=(40 40 41 41 42 42 43 43) + punpcklwd xmm3,xmm3 ; xmm3=(60 60 61 61 62 62 63 63) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) + cvtdq2ps xmm2,xmm2 ; xmm2=in4=(40 41 42 43) + cvtdq2ps xmm3,xmm3 ; xmm3=in6=(60 61 62 63) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[rel PD_1_414] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq xmm2, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm2,xmm2 ; xmm2=(10 10 11 11 12 12 13 13) + punpcklwd xmm3,xmm3 ; xmm3=(30 30 31 31 32 32 33 33) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) + cvtdq2ps xmm2,xmm2 ; xmm2=in1=(10 11 12 13) + cvtdq2ps xmm3,xmm3 ; xmm3=in3=(30 31 32 33) + + punpcklwd xmm5,xmm5 ; xmm5=(50 50 51 51 52 52 53 53) + punpcklwd xmm1,xmm1 ; xmm1=(70 70 71 71 72 72 73 73) + psrad xmm5,(DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) + cvtdq2ps xmm5,xmm5 ; xmm5=in5=(50 51 52 53) + cvtdq2ps xmm1,xmm1 ; xmm1=in7=(70 71 72 73) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[rel PD_1_414] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[rel PD_1_847] ; xmm0=z5 + mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) + movaps xmm3,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm0,xmm7 + movaps xmm3,xmm5 + addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2,xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) + movaps xmm4,xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) + movaps xmm0,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6,xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) + movaps xmm3,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add rsi, byte 4*SIZEOF_JCOEF ; coef_block + add rdx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add rdi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec rcx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + lea rsi, [workspace] ; FAST_FLOAT *wsptr + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + mov rcx, DCTSIZE/4 ; ctr +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[rel PD_1_414] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[rel PD_1_414] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[rel PD_1_847] ; xmm0=z5 + mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,[rel PD_RNDINT_MAGIC] ; xmm1=[rel PD_RNDINT_MAGIC] + pcmpeqd xmm3,xmm3 + psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm6,xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) + addps xmm7,xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) + addps xmm0,xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) + addps xmm5,xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) + + pand xmm6,xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) + pand xmm0,xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) + por xmm6,xmm7 ; xmm6=(00 01 10 11 20 21 30 31) + por xmm0,xmm5 ; xmm0=(06 07 16 17 26 27 36 37) + + movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 + movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm7,xmm1 + movaps xmm5,xmm3 + addps xmm1,xmm2 ; xmm1=data2=(02 12 22 32) + addps xmm3,xmm4 ; xmm3=data4=(04 14 24 34) + subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35) + subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33) + + movaps xmm2,[rel PD_RNDINT_MAGIC] ; xmm2=[rel PD_RNDINT_MAGIC] + pcmpeqd xmm4,xmm4 + psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm3,xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) + addps xmm7,xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) + addps xmm1,xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) + addps xmm5,xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) + + pand xmm3,xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) + pand xmm1,xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) + por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35) + por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33) + + movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] + + packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) + packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) + paddb xmm6,xmm2 + paddb xmm1,xmm2 + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 3) + punpckldq xmm6,xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm7,xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + + pshufd xmm5,xmm6,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm3,xmm7,0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7 + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3 + + add rsi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add rdi, byte 4*SIZEOF_JSAMPROW + dec rcx ; ctr + jnz near .rowloop + + pop rbx + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctflt-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctflt-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctflt-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctflt-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,498 @@ +; +; jidctflt.asm - floating-point IDCT (SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_sse2) PRIVATE + +EXTN(jconst_idct_float_sse2): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_float_sse2) PRIVATE + +EXTN(jsimd_idct_float_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq xmm4, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq xmm6, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + movq xmm7, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm2 + por xmm3,xmm4 + por xmm5,xmm6 + por xmm1,xmm3 + por xmm5,xmm7 + por xmm1,xmm5 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1,xmm0 + movaps xmm2,xmm0 + movaps xmm3,xmm0 + + shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) + shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) + shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) + shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpcklwd xmm1,xmm1 ; xmm1=(20 20 21 21 22 22 23 23) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm1,xmm1 ; xmm1=in2=(20 21 22 23) + + punpcklwd xmm2,xmm2 ; xmm2=(40 40 41 41 42 42 43 43) + punpcklwd xmm3,xmm3 ; xmm3=(60 60 61 61 62 62 63 63) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) + cvtdq2ps xmm2,xmm2 ; xmm2=in4=(40 41 42 43) + cvtdq2ps xmm3,xmm3 ; xmm3=in6=(60 61 62 63) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq xmm2, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm2,xmm2 ; xmm2=(10 10 11 11 12 12 13 13) + punpcklwd xmm3,xmm3 ; xmm3=(30 30 31 31 32 32 33 33) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) + cvtdq2ps xmm2,xmm2 ; xmm2=in1=(10 11 12 13) + cvtdq2ps xmm3,xmm3 ; xmm3=in3=(30 31 32 33) + + punpcklwd xmm5,xmm5 ; xmm5=(50 50 51 51 52 52 53 53) + punpcklwd xmm1,xmm1 ; xmm1=(70 70 71 71 72 72 73 73) + psrad xmm5,(DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) + cvtdq2ps xmm5,xmm5 ; xmm5=in5=(50 51 52 53) + cvtdq2ps xmm1,xmm1 ; xmm1=in7=(70 71 72 73) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) + movaps xmm3,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm0,xmm7 + movaps xmm3,xmm5 + addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2,xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) + movaps xmm4,xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) + movaps xmm0,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6,xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) + movaps xmm3,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm1=[PD_RNDINT_MAGIC] + pcmpeqd xmm3,xmm3 + psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm6,xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) + addps xmm7,xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) + addps xmm0,xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) + addps xmm5,xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) + + pand xmm6,xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) + pand xmm0,xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) + por xmm6,xmm7 ; xmm6=(00 01 10 11 20 21 30 31) + por xmm0,xmm5 ; xmm0=(06 07 16 17 26 27 36 37) + + movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 + movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm7,xmm1 + movaps xmm5,xmm3 + addps xmm1,xmm2 ; xmm1=data2=(02 12 22 32) + addps xmm3,xmm4 ; xmm3=data4=(04 14 24 34) + subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35) + subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33) + + movaps xmm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm2=[PD_RNDINT_MAGIC] + pcmpeqd xmm4,xmm4 + psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm3,xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) + addps xmm7,xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) + addps xmm1,xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) + addps xmm5,xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) + + pand xmm3,xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) + pand xmm1,xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) + por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35) + por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33) + + movdqa xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] + + packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) + packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) + paddb xmm6,xmm2 + paddb xmm1,xmm2 + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 3) + punpckldq xmm6,xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm7,xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + + pshufd xmm5,xmm6,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm3,xmm7,0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7 + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctflt-sse.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctflt-sse.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctflt-sse.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctflt-sse.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,572 @@ +; +; jidctflt.asm - floating-point IDCT (SSE & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_sse) PRIVATE + +EXTN(jconst_idct_float_sse): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_0_125 times 4 dd 0.125 ; 1/8 +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_float_sse) PRIVATE + +EXTN(jsimd_idct_float_sse): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm1,mm0 ; mm1=(** 02 ** 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in0H=(02 03) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) + cvtpi2ps xmm3,mm1 ; xmm3=(02 03 ** **) + cvtpi2ps xmm0,mm0 ; xmm0=(00 01 ** **) + movlhps xmm0,xmm3 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1,xmm0 + movaps xmm2,xmm0 + movaps xmm3,xmm0 + + shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) + shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) + shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) + shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm4,mm0 ; mm4=(** 02 ** 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm5,mm1 ; mm5=(** 22 ** 23) + punpcklwd mm1,mm1 ; mm1=(20 20 21 21) + + psrad mm4,(DWORD_BIT-WORD_BIT) ; mm4=in0H=(02 03) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) + cvtpi2ps xmm4,mm4 ; xmm4=(02 03 ** **) + cvtpi2ps xmm0,mm0 ; xmm0=(00 01 ** **) + psrad mm5,(DWORD_BIT-WORD_BIT) ; mm5=in2H=(22 23) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in2L=(20 21) + cvtpi2ps xmm5,mm5 ; xmm5=(22 23 ** **) + cvtpi2ps xmm1,mm1 ; xmm1=(20 21 ** **) + + punpckhwd mm6,mm2 ; mm6=(** 42 ** 43) + punpcklwd mm2,mm2 ; mm2=(40 40 41 41) + punpckhwd mm7,mm3 ; mm7=(** 62 ** 63) + punpcklwd mm3,mm3 ; mm3=(60 60 61 61) + + psrad mm6,(DWORD_BIT-WORD_BIT) ; mm6=in4H=(42 43) + psrad mm2,(DWORD_BIT-WORD_BIT) ; mm2=in4L=(40 41) + cvtpi2ps xmm6,mm6 ; xmm6=(42 43 ** **) + cvtpi2ps xmm2,mm2 ; xmm2=(40 41 ** **) + psrad mm7,(DWORD_BIT-WORD_BIT) ; mm7=in6H=(62 63) + psrad mm3,(DWORD_BIT-WORD_BIT) ; mm3=in6L=(60 61) + cvtpi2ps xmm7,mm7 ; xmm7=(62 63 ** **) + cvtpi2ps xmm3,mm3 ; xmm3=(60 61 ** **) + + movlhps xmm0,xmm4 ; xmm0=in0=(00 01 02 03) + movlhps xmm1,xmm5 ; xmm1=in2=(20 21 22 23) + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movlhps xmm2,xmm6 ; xmm2=in4=(40 41 42 43) + movlhps xmm3,xmm7 ; xmm3=in6=(60 61 62 63) + mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm6,mm4 ; mm6=(** 12 ** 13) + punpcklwd mm4,mm4 ; mm4=(10 10 11 11) + punpckhwd mm2,mm0 ; mm2=(** 32 ** 33) + punpcklwd mm0,mm0 ; mm0=(30 30 31 31) + + psrad mm6,(DWORD_BIT-WORD_BIT) ; mm6=in1H=(12 13) + psrad mm4,(DWORD_BIT-WORD_BIT) ; mm4=in1L=(10 11) + cvtpi2ps xmm4,mm6 ; xmm4=(12 13 ** **) + cvtpi2ps xmm2,mm4 ; xmm2=(10 11 ** **) + psrad mm2,(DWORD_BIT-WORD_BIT) ; mm2=in3H=(32 33) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in3L=(30 31) + cvtpi2ps xmm0,mm2 ; xmm0=(32 33 ** **) + cvtpi2ps xmm3,mm0 ; xmm3=(30 31 ** **) + + punpckhwd mm7,mm5 ; mm7=(** 52 ** 53) + punpcklwd mm5,mm5 ; mm5=(50 50 51 51) + punpckhwd mm3,mm1 ; mm3=(** 72 ** 73) + punpcklwd mm1,mm1 ; mm1=(70 70 71 71) + + movlhps xmm2,xmm4 ; xmm2=in1=(10 11 12 13) + movlhps xmm3,xmm0 ; xmm3=in3=(30 31 32 33) + + psrad mm7,(DWORD_BIT-WORD_BIT) ; mm7=in5H=(52 53) + psrad mm5,(DWORD_BIT-WORD_BIT) ; mm5=in5L=(50 51) + cvtpi2ps xmm4,mm7 ; xmm4=(52 53 ** **) + cvtpi2ps xmm5,mm5 ; xmm5=(50 51 ** **) + psrad mm3,(DWORD_BIT-WORD_BIT) ; mm3=in7H=(72 73) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in7L=(70 71) + cvtpi2ps xmm0,mm3 ; xmm0=(72 73 ** **) + cvtpi2ps xmm1,mm1 ; xmm1=(70 71 ** **) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movlhps xmm5,xmm4 ; xmm5=in5=(50 51 52 53) + movlhps xmm1,xmm0 ; xmm1=in7=(70 71 72 73) + mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) + movaps xmm3,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm0,xmm7 + movaps xmm3,xmm5 + addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2,xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) + movaps xmm4,xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) + movaps xmm0,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6,xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) + movaps xmm3,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,[GOTOFF(ebx,PD_0_125)] ; xmm1=[PD_0_125] + + mulps xmm6,xmm1 ; descale(1/8) + mulps xmm7,xmm1 ; descale(1/8) + mulps xmm5,xmm1 ; descale(1/8) + mulps xmm0,xmm1 ; descale(1/8) + + movhlps xmm3,xmm6 + movhlps xmm1,xmm7 + cvtps2pi mm0,xmm6 ; round to int32, mm0=data0L=(00 10) + cvtps2pi mm1,xmm7 ; round to int32, mm1=data1L=(01 11) + cvtps2pi mm2,xmm3 ; round to int32, mm2=data0H=(20 30) + cvtps2pi mm3,xmm1 ; round to int32, mm3=data1H=(21 31) + packssdw mm0,mm2 ; mm0=data0=(00 10 20 30) + packssdw mm1,mm3 ; mm1=data1=(01 11 21 31) + + movhlps xmm6,xmm5 + movhlps xmm7,xmm0 + cvtps2pi mm4,xmm5 ; round to int32, mm4=data7L=(07 17) + cvtps2pi mm5,xmm0 ; round to int32, mm5=data6L=(06 16) + cvtps2pi mm6,xmm6 ; round to int32, mm6=data7H=(27 37) + cvtps2pi mm7,xmm7 ; round to int32, mm7=data6H=(26 36) + packssdw mm4,mm6 ; mm4=data7=(07 17 27 37) + packssdw mm5,mm7 ; mm5=data6=(06 16 26 36) + + packsswb mm0,mm5 ; mm0=(00 10 20 30 06 16 26 36) + packsswb mm1,mm4 ; mm1=(01 11 21 31 07 17 27 37) + + movaps xmm3, XMMWORD [wk(0)] ; xmm3=tmp2 + movaps xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movaps xmm6,[GOTOFF(ebx,PD_0_125)] ; xmm6=[PD_0_125] + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm5,xmm3 + movaps xmm0,xmm1 + addps xmm3,xmm2 ; xmm3=data2=(02 12 22 32) + addps xmm1,xmm4 ; xmm1=data4=(04 14 24 34) + subps xmm5,xmm2 ; xmm5=data5=(05 15 25 35) + subps xmm0,xmm4 ; xmm0=data3=(03 13 23 33) + + mulps xmm3,xmm6 ; descale(1/8) + mulps xmm1,xmm6 ; descale(1/8) + mulps xmm5,xmm6 ; descale(1/8) + mulps xmm0,xmm6 ; descale(1/8) + + movhlps xmm7,xmm3 + movhlps xmm2,xmm1 + cvtps2pi mm2,xmm3 ; round to int32, mm2=data2L=(02 12) + cvtps2pi mm3,xmm1 ; round to int32, mm3=data4L=(04 14) + cvtps2pi mm6,xmm7 ; round to int32, mm6=data2H=(22 32) + cvtps2pi mm7,xmm2 ; round to int32, mm7=data4H=(24 34) + packssdw mm2,mm6 ; mm2=data2=(02 12 22 32) + packssdw mm3,mm7 ; mm3=data4=(04 14 24 34) + + movhlps xmm4,xmm5 + movhlps xmm6,xmm0 + cvtps2pi mm5,xmm5 ; round to int32, mm5=data5L=(05 15) + cvtps2pi mm4,xmm0 ; round to int32, mm4=data3L=(03 13) + cvtps2pi mm6,xmm4 ; round to int32, mm6=data5H=(25 35) + cvtps2pi mm7,xmm6 ; round to int32, mm7=data3H=(23 33) + packssdw mm5,mm6 ; mm5=data5=(05 15 25 35) + packssdw mm4,mm7 ; mm4=data3=(03 13 23 33) + + movq mm6,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] + + packsswb mm2,mm3 ; mm2=(02 12 22 32 04 14 24 34) + packsswb mm4,mm5 ; mm4=(03 13 23 33 05 15 25 35) + + paddb mm0,mm6 + paddb mm1,mm6 + paddb mm2,mm6 + paddb mm4,mm6 + + movq mm7,mm0 ; transpose coefficients(phase 1) + punpcklbw mm0,mm1 ; mm0=(00 01 10 11 20 21 30 31) + punpckhbw mm7,mm1 ; mm7=(06 07 16 17 26 27 36 37) + movq mm3,mm2 ; transpose coefficients(phase 1) + punpcklbw mm2,mm4 ; mm2=(02 03 12 13 22 23 32 33) + punpckhbw mm3,mm4 ; mm3=(04 05 14 15 24 25 34 35) + + movq mm5,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm2 ; mm0=(00 01 02 03 10 11 12 13) + punpckhwd mm5,mm2 ; mm5=(20 21 22 23 30 31 32 33) + movq mm6,mm3 ; transpose coefficients(phase 2) + punpcklwd mm3,mm7 ; mm3=(04 05 06 07 14 15 16 17) + punpckhwd mm6,mm7 ; mm6=(24 25 26 27 34 35 36 37) + + movq mm1,mm0 ; transpose coefficients(phase 3) + punpckldq mm0,mm3 ; mm0=(00 01 02 03 04 05 06 07) + punpckhdq mm1,mm3 ; mm1=(10 11 12 13 14 15 16 17) + movq mm4,mm5 ; transpose coefficients(phase 3) + punpckldq mm5,mm6 ; mm5=(20 21 22 23 24 25 26 27) + punpckhdq mm4,mm6 ; mm4=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctfst-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jidctfst-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctfst-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctfst-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,257 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* FAST INTEGER INVERSE DCT + * + * This is similar to the SSE2 implementation, except that we left-shift the + * constants by 1 less bit (the -1 in CONST_SHIFT.) This is because + * vec_madds(arg1, arg2, arg3) generates the 16-bit saturated sum of: + * the elements in arg3 + the most significant 17 bits of + * (the elements in arg1 * the elements in arg2). + */ + +#include "jsimd_altivec.h" + + +#define F_1_082 277 /* FIX(1.082392200) */ +#define F_1_414 362 /* FIX(1.414213562) */ +#define F_1_847 473 /* FIX(1.847759065) */ +#define F_2_613 669 /* FIX(2.613125930) */ +#define F_1_613 (F_2_613 - 256) /* FIX(2.613125930) - FIX(1) */ + +#define CONST_BITS 8 +#define PASS1_BITS 2 +#define PRE_MULTIPLY_SCALE_BITS 2 +#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1) + + +#define DO_IDCT(in) \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(in##0, in##4); \ + tmp11 = vec_sub(in##0, in##4); \ + tmp13 = vec_add(in##2, in##6); \ + \ + tmp12 = vec_sub(in##2, in##6); \ + tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ + tmp12 = vec_madds(tmp12, pw_F1414, pw_zero); \ + tmp12 = vec_sub(tmp12, tmp13); \ + \ + tmp0 = vec_add(tmp10, tmp13); \ + tmp3 = vec_sub(tmp10, tmp13); \ + tmp1 = vec_add(tmp11, tmp12); \ + tmp2 = vec_sub(tmp11, tmp12); \ + \ + /* Odd part */ \ + \ + z13 = vec_add(in##5, in##3); \ + z10 = vec_sub(in##5, in##3); \ + z10s = vec_sl(z10, pre_multiply_scale_bits); \ + z11 = vec_add(in##1, in##7); \ + z12s = vec_sub(in##1, in##7); \ + z12s = vec_sl(z12s, pre_multiply_scale_bits); \ + \ + tmp11 = vec_sub(z11, z13); \ + tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ + tmp11 = vec_madds(tmp11, pw_F1414, pw_zero); \ + \ + tmp7 = vec_add(z11, z13); \ + \ + /* To avoid overflow... \ + * \ + * (Original) \ + * tmp12 = -2.613125930 * z10 + z5; \ + * \ + * (This implementation) \ + * tmp12 = (-1.613125930 - 1) * z10 + z5; \ + * = -1.613125930 * z10 - z10 + z5; \ + */ \ + \ + z5 = vec_add(z10s, z12s); \ + z5 = vec_madds(z5, pw_F1847, pw_zero); \ + \ + tmp10 = vec_madds(z12s, pw_F1082, pw_zero); \ + tmp10 = vec_sub(tmp10, z5); \ + tmp12 = vec_madds(z10s, pw_MF1613, z5); \ + tmp12 = vec_sub(tmp12, z10); \ + \ + tmp6 = vec_sub(tmp12, tmp7); \ + tmp5 = vec_sub(tmp11, tmp6); \ + tmp4 = vec_add(tmp10, tmp5); \ + \ + out0 = vec_add(tmp0, tmp7); \ + out1 = vec_add(tmp1, tmp6); \ + out2 = vec_add(tmp2, tmp5); \ + out3 = vec_sub(tmp3, tmp4); \ + out4 = vec_add(tmp3, tmp4); \ + out5 = vec_sub(tmp2, tmp5); \ + out6 = vec_sub(tmp1, tmp6); \ + out7 = vec_sub(tmp0, tmp7); \ +} + + +void +jsimd_idct_ifast_altivec (void *dct_table_, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + short *dct_table = (short *)dct_table_; + int *outptr; + + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7, + tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, + z5, z10, z10s, z11, z12s, z13, + out0, out1, out2, out3, out4, out5, out6, out7; + __vector signed char outb; + + /* Constants */ + __vector short pw_zero = { __8X(0) }, + pw_F1414 = { __8X(F_1_414 << CONST_SHIFT) }, + pw_F1847 = { __8X(F_1_847 << CONST_SHIFT) }, + pw_MF1613 = { __8X(-F_1_613 << CONST_SHIFT) }, + pw_F1082 = { __8X(F_1_082 << CONST_SHIFT) }; + __vector unsigned short + pre_multiply_scale_bits = { __8X(PRE_MULTIPLY_SCALE_BITS) }, + pass1_bits3 = { __8X(PASS1_BITS + 3) }; + __vector signed char pb_centerjsamp = { __16X(CENTERJSAMPLE) }; + + /* Pass 1: process columns */ + + col0 = vec_ld(0, coef_block); + col1 = vec_ld(16, coef_block); + col2 = vec_ld(32, coef_block); + col3 = vec_ld(48, coef_block); + col4 = vec_ld(64, coef_block); + col5 = vec_ld(80, coef_block); + col6 = vec_ld(96, coef_block); + col7 = vec_ld(112, coef_block); + + tmp1 = vec_or(col1, col2); + tmp2 = vec_or(col3, col4); + tmp1 = vec_or(tmp1, tmp2); + tmp3 = vec_or(col5, col6); + tmp3 = vec_or(tmp3, col7); + tmp1 = vec_or(tmp1, tmp3); + + quant0 = vec_ld(0, dct_table); + col0 = vec_mladd(col0, quant0, pw_zero); + + if (vec_all_eq(tmp1, pw_zero)) { + /* AC terms all zero */ + + row0 = vec_splat(col0, 0); + row1 = vec_splat(col0, 1); + row2 = vec_splat(col0, 2); + row3 = vec_splat(col0, 3); + row4 = vec_splat(col0, 4); + row5 = vec_splat(col0, 5); + row6 = vec_splat(col0, 6); + row7 = vec_splat(col0, 7); + + } else { + + quant1 = vec_ld(16, dct_table); + quant2 = vec_ld(32, dct_table); + quant3 = vec_ld(48, dct_table); + quant4 = vec_ld(64, dct_table); + quant5 = vec_ld(80, dct_table); + quant6 = vec_ld(96, dct_table); + quant7 = vec_ld(112, dct_table); + + col1 = vec_mladd(col1, quant1, pw_zero); + col2 = vec_mladd(col2, quant2, pw_zero); + col3 = vec_mladd(col3, quant3, pw_zero); + col4 = vec_mladd(col4, quant4, pw_zero); + col5 = vec_mladd(col5, quant5, pw_zero); + col6 = vec_mladd(col6, quant6, pw_zero); + col7 = vec_mladd(col7, quant7, pw_zero); + + DO_IDCT(col); + + TRANSPOSE(out, row); + } + + /* Pass 2: process rows */ + + DO_IDCT(row); + + out0 = vec_sra(out0, pass1_bits3); + out1 = vec_sra(out1, pass1_bits3); + out2 = vec_sra(out2, pass1_bits3); + out3 = vec_sra(out3, pass1_bits3); + out4 = vec_sra(out4, pass1_bits3); + out5 = vec_sra(out5, pass1_bits3); + out6 = vec_sra(out6, pass1_bits3); + out7 = vec_sra(out7, pass1_bits3); + + TRANSPOSE(out, col); + + outb = vec_packs(col0, col0); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[0] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col1, col1); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[1] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col2, col2); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[2] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col3, col3); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[3] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col4, col4); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[4] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col5, col5); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[5] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col6, col6); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[6] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col7, col7); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[7] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctfst-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctfst-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctfst-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctfst-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,500 @@ +; +; jidctfst.asm - fast integer IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_idct_ifast_mmx) PRIVATE + +EXTN(jconst_idct_ifast_mmx): + +PW_F1414 times 4 dw F_1_414 << CONST_SHIFT +PW_F1847 times 4 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 4 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_mmx (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; jpeg_component_info *compptr +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_ifast_mmx) PRIVATE + +EXTN(jsimd_idct_ifast_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm1 + psubw mm0,mm2 ; mm0=tmp11 + psubw mm1,mm3 + paddw mm4,mm2 ; mm4=tmp10 + paddw mm5,mm3 ; mm5=tmp13 + + psllw mm1,PRE_MULTIPLY_SCALE_BITS + pmulhw mm1,[GOTOFF(ebx,PW_F1414)] + psubw mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + psubw mm4,mm5 ; mm4=tmp3 + psubw mm0,mm1 ; mm0=tmp2 + paddw mm6,mm5 ; mm6=tmp0 + paddw mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 + movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4,mm2 + movq mm0,mm5 + psubw mm2,mm1 ; mm2=z12 + psubw mm5,mm3 ; mm5=z10 + paddw mm4,mm1 ; mm4=z11 + paddw mm0,mm3 ; mm0=z13 + + movq mm1,mm5 ; mm1=z10(unscaled) + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm5,PRE_MULTIPLY_SCALE_BITS + + movq mm3,mm4 + psubw mm4,mm0 + paddw mm3,mm0 ; mm3=tmp7 + + psllw mm4,PRE_MULTIPLY_SCALE_BITS + pmulhw mm4,[GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movq mm0,mm5 + paddw mm5,mm2 + pmulhw mm5,[GOTOFF(ebx,PW_F1847)] ; mm5=z5 + pmulhw mm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw mm2,[GOTOFF(ebx,PW_F1082)] + psubw mm0,mm1 + psubw mm2,mm5 ; mm2=tmp10 + paddw mm0,mm5 ; mm0=tmp12 + + ; -- Final output stage + + psubw mm0,mm3 ; mm0=tmp6 + movq mm1,mm6 + movq mm5,mm7 + paddw mm6,mm3 ; mm6=data0=(00 01 02 03) + paddw mm7,mm0 ; mm7=data1=(10 11 12 13) + psubw mm1,mm3 ; mm1=data7=(70 71 72 73) + psubw mm5,mm0 ; mm5=data6=(60 61 62 63) + psubw mm4,mm0 ; mm4=tmp5 + + movq mm3,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm3,mm7 ; mm3=(02 12 03 13) + movq mm0,mm5 ; transpose coefficients(phase 1) + punpcklwd mm5,mm1 ; mm5=(60 70 61 71) + punpckhwd mm0,mm1 ; mm0=(62 72 63 73) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp2 + movq mm1, MMWORD [wk(1)] ; mm1=tmp3 + + movq MMWORD [wk(0)], mm5 ; wk(0)=(60 70 61 71) + movq MMWORD [wk(1)], mm0 ; wk(1)=(62 72 63 73) + + paddw mm2,mm4 ; mm2=tmp4 + movq mm5,mm7 + movq mm0,mm1 + paddw mm7,mm4 ; mm7=data2=(20 21 22 23) + paddw mm1,mm2 ; mm1=data4=(40 41 42 43) + psubw mm5,mm4 ; mm5=data5=(50 51 52 53) + psubw mm0,mm2 ; mm0=data3=(30 31 32 33) + + movq mm4,mm7 ; transpose coefficients(phase 1) + punpcklwd mm7,mm0 ; mm7=(20 30 21 31) + punpckhwd mm4,mm0 ; mm4=(22 32 23 33) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm5 ; mm1=(40 50 41 51) + punpckhwd mm2,mm5 ; mm2=(42 52 43 53) + + movq mm0,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm7 ; mm6=(00 10 20 30) + punpckhdq mm0,mm7 ; mm0=(01 11 21 31) + movq mm5,mm3 ; transpose coefficients(phase 2) + punpckldq mm3,mm4 ; mm3=(02 12 22 32) + punpckhdq mm5,mm4 ; mm5=(03 13 23 33) + + movq mm7, MMWORD [wk(0)] ; mm7=(60 70 61 71) + movq mm4, MMWORD [wk(1)] ; mm4=(62 72 63 73) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 + + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm7 ; mm1=(40 50 60 70) + punpckhdq mm6,mm7 ; mm6=(41 51 61 71) + movq mm0,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm4 ; mm2=(42 52 62 72) + punpckhdq mm0,mm4 ; mm0=(43 53 63 73) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_IFAST_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm0 + movq mm5,mm1 + psubw mm0,mm2 ; mm0=tmp11 + psubw mm1,mm3 + paddw mm4,mm2 ; mm4=tmp10 + paddw mm5,mm3 ; mm5=tmp13 + + psllw mm1,PRE_MULTIPLY_SCALE_BITS + pmulhw mm1,[GOTOFF(ebx,PW_F1414)] + psubw mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + psubw mm4,mm5 ; mm4=tmp3 + psubw mm0,mm1 ; mm0=tmp2 + paddw mm6,mm5 ; mm6=tmp0 + paddw mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 + movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm2 + movq mm0,mm5 + psubw mm2,mm1 ; mm2=z12 + psubw mm5,mm3 ; mm5=z10 + paddw mm4,mm1 ; mm4=z11 + paddw mm0,mm3 ; mm0=z13 + + movq mm1,mm5 ; mm1=z10(unscaled) + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm5,PRE_MULTIPLY_SCALE_BITS + + movq mm3,mm4 + psubw mm4,mm0 + paddw mm3,mm0 ; mm3=tmp7 + + psllw mm4,PRE_MULTIPLY_SCALE_BITS + pmulhw mm4,[GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movq mm0,mm5 + paddw mm5,mm2 + pmulhw mm5,[GOTOFF(ebx,PW_F1847)] ; mm5=z5 + pmulhw mm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw mm2,[GOTOFF(ebx,PW_F1082)] + psubw mm0,mm1 + psubw mm2,mm5 ; mm2=tmp10 + paddw mm0,mm5 ; mm0=tmp12 + + ; -- Final output stage + + psubw mm0,mm3 ; mm0=tmp6 + movq mm1,mm6 + movq mm5,mm7 + paddw mm6,mm3 ; mm6=data0=(00 10 20 30) + paddw mm7,mm0 ; mm7=data1=(01 11 21 31) + psraw mm6,(PASS1_BITS+3) ; descale + psraw mm7,(PASS1_BITS+3) ; descale + psubw mm1,mm3 ; mm1=data7=(07 17 27 37) + psubw mm5,mm0 ; mm5=data6=(06 16 26 36) + psraw mm1,(PASS1_BITS+3) ; descale + psraw mm5,(PASS1_BITS+3) ; descale + psubw mm4,mm0 ; mm4=tmp5 + + packsswb mm6,mm5 ; mm6=(00 10 20 30 06 16 26 36) + packsswb mm7,mm1 ; mm7=(01 11 21 31 07 17 27 37) + + movq mm3, MMWORD [wk(0)] ; mm3=tmp2 + movq mm0, MMWORD [wk(1)] ; mm0=tmp3 + + paddw mm2,mm4 ; mm2=tmp4 + movq mm5,mm3 + movq mm1,mm0 + paddw mm3,mm4 ; mm3=data2=(02 12 22 32) + paddw mm0,mm2 ; mm0=data4=(04 14 24 34) + psraw mm3,(PASS1_BITS+3) ; descale + psraw mm0,(PASS1_BITS+3) ; descale + psubw mm5,mm4 ; mm5=data5=(05 15 25 35) + psubw mm1,mm2 ; mm1=data3=(03 13 23 33) + psraw mm5,(PASS1_BITS+3) ; descale + psraw mm1,(PASS1_BITS+3) ; descale + + movq mm4,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm4=[PB_CENTERJSAMP] + + packsswb mm3,mm0 ; mm3=(02 12 22 32 04 14 24 34) + packsswb mm1,mm5 ; mm1=(03 13 23 33 05 15 25 35) + + paddb mm6,mm4 + paddb mm7,mm4 + paddb mm3,mm4 + paddb mm1,mm4 + + movq mm2,mm6 ; transpose coefficients(phase 1) + punpcklbw mm6,mm7 ; mm6=(00 01 10 11 20 21 30 31) + punpckhbw mm2,mm7 ; mm2=(06 07 16 17 26 27 36 37) + movq mm0,mm3 ; transpose coefficients(phase 1) + punpcklbw mm3,mm1 ; mm3=(02 03 12 13 22 23 32 33) + punpckhbw mm0,mm1 ; mm0=(04 05 14 15 24 25 34 35) + + movq mm5,mm6 ; transpose coefficients(phase 2) + punpcklwd mm6,mm3 ; mm6=(00 01 02 03 10 11 12 13) + punpckhwd mm5,mm3 ; mm5=(20 21 22 23 30 31 32 33) + movq mm4,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm2 ; mm0=(04 05 06 07 14 15 16 17) + punpckhwd mm4,mm2 ; mm4=(24 25 26 27 34 35 36 37) + + movq mm7,mm6 ; transpose coefficients(phase 3) + punpckldq mm6,mm0 ; mm6=(00 01 02 03 04 05 06 07) + punpckhdq mm7,mm0 ; mm7=(10 11 12 13 14 15 16 17) + movq mm1,mm5 ; transpose coefficients(phase 3) + punpckldq mm5,mm4 ; mm5=(20 21 22 23 24 25 26 27) + punpckhdq mm1,mm4 ; mm1=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_JCOEF ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctfst-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctfst-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctfst-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctfst-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,492 @@ +; +; jidctfst.asm - fast integer IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_idct_ifast_sse2) PRIVATE + +EXTN(jconst_idct_ifast_sse2): + +PW_F1414 times 8 dw F_1_414 << CONST_SHIFT +PW_F1847 times 8 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 8 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = jpeg_component_info *compptr +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + +%define original_rbp rbp+0 +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_idct_ifast_sse2) PRIVATE + +EXTN(jsimd_idct_ifast_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test rax,rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm7,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm7,xmm7 ; xmm7=(04 04 05 05 06 06 07 07) + + pshufd xmm6,xmm0,0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) + pshufd xmm2,xmm0,0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) + pshufd xmm5,xmm0,0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) + pshufd xmm0,xmm0,0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) + pshufd xmm1,xmm7,0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) + pshufd xmm4,xmm7,0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) + pshufd xmm3,xmm7,0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) + pshufd xmm7,xmm7,0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 + jmp near .column_end +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + psubw xmm0,xmm2 ; xmm0=tmp11 + psubw xmm1,xmm3 + paddw xmm4,xmm2 ; xmm4=tmp10 + paddw xmm5,xmm3 ; xmm5=tmp13 + + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm1,[rel PW_F1414] + psubw xmm1,xmm5 ; xmm1=tmp12 + + movdqa xmm6,xmm4 + movdqa xmm7,xmm0 + psubw xmm4,xmm5 ; xmm4=tmp3 + psubw xmm0,xmm1 ; xmm0=tmp2 + paddw xmm6,xmm5 ; xmm6=tmp0 + paddw xmm7,xmm1 ; xmm7=tmp1 + + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 + + ; -- Odd part + + movdqa xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm2 + movdqa xmm0,xmm5 + psubw xmm2,xmm1 ; xmm2=z12 + psubw xmm5,xmm3 ; xmm5=z10 + paddw xmm4,xmm1 ; xmm4=z11 + paddw xmm0,xmm3 ; xmm0=z13 + + movdqa xmm1,xmm5 ; xmm1=z10(unscaled) + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm3,xmm4 + psubw xmm4,xmm0 + paddw xmm3,xmm0 ; xmm3=tmp7 + + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm4,[rel PW_F1414] ; xmm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm0,xmm5 + paddw xmm5,xmm2 + pmulhw xmm5,[rel PW_F1847] ; xmm5=z5 + pmulhw xmm0,[rel PW_MF1613] + pmulhw xmm2,[rel PW_F1082] + psubw xmm0,xmm1 + psubw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm0,xmm5 ; xmm0=tmp12 + + ; -- Final output stage + + psubw xmm0,xmm3 ; xmm0=tmp6 + movdqa xmm1,xmm6 + movdqa xmm5,xmm7 + paddw xmm6,xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) + paddw xmm7,xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) + psubw xmm1,xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) + psubw xmm5,xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) + psubw xmm4,xmm0 ; xmm4=tmp5 + + movdqa xmm3,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(00 10 01 11 02 12 03 13) + punpckhwd xmm3,xmm7 ; xmm3=(04 14 05 15 06 16 07 17) + movdqa xmm0,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm1 ; xmm5=(60 70 61 71 62 72 63 73) + punpckhwd xmm0,xmm1 ; xmm0=(64 74 65 75 66 76 67 77) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) + + paddw xmm2,xmm4 ; xmm2=tmp4 + movdqa xmm5,xmm7 + movdqa xmm0,xmm1 + paddw xmm7,xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) + paddw xmm1,xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) + psubw xmm5,xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) + psubw xmm0,xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm0 ; xmm7=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm0 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm2,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm5 ; xmm2=(44 54 45 55 46 56 47 57) + + movdqa xmm0,xmm3 ; transpose coefficients(phase 2) + punpckldq xmm3,xmm4 ; xmm3=(04 14 24 34 05 15 25 35) + punpckhdq xmm0,xmm4 ; xmm0=(06 16 26 36 07 17 27 37) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=(00 10 20 30 01 11 21 31) + punpckhdq xmm5,xmm7 ; xmm5=(02 12 22 32 03 13 23 33) + + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm4 ; xmm1=(40 50 60 70 41 51 61 71) + punpckhdq xmm3,xmm4 ; xmm3=(42 52 62 72 43 53 63 73) + movdqa xmm0,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm7 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm0,xmm7 ; xmm0=(46 56 66 76 47 57 67 77) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm4,xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) + movdqa xmm7,xmm5 ; transpose coefficients(phase 3) + punpcklqdq xmm5,xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm7,xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 + + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm4,xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) + movdqa xmm7,xmm3 ; transpose coefficients(phase 3) + punpcklqdq xmm3,xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm7,xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 + + movdqa xmm2,xmm6 + movdqa xmm0,xmm5 + psubw xmm6,xmm1 ; xmm6=tmp11 + psubw xmm5,xmm3 + paddw xmm2,xmm1 ; xmm2=tmp10 + paddw xmm0,xmm3 ; xmm0=tmp13 + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[rel PW_F1414] + psubw xmm5,xmm0 ; xmm5=tmp12 + + movdqa xmm1,xmm2 + movdqa xmm3,xmm6 + psubw xmm2,xmm0 ; xmm2=tmp3 + psubw xmm6,xmm5 ; xmm6=tmp2 + paddw xmm1,xmm0 ; xmm1=tmp0 + paddw xmm3,xmm5 ; xmm3=tmp1 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 + + ; -- Odd part + + ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 + + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + psubw xmm0,xmm7 ; xmm0=z12 + psubw xmm4,xmm5 ; xmm4=z10 + paddw xmm2,xmm7 ; xmm2=z11 + paddw xmm6,xmm5 ; xmm6=z13 + + movdqa xmm7,xmm4 ; xmm7=z10(unscaled) + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm5,xmm2 + psubw xmm2,xmm6 + paddw xmm5,xmm6 ; xmm5=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm2,[rel PW_F1414] ; xmm2=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm6,xmm4 + paddw xmm4,xmm0 + pmulhw xmm4,[rel PW_F1847] ; xmm4=z5 + pmulhw xmm6,[rel PW_MF1613] + pmulhw xmm0,[rel PW_F1082] + psubw xmm6,xmm7 + psubw xmm0,xmm4 ; xmm0=tmp10 + paddw xmm6,xmm4 ; xmm6=tmp12 + + ; -- Final output stage + + psubw xmm6,xmm5 ; xmm6=tmp6 + movdqa xmm7,xmm1 + movdqa xmm4,xmm3 + paddw xmm1,xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) + paddw xmm3,xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) + psraw xmm1,(PASS1_BITS+3) ; descale + psraw xmm3,(PASS1_BITS+3) ; descale + psubw xmm7,xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) + psubw xmm4,xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) + psraw xmm7,(PASS1_BITS+3) ; descale + psraw xmm4,(PASS1_BITS+3) ; descale + psubw xmm2,xmm6 ; xmm2=tmp5 + + packsswb xmm1,xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 + + paddw xmm0,xmm2 ; xmm0=tmp4 + movdqa xmm4,xmm5 + movdqa xmm7,xmm6 + paddw xmm5,xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) + paddw xmm6,xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) + psraw xmm5,(PASS1_BITS+3) ; descale + psraw xmm6,(PASS1_BITS+3) ; descale + psubw xmm4,xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) + psubw xmm7,xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) + psraw xmm4,(PASS1_BITS+3) ; descale + psraw xmm7,(PASS1_BITS+3) ; descale + + movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] + + packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm1,xmm2 + paddb xmm3,xmm2 + paddb xmm5,xmm2 + paddb xmm7,xmm2 + + movdqa xmm0,xmm1 ; transpose coefficients(phase 1) + punpcklbw xmm1,xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklbw xmm5,xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm6,xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 2) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm2,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm2,xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 3) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm3,xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm7,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm7,xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm5,xmm1,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm3,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm6,xmm4,0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm2,xmm7,0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7 + + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0 + mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctfst-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctfst-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctfst-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctfst-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,502 @@ +; +; jidctfst.asm - fast integer IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_idct_ifast_sse2) PRIVATE + +EXTN(jconst_idct_ifast_sse2): + +PW_F1414 times 8 dw F_1_414 << CONST_SHIFT +PW_F1847 times 8 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 8 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; jpeg_component_info *compptr +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_idct_ifast_sse2) PRIVATE + +EXTN(jsimd_idct_ifast_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm7,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm7,xmm7 ; xmm7=(04 04 05 05 06 06 07 07) + + pshufd xmm6,xmm0,0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) + pshufd xmm2,xmm0,0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) + pshufd xmm5,xmm0,0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) + pshufd xmm0,xmm0,0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) + pshufd xmm1,xmm7,0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) + pshufd xmm4,xmm7,0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) + pshufd xmm3,xmm7,0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) + pshufd xmm7,xmm7,0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + psubw xmm0,xmm2 ; xmm0=tmp11 + psubw xmm1,xmm3 + paddw xmm4,xmm2 ; xmm4=tmp10 + paddw xmm5,xmm3 ; xmm5=tmp13 + + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm1,[GOTOFF(ebx,PW_F1414)] + psubw xmm1,xmm5 ; xmm1=tmp12 + + movdqa xmm6,xmm4 + movdqa xmm7,xmm0 + psubw xmm4,xmm5 ; xmm4=tmp3 + psubw xmm0,xmm1 ; xmm0=tmp2 + paddw xmm6,xmm5 ; xmm6=tmp0 + paddw xmm7,xmm1 ; xmm7=tmp1 + + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 + + ; -- Odd part + + movdqa xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm2 + movdqa xmm0,xmm5 + psubw xmm2,xmm1 ; xmm2=z12 + psubw xmm5,xmm3 ; xmm5=z10 + paddw xmm4,xmm1 ; xmm4=z11 + paddw xmm0,xmm3 ; xmm0=z13 + + movdqa xmm1,xmm5 ; xmm1=z10(unscaled) + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm3,xmm4 + psubw xmm4,xmm0 + paddw xmm3,xmm0 ; xmm3=tmp7 + + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm4,[GOTOFF(ebx,PW_F1414)] ; xmm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm0,xmm5 + paddw xmm5,xmm2 + pmulhw xmm5,[GOTOFF(ebx,PW_F1847)] ; xmm5=z5 + pmulhw xmm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw xmm2,[GOTOFF(ebx,PW_F1082)] + psubw xmm0,xmm1 + psubw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm0,xmm5 ; xmm0=tmp12 + + ; -- Final output stage + + psubw xmm0,xmm3 ; xmm0=tmp6 + movdqa xmm1,xmm6 + movdqa xmm5,xmm7 + paddw xmm6,xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) + paddw xmm7,xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) + psubw xmm1,xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) + psubw xmm5,xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) + psubw xmm4,xmm0 ; xmm4=tmp5 + + movdqa xmm3,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(00 10 01 11 02 12 03 13) + punpckhwd xmm3,xmm7 ; xmm3=(04 14 05 15 06 16 07 17) + movdqa xmm0,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm1 ; xmm5=(60 70 61 71 62 72 63 73) + punpckhwd xmm0,xmm1 ; xmm0=(64 74 65 75 66 76 67 77) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) + + paddw xmm2,xmm4 ; xmm2=tmp4 + movdqa xmm5,xmm7 + movdqa xmm0,xmm1 + paddw xmm7,xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) + paddw xmm1,xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) + psubw xmm5,xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) + psubw xmm0,xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm0 ; xmm7=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm0 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm2,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm5 ; xmm2=(44 54 45 55 46 56 47 57) + + movdqa xmm0,xmm3 ; transpose coefficients(phase 2) + punpckldq xmm3,xmm4 ; xmm3=(04 14 24 34 05 15 25 35) + punpckhdq xmm0,xmm4 ; xmm0=(06 16 26 36 07 17 27 37) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=(00 10 20 30 01 11 21 31) + punpckhdq xmm5,xmm7 ; xmm5=(02 12 22 32 03 13 23 33) + + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm4 ; xmm1=(40 50 60 70 41 51 61 71) + punpckhdq xmm3,xmm4 ; xmm3=(42 52 62 72 43 53 63 73) + movdqa xmm0,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm7 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm0,xmm7 ; xmm0=(46 56 66 76 47 57 67 77) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm4,xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) + movdqa xmm7,xmm5 ; transpose coefficients(phase 3) + punpcklqdq xmm5,xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm7,xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 + + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm4,xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) + movdqa xmm7,xmm3 ; transpose coefficients(phase 3) + punpcklqdq xmm3,xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm7,xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 + + movdqa xmm2,xmm6 + movdqa xmm0,xmm5 + psubw xmm6,xmm1 ; xmm6=tmp11 + psubw xmm5,xmm3 + paddw xmm2,xmm1 ; xmm2=tmp10 + paddw xmm0,xmm3 ; xmm0=tmp13 + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F1414)] + psubw xmm5,xmm0 ; xmm5=tmp12 + + movdqa xmm1,xmm2 + movdqa xmm3,xmm6 + psubw xmm2,xmm0 ; xmm2=tmp3 + psubw xmm6,xmm5 ; xmm6=tmp2 + paddw xmm1,xmm0 ; xmm1=tmp0 + paddw xmm3,xmm5 ; xmm3=tmp1 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 + + ; -- Odd part + + ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 + + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + psubw xmm0,xmm7 ; xmm0=z12 + psubw xmm4,xmm5 ; xmm4=z10 + paddw xmm2,xmm7 ; xmm2=z11 + paddw xmm6,xmm5 ; xmm6=z13 + + movdqa xmm7,xmm4 ; xmm7=z10(unscaled) + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm5,xmm2 + psubw xmm2,xmm6 + paddw xmm5,xmm6 ; xmm5=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm2,[GOTOFF(ebx,PW_F1414)] ; xmm2=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm6,xmm4 + paddw xmm4,xmm0 + pmulhw xmm4,[GOTOFF(ebx,PW_F1847)] ; xmm4=z5 + pmulhw xmm6,[GOTOFF(ebx,PW_MF1613)] + pmulhw xmm0,[GOTOFF(ebx,PW_F1082)] + psubw xmm6,xmm7 + psubw xmm0,xmm4 ; xmm0=tmp10 + paddw xmm6,xmm4 ; xmm6=tmp12 + + ; -- Final output stage + + psubw xmm6,xmm5 ; xmm6=tmp6 + movdqa xmm7,xmm1 + movdqa xmm4,xmm3 + paddw xmm1,xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) + paddw xmm3,xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) + psraw xmm1,(PASS1_BITS+3) ; descale + psraw xmm3,(PASS1_BITS+3) ; descale + psubw xmm7,xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) + psubw xmm4,xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) + psraw xmm7,(PASS1_BITS+3) ; descale + psraw xmm4,(PASS1_BITS+3) ; descale + psubw xmm2,xmm6 ; xmm2=tmp5 + + packsswb xmm1,xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 + + paddw xmm0,xmm2 ; xmm0=tmp4 + movdqa xmm4,xmm5 + movdqa xmm7,xmm6 + paddw xmm5,xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) + paddw xmm6,xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) + psraw xmm5,(PASS1_BITS+3) ; descale + psraw xmm6,(PASS1_BITS+3) ; descale + psubw xmm4,xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) + psubw xmm7,xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) + psraw xmm4,(PASS1_BITS+3) ; descale + psraw xmm7,(PASS1_BITS+3) ; descale + + movdqa xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] + + packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm1,xmm2 + paddb xmm3,xmm2 + paddb xmm5,xmm2 + paddb xmm7,xmm2 + + movdqa xmm0,xmm1 ; transpose coefficients(phase 1) + punpcklbw xmm1,xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklbw xmm5,xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm6,xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 2) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm2,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm2,xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 3) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm3,xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm7,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm7,xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm5,xmm1,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm3,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm6,xmm4,0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm2,xmm7,0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7 + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 + mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctint-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jidctint-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctint-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctint-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,359 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* SLOW INTEGER INVERSE DCT */ + +#include "jsimd_altivec.h" + + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) + + +#define DO_IDCT(in, PASS) \ +{ \ + /* Even part \ + * \ + * (Original) \ + * z1 = (z2 + z3) * 0.541196100; \ + * tmp2 = z1 + z3 * -1.847759065; \ + * tmp3 = z1 + z2 * 0.765366865; \ + * \ + * (This implementation) \ + * tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \ + * tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \ + */ \ + \ + in##26l = vec_mergeh(in##2, in##6); \ + in##26h = vec_mergel(in##2, in##6); \ + \ + tmp3l = vec_msums(in##26l, pw_f130_f054, pd_zero); \ + tmp3h = vec_msums(in##26h, pw_f130_f054, pd_zero); \ + tmp2l = vec_msums(in##26l, pw_f054_mf130, pd_zero); \ + tmp2h = vec_msums(in##26h, pw_f054_mf130, pd_zero); \ + \ + tmp0 = vec_add(in##0, in##4); \ + tmp1 = vec_sub(in##0, in##4); \ + \ + tmp0l = vec_unpackh(tmp0); \ + tmp0h = vec_unpackl(tmp0); \ + tmp0l = vec_sl(tmp0l, const_bits); \ + tmp0h = vec_sl(tmp0h, const_bits); \ + tmp0l = vec_add(tmp0l, pd_descale_p##PASS); \ + tmp0h = vec_add(tmp0h, pd_descale_p##PASS); \ + \ + tmp10l = vec_add(tmp0l, tmp3l); \ + tmp10h = vec_add(tmp0h, tmp3h); \ + tmp13l = vec_sub(tmp0l, tmp3l); \ + tmp13h = vec_sub(tmp0h, tmp3h); \ + \ + tmp1l = vec_unpackh(tmp1); \ + tmp1h = vec_unpackl(tmp1); \ + tmp1l = vec_sl(tmp1l, const_bits); \ + tmp1h = vec_sl(tmp1h, const_bits); \ + tmp1l = vec_add(tmp1l, pd_descale_p##PASS); \ + tmp1h = vec_add(tmp1h, pd_descale_p##PASS); \ + \ + tmp11l = vec_add(tmp1l, tmp2l); \ + tmp11h = vec_add(tmp1h, tmp2h); \ + tmp12l = vec_sub(tmp1l, tmp2l); \ + tmp12h = vec_sub(tmp1h, tmp2h); \ + \ + /* Odd part */ \ + \ + z3 = vec_add(in##3, in##7); \ + z4 = vec_add(in##1, in##5); \ + \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ + z34l = vec_mergeh(z3, z4); \ + z34h = vec_mergel(z3, z4); \ + \ + z3l = vec_msums(z34l, pw_mf078_f117, pd_zero); \ + z3h = vec_msums(z34h, pw_mf078_f117, pd_zero); \ + z4l = vec_msums(z34l, pw_f117_f078, pd_zero); \ + z4h = vec_msums(z34h, pw_f117_f078, pd_zero); \ + \ + /* (Original) \ + * z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \ + * tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \ + * tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * tmp0 += z1 + z3; tmp1 += z2 + z4; \ + * tmp2 += z2 + z3; tmp3 += z1 + z4; \ + * \ + * (This implementation) \ + * tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \ + * tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \ + * tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \ + * tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \ + * tmp0 += z3; tmp1 += z4; \ + * tmp2 += z3; tmp3 += z4; \ + */ \ + \ + in##71l = vec_mergeh(in##7, in##1); \ + in##71h = vec_mergel(in##7, in##1); \ + \ + tmp0l = vec_msums(in##71l, pw_mf060_mf089, z3l); \ + tmp0h = vec_msums(in##71h, pw_mf060_mf089, z3h); \ + tmp3l = vec_msums(in##71l, pw_mf089_f060, z4l); \ + tmp3h = vec_msums(in##71h, pw_mf089_f060, z4h); \ + \ + in##53l = vec_mergeh(in##5, in##3); \ + in##53h = vec_mergel(in##5, in##3); \ + \ + tmp1l = vec_msums(in##53l, pw_mf050_mf256, z4l); \ + tmp1h = vec_msums(in##53h, pw_mf050_mf256, z4h); \ + tmp2l = vec_msums(in##53l, pw_mf256_f050, z3l); \ + tmp2h = vec_msums(in##53h, pw_mf256_f050, z3h); \ + \ + /* Final output stage */ \ + \ + out0l = vec_add(tmp10l, tmp3l); \ + out0h = vec_add(tmp10h, tmp3h); \ + out7l = vec_sub(tmp10l, tmp3l); \ + out7h = vec_sub(tmp10h, tmp3h); \ + \ + out0l = vec_sra(out0l, descale_p##PASS); \ + out0h = vec_sra(out0h, descale_p##PASS); \ + out7l = vec_sra(out7l, descale_p##PASS); \ + out7h = vec_sra(out7h, descale_p##PASS); \ + \ + out0 = vec_pack(out0l, out0h); \ + out7 = vec_pack(out7l, out7h); \ + \ + out1l = vec_add(tmp11l, tmp2l); \ + out1h = vec_add(tmp11h, tmp2h); \ + out6l = vec_sub(tmp11l, tmp2l); \ + out6h = vec_sub(tmp11h, tmp2h); \ + \ + out1l = vec_sra(out1l, descale_p##PASS); \ + out1h = vec_sra(out1h, descale_p##PASS); \ + out6l = vec_sra(out6l, descale_p##PASS); \ + out6h = vec_sra(out6h, descale_p##PASS); \ + \ + out1 = vec_pack(out1l, out1h); \ + out6 = vec_pack(out6l, out6h); \ + \ + out2l = vec_add(tmp12l, tmp1l); \ + out2h = vec_add(tmp12h, tmp1h); \ + out5l = vec_sub(tmp12l, tmp1l); \ + out5h = vec_sub(tmp12h, tmp1h); \ + \ + out2l = vec_sra(out2l, descale_p##PASS); \ + out2h = vec_sra(out2h, descale_p##PASS); \ + out5l = vec_sra(out5l, descale_p##PASS); \ + out5h = vec_sra(out5h, descale_p##PASS); \ + \ + out2 = vec_pack(out2l, out2h); \ + out5 = vec_pack(out5l, out5h); \ + \ + out3l = vec_add(tmp13l, tmp0l); \ + out3h = vec_add(tmp13h, tmp0h); \ + out4l = vec_sub(tmp13l, tmp0l); \ + out4h = vec_sub(tmp13h, tmp0h); \ + \ + out3l = vec_sra(out3l, descale_p##PASS); \ + out3h = vec_sra(out3h, descale_p##PASS); \ + out4l = vec_sra(out4l, descale_p##PASS); \ + out4h = vec_sra(out4h, descale_p##PASS); \ + \ + out3 = vec_pack(out3l, out3h); \ + out4 = vec_pack(out4l, out4h); \ +} + + +void +jsimd_idct_islow_altivec (void *dct_table_, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + short *dct_table = (short *)dct_table_; + int *outptr; + + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7, + tmp0, tmp1, tmp2, tmp3, z3, z4, + z34l, z34h, col71l, col71h, col26l, col26h, col53l, col53h, + row71l, row71h, row26l, row26h, row53l, row53h, + out0, out1, out2, out3, out4, out5, out6, out7; + __vector int tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h, + tmp10l, tmp10h, tmp11l, tmp11h, tmp12l, tmp12h, tmp13l, tmp13h, + z3l, z3h, z4l, z4h, + out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h, + out5l, out5h, out6l, out6h, out7l, out7h; + __vector signed char outb; + + /* Constants */ + __vector short pw_zero = { __8X(0) }, + pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) }, + pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) }, + pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) }, + pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) }, + pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) }, + pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) }, + pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) }, + pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) }; + __vector unsigned short pass1_bits = { __8X(PASS1_BITS) }; + __vector int pd_zero = { __4X(0) }, + pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) }, + pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) }; + __vector unsigned int descale_p1 = { __4X(DESCALE_P1) }, + descale_p2 = { __4X(DESCALE_P2) }, + const_bits = { __4X(CONST_BITS) }; + __vector signed char pb_centerjsamp = { __16X(CENTERJSAMPLE) }; + + /* Pass 1: process columns */ + + col0 = vec_ld(0, coef_block); + col1 = vec_ld(16, coef_block); + col2 = vec_ld(32, coef_block); + col3 = vec_ld(48, coef_block); + col4 = vec_ld(64, coef_block); + col5 = vec_ld(80, coef_block); + col6 = vec_ld(96, coef_block); + col7 = vec_ld(112, coef_block); + + tmp1 = vec_or(col1, col2); + tmp2 = vec_or(col3, col4); + tmp1 = vec_or(tmp1, tmp2); + tmp3 = vec_or(col5, col6); + tmp3 = vec_or(tmp3, col7); + tmp1 = vec_or(tmp1, tmp3); + + quant0 = vec_ld(0, dct_table); + col0 = vec_mladd(col0, quant0, pw_zero); + + if (vec_all_eq(tmp1, pw_zero)) { + /* AC terms all zero */ + + col0 = vec_sl(col0, pass1_bits); + + row0 = vec_splat(col0, 0); + row1 = vec_splat(col0, 1); + row2 = vec_splat(col0, 2); + row3 = vec_splat(col0, 3); + row4 = vec_splat(col0, 4); + row5 = vec_splat(col0, 5); + row6 = vec_splat(col0, 6); + row7 = vec_splat(col0, 7); + + } else { + + quant1 = vec_ld(16, dct_table); + quant2 = vec_ld(32, dct_table); + quant3 = vec_ld(48, dct_table); + quant4 = vec_ld(64, dct_table); + quant5 = vec_ld(80, dct_table); + quant6 = vec_ld(96, dct_table); + quant7 = vec_ld(112, dct_table); + + col1 = vec_mladd(col1, quant1, pw_zero); + col2 = vec_mladd(col2, quant2, pw_zero); + col3 = vec_mladd(col3, quant3, pw_zero); + col4 = vec_mladd(col4, quant4, pw_zero); + col5 = vec_mladd(col5, quant5, pw_zero); + col6 = vec_mladd(col6, quant6, pw_zero); + col7 = vec_mladd(col7, quant7, pw_zero); + + DO_IDCT(col, 1); + + TRANSPOSE(out, row); + } + + /* Pass 2: process rows */ + + DO_IDCT(row, 2); + + TRANSPOSE(out, col); + + outb = vec_packs(col0, col0); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[0] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col1, col1); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[1] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col2, col2); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[2] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col3, col3); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[3] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col4, col4); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[4] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col5, col5); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[5] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col6, col6); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[6] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); + + outb = vec_packs(col7, col7); + outb = vec_add(outb, pb_centerjsamp); + outptr = (int *)(output_buf[7] + output_col); + vec_ste((__vector int)outb, 0, outptr); + vec_ste((__vector int)outb, 4, outptr); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctint-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctint-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctint-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctint-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,852 @@ +; +; jidctint.asm - accurate integer IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_islow_mmx) PRIVATE + +EXTN(jconst_idct_islow_mmx): + +PW_F130_F054 times 2 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 2 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 2 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 2 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 2 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 2 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 2 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 2 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2-1) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_mmx (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; jpeg_component_info *compptr +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 12 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_islow_mmx) PRIVATE + +EXTN(jsimd_idct_islow_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0,PASS1_BITS + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movq mm4,mm1 ; mm1=in2=z2 + movq mm5,mm1 + punpcklwd mm4,mm3 ; mm3=in6=z3 + punpckhwd mm5,mm3 + movq mm1,mm4 + movq mm3,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L + pmaddwd mm5,[GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H + + movq mm6,mm0 + paddw mm0,mm2 ; mm0=in0+in4 + psubw mm6,mm2 ; mm6=in0-in4 + + pxor mm7,mm7 + pxor mm2,mm2 + punpcklwd mm7,mm0 ; mm7=tmp0L + punpckhwd mm2,mm0 ; mm2=tmp0H + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + psrad mm2,(16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS + + movq mm0,mm7 + paddd mm7,mm4 ; mm7=tmp10L + psubd mm0,mm4 ; mm0=tmp13L + movq mm4,mm2 + paddd mm2,mm5 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp13H + + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L + movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H + movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L + movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H + + pxor mm5,mm5 + pxor mm7,mm7 + punpcklwd mm5,mm6 ; mm5=tmp1L + punpckhwd mm7,mm6 ; mm7=tmp1H + psrad mm5,(16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + + movq mm2,mm5 + paddd mm5,mm1 ; mm5=tmp11L + psubd mm2,mm1 ; mm2=tmp12L + movq mm0,mm7 + paddd mm7,mm3 ; mm7=tmp11H + psubd mm0,mm3 ; mm0=tmp12H + + movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L + movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H + movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L + movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm5,mm6 + movq mm7,mm4 + paddw mm5,mm3 ; mm5=z3 + paddw mm7,mm1 ; mm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm2,mm5 + movq mm0,mm5 + punpcklwd mm2,mm7 + punpckhwd mm0,mm7 + movq mm5,mm2 + movq mm7,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L + pmaddwd mm0,[GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H + pmaddwd mm5,[GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L + pmaddwd mm7,[GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H + + movq MMWORD [wk(10)], mm2 ; wk(10)=z3L + movq MMWORD [wk(11)], mm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movq mm2,mm3 + movq mm0,mm3 + punpcklwd mm2,mm4 + punpckhwd mm0,mm4 + movq mm3,mm2 + movq mm4,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L + pmaddwd mm0,[GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H + pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + + paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L + paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H + paddd mm3,mm5 ; mm3=tmp3L + paddd mm4,mm7 ; mm4=tmp3H + + movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L + movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H + + movq mm2,mm1 + movq mm0,mm1 + punpcklwd mm2,mm6 + punpckhwd mm0,mm6 + movq mm1,mm2 + movq mm6,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L + pmaddwd mm0,[GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H + pmaddwd mm1,[GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L + pmaddwd mm6,[GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H + + paddd mm2,mm5 ; mm2=tmp1L + paddd mm0,mm7 ; mm0=tmp1H + paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L + paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movq mm5, MMWORD [wk(0)] ; mm5=tmp10L + movq mm7, MMWORD [wk(1)] ; mm7=tmp10H + + movq mm2,mm5 + movq mm0,mm7 + paddd mm5,mm3 ; mm5=data0L + paddd mm7,mm4 ; mm7=data0H + psubd mm2,mm3 ; mm2=data7L + psubd mm0,mm4 ; mm0=data7H + + movq mm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm3=[PD_DESCALE_P1] + + paddd mm5,mm3 + paddd mm7,mm3 + psrad mm5,DESCALE_P1 + psrad mm7,DESCALE_P1 + paddd mm2,mm3 + paddd mm0,mm3 + psrad mm2,DESCALE_P1 + psrad mm0,DESCALE_P1 + + packssdw mm5,mm7 ; mm5=data0=(00 01 02 03) + packssdw mm2,mm0 ; mm2=data7=(70 71 72 73) + + movq mm4, MMWORD [wk(4)] ; mm4=tmp11L + movq mm3, MMWORD [wk(5)] ; mm3=tmp11H + + movq mm7,mm4 + movq mm0,mm3 + paddd mm4,mm1 ; mm4=data1L + paddd mm3,mm6 ; mm3=data1H + psubd mm7,mm1 ; mm7=data6L + psubd mm0,mm6 ; mm0=data6H + + movq mm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm1=[PD_DESCALE_P1] + + paddd mm4,mm1 + paddd mm3,mm1 + psrad mm4,DESCALE_P1 + psrad mm3,DESCALE_P1 + paddd mm7,mm1 + paddd mm0,mm1 + psrad mm7,DESCALE_P1 + psrad mm0,DESCALE_P1 + + packssdw mm4,mm3 ; mm4=data1=(10 11 12 13) + packssdw mm7,mm0 ; mm7=data6=(60 61 62 63) + + movq mm6,mm5 ; transpose coefficients(phase 1) + punpcklwd mm5,mm4 ; mm5=(00 10 01 11) + punpckhwd mm6,mm4 ; mm6=(02 12 03 13) + movq mm1,mm7 ; transpose coefficients(phase 1) + punpcklwd mm7,mm2 ; mm7=(60 70 61 71) + punpckhwd mm1,mm2 ; mm1=(62 72 63 73) + + movq mm3, MMWORD [wk(6)] ; mm3=tmp12L + movq mm0, MMWORD [wk(7)] ; mm0=tmp12H + movq mm4, MMWORD [wk(10)] ; mm4=tmp1L + movq mm2, MMWORD [wk(11)] ; mm2=tmp1H + + movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 01 11) + movq MMWORD [wk(1)], mm6 ; wk(1)=(02 12 03 13) + movq MMWORD [wk(4)], mm7 ; wk(4)=(60 70 61 71) + movq MMWORD [wk(5)], mm1 ; wk(5)=(62 72 63 73) + + movq mm5,mm3 + movq mm6,mm0 + paddd mm3,mm4 ; mm3=data2L + paddd mm0,mm2 ; mm0=data2H + psubd mm5,mm4 ; mm5=data5L + psubd mm6,mm2 ; mm6=data5H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm7=[PD_DESCALE_P1] + + paddd mm3,mm7 + paddd mm0,mm7 + psrad mm3,DESCALE_P1 + psrad mm0,DESCALE_P1 + paddd mm5,mm7 + paddd mm6,mm7 + psrad mm5,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm3,mm0 ; mm3=data2=(20 21 22 23) + packssdw mm5,mm6 ; mm5=data5=(50 51 52 53) + + movq mm1, MMWORD [wk(2)] ; mm1=tmp13L + movq mm4, MMWORD [wk(3)] ; mm4=tmp13H + movq mm2, MMWORD [wk(8)] ; mm2=tmp0L + movq mm7, MMWORD [wk(9)] ; mm7=tmp0H + + movq mm0,mm1 + movq mm6,mm4 + paddd mm1,mm2 ; mm1=data3L + paddd mm4,mm7 ; mm4=data3H + psubd mm0,mm2 ; mm0=data4L + psubd mm6,mm7 ; mm6=data4H + + movq mm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm2=[PD_DESCALE_P1] + + paddd mm1,mm2 + paddd mm4,mm2 + psrad mm1,DESCALE_P1 + psrad mm4,DESCALE_P1 + paddd mm0,mm2 + paddd mm6,mm2 + psrad mm0,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm1,mm4 ; mm1=data3=(30 31 32 33) + packssdw mm0,mm6 ; mm0=data4=(40 41 42 43) + + movq mm7, MMWORD [wk(0)] ; mm7=(00 10 01 11) + movq mm2, MMWORD [wk(1)] ; mm2=(02 12 03 13) + + movq mm4,mm3 ; transpose coefficients(phase 1) + punpcklwd mm3,mm1 ; mm3=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm6,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm5 ; mm0=(40 50 41 51) + punpckhwd mm6,mm5 ; mm6=(42 52 43 53) + + movq mm1,mm7 ; transpose coefficients(phase 2) + punpckldq mm7,mm3 ; mm7=(00 10 20 30) + punpckhdq mm1,mm3 ; mm1=(01 11 21 31) + movq mm5,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm4 ; mm2=(02 12 22 32) + punpckhdq mm5,mm4 ; mm5=(03 13 23 33) + + movq mm3, MMWORD [wk(4)] ; mm3=(60 70 61 71) + movq mm4, MMWORD [wk(5)] ; mm4=(62 72 63 73) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 + + movq mm7,mm0 ; transpose coefficients(phase 2) + punpckldq mm0,mm3 ; mm0=(40 50 60 70) + punpckhdq mm7,mm3 ; mm7=(41 51 61 71) + movq mm1,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm4 ; mm6=(42 52 62 72) + punpckhdq mm1,mm4 ; mm1=(43 53 63 73) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movq mm4,mm1 ; mm1=in2=z2 + movq mm5,mm1 + punpcklwd mm4,mm3 ; mm3=in6=z3 + punpckhwd mm5,mm3 + movq mm1,mm4 + movq mm3,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L + pmaddwd mm5,[GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H + + movq mm6,mm0 + paddw mm0,mm2 ; mm0=in0+in4 + psubw mm6,mm2 ; mm6=in0-in4 + + pxor mm7,mm7 + pxor mm2,mm2 + punpcklwd mm7,mm0 ; mm7=tmp0L + punpckhwd mm2,mm0 ; mm2=tmp0H + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + psrad mm2,(16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS + + movq mm0,mm7 + paddd mm7,mm4 ; mm7=tmp10L + psubd mm0,mm4 ; mm0=tmp13L + movq mm4,mm2 + paddd mm2,mm5 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp13H + + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L + movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H + movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L + movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H + + pxor mm5,mm5 + pxor mm7,mm7 + punpcklwd mm5,mm6 ; mm5=tmp1L + punpckhwd mm7,mm6 ; mm7=tmp1H + psrad mm5,(16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + + movq mm2,mm5 + paddd mm5,mm1 ; mm5=tmp11L + psubd mm2,mm1 ; mm2=tmp12L + movq mm0,mm7 + paddd mm7,mm3 ; mm7=tmp11H + psubd mm0,mm3 ; mm0=tmp12H + + movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L + movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H + movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L + movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm5,mm6 + movq mm7,mm4 + paddw mm5,mm3 ; mm5=z3 + paddw mm7,mm1 ; mm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm2,mm5 + movq mm0,mm5 + punpcklwd mm2,mm7 + punpckhwd mm0,mm7 + movq mm5,mm2 + movq mm7,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L + pmaddwd mm0,[GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H + pmaddwd mm5,[GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L + pmaddwd mm7,[GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H + + movq MMWORD [wk(10)], mm2 ; wk(10)=z3L + movq MMWORD [wk(11)], mm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movq mm2,mm3 + movq mm0,mm3 + punpcklwd mm2,mm4 + punpckhwd mm0,mm4 + movq mm3,mm2 + movq mm4,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L + pmaddwd mm0,[GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H + pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + + paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L + paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H + paddd mm3,mm5 ; mm3=tmp3L + paddd mm4,mm7 ; mm4=tmp3H + + movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L + movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H + + movq mm2,mm1 + movq mm0,mm1 + punpcklwd mm2,mm6 + punpckhwd mm0,mm6 + movq mm1,mm2 + movq mm6,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L + pmaddwd mm0,[GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H + pmaddwd mm1,[GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L + pmaddwd mm6,[GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H + + paddd mm2,mm5 ; mm2=tmp1L + paddd mm0,mm7 ; mm0=tmp1H + paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L + paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movq mm5, MMWORD [wk(0)] ; mm5=tmp10L + movq mm7, MMWORD [wk(1)] ; mm7=tmp10H + + movq mm2,mm5 + movq mm0,mm7 + paddd mm5,mm3 ; mm5=data0L + paddd mm7,mm4 ; mm7=data0H + psubd mm2,mm3 ; mm2=data7L + psubd mm0,mm4 ; mm0=data7H + + movq mm3,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm3=[PD_DESCALE_P2] + + paddd mm5,mm3 + paddd mm7,mm3 + psrad mm5,DESCALE_P2 + psrad mm7,DESCALE_P2 + paddd mm2,mm3 + paddd mm0,mm3 + psrad mm2,DESCALE_P2 + psrad mm0,DESCALE_P2 + + packssdw mm5,mm7 ; mm5=data0=(00 10 20 30) + packssdw mm2,mm0 ; mm2=data7=(07 17 27 37) + + movq mm4, MMWORD [wk(4)] ; mm4=tmp11L + movq mm3, MMWORD [wk(5)] ; mm3=tmp11H + + movq mm7,mm4 + movq mm0,mm3 + paddd mm4,mm1 ; mm4=data1L + paddd mm3,mm6 ; mm3=data1H + psubd mm7,mm1 ; mm7=data6L + psubd mm0,mm6 ; mm0=data6H + + movq mm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm1=[PD_DESCALE_P2] + + paddd mm4,mm1 + paddd mm3,mm1 + psrad mm4,DESCALE_P2 + psrad mm3,DESCALE_P2 + paddd mm7,mm1 + paddd mm0,mm1 + psrad mm7,DESCALE_P2 + psrad mm0,DESCALE_P2 + + packssdw mm4,mm3 ; mm4=data1=(01 11 21 31) + packssdw mm7,mm0 ; mm7=data6=(06 16 26 36) + + packsswb mm5,mm7 ; mm5=(00 10 20 30 06 16 26 36) + packsswb mm4,mm2 ; mm4=(01 11 21 31 07 17 27 37) + + movq mm6, MMWORD [wk(6)] ; mm6=tmp12L + movq mm1, MMWORD [wk(7)] ; mm1=tmp12H + movq mm3, MMWORD [wk(10)] ; mm3=tmp1L + movq mm0, MMWORD [wk(11)] ; mm0=tmp1H + + movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 20 30 06 16 26 36) + movq MMWORD [wk(1)], mm4 ; wk(1)=(01 11 21 31 07 17 27 37) + + movq mm7,mm6 + movq mm2,mm1 + paddd mm6,mm3 ; mm6=data2L + paddd mm1,mm0 ; mm1=data2H + psubd mm7,mm3 ; mm7=data5L + psubd mm2,mm0 ; mm2=data5H + + movq mm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm5=[PD_DESCALE_P2] + + paddd mm6,mm5 + paddd mm1,mm5 + psrad mm6,DESCALE_P2 + psrad mm1,DESCALE_P2 + paddd mm7,mm5 + paddd mm2,mm5 + psrad mm7,DESCALE_P2 + psrad mm2,DESCALE_P2 + + packssdw mm6,mm1 ; mm6=data2=(02 12 22 32) + packssdw mm7,mm2 ; mm7=data5=(05 15 25 35) + + movq mm4, MMWORD [wk(2)] ; mm4=tmp13L + movq mm3, MMWORD [wk(3)] ; mm3=tmp13H + movq mm0, MMWORD [wk(8)] ; mm0=tmp0L + movq mm5, MMWORD [wk(9)] ; mm5=tmp0H + + movq mm1,mm4 + movq mm2,mm3 + paddd mm4,mm0 ; mm4=data3L + paddd mm3,mm5 ; mm3=data3H + psubd mm1,mm0 ; mm1=data4L + psubd mm2,mm5 ; mm2=data4H + + movq mm0,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm0=[PD_DESCALE_P2] + + paddd mm4,mm0 + paddd mm3,mm0 + psrad mm4,DESCALE_P2 + psrad mm3,DESCALE_P2 + paddd mm1,mm0 + paddd mm2,mm0 + psrad mm1,DESCALE_P2 + psrad mm2,DESCALE_P2 + + movq mm5,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm5=[PB_CENTERJSAMP] + + packssdw mm4,mm3 ; mm4=data3=(03 13 23 33) + packssdw mm1,mm2 ; mm1=data4=(04 14 24 34) + + movq mm0, MMWORD [wk(0)] ; mm0=(00 10 20 30 06 16 26 36) + movq mm3, MMWORD [wk(1)] ; mm3=(01 11 21 31 07 17 27 37) + + packsswb mm6,mm1 ; mm6=(02 12 22 32 04 14 24 34) + packsswb mm4,mm7 ; mm4=(03 13 23 33 05 15 25 35) + + paddb mm0,mm5 + paddb mm3,mm5 + paddb mm6,mm5 + paddb mm4,mm5 + + movq mm2,mm0 ; transpose coefficients(phase 1) + punpcklbw mm0,mm3 ; mm0=(00 01 10 11 20 21 30 31) + punpckhbw mm2,mm3 ; mm2=(06 07 16 17 26 27 36 37) + movq mm1,mm6 ; transpose coefficients(phase 1) + punpcklbw mm6,mm4 ; mm6=(02 03 12 13 22 23 32 33) + punpckhbw mm1,mm4 ; mm1=(04 05 14 15 24 25 34 35) + + movq mm7,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm6 ; mm0=(00 01 02 03 10 11 12 13) + punpckhwd mm7,mm6 ; mm7=(20 21 22 23 30 31 32 33) + movq mm5,mm1 ; transpose coefficients(phase 2) + punpcklwd mm1,mm2 ; mm1=(04 05 06 07 14 15 16 17) + punpckhwd mm5,mm2 ; mm5=(24 25 26 27 34 35 36 37) + + movq mm3,mm0 ; transpose coefficients(phase 3) + punpckldq mm0,mm1 ; mm0=(00 01 02 03 04 05 06 07) + punpckhdq mm3,mm1 ; mm3=(10 11 12 13 14 15 16 17) + movq mm4,mm7 ; transpose coefficients(phase 3) + punpckldq mm7,mm5 ; mm7=(20 21 22 23 24 25 26 27) + punpckhdq mm4,mm5 ; mm4=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_JCOEF ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctint-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctint-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctint-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctint-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,848 @@ +; +; jidctint.asm - accurate integer IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_islow_sse2) PRIVATE + +EXTN(jconst_idct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = jpeg_component_info *compptr +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + +%define original_rbp rbp+0 +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 12 + + align 16 + global EXTN(jsimd_idct_islow_sse2) PRIVATE + +EXTN(jsimd_idct_islow_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test rax,rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm5,PASS1_BITS + + movdqa xmm4,xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm5,xmm5 ; xmm5=(00 00 01 01 02 02 03 03) + punpckhwd xmm4,xmm4 ; xmm4=(04 04 05 05 06 06 07 07) + + pshufd xmm7,xmm5,0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) + pshufd xmm6,xmm5,0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) + pshufd xmm1,xmm5,0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) + pshufd xmm5,xmm5,0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) + pshufd xmm0,xmm4,0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) + pshufd xmm3,xmm4,0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) + pshufd xmm2,xmm4,0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) + pshufd xmm4,xmm4,0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 + jmp near .column_end +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm4,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm4,xmm3 ; xmm3=in6=z3 + punpckhwd xmm5,xmm3 + movdqa xmm1,xmm4 + movdqa xmm3,xmm5 + pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=tmp3L + pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L + pmaddwd xmm3,[rel PW_F054_MF130] ; xmm3=tmp2H + + movdqa xmm6,xmm0 + paddw xmm0,xmm2 ; xmm0=in0+in4 + psubw xmm6,xmm2 ; xmm6=in0-in4 + + pxor xmm7,xmm7 + pxor xmm2,xmm2 + punpcklwd xmm7,xmm0 ; xmm7=tmp0L + punpckhwd xmm2,xmm0 ; xmm2=tmp0H + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + psrad xmm2,(16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS + + movdqa xmm0,xmm7 + paddd xmm7,xmm4 ; xmm7=tmp10L + psubd xmm0,xmm4 ; xmm0=tmp13L + movdqa xmm4,xmm2 + paddd xmm2,xmm5 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp13H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm7,xmm7 + punpcklwd xmm5,xmm6 ; xmm5=tmp1L + punpckhwd xmm7,xmm6 ; xmm7=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + + movdqa xmm2,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm2,xmm1 ; xmm2=tmp12L + movdqa xmm0,xmm7 + paddd xmm7,xmm3 ; xmm7=tmp11H + psubd xmm0,xmm3 ; xmm0=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm4, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm6, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm6, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm1, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm5,xmm6 + movdqa xmm7,xmm4 + paddw xmm5,xmm3 ; xmm5=z3 + paddw xmm7,xmm1 ; xmm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm2,xmm5 + movdqa xmm0,xmm5 + punpcklwd xmm2,xmm7 + punpckhwd xmm0,xmm7 + movdqa xmm5,xmm2 + movdqa xmm7,xmm0 + pmaddwd xmm2,[rel PW_MF078_F117] ; xmm2=z3L + pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3H + pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L + pmaddwd xmm7,[rel PW_F117_F078] ; xmm7=z4H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm2,xmm3 + movdqa xmm0,xmm3 + punpcklwd xmm2,xmm4 + punpckhwd xmm0,xmm4 + movdqa xmm3,xmm2 + movdqa xmm4,xmm0 + pmaddwd xmm2,[rel PW_MF060_MF089] ; xmm2=tmp0L + pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0H + pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3L + pmaddwd xmm4,[rel PW_MF089_F060] ; xmm4=tmp3H + + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L + paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H + paddd xmm3,xmm5 ; xmm3=tmp3L + paddd xmm4,xmm7 ; xmm4=tmp3H + + movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H + + movdqa xmm2,xmm1 + movdqa xmm0,xmm1 + punpcklwd xmm2,xmm6 + punpckhwd xmm0,xmm6 + movdqa xmm1,xmm2 + movdqa xmm6,xmm0 + pmaddwd xmm2,[rel PW_MF050_MF256] ; xmm2=tmp1L + pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1H + pmaddwd xmm1,[rel PW_MF256_F050] ; xmm1=tmp2L + pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H + + paddd xmm2,xmm5 ; xmm2=tmp1L + paddd xmm0,xmm7 ; xmm0=tmp1H + paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H + + movdqa xmm2,xmm5 + movdqa xmm0,xmm7 + paddd xmm5,xmm3 ; xmm5=data0L + paddd xmm7,xmm4 ; xmm7=data0H + psubd xmm2,xmm3 ; xmm2=data7L + psubd xmm0,xmm4 ; xmm0=data7H + + movdqa xmm3,[rel PD_DESCALE_P1] ; xmm3=[rel PD_DESCALE_P1] + + paddd xmm5,xmm3 + paddd xmm7,xmm3 + psrad xmm5,DESCALE_P1 + psrad xmm7,DESCALE_P1 + paddd xmm2,xmm3 + paddd xmm0,xmm3 + psrad xmm2,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm5,xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) + packssdw xmm2,xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) + + movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L + movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H + + movdqa xmm7,xmm4 + movdqa xmm0,xmm3 + paddd xmm4,xmm1 ; xmm4=data1L + paddd xmm3,xmm6 ; xmm3=data1H + psubd xmm7,xmm1 ; xmm7=data6L + psubd xmm0,xmm6 ; xmm0=data6H + + movdqa xmm1,[rel PD_DESCALE_P1] ; xmm1=[rel PD_DESCALE_P1] + + paddd xmm4,xmm1 + paddd xmm3,xmm1 + psrad xmm4,DESCALE_P1 + psrad xmm3,DESCALE_P1 + paddd xmm7,xmm1 + paddd xmm0,xmm1 + psrad xmm7,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm4,xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm7,xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) + + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm4 ; xmm5=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm1,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm2 ; xmm7=(60 70 61 71 62 72 63 73) + punpckhwd xmm1,xmm2 ; xmm1=(64 74 65 75 66 76 67 77) + + movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L + movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H + movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L + movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) + + movdqa xmm5,xmm3 + movdqa xmm6,xmm0 + paddd xmm3,xmm4 ; xmm3=data2L + paddd xmm0,xmm2 ; xmm0=data2H + psubd xmm5,xmm4 ; xmm5=data5L + psubd xmm6,xmm2 ; xmm6=data5H + + movdqa xmm7,[rel PD_DESCALE_P1] ; xmm7=[rel PD_DESCALE_P1] + + paddd xmm3,xmm7 + paddd xmm0,xmm7 + psrad xmm3,DESCALE_P1 + psrad xmm0,DESCALE_P1 + paddd xmm5,xmm7 + paddd xmm6,xmm7 + psrad xmm5,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm3,xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) + packssdw xmm5,xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L + movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H + movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L + movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H + + movdqa xmm0,xmm1 + movdqa xmm6,xmm4 + paddd xmm1,xmm2 ; xmm1=data3L + paddd xmm4,xmm7 ; xmm4=data3H + psubd xmm0,xmm2 ; xmm0=data4L + psubd xmm6,xmm7 ; xmm6=data4H + + movdqa xmm2,[rel PD_DESCALE_P1] ; xmm2=[rel PD_DESCALE_P1] + + paddd xmm1,xmm2 + paddd xmm4,xmm2 + psrad xmm1,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm0,xmm2 + paddd xmm6,xmm2 + psrad xmm0,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm1,xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) + packssdw xmm0,xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) + movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) + + movdqa xmm4,xmm3 ; transpose coefficients(phase 1) + punpcklwd xmm3,xmm1 ; xmm3=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm1 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm6,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(40 50 41 51 42 52 43 53) + punpckhwd xmm6,xmm5 ; xmm6=(44 54 45 55 46 56 47 57) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm3 ; xmm7=(00 10 20 30 01 11 21 31) + punpckhdq xmm1,xmm3 ; xmm1=(02 12 22 32 03 13 23 33) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm4 ; xmm2=(04 14 24 34 05 15 25 35) + punpckhdq xmm5,xmm4 ; xmm5=(06 16 26 36 07 17 27 37) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) + movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) + + movdqa xmm2,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(40 50 60 70 41 51 61 71) + punpckhdq xmm2,xmm3 ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm4 ; xmm6=(44 54 64 74 45 55 65 75) + punpckhdq xmm5,xmm4 ; xmm5=(46 56 66 76 47 57 67 77) + + movdqa xmm3,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm3,xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm4,xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 + + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm3,xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) + movdqa xmm4,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm4,xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm6,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm6,xmm2 ; xmm2=in6=z3 + punpckhwd xmm5,xmm2 + movdqa xmm1,xmm6 + movdqa xmm2,xmm5 + pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=tmp3L + pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L + pmaddwd xmm2,[rel PW_F054_MF130] ; xmm2=tmp2H + + movdqa xmm3,xmm7 + paddw xmm7,xmm0 ; xmm7=in0+in4 + psubw xmm3,xmm0 ; xmm3=in0-in4 + + pxor xmm4,xmm4 + pxor xmm0,xmm0 + punpcklwd xmm4,xmm7 ; xmm4=tmp0L + punpckhwd xmm0,xmm7 ; xmm0=tmp0H + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + psrad xmm0,(16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS + + movdqa xmm7,xmm4 + paddd xmm4,xmm6 ; xmm4=tmp10L + psubd xmm7,xmm6 ; xmm7=tmp13L + movdqa xmm6,xmm0 + paddd xmm0,xmm5 ; xmm0=tmp10H + psubd xmm6,xmm5 ; xmm6=tmp13H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm4,xmm4 + punpcklwd xmm5,xmm3 ; xmm5=tmp1L + punpckhwd xmm4,xmm3 ; xmm4=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + + movdqa xmm0,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm0,xmm1 ; xmm0=tmp12L + movdqa xmm7,xmm4 + paddd xmm4,xmm2 ; xmm4=tmp11H + psubd xmm7,xmm2 ; xmm7=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 + movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 + movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 + movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 + + movdqa xmm5,xmm6 + movdqa xmm4,xmm3 + paddw xmm5,xmm1 ; xmm5=z3 + paddw xmm4,xmm2 ; xmm4=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm0,xmm5 + movdqa xmm7,xmm5 + punpcklwd xmm0,xmm4 + punpckhwd xmm7,xmm4 + movdqa xmm5,xmm0 + movdqa xmm4,xmm7 + pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3L + pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3H + pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L + pmaddwd xmm4,[rel PW_F117_F078] ; xmm4=z4H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm0,xmm1 + movdqa xmm7,xmm1 + punpcklwd xmm0,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm1,xmm0 + movdqa xmm3,xmm7 + pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0L + pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp0H + pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp3L + pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3H + + paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L + paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H + paddd xmm1,xmm5 ; xmm1=tmp3L + paddd xmm3,xmm4 ; xmm3=tmp3H + + movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H + + movdqa xmm0,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm0,xmm6 + punpckhwd xmm7,xmm6 + movdqa xmm2,xmm0 + movdqa xmm6,xmm7 + pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1L + pmaddwd xmm7,[rel PW_MF050_MF256] ; xmm7=tmp1H + pmaddwd xmm2,[rel PW_MF256_F050] ; xmm2=tmp2L + pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H + + paddd xmm0,xmm5 ; xmm0=tmp1L + paddd xmm7,xmm4 ; xmm7=tmp1H + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H + + movdqa xmm0,xmm5 + movdqa xmm7,xmm4 + paddd xmm5,xmm1 ; xmm5=data0L + paddd xmm4,xmm3 ; xmm4=data0H + psubd xmm0,xmm1 ; xmm0=data7L + psubd xmm7,xmm3 ; xmm7=data7H + + movdqa xmm1,[rel PD_DESCALE_P2] ; xmm1=[rel PD_DESCALE_P2] + + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrad xmm5,DESCALE_P2 + psrad xmm4,DESCALE_P2 + paddd xmm0,xmm1 + paddd xmm7,xmm1 + psrad xmm0,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm5,xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) + packssdw xmm0,xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L + movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H + + movdqa xmm4,xmm3 + movdqa xmm7,xmm1 + paddd xmm3,xmm2 ; xmm3=data1L + paddd xmm1,xmm6 ; xmm1=data1H + psubd xmm4,xmm2 ; xmm4=data6L + psubd xmm7,xmm6 ; xmm7=data6H + + movdqa xmm2,[rel PD_DESCALE_P2] ; xmm2=[rel PD_DESCALE_P2] + + paddd xmm3,xmm2 + paddd xmm1,xmm2 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm4,xmm2 + paddd xmm7,xmm2 + psrad xmm4,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm3,xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) + packssdw xmm4,xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) + + packsswb xmm5,xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H + movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L + movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm4,xmm6 + movdqa xmm0,xmm2 + paddd xmm6,xmm1 ; xmm6=data2L + paddd xmm2,xmm7 ; xmm2=data2H + psubd xmm4,xmm1 ; xmm4=data5L + psubd xmm0,xmm7 ; xmm0=data5H + + movdqa xmm5,[rel PD_DESCALE_P2] ; xmm5=[rel PD_DESCALE_P2] + + paddd xmm6,xmm5 + paddd xmm2,xmm5 + psrad xmm6,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm4,xmm5 + paddd xmm0,xmm5 + psrad xmm4,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + packssdw xmm6,xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) + packssdw xmm4,xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) + + movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L + movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H + movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L + movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H + + movdqa xmm2,xmm3 + movdqa xmm0,xmm1 + paddd xmm3,xmm7 ; xmm3=data3L + paddd xmm1,xmm5 ; xmm1=data3H + psubd xmm2,xmm7 ; xmm2=data4L + psubd xmm0,xmm5 ; xmm0=data4H + + movdqa xmm7,[rel PD_DESCALE_P2] ; xmm7=[rel PD_DESCALE_P2] + + paddd xmm3,xmm7 + paddd xmm1,xmm7 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm2,xmm7 + paddd xmm0,xmm7 + psrad xmm2,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + movdqa xmm5,[rel PB_CENTERJSAMP] ; xmm5=[rel PB_CENTERJSAMP] + + packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) + packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + packsswb xmm6,xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm3,xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm7,xmm5 + paddb xmm1,xmm5 + paddb xmm6,xmm5 + paddb xmm3,xmm5 + + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklbw xmm7,xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklbw xmm6,xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm2,xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 2) + punpcklwd xmm7,xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpcklwd xmm2,xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm5,xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpckldq xmm7,xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm1,xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm3,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm3,xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm6,xmm7,0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm1,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm2,xmm4,0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm5,xmm3,0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1 + mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0 + mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctint-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctint-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctint-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctint-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,859 @@ +; +; jidctint.asm - accurate integer IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_islow_sse2) PRIVATE + +EXTN(jconst_idct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; jpeg_component_info *compptr +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 12 + + align 16 + global EXTN(jsimd_idct_islow_sse2) PRIVATE + +EXTN(jsimd_idct_islow_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm5,PASS1_BITS + + movdqa xmm4,xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm5,xmm5 ; xmm5=(00 00 01 01 02 02 03 03) + punpckhwd xmm4,xmm4 ; xmm4=(04 04 05 05 06 06 07 07) + + pshufd xmm7,xmm5,0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) + pshufd xmm6,xmm5,0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) + pshufd xmm1,xmm5,0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) + pshufd xmm5,xmm5,0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) + pshufd xmm0,xmm4,0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) + pshufd xmm3,xmm4,0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) + pshufd xmm2,xmm4,0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) + pshufd xmm4,xmm4,0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm4,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm4,xmm3 ; xmm3=in6=z3 + punpckhwd xmm5,xmm3 + movdqa xmm1,xmm4 + movdqa xmm3,xmm5 + pmaddwd xmm4,[GOTOFF(ebx,PW_F130_F054)] ; xmm4=tmp3L + pmaddwd xmm5,[GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L + pmaddwd xmm3,[GOTOFF(ebx,PW_F054_MF130)] ; xmm3=tmp2H + + movdqa xmm6,xmm0 + paddw xmm0,xmm2 ; xmm0=in0+in4 + psubw xmm6,xmm2 ; xmm6=in0-in4 + + pxor xmm7,xmm7 + pxor xmm2,xmm2 + punpcklwd xmm7,xmm0 ; xmm7=tmp0L + punpckhwd xmm2,xmm0 ; xmm2=tmp0H + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + psrad xmm2,(16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS + + movdqa xmm0,xmm7 + paddd xmm7,xmm4 ; xmm7=tmp10L + psubd xmm0,xmm4 ; xmm0=tmp13L + movdqa xmm4,xmm2 + paddd xmm2,xmm5 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp13H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm7,xmm7 + punpcklwd xmm5,xmm6 ; xmm5=tmp1L + punpckhwd xmm7,xmm6 ; xmm7=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + + movdqa xmm2,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm2,xmm1 ; xmm2=tmp12L + movdqa xmm0,xmm7 + paddd xmm7,xmm3 ; xmm7=tmp11H + psubd xmm0,xmm3 ; xmm0=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm5,xmm6 + movdqa xmm7,xmm4 + paddw xmm5,xmm3 ; xmm5=z3 + paddw xmm7,xmm1 ; xmm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm2,xmm5 + movdqa xmm0,xmm5 + punpcklwd xmm2,xmm7 + punpckhwd xmm0,xmm7 + movdqa xmm5,xmm2 + movdqa xmm7,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF078_F117)] ; xmm2=z3L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3H + pmaddwd xmm5,[GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L + pmaddwd xmm7,[GOTOFF(ebx,PW_F117_F078)] ; xmm7=z4H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm2,xmm3 + movdqa xmm0,xmm3 + punpcklwd xmm2,xmm4 + punpckhwd xmm0,xmm4 + movdqa xmm3,xmm2 + movdqa xmm4,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm2=tmp0L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0H + pmaddwd xmm3,[GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF089_F060)] ; xmm4=tmp3H + + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L + paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H + paddd xmm3,xmm5 ; xmm3=tmp3L + paddd xmm4,xmm7 ; xmm4=tmp3H + + movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H + + movdqa xmm2,xmm1 + movdqa xmm0,xmm1 + punpcklwd xmm2,xmm6 + punpckhwd xmm0,xmm6 + movdqa xmm1,xmm2 + movdqa xmm6,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm2=tmp1L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1H + pmaddwd xmm1,[GOTOFF(ebx,PW_MF256_F050)] ; xmm1=tmp2L + pmaddwd xmm6,[GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H + + paddd xmm2,xmm5 ; xmm2=tmp1L + paddd xmm0,xmm7 ; xmm0=tmp1H + paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H + + movdqa xmm2,xmm5 + movdqa xmm0,xmm7 + paddd xmm5,xmm3 ; xmm5=data0L + paddd xmm7,xmm4 ; xmm7=data0H + psubd xmm2,xmm3 ; xmm2=data7L + psubd xmm0,xmm4 ; xmm0=data7H + + movdqa xmm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm3=[PD_DESCALE_P1] + + paddd xmm5,xmm3 + paddd xmm7,xmm3 + psrad xmm5,DESCALE_P1 + psrad xmm7,DESCALE_P1 + paddd xmm2,xmm3 + paddd xmm0,xmm3 + psrad xmm2,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm5,xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) + packssdw xmm2,xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) + + movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L + movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H + + movdqa xmm7,xmm4 + movdqa xmm0,xmm3 + paddd xmm4,xmm1 ; xmm4=data1L + paddd xmm3,xmm6 ; xmm3=data1H + psubd xmm7,xmm1 ; xmm7=data6L + psubd xmm0,xmm6 ; xmm0=data6H + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm1=[PD_DESCALE_P1] + + paddd xmm4,xmm1 + paddd xmm3,xmm1 + psrad xmm4,DESCALE_P1 + psrad xmm3,DESCALE_P1 + paddd xmm7,xmm1 + paddd xmm0,xmm1 + psrad xmm7,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm4,xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm7,xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) + + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm4 ; xmm5=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm1,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm2 ; xmm7=(60 70 61 71 62 72 63 73) + punpckhwd xmm1,xmm2 ; xmm1=(64 74 65 75 66 76 67 77) + + movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L + movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H + movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L + movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) + + movdqa xmm5,xmm3 + movdqa xmm6,xmm0 + paddd xmm3,xmm4 ; xmm3=data2L + paddd xmm0,xmm2 ; xmm0=data2H + psubd xmm5,xmm4 ; xmm5=data5L + psubd xmm6,xmm2 ; xmm6=data5H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm7=[PD_DESCALE_P1] + + paddd xmm3,xmm7 + paddd xmm0,xmm7 + psrad xmm3,DESCALE_P1 + psrad xmm0,DESCALE_P1 + paddd xmm5,xmm7 + paddd xmm6,xmm7 + psrad xmm5,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm3,xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) + packssdw xmm5,xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L + movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H + movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L + movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H + + movdqa xmm0,xmm1 + movdqa xmm6,xmm4 + paddd xmm1,xmm2 ; xmm1=data3L + paddd xmm4,xmm7 ; xmm4=data3H + psubd xmm0,xmm2 ; xmm0=data4L + psubd xmm6,xmm7 ; xmm6=data4H + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm2=[PD_DESCALE_P1] + + paddd xmm1,xmm2 + paddd xmm4,xmm2 + psrad xmm1,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm0,xmm2 + paddd xmm6,xmm2 + psrad xmm0,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm1,xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) + packssdw xmm0,xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) + movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) + + movdqa xmm4,xmm3 ; transpose coefficients(phase 1) + punpcklwd xmm3,xmm1 ; xmm3=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm1 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm6,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(40 50 41 51 42 52 43 53) + punpckhwd xmm6,xmm5 ; xmm6=(44 54 45 55 46 56 47 57) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm3 ; xmm7=(00 10 20 30 01 11 21 31) + punpckhdq xmm1,xmm3 ; xmm1=(02 12 22 32 03 13 23 33) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm4 ; xmm2=(04 14 24 34 05 15 25 35) + punpckhdq xmm5,xmm4 ; xmm5=(06 16 26 36 07 17 27 37) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) + movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) + + movdqa xmm2,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(40 50 60 70 41 51 61 71) + punpckhdq xmm2,xmm3 ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm4 ; xmm6=(44 54 64 74 45 55 65 75) + punpckhdq xmm5,xmm4 ; xmm5=(46 56 66 76 47 57 67 77) + + movdqa xmm3,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm3,xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm4,xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 + + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm3,xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) + movdqa xmm4,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm4,xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm6,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm6,xmm2 ; xmm2=in6=z3 + punpckhwd xmm5,xmm2 + movdqa xmm1,xmm6 + movdqa xmm2,xmm5 + pmaddwd xmm6,[GOTOFF(ebx,PW_F130_F054)] ; xmm6=tmp3L + pmaddwd xmm5,[GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L + pmaddwd xmm2,[GOTOFF(ebx,PW_F054_MF130)] ; xmm2=tmp2H + + movdqa xmm3,xmm7 + paddw xmm7,xmm0 ; xmm7=in0+in4 + psubw xmm3,xmm0 ; xmm3=in0-in4 + + pxor xmm4,xmm4 + pxor xmm0,xmm0 + punpcklwd xmm4,xmm7 ; xmm4=tmp0L + punpckhwd xmm0,xmm7 ; xmm0=tmp0H + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + psrad xmm0,(16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS + + movdqa xmm7,xmm4 + paddd xmm4,xmm6 ; xmm4=tmp10L + psubd xmm7,xmm6 ; xmm7=tmp13L + movdqa xmm6,xmm0 + paddd xmm0,xmm5 ; xmm0=tmp10H + psubd xmm6,xmm5 ; xmm6=tmp13H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm4,xmm4 + punpcklwd xmm5,xmm3 ; xmm5=tmp1L + punpckhwd xmm4,xmm3 ; xmm4=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + + movdqa xmm0,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm0,xmm1 ; xmm0=tmp12L + movdqa xmm7,xmm4 + paddd xmm4,xmm2 ; xmm4=tmp11H + psubd xmm7,xmm2 ; xmm7=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 + movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 + movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 + movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 + + movdqa xmm5,xmm6 + movdqa xmm4,xmm3 + paddw xmm5,xmm1 ; xmm5=z3 + paddw xmm4,xmm2 ; xmm4=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm0,xmm5 + movdqa xmm7,xmm5 + punpcklwd xmm0,xmm4 + punpckhwd xmm7,xmm4 + movdqa xmm5,xmm0 + movdqa xmm4,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3H + pmaddwd xmm5,[GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L + pmaddwd xmm4,[GOTOFF(ebx,PW_F117_F078)] ; xmm4=z4H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm0,xmm1 + movdqa xmm7,xmm1 + punpcklwd xmm0,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm1,xmm0 + movdqa xmm3,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp0H + pmaddwd xmm1,[GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp3L + pmaddwd xmm3,[GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3H + + paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L + paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H + paddd xmm1,xmm5 ; xmm1=tmp3L + paddd xmm3,xmm4 ; xmm3=tmp3H + + movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H + + movdqa xmm0,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm0,xmm6 + punpckhwd xmm7,xmm6 + movdqa xmm2,xmm0 + movdqa xmm6,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm7=tmp1H + pmaddwd xmm2,[GOTOFF(ebx,PW_MF256_F050)] ; xmm2=tmp2L + pmaddwd xmm6,[GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H + + paddd xmm0,xmm5 ; xmm0=tmp1L + paddd xmm7,xmm4 ; xmm7=tmp1H + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H + + movdqa xmm0,xmm5 + movdqa xmm7,xmm4 + paddd xmm5,xmm1 ; xmm5=data0L + paddd xmm4,xmm3 ; xmm4=data0H + psubd xmm0,xmm1 ; xmm0=data7L + psubd xmm7,xmm3 ; xmm7=data7H + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm1=[PD_DESCALE_P2] + + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrad xmm5,DESCALE_P2 + psrad xmm4,DESCALE_P2 + paddd xmm0,xmm1 + paddd xmm7,xmm1 + psrad xmm0,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm5,xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) + packssdw xmm0,xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L + movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H + + movdqa xmm4,xmm3 + movdqa xmm7,xmm1 + paddd xmm3,xmm2 ; xmm3=data1L + paddd xmm1,xmm6 ; xmm1=data1H + psubd xmm4,xmm2 ; xmm4=data6L + psubd xmm7,xmm6 ; xmm7=data6H + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm2=[PD_DESCALE_P2] + + paddd xmm3,xmm2 + paddd xmm1,xmm2 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm4,xmm2 + paddd xmm7,xmm2 + psrad xmm4,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm3,xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) + packssdw xmm4,xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) + + packsswb xmm5,xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H + movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L + movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm4,xmm6 + movdqa xmm0,xmm2 + paddd xmm6,xmm1 ; xmm6=data2L + paddd xmm2,xmm7 ; xmm2=data2H + psubd xmm4,xmm1 ; xmm4=data5L + psubd xmm0,xmm7 ; xmm0=data5H + + movdqa xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm5=[PD_DESCALE_P2] + + paddd xmm6,xmm5 + paddd xmm2,xmm5 + psrad xmm6,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm4,xmm5 + paddd xmm0,xmm5 + psrad xmm4,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + packssdw xmm6,xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) + packssdw xmm4,xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) + + movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L + movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H + movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L + movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H + + movdqa xmm2,xmm3 + movdqa xmm0,xmm1 + paddd xmm3,xmm7 ; xmm3=data3L + paddd xmm1,xmm5 ; xmm1=data3H + psubd xmm2,xmm7 ; xmm2=data4L + psubd xmm0,xmm5 ; xmm0=data4H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm7=[PD_DESCALE_P2] + + paddd xmm3,xmm7 + paddd xmm1,xmm7 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm2,xmm7 + paddd xmm0,xmm7 + psrad xmm2,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + movdqa xmm5,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm5=[PB_CENTERJSAMP] + + packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) + packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + packsswb xmm6,xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm3,xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm7,xmm5 + paddb xmm1,xmm5 + paddb xmm6,xmm5 + paddb xmm3,xmm5 + + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklbw xmm7,xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklbw xmm6,xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm2,xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 2) + punpcklwd xmm7,xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpcklwd xmm2,xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm5,xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpckldq xmm7,xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm1,xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm3,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm3,xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm6,xmm7,0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm1,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm2,xmm4,0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm5,xmm3,0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1 + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 + mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctred-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctred-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctred-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctred-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,706 @@ +; +; jidctred.asm - reduced-size IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) +%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) +%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) +%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_red_mmx) PRIVATE + +EXTN(jconst_idct_red_mmx): + +PW_F184_MF076 times 2 dw F_1_847,-F_0_765 +PW_F256_F089 times 2 dw F_2_562, F_0_899 +PW_F106_MF217 times 2 dw F_1_061,-F_2_172 +PW_MF060_MF050 times 2 dw -F_0_601,-F_0_509 +PW_F145_MF021 times 2 dw F_1_451,-F_0_211 +PW_F362_MF127 times 2 dw F_3_624,-F_1_272 +PW_F085_MF072 times 2 dw F_0_850,-F_0_720 +PD_DESCALE_P1_4 times 2 dd 1 << (DESCALE_P1_4-1) +PD_DESCALE_P2_4 times 2 dd 1 << (DESCALE_P2_4-1) +PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2-1) +PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2-1) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_mmx (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jsimd_idct_4x4_mmx) PRIVATE + +EXTN(jsimd_idct_4x4_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm0,mm1 + packsswb mm0,mm0 + movd eax,mm0 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0,PASS1_BITS + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm0 + punpcklwd mm4,mm1 + punpckhwd mm5,mm1 + movq mm0,mm4 + movq mm1,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) + pmaddwd mm5,[GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) + pmaddwd mm0,[GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) + pmaddwd mm1,[GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) + + movq mm6,mm2 + movq mm7,mm2 + punpcklwd mm6,mm3 + punpckhwd mm7,mm3 + movq mm2,mm6 + movq mm3,mm7 + pmaddwd mm6,[GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) + pmaddwd mm7,[GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) + pmaddwd mm2,[GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) + pmaddwd mm3,[GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) + + paddd mm6,mm4 ; mm6=tmp2L + paddd mm7,mm5 ; mm7=tmp2H + paddd mm2,mm0 ; mm2=tmp0L + paddd mm3,mm1 ; mm3=tmp0H + + movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L + movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H + + ; -- Even part + + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor mm1,mm1 + pxor mm2,mm2 + punpcklwd mm1,mm4 ; mm1=tmp0L + punpckhwd mm2,mm4 ; mm2=tmp0H + psrad mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 + psrad mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 + + movq mm3,mm5 ; mm5=in2=z2 + punpcklwd mm5,mm0 ; mm0=in6=z3 + punpckhwd mm3,mm0 + pmaddwd mm5,[GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H + + movq mm4,mm1 + movq mm0,mm2 + paddd mm1,mm5 ; mm1=tmp10L + paddd mm2,mm3 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp12L + psubd mm0,mm3 ; mm0=tmp12H + + ; -- Final output stage + + movq mm5,mm1 + movq mm3,mm2 + paddd mm1,mm6 ; mm1=data0L + paddd mm2,mm7 ; mm2=data0H + psubd mm5,mm6 ; mm5=data3L + psubd mm3,mm7 ; mm3=data3H + + movq mm6,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm6=[PD_DESCALE_P1_4] + + paddd mm1,mm6 + paddd mm2,mm6 + psrad mm1,DESCALE_P1_4 + psrad mm2,DESCALE_P1_4 + paddd mm5,mm6 + paddd mm3,mm6 + psrad mm5,DESCALE_P1_4 + psrad mm3,DESCALE_P1_4 + + packssdw mm1,mm2 ; mm1=data0=(00 01 02 03) + packssdw mm5,mm3 ; mm5=data3=(30 31 32 33) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp0L + movq mm6, MMWORD [wk(1)] ; mm6=tmp0H + + movq mm2,mm4 + movq mm3,mm0 + paddd mm4,mm7 ; mm4=data1L + paddd mm0,mm6 ; mm0=data1H + psubd mm2,mm7 ; mm2=data2L + psubd mm3,mm6 ; mm3=data2H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm7=[PD_DESCALE_P1_4] + + paddd mm4,mm7 + paddd mm0,mm7 + psrad mm4,DESCALE_P1_4 + psrad mm0,DESCALE_P1_4 + paddd mm2,mm7 + paddd mm3,mm7 + psrad mm2,DESCALE_P1_4 + psrad mm3,DESCALE_P1_4 + + packssdw mm4,mm0 ; mm4=data1=(10 11 12 13) + packssdw mm2,mm3 ; mm2=data2=(20 21 22 23) + + movq mm6,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm4 ; mm1=(00 10 01 11) + punpckhwd mm6,mm4 ; mm6=(02 12 03 13) + movq mm7,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm5 ; mm2=(20 30 21 31) + punpckhwd mm7,mm5 ; mm7=(22 32 23 33) + + movq mm0,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm2 ; mm1=(00 10 20 30) + punpckhdq mm0,mm2 ; mm0=(01 11 21 31) + movq mm3,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm7 ; mm6=(02 12 22 32) + punpckhdq mm3,mm7 ; mm3=(03 13 23 33) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm0 + movq mm5,mm0 + punpcklwd mm4,mm1 + punpckhwd mm5,mm1 + movq mm0,mm4 + movq mm1,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) + pmaddwd mm5,[GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) + pmaddwd mm0,[GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) + pmaddwd mm1,[GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) + + movq mm6,mm2 + movq mm7,mm2 + punpcklwd mm6,mm3 + punpckhwd mm7,mm3 + movq mm2,mm6 + movq mm3,mm7 + pmaddwd mm6,[GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) + pmaddwd mm7,[GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) + pmaddwd mm2,[GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) + pmaddwd mm3,[GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) + + paddd mm6,mm4 ; mm6=tmp2L + paddd mm7,mm5 ; mm7=tmp2H + paddd mm2,mm0 ; mm2=tmp0L + paddd mm3,mm1 ; mm3=tmp0H + + movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L + movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H + + ; -- Even part + + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + pxor mm1,mm1 + pxor mm2,mm2 + punpcklwd mm1,mm4 ; mm1=tmp0L + punpckhwd mm2,mm4 ; mm2=tmp0H + psrad mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 + psrad mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 + + movq mm3,mm5 ; mm5=in2=z2 + punpcklwd mm5,mm0 ; mm0=in6=z3 + punpckhwd mm3,mm0 + pmaddwd mm5,[GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H + + movq mm4,mm1 + movq mm0,mm2 + paddd mm1,mm5 ; mm1=tmp10L + paddd mm2,mm3 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp12L + psubd mm0,mm3 ; mm0=tmp12H + + ; -- Final output stage + + movq mm5,mm1 + movq mm3,mm2 + paddd mm1,mm6 ; mm1=data0L + paddd mm2,mm7 ; mm2=data0H + psubd mm5,mm6 ; mm5=data3L + psubd mm3,mm7 ; mm3=data3H + + movq mm6,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm6=[PD_DESCALE_P2_4] + + paddd mm1,mm6 + paddd mm2,mm6 + psrad mm1,DESCALE_P2_4 + psrad mm2,DESCALE_P2_4 + paddd mm5,mm6 + paddd mm3,mm6 + psrad mm5,DESCALE_P2_4 + psrad mm3,DESCALE_P2_4 + + packssdw mm1,mm2 ; mm1=data0=(00 10 20 30) + packssdw mm5,mm3 ; mm5=data3=(03 13 23 33) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp0L + movq mm6, MMWORD [wk(1)] ; mm6=tmp0H + + movq mm2,mm4 + movq mm3,mm0 + paddd mm4,mm7 ; mm4=data1L + paddd mm0,mm6 ; mm0=data1H + psubd mm2,mm7 ; mm2=data2L + psubd mm3,mm6 ; mm3=data2H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm7=[PD_DESCALE_P2_4] + + paddd mm4,mm7 + paddd mm0,mm7 + psrad mm4,DESCALE_P2_4 + psrad mm0,DESCALE_P2_4 + paddd mm2,mm7 + paddd mm3,mm7 + psrad mm2,DESCALE_P2_4 + psrad mm3,DESCALE_P2_4 + + packssdw mm4,mm0 ; mm4=data1=(01 11 21 31) + packssdw mm2,mm3 ; mm2=data2=(02 12 22 32) + + movq mm6,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] + + packsswb mm1,mm2 ; mm1=(00 10 20 30 02 12 22 32) + packsswb mm4,mm5 ; mm4=(01 11 21 31 03 13 23 33) + paddb mm1,mm6 + paddb mm4,mm6 + + movq mm7,mm1 ; transpose coefficients(phase 1) + punpcklbw mm1,mm4 ; mm1=(00 01 10 11 20 21 30 31) + punpckhbw mm7,mm4 ; mm7=(02 03 12 13 22 23 32 33) + + movq mm0,mm1 ; transpose coefficients(phase 2) + punpcklwd mm1,mm7 ; mm1=(00 01 02 03 10 11 12 13) + punpckhwd mm0,mm7 ; mm0=(20 21 22 23 30 31 32 33) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 + movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + + psrlq mm1,4*BYTE_BIT + psrlq mm0,4*BYTE_BIT + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 + movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_mmx (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + + align 16 + global EXTN(jsimd_idct_2x2_mmx) PRIVATE + +EXTN(jsimd_idct_2x2_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + + mov edx, POINTER [dct_table(ebp)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm0=(10 11 ** 13), mm1=(30 31 ** 33) + ; mm2=(50 51 ** 53), mm3=(70 71 ** 73) + + pcmpeqd mm7,mm7 + pslld mm7,WORD_BIT ; mm7={0x0000 0xFFFF 0x0000 0xFFFF} + + movq mm4,mm0 ; mm4=(10 11 ** 13) + movq mm5,mm2 ; mm5=(50 51 ** 53) + punpcklwd mm4,mm1 ; mm4=(10 30 11 31) + punpcklwd mm5,mm3 ; mm5=(50 70 51 71) + pmaddwd mm4,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm5,[GOTOFF(ebx,PW_F085_MF072)] + + psrld mm0,WORD_BIT ; mm0=(11 -- 13 --) + pand mm1,mm7 ; mm1=(-- 31 -- 33) + psrld mm2,WORD_BIT ; mm2=(51 -- 53 --) + pand mm3,mm7 ; mm3=(-- 71 -- 73) + por mm0,mm1 ; mm0=(11 31 13 33) + por mm2,mm3 ; mm2=(51 71 53 73) + pmaddwd mm0,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm2,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm4,mm5 ; mm4=tmp0[col0 col1] + + movq mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)] + pmullw mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)] + pmullw mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm6=(** 15 ** 17), mm1=(** 35 ** 37) + ; mm3=(** 55 ** 57), mm5=(** 75 ** 77) + + psrld mm6,WORD_BIT ; mm6=(15 -- 17 --) + pand mm1,mm7 ; mm1=(-- 35 -- 37) + psrld mm3,WORD_BIT ; mm3=(55 -- 57 --) + pand mm5,mm7 ; mm5=(-- 75 -- 77) + por mm6,mm1 ; mm6=(15 35 17 37) + por mm3,mm5 ; mm3=(55 75 57 77) + pmaddwd mm6,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm3,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm0,mm2 ; mm0=tmp0[col1 col3] + paddd mm6,mm3 ; mm6=tmp0[col5 col7] + + ; -- Even part + + movq mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)] + pmullw mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm1=(00 01 ** 03), mm5=(** 05 ** 07) + + movq mm2,mm1 ; mm2=(00 01 ** 03) + pslld mm1,WORD_BIT ; mm1=(-- 00 -- **) + psrad mm1,(WORD_BIT-CONST_BITS-2) ; mm1=tmp10[col0 ****] + + pand mm2,mm7 ; mm2=(-- 01 -- 03) + pand mm5,mm7 ; mm5=(-- 05 -- 07) + psrad mm2,(WORD_BIT-CONST_BITS-2) ; mm2=tmp10[col1 col3] + psrad mm5,(WORD_BIT-CONST_BITS-2) ; mm5=tmp10[col5 col7] + + ; -- Final output stage + + movq mm3,mm1 + paddd mm1,mm4 ; mm1=data0[col0 ****]=(A0 **) + psubd mm3,mm4 ; mm3=data1[col0 ****]=(B0 **) + punpckldq mm1,mm3 ; mm1=(A0 B0) + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1_2)] ; mm7=[PD_DESCALE_P1_2] + + movq mm4,mm2 + movq mm3,mm5 + paddd mm2,mm0 ; mm2=data0[col1 col3]=(A1 A3) + paddd mm5,mm6 ; mm5=data0[col5 col7]=(A5 A7) + psubd mm4,mm0 ; mm4=data1[col1 col3]=(B1 B3) + psubd mm3,mm6 ; mm3=data1[col5 col7]=(B5 B7) + + paddd mm1,mm7 + psrad mm1,DESCALE_P1_2 + + paddd mm2,mm7 + paddd mm5,mm7 + psrad mm2,DESCALE_P1_2 + psrad mm5,DESCALE_P1_2 + paddd mm4,mm7 + paddd mm3,mm7 + psrad mm4,DESCALE_P1_2 + psrad mm3,DESCALE_P1_2 + + ; ---- Pass 2: process rows, store into output array. + + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(ebp)] + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw mm2,mm4 ; mm2=(A1 A3 B1 B3) + packssdw mm5,mm3 ; mm5=(A5 A7 B5 B7) + pmaddwd mm2,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm5,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm2,mm5 ; mm2=tmp0[row0 row1] + + ; -- Even part + + pslld mm1,(CONST_BITS+2) ; mm1=tmp10[row0 row1] + + ; -- Final output stage + + movq mm0,[GOTOFF(ebx,PD_DESCALE_P2_2)] ; mm0=[PD_DESCALE_P2_2] + + movq mm6,mm1 + paddd mm1,mm2 ; mm1=data0[row0 row1]=(C0 C1) + psubd mm6,mm2 ; mm6=data1[row0 row1]=(D0 D1) + + paddd mm1,mm0 + paddd mm6,mm0 + psrad mm1,DESCALE_P2_2 + psrad mm6,DESCALE_P2_2 + + movq mm7,mm1 ; transpose coefficients + punpckldq mm1,mm6 ; mm1=(C0 D0) + punpckhdq mm7,mm6 ; mm7=(C1 D1) + + packssdw mm1,mm7 ; mm1=(C0 D0 C1 D1) + packsswb mm1,mm1 ; mm1=(C0 D0 C1 D1 C0 D0 C1 D1) + paddb mm1,[GOTOFF(ebx,PB_CENTERJSAMP)] + + movd ecx,mm1 + movd ebx,mm1 ; ebx=(C0 D0 C1 D1) + shr ecx,2*BYTE_BIT ; ecx=(C1 D1 -- --) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov WORD [edx+eax*SIZEOF_JSAMPLE], bx + mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctred-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctred-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctred-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctred-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,576 @@ +; +; jidctred.asm - reduced-size IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) +%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) +%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) +%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_red_sse2) PRIVATE + +EXTN(jconst_idct_red_sse2): + +PW_F184_MF076 times 4 dw F_1_847,-F_0_765 +PW_F256_F089 times 4 dw F_2_562, F_0_899 +PW_F106_MF217 times 4 dw F_1_061,-F_2_172 +PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 +PW_F145_MF021 times 4 dw F_1_451,-F_0_211 +PW_F362_MF127 times 4 dw F_3_624,-F_1_272 +PW_F085_MF072 times 4 dw F_0_850,-F_0_720 +PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4-1) +PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4-1) +PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2-1) +PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + +%define original_rbp rbp+0 +%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_idct_4x4_sse2) PRIVATE + +EXTN(jsimd_idct_4x4_sse2): + push rbp + mov rax,rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp],rax + mov rbp,rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm0,xmm1 + packsswb xmm0,xmm0 + packsswb xmm0,xmm0 + movd eax,xmm0 + test rax,rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm0,PASS1_BITS + + movdqa xmm3,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm3,xmm3 ; xmm3=(04 04 05 05 06 06 07 07) + + pshufd xmm1,xmm0,0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) + pshufd xmm0,xmm0,0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) + pshufd xmm6,xmm3,0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) + pshufd xmm3,xmm3,0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) + + jmp near .column_end +%endif +.columnDCT: + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm0 + punpcklwd xmm4,xmm1 + punpckhwd xmm5,xmm1 + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + pmaddwd xmm4,[rel PW_F256_F089] ; xmm4=(tmp2L) + pmaddwd xmm5,[rel PW_F256_F089] ; xmm5=(tmp2H) + pmaddwd xmm0,[rel PW_F106_MF217] ; xmm0=(tmp0L) + pmaddwd xmm1,[rel PW_F106_MF217] ; xmm1=(tmp0H) + + movdqa xmm6,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm6,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2L) + pmaddwd xmm7,[rel PW_MF060_MF050] ; xmm7=(tmp2H) + pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0L) + pmaddwd xmm3,[rel PW_F145_MF021] ; xmm3=(tmp0H) + + paddd xmm6,xmm4 ; xmm6=tmp2L + paddd xmm7,xmm5 ; xmm7=tmp2H + paddd xmm2,xmm0 ; xmm2=tmp0L + paddd xmm3,xmm1 ; xmm3=tmp0H + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H + + ; -- Even part + + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm5, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movdqa xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm5, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm0, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor xmm1,xmm1 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm4 ; xmm1=tmp0L + punpckhwd xmm2,xmm4 ; xmm2=tmp0H + psrad xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 + psrad xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 + + movdqa xmm3,xmm5 ; xmm5=in2=z2 + punpcklwd xmm5,xmm0 ; xmm0=in6=z3 + punpckhwd xmm3,xmm0 + pmaddwd xmm5,[rel PW_F184_MF076] ; xmm5=tmp2L + pmaddwd xmm3,[rel PW_F184_MF076] ; xmm3=tmp2H + + movdqa xmm4,xmm1 + movdqa xmm0,xmm2 + paddd xmm1,xmm5 ; xmm1=tmp10L + paddd xmm2,xmm3 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp12L + psubd xmm0,xmm3 ; xmm0=tmp12H + + ; -- Final output stage + + movdqa xmm5,xmm1 + movdqa xmm3,xmm2 + paddd xmm1,xmm6 ; xmm1=data0L + paddd xmm2,xmm7 ; xmm2=data0H + psubd xmm5,xmm6 ; xmm5=data3L + psubd xmm3,xmm7 ; xmm3=data3H + + movdqa xmm6,[rel PD_DESCALE_P1_4] ; xmm6=[rel PD_DESCALE_P1_4] + + paddd xmm1,xmm6 + paddd xmm2,xmm6 + psrad xmm1,DESCALE_P1_4 + psrad xmm2,DESCALE_P1_4 + paddd xmm5,xmm6 + paddd xmm3,xmm6 + psrad xmm5,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm1,xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) + packssdw xmm5,xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H + + movdqa xmm2,xmm4 + movdqa xmm3,xmm0 + paddd xmm4,xmm7 ; xmm4=data1L + paddd xmm0,xmm6 ; xmm0=data1H + psubd xmm2,xmm7 ; xmm2=data2L + psubd xmm3,xmm6 ; xmm3=data2H + + movdqa xmm7,[rel PD_DESCALE_P1_4] ; xmm7=[rel PD_DESCALE_P1_4] + + paddd xmm4,xmm7 + paddd xmm0,xmm7 + psrad xmm4,DESCALE_P1_4 + psrad xmm0,DESCALE_P1_4 + paddd xmm2,xmm7 + paddd xmm3,xmm7 + psrad xmm2,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm4,xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm2,xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm4 ; xmm1=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm7,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm5 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm7,xmm5 ; xmm7=(24 34 25 35 26 36 27 37) + + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) + punpckhdq xmm0,xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) + movdqa xmm3,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) + punpckhdq xmm3,xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + pxor xmm4,xmm4 + punpcklwd xmm4,xmm1 ; xmm4=tmp0 + psrad xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 + + ; -- Odd part + + punpckhwd xmm1,xmm0 + punpckhwd xmm6,xmm3 + movdqa xmm5,xmm1 + movdqa xmm2,xmm6 + pmaddwd xmm1,[rel PW_F256_F089] ; xmm1=(tmp2) + pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2) + pmaddwd xmm5,[rel PW_F106_MF217] ; xmm5=(tmp0) + pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0) + + paddd xmm6,xmm1 ; xmm6=tmp2 + paddd xmm2,xmm5 ; xmm2=tmp0 + + ; -- Even part + + punpcklwd xmm0,xmm3 + pmaddwd xmm0,[rel PW_F184_MF076] ; xmm0=tmp2 + + movdqa xmm7,xmm4 + paddd xmm4,xmm0 ; xmm4=tmp10 + psubd xmm7,xmm0 ; xmm7=tmp12 + + ; -- Final output stage + + movdqa xmm1,[rel PD_DESCALE_P2_4] ; xmm1=[rel PD_DESCALE_P2_4] + + movdqa xmm5,xmm4 + movdqa xmm3,xmm7 + paddd xmm4,xmm6 ; xmm4=data0=(00 10 20 30) + paddd xmm7,xmm2 ; xmm7=data1=(01 11 21 31) + psubd xmm5,xmm6 ; xmm5=data3=(03 13 23 33) + psubd xmm3,xmm2 ; xmm3=data2=(02 12 22 32) + + paddd xmm4,xmm1 + paddd xmm7,xmm1 + psrad xmm4,DESCALE_P2_4 + psrad xmm7,DESCALE_P2_4 + paddd xmm5,xmm1 + paddd xmm3,xmm1 + psrad xmm5,DESCALE_P2_4 + psrad xmm3,DESCALE_P2_4 + + packssdw xmm4,xmm3 ; xmm4=(00 10 20 30 02 12 22 32) + packssdw xmm7,xmm5 ; xmm7=(01 11 21 31 03 13 23 33) + + movdqa xmm0,xmm4 ; transpose coefficients(phase 1) + punpcklwd xmm4,xmm7 ; xmm4=(00 01 10 11 20 21 30 31) + punpckhwd xmm0,xmm7 ; xmm0=(02 03 12 13 22 23 32 33) + + movdqa xmm6,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(00 01 02 03 10 11 12 13) + punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33) + + packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) + paddb xmm4,[rel PB_CENTERJSAMP] + + pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) + pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) + pshufd xmm3,xmm4,0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2 + mov rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1 + movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + + uncollect_args + mov rsp,rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13 = JDIMENSION output_col + + align 16 + global EXTN(jsimd_idct_2x2_sse2) PRIVATE + +EXTN(jsimd_idct_2x2_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) + ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) + + pcmpeqd xmm7,xmm7 + pslld xmm7,WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} + + movdqa xmm4,xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) + movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) + punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) + punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) + pmaddwd xmm4,[rel PW_F362_MF127] + pmaddwd xmm5,[rel PW_F085_MF072] + + psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) + pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) + psrld xmm2,WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) + pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) + por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37) + por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77) + pmaddwd xmm0,[rel PW_F362_MF127] + pmaddwd xmm2,[rel PW_F085_MF072] + + paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3] + paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7] + + ; -- Even part + + movdqa xmm6, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm6=(00 01 ** 03 ** 05 ** 07) + + movdqa xmm1,xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) + pslld xmm6,WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) + pand xmm1,xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) + psrad xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] + psrad xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] + + ; -- Final output stage + + movdqa xmm3,xmm6 + movdqa xmm5,xmm1 + paddd xmm6,xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) + paddd xmm1,xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) + psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) + psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) + + movdqa xmm2,[rel PD_DESCALE_P1_2] ; xmm2=[rel PD_DESCALE_P1_2] + + punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **) + + movdqa xmm7,xmm1 + punpcklqdq xmm1,xmm5 ; xmm1=(A1 A3 B1 B3) + punpckhqdq xmm7,xmm5 ; xmm7=(A5 A7 B5 B7) + + paddd xmm6,xmm2 + psrad xmm6,DESCALE_P1_2 + + paddd xmm1,xmm2 + paddd xmm7,xmm2 + psrad xmm1,DESCALE_P1_2 + psrad xmm7,DESCALE_P1_2 + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) + packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) + pmaddwd xmm1,[rel PW_F362_MF127] + pmaddwd xmm7,[rel PW_F085_MF072] + + paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1] + + ; -- Even part + + pslld xmm6,(CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] + + ; -- Final output stage + + movdqa xmm4,xmm6 + paddd xmm6,xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) + psubd xmm4,xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) + + punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1) + + paddd xmm6,[rel PD_DESCALE_P2_2] + psrad xmm6,DESCALE_P2_2 + + packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) + packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) + paddb xmm6,[rel PB_CENTERJSAMP] + + pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --) + pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx + mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx + + pop rbx + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctred-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jidctred-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jidctred-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jidctred-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,594 @@ +; +; jidctred.asm - reduced-size IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) +%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) +%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) +%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_red_sse2) PRIVATE + +EXTN(jconst_idct_red_sse2): + +PW_F184_MF076 times 4 dw F_1_847,-F_0_765 +PW_F256_F089 times 4 dw F_2_562, F_0_899 +PW_F106_MF217 times 4 dw F_1_061,-F_2_172 +PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 +PW_F145_MF021 times 4 dw F_1_451,-F_0_211 +PW_F362_MF127 times 4 dw F_3_624,-F_1_272 +PW_F085_MF072 times 4 dw F_0_850,-F_0_720 +PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4-1) +PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4-1) +PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2-1) +PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jsimd_idct_4x4_sse2) PRIVATE + +EXTN(jsimd_idct_4x4_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm0,xmm1 + packsswb xmm0,xmm0 + packsswb xmm0,xmm0 + movd eax,xmm0 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm0,PASS1_BITS + + movdqa xmm3,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm3,xmm3 ; xmm3=(04 04 05 05 06 06 07 07) + + pshufd xmm1,xmm0,0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) + pshufd xmm0,xmm0,0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) + pshufd xmm6,xmm3,0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) + pshufd xmm3,xmm3,0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) + + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm0 + punpcklwd xmm4,xmm1 + punpckhwd xmm5,xmm1 + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + pmaddwd xmm4,[GOTOFF(ebx,PW_F256_F089)] ; xmm4=(tmp2L) + pmaddwd xmm5,[GOTOFF(ebx,PW_F256_F089)] ; xmm5=(tmp2H) + pmaddwd xmm0,[GOTOFF(ebx,PW_F106_MF217)] ; xmm0=(tmp0L) + pmaddwd xmm1,[GOTOFF(ebx,PW_F106_MF217)] ; xmm1=(tmp0H) + + movdqa xmm6,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm6,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + pmaddwd xmm6,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2L) + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm7=(tmp2H) + pmaddwd xmm2,[GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0L) + pmaddwd xmm3,[GOTOFF(ebx,PW_F145_MF021)] ; xmm3=(tmp0H) + + paddd xmm6,xmm4 ; xmm6=tmp2L + paddd xmm7,xmm5 ; xmm7=tmp2H + paddd xmm2,xmm0 ; xmm2=tmp0L + paddd xmm3,xmm1 ; xmm3=tmp0H + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H + + ; -- Even part + + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movdqa xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor xmm1,xmm1 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm4 ; xmm1=tmp0L + punpckhwd xmm2,xmm4 ; xmm2=tmp0H + psrad xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 + psrad xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 + + movdqa xmm3,xmm5 ; xmm5=in2=z2 + punpcklwd xmm5,xmm0 ; xmm0=in6=z3 + punpckhwd xmm3,xmm0 + pmaddwd xmm5,[GOTOFF(ebx,PW_F184_MF076)] ; xmm5=tmp2L + pmaddwd xmm3,[GOTOFF(ebx,PW_F184_MF076)] ; xmm3=tmp2H + + movdqa xmm4,xmm1 + movdqa xmm0,xmm2 + paddd xmm1,xmm5 ; xmm1=tmp10L + paddd xmm2,xmm3 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp12L + psubd xmm0,xmm3 ; xmm0=tmp12H + + ; -- Final output stage + + movdqa xmm5,xmm1 + movdqa xmm3,xmm2 + paddd xmm1,xmm6 ; xmm1=data0L + paddd xmm2,xmm7 ; xmm2=data0H + psubd xmm5,xmm6 ; xmm5=data3L + psubd xmm3,xmm7 ; xmm3=data3H + + movdqa xmm6,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm6=[PD_DESCALE_P1_4] + + paddd xmm1,xmm6 + paddd xmm2,xmm6 + psrad xmm1,DESCALE_P1_4 + psrad xmm2,DESCALE_P1_4 + paddd xmm5,xmm6 + paddd xmm3,xmm6 + psrad xmm5,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm1,xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) + packssdw xmm5,xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H + + movdqa xmm2,xmm4 + movdqa xmm3,xmm0 + paddd xmm4,xmm7 ; xmm4=data1L + paddd xmm0,xmm6 ; xmm0=data1H + psubd xmm2,xmm7 ; xmm2=data2L + psubd xmm3,xmm6 ; xmm3=data2H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm7=[PD_DESCALE_P1_4] + + paddd xmm4,xmm7 + paddd xmm0,xmm7 + psrad xmm4,DESCALE_P1_4 + psrad xmm0,DESCALE_P1_4 + paddd xmm2,xmm7 + paddd xmm3,xmm7 + psrad xmm2,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm4,xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm2,xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm4 ; xmm1=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm7,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm5 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm7,xmm5 ; xmm7=(24 34 25 35 26 36 27 37) + + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) + punpckhdq xmm0,xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) + movdqa xmm3,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) + punpckhdq xmm3,xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + pxor xmm4,xmm4 + punpcklwd xmm4,xmm1 ; xmm4=tmp0 + psrad xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 + + ; -- Odd part + + punpckhwd xmm1,xmm0 + punpckhwd xmm6,xmm3 + movdqa xmm5,xmm1 + movdqa xmm2,xmm6 + pmaddwd xmm1,[GOTOFF(ebx,PW_F256_F089)] ; xmm1=(tmp2) + pmaddwd xmm6,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2) + pmaddwd xmm5,[GOTOFF(ebx,PW_F106_MF217)] ; xmm5=(tmp0) + pmaddwd xmm2,[GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0) + + paddd xmm6,xmm1 ; xmm6=tmp2 + paddd xmm2,xmm5 ; xmm2=tmp0 + + ; -- Even part + + punpcklwd xmm0,xmm3 + pmaddwd xmm0,[GOTOFF(ebx,PW_F184_MF076)] ; xmm0=tmp2 + + movdqa xmm7,xmm4 + paddd xmm4,xmm0 ; xmm4=tmp10 + psubd xmm7,xmm0 ; xmm7=tmp12 + + ; -- Final output stage + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; xmm1=[PD_DESCALE_P2_4] + + movdqa xmm5,xmm4 + movdqa xmm3,xmm7 + paddd xmm4,xmm6 ; xmm4=data0=(00 10 20 30) + paddd xmm7,xmm2 ; xmm7=data1=(01 11 21 31) + psubd xmm5,xmm6 ; xmm5=data3=(03 13 23 33) + psubd xmm3,xmm2 ; xmm3=data2=(02 12 22 32) + + paddd xmm4,xmm1 + paddd xmm7,xmm1 + psrad xmm4,DESCALE_P2_4 + psrad xmm7,DESCALE_P2_4 + paddd xmm5,xmm1 + paddd xmm3,xmm1 + psrad xmm5,DESCALE_P2_4 + psrad xmm3,DESCALE_P2_4 + + packssdw xmm4,xmm3 ; xmm4=(00 10 20 30 02 12 22 32) + packssdw xmm7,xmm5 ; xmm7=(01 11 21 31 03 13 23 33) + + movdqa xmm0,xmm4 ; transpose coefficients(phase 1) + punpcklwd xmm4,xmm7 ; xmm4=(00 01 10 11 20 21 30 31) + punpckhwd xmm0,xmm7 ; xmm0=(02 03 12 13 22 23 32 33) + + movdqa xmm6,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(00 01 02 03 10 11 12 13) + punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33) + + packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) + paddb xmm4,[GOTOFF(ebx,PB_CENTERJSAMP)] + + pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) + pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) + pshufd xmm3,xmm4,0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 + movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_sse2 (void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b)+8 ; void *dct_table +%define coef_block(b) (b)+12 ; JCOEFPTR coef_block +%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf +%define output_col(b) (b)+20 ; JDIMENSION output_col + + align 16 + global EXTN(jsimd_idct_2x2_sse2) PRIVATE + +EXTN(jsimd_idct_2x2_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + + mov edx, POINTER [dct_table(ebp)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) + ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) + + pcmpeqd xmm7,xmm7 + pslld xmm7,WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} + + movdqa xmm4,xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) + movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) + punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) + punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) + pmaddwd xmm4,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm5,[GOTOFF(ebx,PW_F085_MF072)] + + psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) + pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) + psrld xmm2,WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) + pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) + por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37) + por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77) + pmaddwd xmm0,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm2,[GOTOFF(ebx,PW_F085_MF072)] + + paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3] + paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7] + + ; -- Even part + + movdqa xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm6=(00 01 ** 03 ** 05 ** 07) + + movdqa xmm1,xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) + pslld xmm6,WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) + pand xmm1,xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) + psrad xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] + psrad xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] + + ; -- Final output stage + + movdqa xmm3,xmm6 + movdqa xmm5,xmm1 + paddd xmm6,xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) + paddd xmm1,xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) + psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) + psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P1_2)] ; xmm2=[PD_DESCALE_P1_2] + + punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **) + + movdqa xmm7,xmm1 + punpcklqdq xmm1,xmm5 ; xmm1=(A1 A3 B1 B3) + punpckhqdq xmm7,xmm5 ; xmm7=(A5 A7 B5 B7) + + paddd xmm6,xmm2 + psrad xmm6,DESCALE_P1_2 + + paddd xmm1,xmm2 + paddd xmm7,xmm2 + psrad xmm1,DESCALE_P1_2 + psrad xmm7,DESCALE_P1_2 + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(ebp)] + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) + packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) + pmaddwd xmm1,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm7,[GOTOFF(ebx,PW_F085_MF072)] + + paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1] + + ; -- Even part + + pslld xmm6,(CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] + + ; -- Final output stage + + movdqa xmm4,xmm6 + paddd xmm6,xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) + psubd xmm4,xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) + + punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1) + + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P2_2)] + psrad xmm6,DESCALE_P2_2 + + packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) + packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) + paddb xmm6,[GOTOFF(ebx,PB_CENTERJSAMP)] + + pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --) + pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov WORD [edx+eax*SIZEOF_JSAMPLE], bx + mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jpeg_nbits_table.inc glmark2-2021.02/src/libjpeg-turbo/simd/jpeg_nbits_table.inc --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jpeg_nbits_table.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jpeg_nbits_table.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,4097 @@ +jpeg_nbits_table db \ + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, \ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, \ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, \ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquant-3dn.asm glmark2-2021.02/src/libjpeg-turbo/simd/jquant-3dn.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquant-3dn.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jquant-3dn.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,233 @@ +; +; jquant.asm - sample data conversion and quantization (3DNow! & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_3dnow (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_convsamp_float_3dnow) PRIVATE + +EXTN(jsimd_convsamp_float_3dnow): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw mm7,mm7 + psllw mm7,7 + packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb mm0,mm7 ; mm0=(01234567) + psubb mm1,mm7 ; mm1=(89ABCDEF) + + punpcklbw mm2,mm0 ; mm2=(*0*1*2*3) + punpckhbw mm0,mm0 ; mm0=(*4*5*6*7) + punpcklbw mm3,mm1 ; mm3=(*8*9*A*B) + punpckhbw mm1,mm1 ; mm1=(*C*D*E*F) + + punpcklwd mm4,mm2 ; mm4=(***0***1) + punpckhwd mm2,mm2 ; mm2=(***2***3) + punpcklwd mm5,mm0 ; mm5=(***4***5) + punpckhwd mm0,mm0 ; mm0=(***6***7) + + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01) + psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23) + pi2fd mm4,mm4 + pi2fd mm2,mm2 + psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45) + psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67) + pi2fd mm5,mm5 + pi2fd mm0,mm0 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + + punpcklwd mm6,mm3 ; mm6=(***8***9) + punpckhwd mm3,mm3 ; mm3=(***A***B) + punpcklwd mm4,mm1 ; mm4=(***C***D) + punpckhwd mm1,mm1 ; mm1=(***E***F) + + psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89) + psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB) + pi2fd mm6,mm6 + pi2fd mm3,mm3 + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD) + psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF) + pi2fd mm4,mm4 + pi2fd mm1,mm1 + + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_3dnow (JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT *divisors +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_quantize_float_3dnow) PRIVATE + +EXTN(jsimd_quantize_float_3dnow): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov eax, 0x4B400000 ; (float)0x00C00000 (rndint_magic) + movd mm7,eax + punpckldq mm7,mm7 ; mm7={12582912.0F 12582912.0F} + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)] + pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] + pfmul mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm0,mm7 ; mm0=(00 ** 01 **) + pfadd mm1,mm7 ; mm1=(02 ** 03 **) + pfadd mm2,mm7 ; mm0=(04 ** 05 **) + pfadd mm3,mm7 ; mm1=(06 ** 07 **) + + movq mm4,mm0 + punpcklwd mm0,mm1 ; mm0=(00 02 ** **) + punpckhwd mm4,mm1 ; mm4=(01 03 ** **) + movq mm5,mm2 + punpcklwd mm2,mm3 ; mm2=(04 06 ** **) + punpckhwd mm5,mm3 ; mm5=(05 07 ** **) + + punpcklwd mm0,mm4 ; mm0=(00 01 02 03) + punpcklwd mm2,mm5 ; mm2=(04 05 06 07) + + movq mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + pfmul mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + pfmul mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)] + movq mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)] + pfmul mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] + pfmul mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm6,mm7 ; mm0=(10 ** 11 **) + pfadd mm1,mm7 ; mm4=(12 ** 13 **) + pfadd mm3,mm7 ; mm0=(14 ** 15 **) + pfadd mm4,mm7 ; mm4=(16 ** 17 **) + + movq mm5,mm6 + punpcklwd mm6,mm1 ; mm6=(10 12 ** **) + punpckhwd mm5,mm1 ; mm5=(11 13 ** **) + movq mm1,mm3 + punpcklwd mm3,mm4 ; mm3=(14 16 ** **) + punpckhwd mm1,mm4 ; mm1=(15 17 ** **) + + punpcklwd mm6,mm5 ; mm6=(10 11 12 13) + punpcklwd mm3,mm1 ; mm3=(14 15 16 17) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz near .quantloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquantf-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jquantf-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquantf-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jquantf-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,158 @@ +; +; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +; r10 = JSAMPARRAY sample_data +; r11 = JDIMENSION start_col +; r12 = FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_convsamp_float_sse2) PRIVATE + +EXTN(jsimd_convsamp_float_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + pcmpeqw xmm7,xmm7 + psllw xmm7,7 + packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) + + mov rsi, r10 + mov eax, r11d + mov rdi, r12 + mov rcx, DCTSIZE/2 +.convloop: + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] + movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] + + psubb xmm0,xmm7 ; xmm0=(01234567) + psubb xmm1,xmm7 ; xmm1=(89ABCDEF) + + punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) + punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) + + punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3) + punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7) + punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B) + punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F) + + psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123) + psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567) + cvtdq2ps xmm2,xmm2 ; xmm2=(0123) + cvtdq2ps xmm0,xmm0 ; xmm0=(4567) + psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) + psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) + cvtdq2ps xmm3,xmm3 ; xmm3=(89AB) + cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1 + + add rsi, byte 2*SIZEOF_JSAMPROW + add rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec rcx + jnz short .convloop + + pop rbx + uncollect_args + pop rbp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +; r10 = JCOEFPTR coef_block +; r11 = FAST_FLOAT *divisors +; r12 = FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_quantize_float_sse2) PRIVATE + +EXTN(jsimd_quantize_float_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov rsi, r12 + mov rdx, r11 + mov rdi, r10 + mov rax, DCTSIZE2/16 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)] + + cvtps2dq xmm0,xmm0 + cvtps2dq xmm1,xmm1 + cvtps2dq xmm2,xmm2 + cvtps2dq xmm3,xmm3 + + packssdw xmm0,xmm1 + packssdw xmm2,xmm3 + + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2 + + add rsi, byte 16*SIZEOF_FAST_FLOAT + add rdx, byte 16*SIZEOF_FAST_FLOAT + add rdi, byte 16*SIZEOF_JCOEF + dec rax + jnz short .quantloop + + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquantf-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jquantf-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquantf-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jquantf-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,171 @@ +; +; jquantf.asm - sample data conversion and quantization (SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_convsamp_float_sse2) PRIVATE + +EXTN(jsimd_convsamp_float_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw xmm7,xmm7 + psllw xmm7,7 + packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb xmm0,xmm7 ; xmm0=(01234567) + psubb xmm1,xmm7 ; xmm1=(89ABCDEF) + + punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) + punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) + + punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3) + punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7) + punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B) + punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F) + + psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123) + psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567) + cvtdq2ps xmm2,xmm2 ; xmm2=(0123) + cvtdq2ps xmm0,xmm0 ; xmm0=(4567) + psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) + psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) + cvtdq2ps xmm3,xmm3 ; xmm3=(89AB) + cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT *divisors +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_quantize_float_sse2) PRIVATE + +EXTN(jsimd_quantize_float_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + cvtps2dq xmm0,xmm0 + cvtps2dq xmm1,xmm1 + cvtps2dq xmm2,xmm2 + cvtps2dq xmm3,xmm3 + + packssdw xmm0,xmm1 + packssdw xmm2,xmm3 + + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz short .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquanti-altivec.c glmark2-2021.02/src/libjpeg-turbo/simd/jquanti-altivec.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquanti-altivec.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jquanti-altivec.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,252 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* INTEGER QUANTIZATION AND SAMPLE CONVERSION */ + +#include "jsimd_altivec.h" + + +/* NOTE: The address will either be aligned or offset by 8 bytes, so we can + * always get the data we want by using a single vector load (although we may + * have to permute the result.) + */ +#if __BIG_ENDIAN__ + +#define LOAD_ROW(row) { \ + elemptr = sample_data[row] + start_col; \ + in##row = vec_ld(0, elemptr); \ + if ((size_t)elemptr & 15) \ + in##row = vec_perm(in##row, in##row, vec_lvsl(0, elemptr)); \ +} + +#else + +#define LOAD_ROW(row) { \ + elemptr = sample_data[row] + start_col; \ + in##row = vec_vsx_ld(0, elemptr); \ +} + +#endif + + +void +jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + JSAMPROW elemptr; + + __vector unsigned char in0, in1, in2, in3, in4, in5, in6, in7; + __vector short out0, out1, out2, out3, out4, out5, out6, out7; + + /* Constants */ + __vector short pw_centerjsamp = { __8X(CENTERJSAMPLE) }; + __vector unsigned char pb_zero = { __16X(0) }; + + LOAD_ROW(0); + LOAD_ROW(1); + LOAD_ROW(2); + LOAD_ROW(3); + LOAD_ROW(4); + LOAD_ROW(5); + LOAD_ROW(6); + LOAD_ROW(7); + + out0 = (__vector short)VEC_UNPACKHU(in0); + out1 = (__vector short)VEC_UNPACKHU(in1); + out2 = (__vector short)VEC_UNPACKHU(in2); + out3 = (__vector short)VEC_UNPACKHU(in3); + out4 = (__vector short)VEC_UNPACKHU(in4); + out5 = (__vector short)VEC_UNPACKHU(in5); + out6 = (__vector short)VEC_UNPACKHU(in6); + out7 = (__vector short)VEC_UNPACKHU(in7); + + out0 = vec_sub(out0, pw_centerjsamp); + out1 = vec_sub(out1, pw_centerjsamp); + out2 = vec_sub(out2, pw_centerjsamp); + out3 = vec_sub(out3, pw_centerjsamp); + out4 = vec_sub(out4, pw_centerjsamp); + out5 = vec_sub(out5, pw_centerjsamp); + out6 = vec_sub(out6, pw_centerjsamp); + out7 = vec_sub(out7, pw_centerjsamp); + + vec_st(out0, 0, workspace); + vec_st(out1, 16, workspace); + vec_st(out2, 32, workspace); + vec_st(out3, 48, workspace); + vec_st(out4, 64, workspace); + vec_st(out5, 80, workspace); + vec_st(out6, 96, workspace); + vec_st(out7, 112, workspace); +} + + +#define WORD_BIT 16 + +/* There is no AltiVec 16-bit unsigned multiply instruction, hence this. + We basically need an unsigned equivalent of vec_madds(). */ + +#define MULTIPLY(vs0, vs1, out) { \ + tmpe = vec_mule((__vector unsigned short)vs0, \ + (__vector unsigned short)vs1); \ + tmpo = vec_mulo((__vector unsigned short)vs0, \ + (__vector unsigned short)vs1); \ + out = (__vector short)vec_perm((__vector unsigned short)tmpe, \ + (__vector unsigned short)tmpo, \ + shift_pack_index); \ +} + +void +jsimd_quantize_altivec (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s, + corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7, + recip0, recip1, recip2, recip3, recip4, recip5, recip6, recip7, + scale0, scale1, scale2, scale3, scale4, scale5, scale6, scale7; + __vector unsigned int tmpe, tmpo; + + /* Constants */ + __vector unsigned short pw_word_bit_m1 = { __8X(WORD_BIT - 1) }; +#if __BIG_ENDIAN__ + __vector unsigned char shift_pack_index = + {0,1,16,17,4,5,20,21,8,9,24,25,12,13,28,29}; +#else + __vector unsigned char shift_pack_index = + {2,3,18,19,6,7,22,23,10,11,26,27,14,15,30,31}; +#endif + + row0 = vec_ld(0, workspace); + row1 = vec_ld(16, workspace); + row2 = vec_ld(32, workspace); + row3 = vec_ld(48, workspace); + row4 = vec_ld(64, workspace); + row5 = vec_ld(80, workspace); + row6 = vec_ld(96, workspace); + row7 = vec_ld(112, workspace); + + /* Branch-less absolute value */ + row0s = vec_sra(row0, pw_word_bit_m1); + row1s = vec_sra(row1, pw_word_bit_m1); + row2s = vec_sra(row2, pw_word_bit_m1); + row3s = vec_sra(row3, pw_word_bit_m1); + row4s = vec_sra(row4, pw_word_bit_m1); + row5s = vec_sra(row5, pw_word_bit_m1); + row6s = vec_sra(row6, pw_word_bit_m1); + row7s = vec_sra(row7, pw_word_bit_m1); + row0 = vec_xor(row0, row0s); + row1 = vec_xor(row1, row1s); + row2 = vec_xor(row2, row2s); + row3 = vec_xor(row3, row3s); + row4 = vec_xor(row4, row4s); + row5 = vec_xor(row5, row5s); + row6 = vec_xor(row6, row6s); + row7 = vec_xor(row7, row7s); + row0 = vec_sub(row0, row0s); + row1 = vec_sub(row1, row1s); + row2 = vec_sub(row2, row2s); + row3 = vec_sub(row3, row3s); + row4 = vec_sub(row4, row4s); + row5 = vec_sub(row5, row5s); + row6 = vec_sub(row6, row6s); + row7 = vec_sub(row7, row7s); + + corr0 = vec_ld(DCTSIZE2 * 2, divisors); + corr1 = vec_ld(DCTSIZE2 * 2 + 16, divisors); + corr2 = vec_ld(DCTSIZE2 * 2 + 32, divisors); + corr3 = vec_ld(DCTSIZE2 * 2 + 48, divisors); + corr4 = vec_ld(DCTSIZE2 * 2 + 64, divisors); + corr5 = vec_ld(DCTSIZE2 * 2 + 80, divisors); + corr6 = vec_ld(DCTSIZE2 * 2 + 96, divisors); + corr7 = vec_ld(DCTSIZE2 * 2 + 112, divisors); + + row0 = vec_add(row0, corr0); + row1 = vec_add(row1, corr1); + row2 = vec_add(row2, corr2); + row3 = vec_add(row3, corr3); + row4 = vec_add(row4, corr4); + row5 = vec_add(row5, corr5); + row6 = vec_add(row6, corr6); + row7 = vec_add(row7, corr7); + + recip0 = vec_ld(0, divisors); + recip1 = vec_ld(16, divisors); + recip2 = vec_ld(32, divisors); + recip3 = vec_ld(48, divisors); + recip4 = vec_ld(64, divisors); + recip5 = vec_ld(80, divisors); + recip6 = vec_ld(96, divisors); + recip7 = vec_ld(112, divisors); + + MULTIPLY(row0, recip0, row0); + MULTIPLY(row1, recip1, row1); + MULTIPLY(row2, recip2, row2); + MULTIPLY(row3, recip3, row3); + MULTIPLY(row4, recip4, row4); + MULTIPLY(row5, recip5, row5); + MULTIPLY(row6, recip6, row6); + MULTIPLY(row7, recip7, row7); + + scale0 = vec_ld(DCTSIZE2 * 4, divisors); + scale1 = vec_ld(DCTSIZE2 * 4 + 16, divisors); + scale2 = vec_ld(DCTSIZE2 * 4 + 32, divisors); + scale3 = vec_ld(DCTSIZE2 * 4 + 48, divisors); + scale4 = vec_ld(DCTSIZE2 * 4 + 64, divisors); + scale5 = vec_ld(DCTSIZE2 * 4 + 80, divisors); + scale6 = vec_ld(DCTSIZE2 * 4 + 96, divisors); + scale7 = vec_ld(DCTSIZE2 * 4 + 112, divisors); + + MULTIPLY(row0, scale0, row0); + MULTIPLY(row1, scale1, row1); + MULTIPLY(row2, scale2, row2); + MULTIPLY(row3, scale3, row3); + MULTIPLY(row4, scale4, row4); + MULTIPLY(row5, scale5, row5); + MULTIPLY(row6, scale6, row6); + MULTIPLY(row7, scale7, row7); + + row0 = vec_xor(row0, row0s); + row1 = vec_xor(row1, row1s); + row2 = vec_xor(row2, row2s); + row3 = vec_xor(row3, row3s); + row4 = vec_xor(row4, row4s); + row5 = vec_xor(row5, row5s); + row6 = vec_xor(row6, row6s); + row7 = vec_xor(row7, row7s); + row0 = vec_sub(row0, row0s); + row1 = vec_sub(row1, row1s); + row2 = vec_sub(row2, row2s); + row3 = vec_sub(row3, row3s); + row4 = vec_sub(row4, row4s); + row5 = vec_sub(row5, row5s); + row6 = vec_sub(row6, row6s); + row7 = vec_sub(row7, row7s); + + vec_st(row0, 0, coef_block); + vec_st(row1, 16, coef_block); + vec_st(row2, 32, coef_block); + vec_st(row3, 48, coef_block); + vec_st(row4, 64, coef_block); + vec_st(row5, 80, coef_block); + vec_st(row6, 96, coef_block); + vec_st(row7, 112, coef_block); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquanti-sse2-64.asm glmark2-2021.02/src/libjpeg-turbo/simd/jquanti-sse2-64.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquanti-sse2-64.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jquanti-sse2-64.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,187 @@ +; +; jquanti.asm - sample data conversion and quantization (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2009 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +; r10 = JSAMPARRAY sample_data +; r11 = JDIMENSION start_col +; r12 = DCTELEM *workspace + + align 16 + global EXTN(jsimd_convsamp_sse2) PRIVATE + +EXTN(jsimd_convsamp_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + push rbx + + pxor xmm6,xmm6 ; xmm6=(all 0's) + pcmpeqw xmm7,xmm7 + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + mov rsi, r10 + mov eax, r11d + mov rdi, r12 + mov rcx, DCTSIZE/4 +.convloop: + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567) + movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) + + mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) + movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) + + punpcklbw xmm0,xmm6 ; xmm0=(01234567) + punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF) + paddw xmm0,xmm7 + paddw xmm1,xmm7 + punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN) + punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV) + paddw xmm2,xmm7 + paddw xmm3,xmm7 + + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3 + + add rsi, byte 4*SIZEOF_JSAMPROW + add rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec rcx + jnz short .convloop + + pop rbx + uncollect_args + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) +%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) +%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) + +; r10 = JCOEFPTR coef_block +; r11 = DCTELEM *divisors +; r12 = DCTELEM *workspace + + align 16 + global EXTN(jsimd_quantize_sse2) PRIVATE + +EXTN(jsimd_quantize_sse2): + push rbp + mov rax,rsp + mov rbp,rsp + collect_args + + mov rsi, r12 + mov rdx, r11 + mov rdi, r10 + mov rax, DCTSIZE2/32 +.quantloop: + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + psraw xmm4,(WORD_BIT-1) + psraw xmm5,(WORD_BIT-1) + psraw xmm6,(WORD_BIT-1) + psraw xmm7,(WORD_BIT-1) + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; + psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; + psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; + psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; + + paddw xmm0, XMMWORD [CORRECTION(0,0,rdx)] ; correction + roundfactor + paddw xmm1, XMMWORD [CORRECTION(1,0,rdx)] + paddw xmm2, XMMWORD [CORRECTION(2,0,rdx)] + paddw xmm3, XMMWORD [CORRECTION(3,0,rdx)] + pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,rdx)] ; reciprocal + pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,rdx)] + pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,rdx)] + pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,rdx)] + pmulhuw xmm0, XMMWORD [SCALE(0,0,rdx)] ; scale + pmulhuw xmm1, XMMWORD [SCALE(1,0,rdx)] + pmulhuw xmm2, XMMWORD [SCALE(2,0,rdx)] + pmulhuw xmm3, XMMWORD [SCALE(3,0,rdx)] + + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 + psubw xmm1,xmm5 + psubw xmm2,xmm6 + psubw xmm3,xmm7 + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3 + + add rsi, byte 32*SIZEOF_DCTELEM + add rdx, byte 32*SIZEOF_DCTELEM + add rdi, byte 32*SIZEOF_JCOEF + dec rax + jnz near .quantloop + + uncollect_args + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquanti-sse2.asm glmark2-2021.02/src/libjpeg-turbo/simd/jquanti-sse2.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquanti-sse2.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jquanti-sse2.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,200 @@ +; +; jquanti.asm - sample data conversion and quantization (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; DCTELEM *workspace + + align 16 + global EXTN(jsimd_convsamp_sse2) PRIVATE + +EXTN(jsimd_convsamp_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pxor xmm6,xmm6 ; xmm6=(all 0's) + pcmpeqw xmm7,xmm7 + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm0=(01234567) + movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) + movq xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) + + punpcklbw xmm0,xmm6 ; xmm0=(01234567) + punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF) + paddw xmm0,xmm7 + paddw xmm1,xmm7 + punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN) + punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV) + paddw xmm2,xmm7 + paddw xmm3,xmm7 + + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 + + add esi, byte 4*SIZEOF_JSAMPROW + add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) +%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) +%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; DCTELEM *divisors +%define workspace ebp+16 ; DCTELEM *workspace + + align 16 + global EXTN(jsimd_quantize_sse2) PRIVATE + +EXTN(jsimd_quantize_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/32 + alignx 16,7 +.quantloop: + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)] + movdqa xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)] + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + psraw xmm4,(WORD_BIT-1) + psraw xmm5,(WORD_BIT-1) + psraw xmm6,(WORD_BIT-1) + psraw xmm7,(WORD_BIT-1) + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; + psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; + psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; + psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; + + paddw xmm0, XMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + paddw xmm1, XMMWORD [CORRECTION(1,0,edx)] + paddw xmm2, XMMWORD [CORRECTION(2,0,edx)] + paddw xmm3, XMMWORD [CORRECTION(3,0,edx)] + pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)] + pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)] + pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)] + pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)] ; scale + pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)] + pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)] + pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)] + + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 + psubw xmm1,xmm5 + psubw xmm2,xmm6 + psubw xmm3,xmm7 + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 + + add esi, byte 32*SIZEOF_DCTELEM + add edx, byte 32*SIZEOF_DCTELEM + add edi, byte 32*SIZEOF_JCOEF + dec eax + jnz near .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquant-mmx.asm glmark2-2021.02/src/libjpeg-turbo/simd/jquant-mmx.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquant-mmx.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jquant-mmx.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,274 @@ +; +; jquant.asm - sample data conversion and quantization (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_mmx (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; DCTELEM *workspace + + align 16 + global EXTN(jsimd_convsamp_mmx) PRIVATE + +EXTN(jsimd_convsamp_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pxor mm6,mm6 ; mm6=(all 0's) + pcmpeqw mm7,mm7 + psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm0=(01234567) + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm1=(89ABCDEF) + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm2=(GHIJKLMN) + movq mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm3=(OPQRSTUV) + + movq mm4,mm0 + punpcklbw mm0,mm6 ; mm0=(0123) + punpckhbw mm4,mm6 ; mm4=(4567) + movq mm5,mm1 + punpcklbw mm1,mm6 ; mm1=(89AB) + punpckhbw mm5,mm6 ; mm5=(CDEF) + + paddw mm0,mm7 + paddw mm4,mm7 + paddw mm1,mm7 + paddw mm5,mm7 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5 + + movq mm0,mm2 + punpcklbw mm2,mm6 ; mm2=(GHIJ) + punpckhbw mm0,mm6 ; mm0=(KLMN) + movq mm4,mm3 + punpcklbw mm3,mm6 ; mm3=(OPQR) + punpckhbw mm4,mm6 ; mm4=(STUV) + + paddw mm2,mm7 + paddw mm0,mm7 + paddw mm3,mm7 + paddw mm4,mm7 + + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4 + + add esi, byte 4*SIZEOF_JSAMPROW + add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_mmx (JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m,n,b) MMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) +%define CORRECTION(m,n,b) MMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) +%define SCALE(m,n,b) MMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) +%define SHIFT(m,n,b) MMBLOCK(DCTSIZE*3+(m),(n),(b),SIZEOF_DCTELEM) + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; DCTELEM *divisors +%define workspace ebp+16 ; DCTELEM *workspace + + align 16 + global EXTN(jsimd_quantize_mmx) PRIVATE + +EXTN(jsimd_quantize_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov ah, 2 + alignx 16,7 +.quantloop1: + mov al, DCTSIZE2/8/2 + alignx 16,7 +.quantloop2: + movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)] + + movq mm0,mm2 + movq mm1,mm3 + + psraw mm2,(WORD_BIT-1) ; -1 if value < 0, 0 otherwise + psraw mm3,(WORD_BIT-1) + + pxor mm0,mm2 ; val = -val + pxor mm1,mm3 + psubw mm0,mm2 + psubw mm1,mm3 + + ; + ; MMX is an annoyingly crappy instruction set. It has two + ; misfeatures that are causing problems here: + ; + ; - All multiplications are signed. + ; + ; - The second operand for the shifts is not treated as packed. + ; + ; + ; We work around the first problem by implementing this algorithm: + ; + ; unsigned long unsigned_multiply(unsigned short x, unsigned short y) + ; { + ; enum { SHORT_BIT = 16 }; + ; signed short sx = (signed short) x; + ; signed short sy = (signed short) y; + ; signed long sz; + ; + ; sz = (long) sx * (long) sy; /* signed multiply */ + ; + ; if (sx < 0) sz += (long) sy << SHORT_BIT; + ; if (sy < 0) sz += (long) sx << SHORT_BIT; + ; + ; return (unsigned long) sz; + ; } + ; + ; (note that a negative sx adds _sy_ and vice versa) + ; + ; For the second problem, we replace the shift by a multiplication. + ; Unfortunately that means we have to deal with the signed issue again. + ; + + paddw mm0, MMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + paddw mm1, MMWORD [CORRECTION(0,1,edx)] + + movq mm4,mm0 ; store current value for later + movq mm5,mm1 + pmulhw mm0, MMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + pmulhw mm1, MMWORD [RECIPROCAL(0,1,edx)] + paddw mm0,mm4 ; reciprocal is always negative (MSB=1), + paddw mm1,mm5 ; so we always need to add the initial value + ; (input value is never negative as we + ; inverted it at the start of this routine) + + ; here it gets a bit tricky as both scale + ; and mm0/mm1 can be negative + movq mm6, MMWORD [SCALE(0,0,edx)] ; scale + movq mm7, MMWORD [SCALE(0,1,edx)] + movq mm4,mm0 + movq mm5,mm1 + pmulhw mm0,mm6 + pmulhw mm1,mm7 + + psraw mm6,(WORD_BIT-1) ; determine if scale is negative + psraw mm7,(WORD_BIT-1) + + pand mm6,mm4 ; and add input if it is + pand mm7,mm5 + paddw mm0,mm6 + paddw mm1,mm7 + + psraw mm4,(WORD_BIT-1) ; then check if negative input + psraw mm5,(WORD_BIT-1) + + pand mm4, MMWORD [SCALE(0,0,edx)] ; and add scale if it is + pand mm5, MMWORD [SCALE(0,1,edx)] + paddw mm0,mm4 + paddw mm1,mm5 + + pxor mm0,mm2 ; val = -val + pxor mm1,mm3 + psubw mm0,mm2 + psubw mm1,mm3 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1 + + add esi, byte 8*SIZEOF_DCTELEM + add edx, byte 8*SIZEOF_DCTELEM + add edi, byte 8*SIZEOF_JCOEF + dec al + jnz near .quantloop2 + dec ah + jnz near .quantloop1 ; to avoid branch misprediction + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquant-sse.asm glmark2-2021.02/src/libjpeg-turbo/simd/jquant-sse.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jquant-sse.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jquant-sse.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,211 @@ +; +; jquant.asm - sample data conversion and quantization (SSE & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_convsamp_float_sse) PRIVATE + +EXTN(jsimd_convsamp_float_sse): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw mm7,mm7 + psllw mm7,7 + packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb mm0,mm7 ; mm0=(01234567) + psubb mm1,mm7 ; mm1=(89ABCDEF) + + punpcklbw mm2,mm0 ; mm2=(*0*1*2*3) + punpckhbw mm0,mm0 ; mm0=(*4*5*6*7) + punpcklbw mm3,mm1 ; mm3=(*8*9*A*B) + punpckhbw mm1,mm1 ; mm1=(*C*D*E*F) + + punpcklwd mm4,mm2 ; mm4=(***0***1) + punpckhwd mm2,mm2 ; mm2=(***2***3) + punpcklwd mm5,mm0 ; mm5=(***4***5) + punpckhwd mm0,mm0 ; mm0=(***6***7) + + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01) + psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23) + cvtpi2ps xmm0,mm4 ; xmm0=(01**) + cvtpi2ps xmm1,mm2 ; xmm1=(23**) + psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45) + psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67) + cvtpi2ps xmm2,mm5 ; xmm2=(45**) + cvtpi2ps xmm3,mm0 ; xmm3=(67**) + + punpcklwd mm6,mm3 ; mm6=(***8***9) + punpckhwd mm3,mm3 ; mm3=(***A***B) + punpcklwd mm4,mm1 ; mm4=(***C***D) + punpckhwd mm1,mm1 ; mm1=(***E***F) + + psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89) + psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB) + cvtpi2ps xmm4,mm6 ; xmm4=(89**) + cvtpi2ps xmm5,mm3 ; xmm5=(AB**) + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD) + psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF) + cvtpi2ps xmm6,mm4 ; xmm6=(CD**) + cvtpi2ps xmm7,mm1 ; xmm7=(EF**) + + movlhps xmm0,xmm1 ; xmm0=(0123) + movlhps xmm2,xmm3 ; xmm2=(4567) + movlhps xmm4,xmm5 ; xmm4=(89AB) + movlhps xmm6,xmm7 ; xmm6=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse (JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT *divisors +%define workspace ebp+16 ; FAST_FLOAT *workspace + + align 16 + global EXTN(jsimd_quantize_float_sse) PRIVATE + +EXTN(jsimd_quantize_float_sse): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + movhlps xmm4,xmm0 + movhlps xmm5,xmm1 + + cvtps2pi mm0,xmm0 + cvtps2pi mm1,xmm1 + cvtps2pi mm4,xmm4 + cvtps2pi mm5,xmm5 + + movhlps xmm6,xmm2 + movhlps xmm7,xmm3 + + cvtps2pi mm2,xmm2 + cvtps2pi mm3,xmm3 + cvtps2pi mm6,xmm6 + cvtps2pi mm7,xmm7 + + packssdw mm0,mm4 + packssdw mm1,mm5 + packssdw mm2,mm6 + packssdw mm3,mm7 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz short .quantloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_altivec.h glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_altivec.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_altivec.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_altivec.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,99 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" +#include + + +/* Common code */ + +#define __4X(a) a, a, a, a +#define __4X2(a, b) a, b, a, b, a, b, a, b +#define __8X(a) __4X(a), __4X(a) +#define __16X(a) __8X(a), __8X(a) + +#define TRANSPOSE(row, col) \ +{ \ + __vector short row04l, row04h, row15l, row15h, \ + row26l, row26h, row37l, row37h; \ + __vector short col01e, col01o, col23e, col23o, \ + col45e, col45o, col67e, col67o; \ + \ + /* transpose coefficients (phase 1) */ \ + row04l = vec_mergeh(row##0, row##4); /* row04l=(00 40 01 41 02 42 03 43) */ \ + row04h = vec_mergel(row##0, row##4); /* row04h=(04 44 05 45 06 46 07 47) */ \ + row15l = vec_mergeh(row##1, row##5); /* row15l=(10 50 11 51 12 52 13 53) */ \ + row15h = vec_mergel(row##1, row##5); /* row15h=(14 54 15 55 16 56 17 57) */ \ + row26l = vec_mergeh(row##2, row##6); /* row26l=(20 60 21 61 22 62 23 63) */ \ + row26h = vec_mergel(row##2, row##6); /* row26h=(24 64 25 65 26 66 27 67) */ \ + row37l = vec_mergeh(row##3, row##7); /* row37l=(30 70 31 71 32 72 33 73) */ \ + row37h = vec_mergel(row##3, row##7); /* row37h=(34 74 35 75 36 76 37 77) */ \ + \ + /* transpose coefficients (phase 2) */ \ + col01e = vec_mergeh(row04l, row26l); /* col01e=(00 20 40 60 01 21 41 61) */ \ + col23e = vec_mergel(row04l, row26l); /* col23e=(02 22 42 62 03 23 43 63) */ \ + col45e = vec_mergeh(row04h, row26h); /* col45e=(04 24 44 64 05 25 45 65) */ \ + col67e = vec_mergel(row04h, row26h); /* col67e=(06 26 46 66 07 27 47 67) */ \ + col01o = vec_mergeh(row15l, row37l); /* col01o=(10 30 50 70 11 31 51 71) */ \ + col23o = vec_mergel(row15l, row37l); /* col23o=(12 32 52 72 13 33 53 73) */ \ + col45o = vec_mergeh(row15h, row37h); /* col45o=(14 34 54 74 15 35 55 75) */ \ + col67o = vec_mergel(row15h, row37h); /* col67o=(16 36 56 76 17 37 57 77) */ \ + \ + /* transpose coefficients (phase 3) */ \ + col##0 = vec_mergeh(col01e, col01o); /* col0=(00 10 20 30 40 50 60 70) */ \ + col##1 = vec_mergel(col01e, col01o); /* col1=(01 11 21 31 41 51 61 71) */ \ + col##2 = vec_mergeh(col23e, col23o); /* col2=(02 12 22 32 42 52 62 72) */ \ + col##3 = vec_mergel(col23e, col23o); /* col3=(03 13 23 33 43 53 63 73) */ \ + col##4 = vec_mergeh(col45e, col45o); /* col4=(04 14 24 34 44 54 64 74) */ \ + col##5 = vec_mergel(col45e, col45o); /* col5=(05 15 25 35 45 55 65 75) */ \ + col##6 = vec_mergeh(col67e, col67o); /* col6=(06 16 26 36 46 56 66 76) */ \ + col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \ +} + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +/* Macros to abstract big/little endian bit twiddling */ + +#if __BIG_ENDIAN__ + +#define VEC_LD(a, b) vec_ld(a, b) +#define VEC_ST(a, b, c) vec_st(a, b, c) +#define VEC_UNPACKHU(a) vec_mergeh(pb_zero, a) +#define VEC_UNPACKLU(a) vec_mergel(pb_zero, a) + +#else + +#define VEC_LD(a, b) vec_vsx_ld(a, b) +#define VEC_ST(a, b, c) vec_vsx_st(a, b, c) +#define VEC_UNPACKHU(a) vec_mergeh(a, pb_zero) +#define VEC_UNPACKLU(a) vec_mergel(a, pb_zero) + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_arm64.c glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_arm64.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_arm64.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_arm64.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,802 @@ +/* + * jsimd_arm64.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander + * Copyright 2015-2016 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 64-bit ARM architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +#include +#include +#include + +#define JSIMD_FASTLD3 1 +#define JSIMD_FASTST3 2 +#define JSIMD_FASTTBL 4 + +static unsigned int simd_support = ~0; +static unsigned int simd_huffman = 1; +static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 | + JSIMD_FASTTBL; + +#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + +#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) + +LOCAL(int) +check_cpuinfo (char *buffer, const char *field, char *value) +{ + char *p; + if (*value == 0) + return 0; + if (strncmp(buffer, field, strlen(field)) != 0) + return 0; + buffer += strlen(field); + while (isspace(*buffer)) + buffer++; + + /* Check if 'value' is present in the buffer as a separate word */ + while ((p = strstr(buffer, value))) { + if (p > buffer && !isspace(*(p - 1))) { + buffer++; + continue; + } + p += strlen(value); + if (*p != 0 && !isspace(*p)) { + buffer++; + continue; + } + return 1; + } + return 0; +} + +LOCAL(int) +parse_proc_cpuinfo (int bufsize) +{ + char *buffer = (char *)malloc(bufsize); + FILE *fd; + + if (!buffer) + return 0; + + fd = fopen("/proc/cpuinfo", "r"); + if (fd) { + while (fgets(buffer, bufsize, fd)) { + if (!strchr(buffer, '\n') && !feof(fd)) { + /* "impossible" happened - insufficient size of the buffer! */ + fclose(fd); + free(buffer); + return 0; + } + if (check_cpuinfo(buffer, "CPU part", "0xd03") || + check_cpuinfo(buffer, "CPU part", "0xd07")) + /* The Cortex-A53 has a slow tbl implementation. We can gain a few + percent speedup by disabling the use of that instruction. The + speedup on Cortex-A57 is more subtle but still measurable. */ + simd_features &= ~JSIMD_FASTTBL; + else if (check_cpuinfo(buffer, "CPU part", "0x0a1")) + /* The SIMD version of Huffman encoding is slower than the C version on + Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that + CPU. */ + simd_huffman = simd_features = 0; + } + fclose(fd); + } + free(buffer); + return 1; +} + +#endif + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ + +/* + * ARMv8 architectures support NEON extensions by default. + * It is no longer optional as it was with ARMv7. + */ + + +LOCAL(void) +init_simd (void) +{ + char *env = NULL; +#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + int bufsize = 1024; /* an initial guess for the line buffer size limit */ +#endif + + if (simd_support != ~0U) + return; + + simd_support = 0; + + simd_support |= JSIMD_ARM_NEON; +#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + while (!parse_proc_cpuinfo(bufsize)) { + bufsize *= 2; + if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) + break; + } +#endif + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCENEON"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_ARM_NEON; + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; + env = getenv("JSIMD_FASTLD3"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_features |= JSIMD_FASTLD3; + if ((env != NULL) && (strcmp(env, "0") == 0)) + simd_features &= ~JSIMD_FASTLD3; + env = getenv("JSIMD_FASTST3"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_features |= JSIMD_FASTST3; + if ((env != NULL) && (strcmp(env, "0") == 0)) + simd_features &= ~JSIMD_FASTST3; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + if (simd_features & JSIMD_FASTLD3) + neonfct=jsimd_extrgb_ycc_convert_neon; + else + neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + neonfct=jsimd_extrgbx_ycc_convert_neon; + break; + case JCS_EXT_BGR: + if (simd_features & JSIMD_FASTLD3) + neonfct=jsimd_extbgr_ycc_convert_neon; + else + neonfct=jsimd_extbgr_ycc_convert_neon_slowld3; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + neonfct=jsimd_extbgrx_ycc_convert_neon; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + neonfct=jsimd_extxbgr_ycc_convert_neon; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + neonfct=jsimd_extxrgb_ycc_convert_neon; + break; + default: + if (simd_features & JSIMD_FASTLD3) + neonfct=jsimd_extrgb_ycc_convert_neon; + else + neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; + break; + } + + neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + if (simd_features & JSIMD_FASTST3) + neonfct=jsimd_ycc_extrgb_convert_neon; + else + neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + neonfct=jsimd_ycc_extrgbx_convert_neon; + break; + case JCS_EXT_BGR: + if (simd_features & JSIMD_FASTST3) + neonfct=jsimd_ycc_extbgr_convert_neon; + else + neonfct=jsimd_ycc_extbgr_convert_neon_slowst3; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + neonfct=jsimd_ycc_extbgrx_convert_neon; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + neonfct=jsimd_ycc_extxbgr_convert_neon; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + neonfct=jsimd_ycc_extxrgb_convert_neon; + break; + default: + if (simd_features & JSIMD_FASTST3) + neonfct=jsimd_ycc_extrgb_convert_neon; + else + neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; + break; + } + + neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, + output_buf, num_rows); +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (DCTSIZE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (DCTSIZE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + jsimd_convsamp_neon(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + jsimd_fdct_islow_neon(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + jsimd_fdct_ifast_neon(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + jsimd_quantize_neon(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON && simd_huffman) + return 1; + + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + if (simd_features & JSIMD_FASTTBL) + return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, + dctbl, actbl); + else + return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block, + last_dc_val, dctbl, actbl); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_arm64_neon.S glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_arm64_neon.S --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_arm64_neon.S 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_arm64_neon.S 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,3453 @@ +/* + * ARMv8 NEON optimizations for libjpeg-turbo + * + * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies). + * All rights reserved. + * Author: Siarhei Siamashka + * Copyright (C) 2013-2014, Linaro Limited + * Author: Ragesh Radhakrishnan + * Copyright (C) 2014-2016, D. R. Commander. All Rights Reserved. + * Copyright (C) 2015-2016, Matthieu Darbois. All Rights Reserved. + * Copyright (C) 2016, Siarhei Siamashka. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits /* mark stack as non-executable */ +#endif + +.text + + +#define RESPECT_STRICT_ALIGNMENT 1 + + +/*****************************************************************************/ + +/* Supplementary macro for setting function attributes */ +.macro asm_function fname +#ifdef __APPLE__ + .globl _\fname +_\fname: +#else + .global \fname +#ifdef __ELF__ + .hidden \fname + .type \fname, %function +#endif +\fname: +#endif +.endm + +/* Transpose elements of single 128 bit registers */ +.macro transpose_single x0, x1, xi, xilen, literal + ins \xi\xilen[0], \x0\xilen[0] + ins \x1\xilen[0], \x0\xilen[1] + trn1 \x0\literal, \x0\literal, \x1\literal + trn2 \x1\literal, \xi\literal, \x1\literal +.endm + +/* Transpose elements of 2 differnet registers */ +.macro transpose x0, x1, xi, xilen, literal + mov \xi\xilen, \x0\xilen + trn1 \x0\literal, \x0\literal, \x1\literal + trn2 \x1\literal, \xi\literal, \x1\literal +.endm + +/* Transpose a block of 4x4 coefficients in four 64-bit registers */ +.macro transpose_4x4_32 x0, x0len, x1, x1len, x2, x2len, x3, x3len, xi, xilen + mov \xi\xilen, \x0\xilen + trn1 \x0\x0len, \x0\x0len, \x2\x2len + trn2 \x2\x2len, \xi\x0len, \x2\x2len + mov \xi\xilen, \x1\xilen + trn1 \x1\x1len, \x1\x1len, \x3\x3len + trn2 \x3\x3len, \xi\x1len, \x3\x3len +.endm + +.macro transpose_4x4_16 x0, x0len, x1, x1len, x2, x2len, x3, x3len, xi, xilen + mov \xi\xilen, \x0\xilen + trn1 \x0\x0len, \x0\x0len, \x1\x1len + trn2 \x1\x2len, \xi\x0len, \x1\x2len + mov \xi\xilen, \x2\xilen + trn1 \x2\x2len, \x2\x2len, \x3\x3len + trn2 \x3\x2len, \xi\x1len, \x3\x3len +.endm + +.macro transpose_4x4 x0, x1, x2, x3, x5 + transpose_4x4_16 \x0, .4h, \x1, .4h, \x2, .4h, \x3, .4h, \x5, .16b + transpose_4x4_32 \x0, .2s, \x1, .2s, \x2, .2s, \x3, .2s, \x5, .16b +.endm + +.macro transpose_8x8 l0, l1, l2, l3, l4, l5, l6, l7, t0, t1, t2, t3 + trn1 \t0\().8h, \l0\().8h, \l1\().8h + trn1 \t1\().8h, \l2\().8h, \l3\().8h + trn1 \t2\().8h, \l4\().8h, \l5\().8h + trn1 \t3\().8h, \l6\().8h, \l7\().8h + trn2 \l1\().8h, \l0\().8h, \l1\().8h + trn2 \l3\().8h, \l2\().8h, \l3\().8h + trn2 \l5\().8h, \l4\().8h, \l5\().8h + trn2 \l7\().8h, \l6\().8h, \l7\().8h + + trn1 \l4\().4s, \t2\().4s, \t3\().4s + trn2 \t3\().4s, \t2\().4s, \t3\().4s + trn1 \t2\().4s, \t0\().4s, \t1\().4s + trn2 \l2\().4s, \t0\().4s, \t1\().4s + trn1 \t0\().4s, \l1\().4s, \l3\().4s + trn2 \l3\().4s, \l1\().4s, \l3\().4s + trn2 \t1\().4s, \l5\().4s, \l7\().4s + trn1 \l5\().4s, \l5\().4s, \l7\().4s + + trn2 \l6\().2d, \l2\().2d, \t3\().2d + trn1 \l0\().2d, \t2\().2d, \l4\().2d + trn1 \l1\().2d, \t0\().2d, \l5\().2d + trn2 \l7\().2d, \l3\().2d, \t1\().2d + trn1 \l2\().2d, \l2\().2d, \t3\().2d + trn2 \l4\().2d, \t2\().2d, \l4\().2d + trn1 \l3\().2d, \l3\().2d, \t1\().2d + trn2 \l5\().2d, \t0\().2d, \l5\().2d +.endm + + +#define CENTERJSAMPLE 128 + +/*****************************************************************************/ + +/* + * Perform dequantization and inverse DCT on one block of coefficients. + * + * GLOBAL(void) + * jsimd_idct_islow_neon (void *dct_table, JCOEFPTR coef_block, + * JSAMPARRAY output_buf, JDIMENSION output_col) + */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +.balign 16 +Ljsimd_idct_islow_neon_consts: + .short F_0_298 + .short -F_0_390 + .short F_0_541 + .short F_0_765 + .short - F_0_899 + .short F_1_175 + .short F_1_501 + .short - F_1_847 + .short - F_1_961 + .short F_2_053 + .short - F_2_562 + .short F_3_072 + .short 0 /* padding */ + .short 0 + .short 0 + .short 0 + +#undef F_0_298 +#undef F_0_390 +#undef F_0_541 +#undef F_0_765 +#undef F_0_899 +#undef F_1_175 +#undef F_1_501 +#undef F_1_847 +#undef F_1_961 +#undef F_2_053 +#undef F_2_562 +#undef F_3_072 + +#define XFIX_P_0_298 v0.h[0] +#define XFIX_N_0_390 v0.h[1] +#define XFIX_P_0_541 v0.h[2] +#define XFIX_P_0_765 v0.h[3] +#define XFIX_N_0_899 v0.h[4] +#define XFIX_P_1_175 v0.h[5] +#define XFIX_P_1_501 v0.h[6] +#define XFIX_N_1_847 v0.h[7] +#define XFIX_N_1_961 v1.h[0] +#define XFIX_P_2_053 v1.h[1] +#define XFIX_N_2_562 v1.h[2] +#define XFIX_P_3_072 v1.h[3] + +asm_function jsimd_idct_islow_neon + DCT_TABLE .req x0 + COEF_BLOCK .req x1 + OUTPUT_BUF .req x2 + OUTPUT_COL .req x3 + TMP1 .req x0 + TMP2 .req x1 + TMP3 .req x9 + TMP4 .req x10 + TMP5 .req x11 + TMP6 .req x12 + TMP7 .req x13 + TMP8 .req x14 + + sub sp, sp, #64 + adr x15, Ljsimd_idct_islow_neon_consts + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], #32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], #32 + ld1 {v0.8h, v1.8h}, [x15] + ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [COEF_BLOCK], #64 + ld1 {v18.8h, v19.8h, v20.8h, v21.8h}, [DCT_TABLE], #64 + ld1 {v6.8h, v7.8h, v8.8h, v9.8h}, [COEF_BLOCK], #64 + ld1 {v22.8h, v23.8h, v24.8h, v25.8h}, [DCT_TABLE], #64 + + cmeq v16.8h, v3.8h, #0 + cmeq v26.8h, v4.8h, #0 + cmeq v27.8h, v5.8h, #0 + cmeq v28.8h, v6.8h, #0 + cmeq v29.8h, v7.8h, #0 + cmeq v30.8h, v8.8h, #0 + cmeq v31.8h, v9.8h, #0 + + and v10.16b, v16.16b, v26.16b + and v11.16b, v27.16b, v28.16b + and v12.16b, v29.16b, v30.16b + and v13.16b, v31.16b, v10.16b + and v14.16b, v11.16b, v12.16b + mul v2.8h, v2.8h, v18.8h + and v15.16b, v13.16b, v14.16b + shl v10.8h, v2.8h, #(PASS1_BITS) + sqxtn v16.8b, v15.8h + mov TMP1, v16.d[0] + sub sp, sp, #64 + mvn TMP2, TMP1 + + cbnz TMP2, 2f + /* case all AC coeffs are zeros */ + dup v2.2d, v10.d[0] + dup v6.2d, v10.d[1] + mov v3.16b, v2.16b + mov v7.16b, v6.16b + mov v4.16b, v2.16b + mov v8.16b, v6.16b + mov v5.16b, v2.16b + mov v9.16b, v6.16b +1: + /* for this transpose, we should organise data like this: + * 00, 01, 02, 03, 40, 41, 42, 43 + * 10, 11, 12, 13, 50, 51, 52, 53 + * 20, 21, 22, 23, 60, 61, 62, 63 + * 30, 31, 32, 33, 70, 71, 72, 73 + * 04, 05, 06, 07, 44, 45, 46, 47 + * 14, 15, 16, 17, 54, 55, 56, 57 + * 24, 25, 26, 27, 64, 65, 66, 67 + * 34, 35, 36, 37, 74, 75, 76, 77 + */ + trn1 v28.8h, v2.8h, v3.8h + trn1 v29.8h, v4.8h, v5.8h + trn1 v30.8h, v6.8h, v7.8h + trn1 v31.8h, v8.8h, v9.8h + trn2 v16.8h, v2.8h, v3.8h + trn2 v17.8h, v4.8h, v5.8h + trn2 v18.8h, v6.8h, v7.8h + trn2 v19.8h, v8.8h, v9.8h + trn1 v2.4s, v28.4s, v29.4s + trn1 v6.4s, v30.4s, v31.4s + trn1 v3.4s, v16.4s, v17.4s + trn1 v7.4s, v18.4s, v19.4s + trn2 v4.4s, v28.4s, v29.4s + trn2 v8.4s, v30.4s, v31.4s + trn2 v5.4s, v16.4s, v17.4s + trn2 v9.4s, v18.4s, v19.4s + /* Even part: reverse the even part of the forward DCT. */ + add v18.8h, v4.8h, v8.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) */ + add v22.8h, v2.8h, v6.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull2 v19.4s, v18.8h, XFIX_P_0_541 /* z1h z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sub v26.8h, v2.8h, v6.8h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1l z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + mov v21.16b, v19.16b /* tmp3 = z1 */ + mov v20.16b, v18.16b /* tmp3 = z1 */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + sshll v22.4s, v22.4h, #(CONST_BITS) /* tmp0l tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + sshll v26.4s, v26.4h, #(CONST_BITS) /* tmp1l tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + add v2.4s, v22.4s, v20.4s /* tmp10l tmp10 = tmp0 + tmp3; */ + sub v6.4s, v22.4s, v20.4s /* tmp13l tmp13 = tmp0 - tmp3; */ + add v8.4s, v26.4s, v18.4s /* tmp11l tmp11 = tmp1 + tmp2; */ + sub v4.4s, v26.4s, v18.4s /* tmp12l tmp12 = tmp1 - tmp2; */ + add v28.4s, v23.4s, v21.4s /* tmp10h tmp10 = tmp0 + tmp3; */ + sub v31.4s, v23.4s, v21.4s /* tmp13h tmp13 = tmp0 - tmp3; */ + add v29.4s, v27.4s, v19.4s /* tmp11h tmp11 = tmp1 + tmp2; */ + sub v30.4s, v27.4s, v19.4s /* tmp12h tmp12 = tmp1 - tmp2; */ + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + add v22.8h, v9.8h, v5.8h /* z3 = tmp0 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v24.8h, v7.8h, v3.8h /* z4 = tmp1 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v18.8h, v9.8h, v3.8h /* z1 = tmp0 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v20.8h, v7.8h, v5.8h /* z2 = tmp1 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v26.8h, v22.8h, v24.8h /* z5 = z3 + z4 */ + + smull2 v11.4s, v9.8h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull2 v13.4s, v7.8h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + smull v10.4s, v9.4h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull v12.4s, v7.4h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + add v23.4s, v23.4s, v27.4s /* z3 += z5 */ + add v22.4s, v22.4s, v26.4s /* z3 += z5 */ + add v25.4s, v25.4s, v27.4s /* z4 += z5 */ + add v24.4s, v24.4s, v26.4s /* z4 += z5 */ + + add v11.4s, v11.4s, v19.4s /* tmp0 += z1 */ + add v10.4s, v10.4s, v18.4s /* tmp0 += z1 */ + add v13.4s, v13.4s, v21.4s /* tmp1 += z2 */ + add v12.4s, v12.4s, v20.4s /* tmp1 += z2 */ + add v15.4s, v15.4s, v21.4s /* tmp2 += z2 */ + add v14.4s, v14.4s, v20.4s /* tmp2 += z2 */ + add v17.4s, v17.4s, v19.4s /* tmp3 += z1 */ + add v16.4s, v16.4s, v18.4s /* tmp3 += z1 */ + + add v11.4s, v11.4s, v23.4s /* tmp0 += z3 */ + add v10.4s, v10.4s, v22.4s /* tmp0 += z3 */ + add v13.4s, v13.4s, v25.4s /* tmp1 += z4 */ + add v12.4s, v12.4s, v24.4s /* tmp1 += z4 */ + add v17.4s, v17.4s, v25.4s /* tmp3 += z4 */ + add v16.4s, v16.4s, v24.4s /* tmp3 += z4 */ + add v15.4s, v15.4s, v23.4s /* tmp2 += z3 */ + add v14.4s, v14.4s, v22.4s /* tmp2 += z3 */ + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + add v18.4s, v2.4s, v16.4s /* tmp10 + tmp3 */ + add v19.4s, v28.4s, v17.4s /* tmp10 + tmp3 */ + sub v20.4s, v2.4s, v16.4s /* tmp10 - tmp3 */ + sub v21.4s, v28.4s, v17.4s /* tmp10 - tmp3 */ + add v22.4s, v8.4s, v14.4s /* tmp11 + tmp2 */ + add v23.4s, v29.4s, v15.4s /* tmp11 + tmp2 */ + sub v24.4s, v8.4s, v14.4s /* tmp11 - tmp2 */ + sub v25.4s, v29.4s, v15.4s /* tmp11 - tmp2 */ + add v26.4s, v4.4s, v12.4s /* tmp12 + tmp1 */ + add v27.4s, v30.4s, v13.4s /* tmp12 + tmp1 */ + sub v28.4s, v4.4s, v12.4s /* tmp12 - tmp1 */ + sub v29.4s, v30.4s, v13.4s /* tmp12 - tmp1 */ + add v14.4s, v6.4s, v10.4s /* tmp13 + tmp0 */ + add v15.4s, v31.4s, v11.4s /* tmp13 + tmp0 */ + sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ + sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */ + + shrn v2.4h, v18.4s, #16 /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn v9.4h, v20.4s, #16 /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn v3.4h, v22.4s, #16 /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn v8.4h, v24.4s, #16 /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn v4.4h, v26.4s, #16 /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn v7.4h, v28.4s, #16 /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn v5.4h, v14.4s, #16 /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn v6.4h, v16.4s, #16 /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn2 v2.8h, v19.4s, #16 /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn2 v9.8h, v21.4s, #16 /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn2 v3.8h, v23.4s, #16 /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn2 v8.8h, v25.4s, #16 /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn2 v4.8h, v27.4s, #16 /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn2 v7.8h, v29.4s, #16 /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn2 v5.8h, v15.4s, #16 /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn2 v6.8h, v17.4s, #16 /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) */ + movi v0.16b, #(CENTERJSAMPLE) + /* Prepare pointers (dual-issue with NEON instructions) */ + ldp TMP1, TMP2, [OUTPUT_BUF], 16 + sqrshrn v28.8b, v2.8h, #(CONST_BITS+PASS1_BITS+3-16) + ldp TMP3, TMP4, [OUTPUT_BUF], 16 + sqrshrn v29.8b, v3.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP1, TMP1, OUTPUT_COL + sqrshrn v30.8b, v4.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP2, TMP2, OUTPUT_COL + sqrshrn v31.8b, v5.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP3, TMP3, OUTPUT_COL + sqrshrn2 v28.16b, v6.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP4, TMP4, OUTPUT_COL + sqrshrn2 v29.16b, v7.8h, #(CONST_BITS+PASS1_BITS+3-16) + ldp TMP5, TMP6, [OUTPUT_BUF], 16 + sqrshrn2 v30.16b, v8.8h, #(CONST_BITS+PASS1_BITS+3-16) + ldp TMP7, TMP8, [OUTPUT_BUF], 16 + sqrshrn2 v31.16b, v9.8h, #(CONST_BITS+PASS1_BITS+3-16) + add TMP5, TMP5, OUTPUT_COL + add v16.16b, v28.16b, v0.16b + add TMP6, TMP6, OUTPUT_COL + add v18.16b, v29.16b, v0.16b + add TMP7, TMP7, OUTPUT_COL + add v20.16b, v30.16b, v0.16b + add TMP8, TMP8, OUTPUT_COL + add v22.16b, v31.16b, v0.16b + + /* Transpose the final 8-bit samples */ + trn1 v28.16b, v16.16b, v18.16b + trn1 v30.16b, v20.16b, v22.16b + trn2 v29.16b, v16.16b, v18.16b + trn2 v31.16b, v20.16b, v22.16b + + trn1 v16.8h, v28.8h, v30.8h + trn2 v18.8h, v28.8h, v30.8h + trn1 v20.8h, v29.8h, v31.8h + trn2 v22.8h, v29.8h, v31.8h + + uzp1 v28.4s, v16.4s, v18.4s + uzp2 v30.4s, v16.4s, v18.4s + uzp1 v29.4s, v20.4s, v22.4s + uzp2 v31.4s, v20.4s, v22.4s + + /* Store results to the output buffer */ + st1 {v28.d}[0], [TMP1] + st1 {v29.d}[0], [TMP2] + st1 {v28.d}[1], [TMP3] + st1 {v29.d}[1], [TMP4] + st1 {v30.d}[0], [TMP5] + st1 {v31.d}[0], [TMP6] + st1 {v30.d}[1], [TMP7] + st1 {v31.d}[1], [TMP8] + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], #32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], #32 + blr x30 + +.balign 16 +2: + mul v3.8h, v3.8h, v19.8h + mul v4.8h, v4.8h, v20.8h + mul v5.8h, v5.8h, v21.8h + add TMP4, xzr, TMP2, LSL #32 + mul v6.8h, v6.8h, v22.8h + mul v7.8h, v7.8h, v23.8h + adds TMP3, xzr, TMP2, LSR #32 + mul v8.8h, v8.8h, v24.8h + mul v9.8h, v9.8h, v25.8h + b.ne 3f + /* Right AC coef is zero */ + dup v15.2d, v10.d[1] + /* Even part: reverse the even part of the forward DCT. */ + add v18.4h, v4.4h, v8.4h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) */ + add v22.4h, v2.4h, v6.4h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + sub v26.4h, v2.4h, v6.4h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1l z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sshll v22.4s, v22.4h, #(CONST_BITS) /* tmp0l tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + mov v20.16b, v18.16b /* tmp3 = z1 */ + sshll v26.4s, v26.4h, #(CONST_BITS) /* tmp1l tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + add v2.4s, v22.4s, v20.4s /* tmp10l tmp10 = tmp0 + tmp3; */ + sub v6.4s, v22.4s, v20.4s /* tmp13l tmp13 = tmp0 - tmp3; */ + add v8.4s, v26.4s, v18.4s /* tmp11l tmp11 = tmp1 + tmp2; */ + sub v4.4s, v26.4s, v18.4s /* tmp12l tmp12 = tmp1 - tmp2; */ + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + add v22.4h, v9.4h, v5.4h /* z3 = tmp0 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v24.4h, v7.4h, v3.4h /* z4 = tmp1 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v18.4h, v9.4h, v3.4h /* z1 = tmp0 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v20.4h, v7.4h, v5.4h /* z2 = tmp1 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v26.4h, v22.4h, v24.4h /* z5 = z3 + z4 */ + + smull v10.4s, v9.4h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull v12.4s, v7.4h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + add v22.4s, v22.4s, v26.4s /* z3 += z5 */ + add v24.4s, v24.4s, v26.4s /* z4 += z5 */ + + add v10.4s, v10.4s, v18.4s /* tmp0 += z1 */ + add v12.4s, v12.4s, v20.4s /* tmp1 += z2 */ + add v14.4s, v14.4s, v20.4s /* tmp2 += z2 */ + add v16.4s, v16.4s, v18.4s /* tmp3 += z1 */ + + add v10.4s, v10.4s, v22.4s /* tmp0 += z3 */ + add v12.4s, v12.4s, v24.4s /* tmp1 += z4 */ + add v16.4s, v16.4s, v24.4s /* tmp3 += z4 */ + add v14.4s, v14.4s, v22.4s /* tmp2 += z3 */ + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + add v18.4s, v2.4s, v16.4s /* tmp10 + tmp3 */ + sub v20.4s, v2.4s, v16.4s /* tmp10 - tmp3 */ + add v22.4s, v8.4s, v14.4s /* tmp11 + tmp2 */ + sub v24.4s, v8.4s, v14.4s /* tmp11 - tmp2 */ + add v26.4s, v4.4s, v12.4s /* tmp12 + tmp1 */ + sub v28.4s, v4.4s, v12.4s /* tmp12 - tmp1 */ + add v14.4s, v6.4s, v10.4s /* tmp13 + tmp0 */ + sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ + + rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + mov v6.16b, v15.16b + mov v7.16b, v15.16b + mov v8.16b, v15.16b + mov v9.16b, v15.16b + b 1b + +.balign 16 +3: + cbnz TMP4, 4f + /* Left AC coef is zero */ + dup v14.2d, v10.d[0] + /* Even part: reverse the even part of the forward DCT. */ + add v18.8h, v4.8h, v8.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) */ + add v22.8h, v2.8h, v6.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull2 v19.4s, v18.8h, XFIX_P_0_541 /* z1h z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sub v26.8h, v2.8h, v6.8h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + mov v21.16b, v19.16b /* tmp3 = z1 */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + add v28.4s, v23.4s, v21.4s /* tmp10h tmp10 = tmp0 + tmp3; */ + sub v31.4s, v23.4s, v21.4s /* tmp13h tmp13 = tmp0 - tmp3; */ + add v29.4s, v27.4s, v19.4s /* tmp11h tmp11 = tmp1 + tmp2; */ + sub v30.4s, v27.4s, v19.4s /* tmp12h tmp12 = tmp1 - tmp2; */ + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + add v22.8h, v9.8h, v5.8h /* z3 = tmp0 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v24.8h, v7.8h, v3.8h /* z4 = tmp1 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v18.8h, v9.8h, v3.8h /* z1 = tmp0 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v20.8h, v7.8h, v5.8h /* z2 = tmp1 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v26.8h, v22.8h, v24.8h /* z5 = z3 + z4 */ + + smull2 v11.4s, v9.8h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull2 v13.4s, v7.8h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + add v23.4s, v23.4s, v27.4s /* z3 += z5 */ + add v22.4s, v22.4s, v26.4s /* z3 += z5 */ + add v25.4s, v25.4s, v27.4s /* z4 += z5 */ + add v24.4s, v24.4s, v26.4s /* z4 += z5 */ + + add v11.4s, v11.4s, v19.4s /* tmp0 += z1 */ + add v13.4s, v13.4s, v21.4s /* tmp1 += z2 */ + add v15.4s, v15.4s, v21.4s /* tmp2 += z2 */ + add v17.4s, v17.4s, v19.4s /* tmp3 += z1 */ + + add v11.4s, v11.4s, v23.4s /* tmp0 += z3 */ + add v13.4s, v13.4s, v25.4s /* tmp1 += z4 */ + add v17.4s, v17.4s, v25.4s /* tmp3 += z4 */ + add v15.4s, v15.4s, v23.4s /* tmp2 += z3 */ + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + add v19.4s, v28.4s, v17.4s /* tmp10 + tmp3 */ + sub v21.4s, v28.4s, v17.4s /* tmp10 - tmp3 */ + add v23.4s, v29.4s, v15.4s /* tmp11 + tmp2 */ + sub v25.4s, v29.4s, v15.4s /* tmp11 - tmp2 */ + add v27.4s, v30.4s, v13.4s /* tmp12 + tmp1 */ + sub v29.4s, v30.4s, v13.4s /* tmp12 - tmp1 */ + add v15.4s, v31.4s, v11.4s /* tmp13 + tmp0 */ + sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */ + + mov v2.16b, v14.16b + mov v3.16b, v14.16b + mov v4.16b, v14.16b + mov v5.16b, v14.16b + rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + b 1b + +.balign 16 +4: + /* "No" AC coef is zero */ + /* Even part: reverse the even part of the forward DCT. */ + add v18.8h, v4.8h, v8.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) */ + add v22.8h, v2.8h, v6.8h /* z2 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull2 v19.4s, v18.8h, XFIX_P_0_541 /* z1h z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sub v26.8h, v2.8h, v6.8h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1l z1 = MULTIPLY(z2 + z3, FIX_0_541196100); */ + sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + mov v21.16b, v19.16b /* tmp3 = z1 */ + mov v20.16b, v18.16b /* tmp3 = z1 */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ + sshll v22.4s, v22.4h, #(CONST_BITS) /* tmp0l tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ + sshll v26.4s, v26.4h, #(CONST_BITS) /* tmp1l tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ + add v2.4s, v22.4s, v20.4s /* tmp10l tmp10 = tmp0 + tmp3; */ + sub v6.4s, v22.4s, v20.4s /* tmp13l tmp13 = tmp0 - tmp3; */ + add v8.4s, v26.4s, v18.4s /* tmp11l tmp11 = tmp1 + tmp2; */ + sub v4.4s, v26.4s, v18.4s /* tmp12l tmp12 = tmp1 - tmp2; */ + add v28.4s, v23.4s, v21.4s /* tmp10h tmp10 = tmp0 + tmp3; */ + sub v31.4s, v23.4s, v21.4s /* tmp13h tmp13 = tmp0 - tmp3; */ + add v29.4s, v27.4s, v19.4s /* tmp11h tmp11 = tmp1 + tmp2; */ + sub v30.4s, v27.4s, v19.4s /* tmp12h tmp12 = tmp1 - tmp2; */ + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + add v22.8h, v9.8h, v5.8h /* z3 = tmp0 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v24.8h, v7.8h, v3.8h /* z4 = tmp1 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v18.8h, v9.8h, v3.8h /* z1 = tmp0 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) */ + add v20.8h, v7.8h, v5.8h /* z2 = tmp1 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) */ + add v26.8h, v22.8h, v24.8h /* z5 = z3 + z4 */ + + smull2 v11.4s, v9.8h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull2 v13.4s, v7.8h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + smull v10.4s, v9.4h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ + smull v12.4s, v7.4h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ + smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ + smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ + smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + + add v23.4s, v23.4s, v27.4s /* z3 += z5 */ + add v22.4s, v22.4s, v26.4s /* z3 += z5 */ + add v25.4s, v25.4s, v27.4s /* z4 += z5 */ + add v24.4s, v24.4s, v26.4s /* z4 += z5 */ + + add v11.4s, v11.4s, v19.4s /* tmp0 += z1 */ + add v10.4s, v10.4s, v18.4s /* tmp0 += z1 */ + add v13.4s, v13.4s, v21.4s /* tmp1 += z2 */ + add v12.4s, v12.4s, v20.4s /* tmp1 += z2 */ + add v15.4s, v15.4s, v21.4s /* tmp2 += z2 */ + add v14.4s, v14.4s, v20.4s /* tmp2 += z2 */ + add v17.4s, v17.4s, v19.4s /* tmp3 += z1 */ + add v16.4s, v16.4s, v18.4s /* tmp3 += z1 */ + + add v11.4s, v11.4s, v23.4s /* tmp0 += z3 */ + add v10.4s, v10.4s, v22.4s /* tmp0 += z3 */ + add v13.4s, v13.4s, v25.4s /* tmp1 += z4 */ + add v12.4s, v12.4s, v24.4s /* tmp1 += z4 */ + add v17.4s, v17.4s, v25.4s /* tmp3 += z4 */ + add v16.4s, v16.4s, v24.4s /* tmp3 += z4 */ + add v15.4s, v15.4s, v23.4s /* tmp2 += z3 */ + add v14.4s, v14.4s, v22.4s /* tmp2 += z3 */ + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + add v18.4s, v2.4s, v16.4s /* tmp10 + tmp3 */ + add v19.4s, v28.4s, v17.4s /* tmp10 + tmp3 */ + sub v20.4s, v2.4s, v16.4s /* tmp10 - tmp3 */ + sub v21.4s, v28.4s, v17.4s /* tmp10 - tmp3 */ + add v22.4s, v8.4s, v14.4s /* tmp11 + tmp2 */ + add v23.4s, v29.4s, v15.4s /* tmp11 + tmp2 */ + sub v24.4s, v8.4s, v14.4s /* tmp11 - tmp2 */ + sub v25.4s, v29.4s, v15.4s /* tmp11 - tmp2 */ + add v26.4s, v4.4s, v12.4s /* tmp12 + tmp1 */ + add v27.4s, v30.4s, v13.4s /* tmp12 + tmp1 */ + sub v28.4s, v4.4s, v12.4s /* tmp12 - tmp1 */ + sub v29.4s, v30.4s, v13.4s /* tmp12 - tmp1 */ + add v14.4s, v6.4s, v10.4s /* tmp13 + tmp0 */ + add v15.4s, v31.4s, v11.4s /* tmp13 + tmp0 */ + sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ + sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */ + + rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + b 1b + + .unreq DCT_TABLE + .unreq COEF_BLOCK + .unreq OUTPUT_BUF + .unreq OUTPUT_COL + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMP4 + .unreq TMP5 + .unreq TMP6 + .unreq TMP7 + .unreq TMP8 + +#undef CENTERJSAMPLE +#undef CONST_BITS +#undef PASS1_BITS +#undef XFIX_P_0_298 +#undef XFIX_N_0_390 +#undef XFIX_P_0_541 +#undef XFIX_P_0_765 +#undef XFIX_N_0_899 +#undef XFIX_P_1_175 +#undef XFIX_P_1_501 +#undef XFIX_N_1_847 +#undef XFIX_N_1_961 +#undef XFIX_P_2_053 +#undef XFIX_N_2_562 +#undef XFIX_P_3_072 + + +/*****************************************************************************/ + +/* + * jsimd_idct_ifast_neon + * + * This function contains a fast, not so accurate integer implementation of + * the inverse DCT (Discrete Cosine Transform). It uses the same calculations + * and produces exactly the same output as IJG's original 'jpeg_idct_ifast' + * function from jidctfst.c + * + * Normally 1-D AAN DCT needs 5 multiplications and 29 additions. + * But in ARM NEON case some extra additions are required because VQDMULH + * instruction can't handle the constants larger than 1. So the expressions + * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x", + * which introduces an extra addition. Overall, there are 6 extra additions + * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions. + */ + +#define XFIX_1_082392200 v0.h[0] +#define XFIX_1_414213562 v0.h[1] +#define XFIX_1_847759065 v0.h[2] +#define XFIX_2_613125930 v0.h[3] + +.balign 16 +Ljsimd_idct_ifast_neon_consts: + .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ + .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ + .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ + .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ + +asm_function jsimd_idct_ifast_neon + + DCT_TABLE .req x0 + COEF_BLOCK .req x1 + OUTPUT_BUF .req x2 + OUTPUT_COL .req x3 + TMP1 .req x0 + TMP2 .req x1 + TMP3 .req x9 + TMP4 .req x10 + TMP5 .req x11 + TMP6 .req x12 + TMP7 .req x13 + TMP8 .req x14 + + /* Load and dequantize coefficients into NEON registers + * with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | d16 | d17 ( v16.8h ) + * 1 | d18 | d19 ( v17.8h ) + * 2 | d20 | d21 ( v18.8h ) + * 3 | d22 | d23 ( v19.8h ) + * 4 | d24 | d25 ( v20.8h ) + * 5 | d26 | d27 ( v21.8h ) + * 6 | d28 | d29 ( v22.8h ) + * 7 | d30 | d31 ( v23.8h ) + */ + /* Save NEON registers used in fast IDCT */ + adr TMP5, Ljsimd_idct_ifast_neon_consts + ld1 {v16.8h, v17.8h}, [COEF_BLOCK], 32 + ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 + ld1 {v18.8h, v19.8h}, [COEF_BLOCK], 32 + mul v16.8h, v16.8h, v0.8h + ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 + mul v17.8h, v17.8h, v1.8h + ld1 {v20.8h, v21.8h}, [COEF_BLOCK], 32 + mul v18.8h, v18.8h, v2.8h + ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 + mul v19.8h, v19.8h, v3.8h + ld1 {v22.8h, v23.8h}, [COEF_BLOCK], 32 + mul v20.8h, v20.8h, v0.8h + ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 + mul v22.8h, v22.8h, v2.8h + mul v21.8h, v21.8h, v1.8h + ld1 {v0.4h}, [TMP5] /* load constants */ + mul v23.8h, v23.8h, v3.8h + + /* 1-D IDCT, pass 1 */ + sub v2.8h, v18.8h, v22.8h + add v22.8h, v18.8h, v22.8h + sub v1.8h, v19.8h, v21.8h + add v21.8h, v19.8h, v21.8h + sub v5.8h, v17.8h, v23.8h + add v23.8h, v17.8h, v23.8h + sqdmulh v4.8h, v2.8h, XFIX_1_414213562 + sqdmulh v6.8h, v1.8h, XFIX_2_613125930 + add v3.8h, v1.8h, v1.8h + sub v1.8h, v5.8h, v1.8h + add v18.8h, v2.8h, v4.8h + sqdmulh v4.8h, v1.8h, XFIX_1_847759065 + sub v2.8h, v23.8h, v21.8h + add v3.8h, v3.8h, v6.8h + sqdmulh v6.8h, v2.8h, XFIX_1_414213562 + add v1.8h, v1.8h, v4.8h + sqdmulh v4.8h, v5.8h, XFIX_1_082392200 + sub v18.8h, v18.8h, v22.8h + add v2.8h, v2.8h, v6.8h + sub v6.8h, v16.8h, v20.8h + add v20.8h, v16.8h, v20.8h + add v17.8h, v5.8h, v4.8h + add v5.8h, v6.8h, v18.8h + sub v18.8h, v6.8h, v18.8h + add v6.8h, v23.8h, v21.8h + add v16.8h, v20.8h, v22.8h + sub v3.8h, v6.8h, v3.8h + sub v20.8h, v20.8h, v22.8h + sub v3.8h, v3.8h, v1.8h + sub v1.8h, v17.8h, v1.8h + add v2.8h, v3.8h, v2.8h + sub v23.8h, v16.8h, v6.8h + add v1.8h, v1.8h, v2.8h + add v16.8h, v16.8h, v6.8h + add v22.8h, v5.8h, v3.8h + sub v17.8h, v5.8h, v3.8h + sub v21.8h, v18.8h, v2.8h + add v18.8h, v18.8h, v2.8h + sub v19.8h, v20.8h, v1.8h + add v20.8h, v20.8h, v1.8h + transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v28, v29, v30, v31 + /* 1-D IDCT, pass 2 */ + sub v2.8h, v18.8h, v22.8h + add v22.8h, v18.8h, v22.8h + sub v1.8h, v19.8h, v21.8h + add v21.8h, v19.8h, v21.8h + sub v5.8h, v17.8h, v23.8h + add v23.8h, v17.8h, v23.8h + sqdmulh v4.8h, v2.8h, XFIX_1_414213562 + sqdmulh v6.8h, v1.8h, XFIX_2_613125930 + add v3.8h, v1.8h, v1.8h + sub v1.8h, v5.8h, v1.8h + add v18.8h, v2.8h, v4.8h + sqdmulh v4.8h, v1.8h, XFIX_1_847759065 + sub v2.8h, v23.8h, v21.8h + add v3.8h, v3.8h, v6.8h + sqdmulh v6.8h, v2.8h, XFIX_1_414213562 + add v1.8h, v1.8h, v4.8h + sqdmulh v4.8h, v5.8h, XFIX_1_082392200 + sub v18.8h, v18.8h, v22.8h + add v2.8h, v2.8h, v6.8h + sub v6.8h, v16.8h, v20.8h + add v20.8h, v16.8h, v20.8h + add v17.8h, v5.8h, v4.8h + add v5.8h, v6.8h, v18.8h + sub v18.8h, v6.8h, v18.8h + add v6.8h, v23.8h, v21.8h + add v16.8h, v20.8h, v22.8h + sub v3.8h, v6.8h, v3.8h + sub v20.8h, v20.8h, v22.8h + sub v3.8h, v3.8h, v1.8h + sub v1.8h, v17.8h, v1.8h + add v2.8h, v3.8h, v2.8h + sub v23.8h, v16.8h, v6.8h + add v1.8h, v1.8h, v2.8h + add v16.8h, v16.8h, v6.8h + add v22.8h, v5.8h, v3.8h + sub v17.8h, v5.8h, v3.8h + sub v21.8h, v18.8h, v2.8h + add v18.8h, v18.8h, v2.8h + sub v19.8h, v20.8h, v1.8h + add v20.8h, v20.8h, v1.8h + /* Descale to 8-bit and range limit */ + movi v0.16b, #0x80 + /* Prepare pointers (dual-issue with NEON instructions) */ + ldp TMP1, TMP2, [OUTPUT_BUF], 16 + sqshrn v28.8b, v16.8h, #5 + ldp TMP3, TMP4, [OUTPUT_BUF], 16 + sqshrn v29.8b, v17.8h, #5 + add TMP1, TMP1, OUTPUT_COL + sqshrn v30.8b, v18.8h, #5 + add TMP2, TMP2, OUTPUT_COL + sqshrn v31.8b, v19.8h, #5 + add TMP3, TMP3, OUTPUT_COL + sqshrn2 v28.16b, v20.8h, #5 + add TMP4, TMP4, OUTPUT_COL + sqshrn2 v29.16b, v21.8h, #5 + ldp TMP5, TMP6, [OUTPUT_BUF], 16 + sqshrn2 v30.16b, v22.8h, #5 + ldp TMP7, TMP8, [OUTPUT_BUF], 16 + sqshrn2 v31.16b, v23.8h, #5 + add TMP5, TMP5, OUTPUT_COL + add v16.16b, v28.16b, v0.16b + add TMP6, TMP6, OUTPUT_COL + add v18.16b, v29.16b, v0.16b + add TMP7, TMP7, OUTPUT_COL + add v20.16b, v30.16b, v0.16b + add TMP8, TMP8, OUTPUT_COL + add v22.16b, v31.16b, v0.16b + + /* Transpose the final 8-bit samples */ + trn1 v28.16b, v16.16b, v18.16b + trn1 v30.16b, v20.16b, v22.16b + trn2 v29.16b, v16.16b, v18.16b + trn2 v31.16b, v20.16b, v22.16b + + trn1 v16.8h, v28.8h, v30.8h + trn2 v18.8h, v28.8h, v30.8h + trn1 v20.8h, v29.8h, v31.8h + trn2 v22.8h, v29.8h, v31.8h + + uzp1 v28.4s, v16.4s, v18.4s + uzp2 v30.4s, v16.4s, v18.4s + uzp1 v29.4s, v20.4s, v22.4s + uzp2 v31.4s, v20.4s, v22.4s + + /* Store results to the output buffer */ + st1 {v28.d}[0], [TMP1] + st1 {v29.d}[0], [TMP2] + st1 {v28.d}[1], [TMP3] + st1 {v29.d}[1], [TMP4] + st1 {v30.d}[0], [TMP5] + st1 {v31.d}[0], [TMP6] + st1 {v30.d}[1], [TMP7] + st1 {v31.d}[1], [TMP8] + blr x30 + + .unreq DCT_TABLE + .unreq COEF_BLOCK + .unreq OUTPUT_BUF + .unreq OUTPUT_COL + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMP4 + .unreq TMP5 + .unreq TMP6 + .unreq TMP7 + .unreq TMP8 + + +/*****************************************************************************/ + +/* + * jsimd_idct_4x4_neon + * + * This function contains inverse-DCT code for getting reduced-size + * 4x4 pixels output from an 8x8 DCT block. It uses the same calculations + * and produces exactly the same output as IJG's original 'jpeg_idct_4x4' + * function from jpeg-6b (jidctred.c). + * + * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which + * requires much less arithmetic operations and hence should be faster. + * The primary purpose of this particular NEON optimized function is + * bit exact compatibility with jpeg-6b. + * + * TODO: a bit better instructions scheduling can be achieved by expanding + * idct_helper/transpose_4x4 macros and reordering instructions, + * but readability will suffer somewhat. + */ + +#define CONST_BITS 13 + +#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ + +.balign 16 +Ljsimd_idct_4x4_neon_consts: + .short FIX_1_847759065 /* v0.h[0] */ + .short -FIX_0_765366865 /* v0.h[1] */ + .short -FIX_0_211164243 /* v0.h[2] */ + .short FIX_1_451774981 /* v0.h[3] */ + .short -FIX_2_172734803 /* d1[0] */ + .short FIX_1_061594337 /* d1[1] */ + .short -FIX_0_509795579 /* d1[2] */ + .short -FIX_0_601344887 /* d1[3] */ + .short FIX_0_899976223 /* v2.h[0] */ + .short FIX_2_562915447 /* v2.h[1] */ + .short 1 << (CONST_BITS+1) /* v2.h[2] */ + .short 0 /* v2.h[3] */ + +.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 + smull v28.4s, \x4, v2.h[2] + smlal v28.4s, \x8, v0.h[0] + smlal v28.4s, \x14, v0.h[1] + + smull v26.4s, \x16, v1.h[2] + smlal v26.4s, \x12, v1.h[3] + smlal v26.4s, \x10, v2.h[0] + smlal v26.4s, \x6, v2.h[1] + + smull v30.4s, \x4, v2.h[2] + smlsl v30.4s, \x8, v0.h[0] + smlsl v30.4s, \x14, v0.h[1] + + smull v24.4s, \x16, v0.h[2] + smlal v24.4s, \x12, v0.h[3] + smlal v24.4s, \x10, v1.h[0] + smlal v24.4s, \x6, v1.h[1] + + add v20.4s, v28.4s, v26.4s + sub v28.4s, v28.4s, v26.4s + + .if \shift > 16 + srshr v20.4s, v20.4s, #\shift + srshr v28.4s, v28.4s, #\shift + xtn \y26, v20.4s + xtn \y29, v28.4s + .else + rshrn \y26, v20.4s, #\shift + rshrn \y29, v28.4s, #\shift + .endif + + add v20.4s, v30.4s, v24.4s + sub v30.4s, v30.4s, v24.4s + + .if \shift > 16 + srshr v20.4s, v20.4s, #\shift + srshr v30.4s, v30.4s, #\shift + xtn \y27, v20.4s + xtn \y28, v30.4s + .else + rshrn \y27, v20.4s, #\shift + rshrn \y28, v30.4s, #\shift + .endif +.endm + +asm_function jsimd_idct_4x4_neon + + DCT_TABLE .req x0 + COEF_BLOCK .req x1 + OUTPUT_BUF .req x2 + OUTPUT_COL .req x3 + TMP1 .req x0 + TMP2 .req x1 + TMP3 .req x2 + TMP4 .req x15 + + /* Save all used NEON registers */ + sub sp, sp, 272 + str x15, [sp], 16 + /* Load constants (v3.4h is just used for padding) */ + adr TMP4, Ljsimd_idct_4x4_neon_consts + st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 + st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 + st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4] + + /* Load all COEF_BLOCK into NEON registers with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | v4.4h | v5.4h + * 1 | v6.4h | v7.4h + * 2 | v8.4h | v9.4h + * 3 | v10.4h | v11.4h + * 4 | - | - + * 5 | v12.4h | v13.4h + * 6 | v14.4h | v15.4h + * 7 | v16.4h | v17.4h + */ + ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 + ld1 {v8.4h, v9.4h, v10.4h, v11.4h}, [COEF_BLOCK], 32 + add COEF_BLOCK, COEF_BLOCK, #16 + ld1 {v12.4h, v13.4h, v14.4h, v15.4h}, [COEF_BLOCK], 32 + ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 + /* dequantize */ + ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 + mul v4.4h, v4.4h, v18.4h + mul v5.4h, v5.4h, v19.4h + ins v4.d[1], v5.d[0] /* 128 bit q4 */ + ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32 + mul v6.4h, v6.4h, v20.4h + mul v7.4h, v7.4h, v21.4h + ins v6.d[1], v7.d[0] /* 128 bit q6 */ + mul v8.4h, v8.4h, v22.4h + mul v9.4h, v9.4h, v23.4h + ins v8.d[1], v9.d[0] /* 128 bit q8 */ + add DCT_TABLE, DCT_TABLE, #16 + ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32 + mul v10.4h, v10.4h, v24.4h + mul v11.4h, v11.4h, v25.4h + ins v10.d[1], v11.d[0] /* 128 bit q10 */ + mul v12.4h, v12.4h, v26.4h + mul v13.4h, v13.4h, v27.4h + ins v12.d[1], v13.d[0] /* 128 bit q12 */ + ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 + mul v14.4h, v14.4h, v28.4h + mul v15.4h, v15.4h, v29.4h + ins v14.d[1], v15.d[0] /* 128 bit q14 */ + mul v16.4h, v16.4h, v30.4h + mul v17.4h, v17.4h, v31.4h + ins v16.d[1], v17.d[0] /* 128 bit q16 */ + + /* Pass 1 */ + idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, \ + v4.4h, v6.4h, v8.4h, v10.4h + transpose_4x4 v4, v6, v8, v10, v3 + ins v10.d[1], v11.d[0] + idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, \ + v5.4h, v7.4h, v9.4h, v11.4h + transpose_4x4 v5, v7, v9, v11, v3 + ins v10.d[1], v11.d[0] + + /* Pass 2 */ + idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, \ + v26.4h, v27.4h, v28.4h, v29.4h + transpose_4x4 v26, v27, v28, v29, v3 + + /* Range limit */ + movi v30.8h, #0x80 + ins v26.d[1], v27.d[0] + ins v28.d[1], v29.d[0] + add v26.8h, v26.8h, v30.8h + add v28.8h, v28.8h, v30.8h + sqxtun v26.8b, v26.8h + sqxtun v27.8b, v28.8h + + /* Store results to the output buffer */ + ldp TMP1, TMP2, [OUTPUT_BUF], 16 + ldp TMP3, TMP4, [OUTPUT_BUF] + add TMP1, TMP1, OUTPUT_COL + add TMP2, TMP2, OUTPUT_COL + add TMP3, TMP3, OUTPUT_COL + add TMP4, TMP4, OUTPUT_COL + +#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT + /* We can use much less instructions on little endian systems if the + * OS kernel is not configured to trap unaligned memory accesses + */ + st1 {v26.s}[0], [TMP1], 4 + st1 {v27.s}[0], [TMP3], 4 + st1 {v26.s}[1], [TMP2], 4 + st1 {v27.s}[1], [TMP4], 4 +#else + st1 {v26.b}[0], [TMP1], 1 + st1 {v27.b}[0], [TMP3], 1 + st1 {v26.b}[1], [TMP1], 1 + st1 {v27.b}[1], [TMP3], 1 + st1 {v26.b}[2], [TMP1], 1 + st1 {v27.b}[2], [TMP3], 1 + st1 {v26.b}[3], [TMP1], 1 + st1 {v27.b}[3], [TMP3], 1 + + st1 {v26.b}[4], [TMP2], 1 + st1 {v27.b}[4], [TMP4], 1 + st1 {v26.b}[5], [TMP2], 1 + st1 {v27.b}[5], [TMP4], 1 + st1 {v26.b}[6], [TMP2], 1 + st1 {v27.b}[6], [TMP4], 1 + st1 {v26.b}[7], [TMP2], 1 + st1 {v27.b}[7], [TMP4], 1 +#endif + + /* vpop {v8.4h - v15.4h} ;not available */ + sub sp, sp, #272 + ldr x15, [sp], 16 + ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 + ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 + ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + blr x30 + + .unreq DCT_TABLE + .unreq COEF_BLOCK + .unreq OUTPUT_BUF + .unreq OUTPUT_COL + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMP4 + +.purgem idct_helper + + +/*****************************************************************************/ + +/* + * jsimd_idct_2x2_neon + * + * This function contains inverse-DCT code for getting reduced-size + * 2x2 pixels output from an 8x8 DCT block. It uses the same calculations + * and produces exactly the same output as IJG's original 'jpeg_idct_2x2' + * function from jpeg-6b (jidctred.c). + * + * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which + * requires much less arithmetic operations and hence should be faster. + * The primary purpose of this particular NEON optimized function is + * bit exact compatibility with jpeg-6b. + */ + +.balign 8 +Ljsimd_idct_2x2_neon_consts: + .short -FIX_0_720959822 /* v14[0] */ + .short FIX_0_850430095 /* v14[1] */ + .short -FIX_1_272758580 /* v14[2] */ + .short FIX_3_624509785 /* v14[3] */ + +.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27 + sshll v15.4s, \x4, #15 + smull v26.4s, \x6, v14.h[3] + smlal v26.4s, \x10, v14.h[2] + smlal v26.4s, \x12, v14.h[1] + smlal v26.4s, \x16, v14.h[0] + + add v20.4s, v15.4s, v26.4s + sub v15.4s, v15.4s, v26.4s + + .if \shift > 16 + srshr v20.4s, v20.4s, #\shift + srshr v15.4s, v15.4s, #\shift + xtn \y26, v20.4s + xtn \y27, v15.4s + .else + rshrn \y26, v20.4s, #\shift + rshrn \y27, v15.4s, #\shift + .endif +.endm + +asm_function jsimd_idct_2x2_neon + + DCT_TABLE .req x0 + COEF_BLOCK .req x1 + OUTPUT_BUF .req x2 + OUTPUT_COL .req x3 + TMP1 .req x0 + TMP2 .req x15 + + /* vpush {v8.4h - v15.4h} ; not available */ + sub sp, sp, 208 + str x15, [sp], 16 + + /* Load constants */ + adr TMP2, Ljsimd_idct_2x2_neon_consts + st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + st1 {v21.8b, v22.8b}, [sp], 16 + st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + st1 {v30.8b, v31.8b}, [sp], 16 + ld1 {v14.4h}, [TMP2] + + /* Load all COEF_BLOCK into NEON registers with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | v4.4h | v5.4h + * 1 | v6.4h | v7.4h + * 2 | - | - + * 3 | v10.4h | v11.4h + * 4 | - | - + * 5 | v12.4h | v13.4h + * 6 | - | - + * 7 | v16.4h | v17.4h + */ + ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 + add COEF_BLOCK, COEF_BLOCK, #16 + ld1 {v10.4h, v11.4h}, [COEF_BLOCK], 16 + add COEF_BLOCK, COEF_BLOCK, #16 + ld1 {v12.4h, v13.4h}, [COEF_BLOCK], 16 + add COEF_BLOCK, COEF_BLOCK, #16 + ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 + /* Dequantize */ + ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 + mul v4.4h, v4.4h, v18.4h + mul v5.4h, v5.4h, v19.4h + ins v4.d[1], v5.d[0] + mul v6.4h, v6.4h, v20.4h + mul v7.4h, v7.4h, v21.4h + ins v6.d[1], v7.d[0] + add DCT_TABLE, DCT_TABLE, #16 + ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16 + mul v10.4h, v10.4h, v24.4h + mul v11.4h, v11.4h, v25.4h + ins v10.d[1], v11.d[0] + add DCT_TABLE, DCT_TABLE, #16 + ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16 + mul v12.4h, v12.4h, v26.4h + mul v13.4h, v13.4h, v27.4h + ins v12.d[1], v13.d[0] + add DCT_TABLE, DCT_TABLE, #16 + ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 + mul v16.4h, v16.4h, v30.4h + mul v17.4h, v17.4h, v31.4h + ins v16.d[1], v17.d[0] + + /* Pass 1 */ +#if 0 + idct_helper v4.4h, v6.4h, v10.4h, v12.4h, v16.4h, 13, v4.4h, v6.4h + transpose_4x4 v4.4h, v6.4h, v8.4h, v10.4h + idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h + transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h +#else + smull v26.4s, v6.4h, v14.h[3] + smlal v26.4s, v10.4h, v14.h[2] + smlal v26.4s, v12.4h, v14.h[1] + smlal v26.4s, v16.4h, v14.h[0] + smull v24.4s, v7.4h, v14.h[3] + smlal v24.4s, v11.4h, v14.h[2] + smlal v24.4s, v13.4h, v14.h[1] + smlal v24.4s, v17.4h, v14.h[0] + sshll v15.4s, v4.4h, #15 + sshll v30.4s, v5.4h, #15 + add v20.4s, v15.4s, v26.4s + sub v15.4s, v15.4s, v26.4s + rshrn v4.4h, v20.4s, #13 + rshrn v6.4h, v15.4s, #13 + add v20.4s, v30.4s, v24.4s + sub v15.4s, v30.4s, v24.4s + rshrn v5.4h, v20.4s, #13 + rshrn v7.4h, v15.4s, #13 + ins v4.d[1], v5.d[0] + ins v6.d[1], v7.d[0] + transpose v4, v6, v3, .16b, .8h + transpose v6, v10, v3, .16b, .4s + ins v11.d[0], v10.d[1] + ins v7.d[0], v6.d[1] +#endif + + /* Pass 2 */ + idct_helper v4.4h, v6.4h, v10.4h, v7.4h, v11.4h, 20, v26.4h, v27.4h + + /* Range limit */ + movi v30.8h, #0x80 + ins v26.d[1], v27.d[0] + add v26.8h, v26.8h, v30.8h + sqxtun v30.8b, v26.8h + ins v26.d[0], v30.d[0] + sqxtun v27.8b, v26.8h + + /* Store results to the output buffer */ + ldp TMP1, TMP2, [OUTPUT_BUF] + add TMP1, TMP1, OUTPUT_COL + add TMP2, TMP2, OUTPUT_COL + + st1 {v26.b}[0], [TMP1], 1 + st1 {v27.b}[4], [TMP1], 1 + st1 {v26.b}[1], [TMP2], 1 + st1 {v27.b}[5], [TMP2], 1 + + sub sp, sp, #208 + ldr x15, [sp], 16 + ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + ld1 {v21.8b, v22.8b}, [sp], 16 + ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + ld1 {v30.8b, v31.8b}, [sp], 16 + blr x30 + + .unreq DCT_TABLE + .unreq COEF_BLOCK + .unreq OUTPUT_BUF + .unreq OUTPUT_COL + .unreq TMP1 + .unreq TMP2 + +.purgem idct_helper + + +/*****************************************************************************/ + +/* + * jsimd_ycc_extrgb_convert_neon + * jsimd_ycc_extbgr_convert_neon + * jsimd_ycc_extrgbx_convert_neon + * jsimd_ycc_extbgrx_convert_neon + * jsimd_ycc_extxbgr_convert_neon + * jsimd_ycc_extxrgb_convert_neon + * + * Colorspace conversion YCbCr -> RGB + */ + +.macro do_load size + .if \size == 8 + ld1 {v4.8b}, [U], 8 + ld1 {v5.8b}, [V], 8 + ld1 {v0.8b}, [Y], 8 + prfm pldl1keep, [U, #64] + prfm pldl1keep, [V, #64] + prfm pldl1keep, [Y, #64] + .elseif \size == 4 + ld1 {v4.b}[0], [U], 1 + ld1 {v4.b}[1], [U], 1 + ld1 {v4.b}[2], [U], 1 + ld1 {v4.b}[3], [U], 1 + ld1 {v5.b}[0], [V], 1 + ld1 {v5.b}[1], [V], 1 + ld1 {v5.b}[2], [V], 1 + ld1 {v5.b}[3], [V], 1 + ld1 {v0.b}[0], [Y], 1 + ld1 {v0.b}[1], [Y], 1 + ld1 {v0.b}[2], [Y], 1 + ld1 {v0.b}[3], [Y], 1 + .elseif \size == 2 + ld1 {v4.b}[4], [U], 1 + ld1 {v4.b}[5], [U], 1 + ld1 {v5.b}[4], [V], 1 + ld1 {v5.b}[5], [V], 1 + ld1 {v0.b}[4], [Y], 1 + ld1 {v0.b}[5], [Y], 1 + .elseif \size == 1 + ld1 {v4.b}[6], [U], 1 + ld1 {v5.b}[6], [V], 1 + ld1 {v0.b}[6], [Y], 1 + .else + .error unsupported macroblock size + .endif +.endm + +.macro do_store bpp, size, fast_st3 + .if \bpp == 24 + .if \size == 8 + .if \fast_st3 == 1 + st3 {v10.8b, v11.8b, v12.8b}, [RGB], 24 + .else + st1 {v10.b}[0], [RGB], #1 + st1 {v11.b}[0], [RGB], #1 + st1 {v12.b}[0], [RGB], #1 + + st1 {v10.b}[1], [RGB], #1 + st1 {v11.b}[1], [RGB], #1 + st1 {v12.b}[1], [RGB], #1 + + st1 {v10.b}[2], [RGB], #1 + st1 {v11.b}[2], [RGB], #1 + st1 {v12.b}[2], [RGB], #1 + + st1 {v10.b}[3], [RGB], #1 + st1 {v11.b}[3], [RGB], #1 + st1 {v12.b}[3], [RGB], #1 + + st1 {v10.b}[4], [RGB], #1 + st1 {v11.b}[4], [RGB], #1 + st1 {v12.b}[4], [RGB], #1 + + st1 {v10.b}[5], [RGB], #1 + st1 {v11.b}[5], [RGB], #1 + st1 {v12.b}[5], [RGB], #1 + + st1 {v10.b}[6], [RGB], #1 + st1 {v11.b}[6], [RGB], #1 + st1 {v12.b}[6], [RGB], #1 + + st1 {v10.b}[7], [RGB], #1 + st1 {v11.b}[7], [RGB], #1 + st1 {v12.b}[7], [RGB], #1 + .endif + .elseif \size == 4 + st3 {v10.b, v11.b, v12.b}[0], [RGB], 3 + st3 {v10.b, v11.b, v12.b}[1], [RGB], 3 + st3 {v10.b, v11.b, v12.b}[2], [RGB], 3 + st3 {v10.b, v11.b, v12.b}[3], [RGB], 3 + .elseif \size == 2 + st3 {v10.b, v11.b, v12.b}[4], [RGB], 3 + st3 {v10.b, v11.b, v12.b}[5], [RGB], 3 + .elseif \size == 1 + st3 {v10.b, v11.b, v12.b}[6], [RGB], 3 + .else + .error unsupported macroblock size + .endif + .elseif \bpp == 32 + .if \size == 8 + st4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], 32 + .elseif \size == 4 + st4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], 4 + st4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], 4 + st4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], 4 + st4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], 4 + .elseif \size == 2 + st4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], 4 + st4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], 4 + .elseif \size == 1 + st4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], 4 + .else + .error unsupported macroblock size + .endif + .elseif \bpp==16 + .if \size == 8 + st1 {v25.8h}, [RGB], 16 + .elseif \size == 4 + st1 {v25.4h}, [RGB], 8 + .elseif \size == 2 + st1 {v25.h}[4], [RGB], 2 + st1 {v25.h}[5], [RGB], 2 + .elseif \size == 1 + st1 {v25.h}[6], [RGB], 2 + .else + .error unsupported macroblock size + .endif + .else + .error unsupported bpp + .endif +.endm + +.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, \ + g_offs, gsize, b_offs, bsize, \ + defsize, fast_st3 + +/* + * 2-stage pipelined YCbCr->RGB conversion + */ + +.macro do_yuv_to_rgb_stage1 + uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */ + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ + smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */ + smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */ + smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */ + smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */ + smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */ + smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */ + smull v28.4s, v6.4h, v1.h[3] /* multiply by 29033 */ + smull2 v30.4s, v6.8h, v1.h[3] /* multiply by 29033 */ +.endm + +.macro do_yuv_to_rgb_stage2 + rshrn v20.4h, v20.4s, #15 + rshrn2 v20.8h, v22.4s, #15 + rshrn v24.4h, v24.4s, #14 + rshrn2 v24.8h, v26.4s, #14 + rshrn v28.4h, v28.4s, #14 + rshrn2 v28.8h, v30.4s, #14 + uaddw v20.8h, v20.8h, v0.8b + uaddw v24.8h, v24.8h, v0.8b + uaddw v28.8h, v28.8h, v0.8b + .if \bpp != 16 + sqxtun v1\g_offs\defsize, v20.8h + sqxtun v1\r_offs\defsize, v24.8h + sqxtun v1\b_offs\defsize, v28.8h + .else + sqshlu v21.8h, v20.8h, #8 + sqshlu v25.8h, v24.8h, #8 + sqshlu v29.8h, v28.8h, #8 + sri v25.8h, v21.8h, #5 + sri v25.8h, v29.8h, #11 + .endif +.endm + +.macro do_yuv_to_rgb_stage2_store_load_stage1 fast_st3 + rshrn v20.4h, v20.4s, #15 + rshrn v24.4h, v24.4s, #14 + rshrn v28.4h, v28.4s, #14 + ld1 {v4.8b}, [U], 8 + rshrn2 v20.8h, v22.4s, #15 + rshrn2 v24.8h, v26.4s, #14 + rshrn2 v28.8h, v30.4s, #14 + ld1 {v5.8b}, [V], 8 + uaddw v20.8h, v20.8h, v0.8b + uaddw v24.8h, v24.8h, v0.8b + uaddw v28.8h, v28.8h, v0.8b + .if \bpp != 16 /**************** rgb24/rgb32 ******************************/ + sqxtun v1\g_offs\defsize, v20.8h + ld1 {v0.8b}, [Y], 8 + sqxtun v1\r_offs\defsize, v24.8h + prfm pldl1keep, [U, #64] + prfm pldl1keep, [V, #64] + prfm pldl1keep, [Y, #64] + sqxtun v1\b_offs\defsize, v28.8h + uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ + smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */ + smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */ + smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */ + smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */ + smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */ + smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */ + .else /**************************** rgb565 ********************************/ + sqshlu v21.8h, v20.8h, #8 + sqshlu v25.8h, v24.8h, #8 + sqshlu v29.8h, v28.8h, #8 + uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ + ld1 {v0.8b}, [Y], 8 + smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */ + smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */ + smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */ + smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */ + sri v25.8h, v21.8h, #5 + smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */ + smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */ + prfm pldl1keep, [U, #64] + prfm pldl1keep, [V, #64] + prfm pldl1keep, [Y, #64] + sri v25.8h, v29.8h, #11 + .endif + do_store \bpp, 8, \fast_st3 + smull v28.4s, v6.4h, v1.h[3] /* multiply by 29033 */ + smull2 v30.4s, v6.8h, v1.h[3] /* multiply by 29033 */ +.endm + +.macro do_yuv_to_rgb + do_yuv_to_rgb_stage1 + do_yuv_to_rgb_stage2 +.endm + +/* Apple gas crashes on adrl, work around that by using adr. + * But this requires a copy of these constants for each function. + */ + +.balign 16 +.if \fast_st3 == 1 +Ljsimd_ycc_\colorid\()_neon_consts: +.else +Ljsimd_ycc_\colorid\()_neon_slowst3_consts: +.endif + .short 0, 0, 0, 0 + .short 22971, -11277, -23401, 29033 + .short -128, -128, -128, -128 + .short -128, -128, -128, -128 + +.if \fast_st3 == 1 +asm_function jsimd_ycc_\colorid\()_convert_neon +.else +asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3 +.endif + OUTPUT_WIDTH .req x0 + INPUT_BUF .req x1 + INPUT_ROW .req x2 + OUTPUT_BUF .req x3 + NUM_ROWS .req x4 + + INPUT_BUF0 .req x5 + INPUT_BUF1 .req x6 + INPUT_BUF2 .req x1 + + RGB .req x7 + Y .req x8 + U .req x9 + V .req x10 + N .req x15 + + sub sp, sp, 336 + str x15, [sp], 16 + + /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */ + .if \fast_st3 == 1 + adr x15, Ljsimd_ycc_\colorid\()_neon_consts + .else + adr x15, Ljsimd_ycc_\colorid\()_neon_slowst3_consts + .endif + + /* Save NEON registers */ + st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 + st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 + st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + ld1 {v0.4h, v1.4h}, [x15], 16 + ld1 {v2.8h}, [x15] + + /* Save ARM registers and handle input arguments */ + /* push {x4, x5, x6, x7, x8, x9, x10, x30} */ + stp x4, x5, [sp], 16 + stp x6, x7, [sp], 16 + stp x8, x9, [sp], 16 + stp x10, x30, [sp], 16 + ldr INPUT_BUF0, [INPUT_BUF] + ldr INPUT_BUF1, [INPUT_BUF, #8] + ldr INPUT_BUF2, [INPUT_BUF, #16] + .unreq INPUT_BUF + + /* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */ + movi v10.16b, #255 + movi v13.16b, #255 + + /* Outer loop over scanlines */ + cmp NUM_ROWS, #1 + b.lt 9f +0: + lsl x16, INPUT_ROW, #3 + ldr Y, [INPUT_BUF0, x16] + ldr U, [INPUT_BUF1, x16] + mov N, OUTPUT_WIDTH + ldr V, [INPUT_BUF2, x16] + add INPUT_ROW, INPUT_ROW, #1 + ldr RGB, [OUTPUT_BUF], #8 + + /* Inner loop over pixels */ + subs N, N, #8 + b.lt 3f + do_load 8 + do_yuv_to_rgb_stage1 + subs N, N, #8 + b.lt 2f +1: + do_yuv_to_rgb_stage2_store_load_stage1 \fast_st3 + subs N, N, #8 + b.ge 1b +2: + do_yuv_to_rgb_stage2 + do_store \bpp, 8, \fast_st3 + tst N, #7 + b.eq 8f +3: + tst N, #4 + b.eq 3f + do_load 4 +3: + tst N, #2 + b.eq 4f + do_load 2 +4: + tst N, #1 + b.eq 5f + do_load 1 +5: + do_yuv_to_rgb + tst N, #4 + b.eq 6f + do_store \bpp, 4, \fast_st3 +6: + tst N, #2 + b.eq 7f + do_store \bpp, 2, \fast_st3 +7: + tst N, #1 + b.eq 8f + do_store \bpp, 1, \fast_st3 +8: + subs NUM_ROWS, NUM_ROWS, #1 + b.gt 0b +9: + /* Restore all registers and return */ + sub sp, sp, #336 + ldr x15, [sp], 16 + ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 + ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 + ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 + ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 + ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + /* pop {r4, r5, r6, r7, r8, r9, r10, pc} */ + ldp x4, x5, [sp], 16 + ldp x6, x7, [sp], 16 + ldp x8, x9, [sp], 16 + ldp x10, x30, [sp], 16 + br x30 + .unreq OUTPUT_WIDTH + .unreq INPUT_ROW + .unreq OUTPUT_BUF + .unreq NUM_ROWS + .unreq INPUT_BUF0 + .unreq INPUT_BUF1 + .unreq INPUT_BUF2 + .unreq RGB + .unreq Y + .unreq U + .unreq V + .unreq N + +.purgem do_yuv_to_rgb +.purgem do_yuv_to_rgb_stage1 +.purgem do_yuv_to_rgb_stage2 +.purgem do_yuv_to_rgb_stage2_store_load_stage1 + +.endm + +/*--------------------------------- id ----- bpp R rsize G gsize B bsize defsize fast_st3*/ +generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h, 1, .4h, 2, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h, 1, .4h, 0, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h, 2, .4h, 1, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h, 2, .4h, 3, .4h, .8b, 1 +generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, .4h, 0, .4h, 0, .4h, .8b, 1 + +generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b, 0 +generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b, 0 + +.purgem do_load +.purgem do_store + + +/*****************************************************************************/ + +/* + * jsimd_extrgb_ycc_convert_neon + * jsimd_extbgr_ycc_convert_neon + * jsimd_extrgbx_ycc_convert_neon + * jsimd_extbgrx_ycc_convert_neon + * jsimd_extxbgr_ycc_convert_neon + * jsimd_extxrgb_ycc_convert_neon + * + * Colorspace conversion RGB -> YCbCr + */ + +.macro do_store size + .if \size == 8 + st1 {v20.8b}, [Y], #8 + st1 {v21.8b}, [U], #8 + st1 {v22.8b}, [V], #8 + .elseif \size == 4 + st1 {v20.b}[0], [Y], #1 + st1 {v20.b}[1], [Y], #1 + st1 {v20.b}[2], [Y], #1 + st1 {v20.b}[3], [Y], #1 + st1 {v21.b}[0], [U], #1 + st1 {v21.b}[1], [U], #1 + st1 {v21.b}[2], [U], #1 + st1 {v21.b}[3], [U], #1 + st1 {v22.b}[0], [V], #1 + st1 {v22.b}[1], [V], #1 + st1 {v22.b}[2], [V], #1 + st1 {v22.b}[3], [V], #1 + .elseif \size == 2 + st1 {v20.b}[4], [Y], #1 + st1 {v20.b}[5], [Y], #1 + st1 {v21.b}[4], [U], #1 + st1 {v21.b}[5], [U], #1 + st1 {v22.b}[4], [V], #1 + st1 {v22.b}[5], [V], #1 + .elseif \size == 1 + st1 {v20.b}[6], [Y], #1 + st1 {v21.b}[6], [U], #1 + st1 {v22.b}[6], [V], #1 + .else + .error unsupported macroblock size + .endif +.endm + +.macro do_load bpp, size, fast_ld3 + .if \bpp == 24 + .if \size == 8 + .if \fast_ld3 == 1 + ld3 {v10.8b, v11.8b, v12.8b}, [RGB], #24 + .else + ld1 {v10.b}[0], [RGB], #1 + ld1 {v11.b}[0], [RGB], #1 + ld1 {v12.b}[0], [RGB], #1 + + ld1 {v10.b}[1], [RGB], #1 + ld1 {v11.b}[1], [RGB], #1 + ld1 {v12.b}[1], [RGB], #1 + + ld1 {v10.b}[2], [RGB], #1 + ld1 {v11.b}[2], [RGB], #1 + ld1 {v12.b}[2], [RGB], #1 + + ld1 {v10.b}[3], [RGB], #1 + ld1 {v11.b}[3], [RGB], #1 + ld1 {v12.b}[3], [RGB], #1 + + ld1 {v10.b}[4], [RGB], #1 + ld1 {v11.b}[4], [RGB], #1 + ld1 {v12.b}[4], [RGB], #1 + + ld1 {v10.b}[5], [RGB], #1 + ld1 {v11.b}[5], [RGB], #1 + ld1 {v12.b}[5], [RGB], #1 + + ld1 {v10.b}[6], [RGB], #1 + ld1 {v11.b}[6], [RGB], #1 + ld1 {v12.b}[6], [RGB], #1 + + ld1 {v10.b}[7], [RGB], #1 + ld1 {v11.b}[7], [RGB], #1 + ld1 {v12.b}[7], [RGB], #1 + .endif + prfm pldl1keep, [RGB, #128] + .elseif \size == 4 + ld3 {v10.b, v11.b, v12.b}[0], [RGB], #3 + ld3 {v10.b, v11.b, v12.b}[1], [RGB], #3 + ld3 {v10.b, v11.b, v12.b}[2], [RGB], #3 + ld3 {v10.b, v11.b, v12.b}[3], [RGB], #3 + .elseif \size == 2 + ld3 {v10.b, v11.b, v12.b}[4], [RGB], #3 + ld3 {v10.b, v11.b, v12.b}[5], [RGB], #3 + .elseif \size == 1 + ld3 {v10.b, v11.b, v12.b}[6], [RGB], #3 + .else + .error unsupported macroblock size + .endif + .elseif \bpp == 32 + .if \size == 8 + ld4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], #32 + prfm pldl1keep, [RGB, #128] + .elseif \size == 4 + ld4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], #4 + ld4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], #4 + ld4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], #4 + ld4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], #4 + .elseif \size == 2 + ld4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], #4 + ld4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], #4 + .elseif \size == 1 + ld4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], #4 + .else + .error unsupported macroblock size + .endif + .else + .error unsupported bpp + .endif +.endm + +.macro generate_jsimd_rgb_ycc_convert_neon colorid, bpp, r_offs, g_offs, \ + b_offs, fast_ld3 + +/* + * 2-stage pipelined RGB->YCbCr conversion + */ + +.macro do_rgb_to_yuv_stage1 + ushll v4.8h, v1\r_offs\().8b, #0 /* r = v4 */ + ushll v6.8h, v1\g_offs\().8b, #0 /* g = v6 */ + ushll v8.8h, v1\b_offs\().8b, #0 /* b = v8 */ + rev64 v18.4s, v1.4s + rev64 v26.4s, v1.4s + rev64 v28.4s, v1.4s + rev64 v30.4s, v1.4s + umull v14.4s, v4.4h, v0.h[0] + umull2 v16.4s, v4.8h, v0.h[0] + umlsl v18.4s, v4.4h, v0.h[3] + umlsl2 v26.4s, v4.8h, v0.h[3] + umlal v28.4s, v4.4h, v0.h[5] + umlal2 v30.4s, v4.8h, v0.h[5] + umlal v14.4s, v6.4h, v0.h[1] + umlal2 v16.4s, v6.8h, v0.h[1] + umlsl v18.4s, v6.4h, v0.h[4] + umlsl2 v26.4s, v6.8h, v0.h[4] + umlsl v28.4s, v6.4h, v0.h[6] + umlsl2 v30.4s, v6.8h, v0.h[6] + umlal v14.4s, v8.4h, v0.h[2] + umlal2 v16.4s, v8.8h, v0.h[2] + umlal v18.4s, v8.4h, v0.h[5] + umlal2 v26.4s, v8.8h, v0.h[5] + umlsl v28.4s, v8.4h, v0.h[7] + umlsl2 v30.4s, v8.8h, v0.h[7] +.endm + +.macro do_rgb_to_yuv_stage2 + rshrn v20.4h, v14.4s, #16 + shrn v22.4h, v18.4s, #16 + shrn v24.4h, v28.4s, #16 + rshrn2 v20.8h, v16.4s, #16 + shrn2 v22.8h, v26.4s, #16 + shrn2 v24.8h, v30.4s, #16 + xtn v20.8b, v20.8h /* v20 = y */ + xtn v21.8b, v22.8h /* v21 = u */ + xtn v22.8b, v24.8h /* v22 = v */ +.endm + +.macro do_rgb_to_yuv + do_rgb_to_yuv_stage1 + do_rgb_to_yuv_stage2 +.endm + +/* TODO: expand macros and interleave instructions if some in-order + * ARM64 processor actually can dual-issue LOAD/STORE with ALU */ +.macro do_rgb_to_yuv_stage2_store_load_stage1 fast_ld3 + do_rgb_to_yuv_stage2 + do_load \bpp, 8, \fast_ld3 + st1 {v20.8b}, [Y], #8 + st1 {v21.8b}, [U], #8 + st1 {v22.8b}, [V], #8 + do_rgb_to_yuv_stage1 +.endm + +.balign 16 +.if \fast_ld3 == 1 +Ljsimd_\colorid\()_ycc_neon_consts: +.else +Ljsimd_\colorid\()_ycc_neon_slowld3_consts: +.endif + .short 19595, 38470, 7471, 11059 + .short 21709, 32768, 27439, 5329 + .short 32767, 128, 32767, 128 + .short 32767, 128, 32767, 128 + +.if \fast_ld3 == 1 +asm_function jsimd_\colorid\()_ycc_convert_neon +.else +asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3 +.endif + OUTPUT_WIDTH .req w0 + INPUT_BUF .req x1 + OUTPUT_BUF .req x2 + OUTPUT_ROW .req x3 + NUM_ROWS .req x4 + + OUTPUT_BUF0 .req x5 + OUTPUT_BUF1 .req x6 + OUTPUT_BUF2 .req x2 /* OUTPUT_BUF */ + + RGB .req x7 + Y .req x9 + U .req x10 + V .req x11 + N .req w12 + + /* Load constants to d0, d1, d2, d3 */ + .if \fast_ld3 == 1 + adr x13, Ljsimd_\colorid\()_ycc_neon_consts + .else + adr x13, Ljsimd_\colorid\()_ycc_neon_slowld3_consts + .endif + ld1 {v0.8h, v1.8h}, [x13] + + ldr OUTPUT_BUF0, [OUTPUT_BUF] + ldr OUTPUT_BUF1, [OUTPUT_BUF, #8] + ldr OUTPUT_BUF2, [OUTPUT_BUF, #16] + .unreq OUTPUT_BUF + + /* Save NEON registers */ + sub sp, sp, #64 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + + /* Outer loop over scanlines */ + cmp NUM_ROWS, #1 + b.lt 9f +0: + ldr Y, [OUTPUT_BUF0, OUTPUT_ROW, lsl #3] + ldr U, [OUTPUT_BUF1, OUTPUT_ROW, lsl #3] + mov N, OUTPUT_WIDTH + ldr V, [OUTPUT_BUF2, OUTPUT_ROW, lsl #3] + add OUTPUT_ROW, OUTPUT_ROW, #1 + ldr RGB, [INPUT_BUF], #8 + + /* Inner loop over pixels */ + subs N, N, #8 + b.lt 3f + do_load \bpp, 8, \fast_ld3 + do_rgb_to_yuv_stage1 + subs N, N, #8 + b.lt 2f +1: + do_rgb_to_yuv_stage2_store_load_stage1 \fast_ld3 + subs N, N, #8 + b.ge 1b +2: + do_rgb_to_yuv_stage2 + do_store 8 + tst N, #7 + b.eq 8f +3: + tbz N, #2, 3f + do_load \bpp, 4, \fast_ld3 +3: + tbz N, #1, 4f + do_load \bpp, 2, \fast_ld3 +4: + tbz N, #0, 5f + do_load \bpp, 1, \fast_ld3 +5: + do_rgb_to_yuv + tbz N, #2, 6f + do_store 4 +6: + tbz N, #1, 7f + do_store 2 +7: + tbz N, #0, 8f + do_store 1 +8: + subs NUM_ROWS, NUM_ROWS, #1 + b.gt 0b +9: + /* Restore all registers and return */ + sub sp, sp, #64 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + br x30 + + .unreq OUTPUT_WIDTH + .unreq OUTPUT_ROW + .unreq INPUT_BUF + .unreq NUM_ROWS + .unreq OUTPUT_BUF0 + .unreq OUTPUT_BUF1 + .unreq OUTPUT_BUF2 + .unreq RGB + .unreq Y + .unreq U + .unreq V + .unreq N + +.purgem do_rgb_to_yuv +.purgem do_rgb_to_yuv_stage1 +.purgem do_rgb_to_yuv_stage2 +.purgem do_rgb_to_yuv_stage2_store_load_stage1 + +.endm + +/*--------------------------------- id ----- bpp R G B Fast LD3 */ +generate_jsimd_rgb_ycc_convert_neon extrgb, 24, 0, 1, 2, 1 +generate_jsimd_rgb_ycc_convert_neon extbgr, 24, 2, 1, 0, 1 +generate_jsimd_rgb_ycc_convert_neon extrgbx, 32, 0, 1, 2, 1 +generate_jsimd_rgb_ycc_convert_neon extbgrx, 32, 2, 1, 0, 1 +generate_jsimd_rgb_ycc_convert_neon extxbgr, 32, 3, 2, 1, 1 +generate_jsimd_rgb_ycc_convert_neon extxrgb, 32, 1, 2, 3, 1 + +generate_jsimd_rgb_ycc_convert_neon extrgb, 24, 0, 1, 2, 0 +generate_jsimd_rgb_ycc_convert_neon extbgr, 24, 2, 1, 0, 0 + +.purgem do_load +.purgem do_store + + +/*****************************************************************************/ + +/* + * Load data into workspace, applying unsigned->signed conversion + * + * TODO: can be combined with 'jsimd_fdct_ifast_neon' to get + * rid of VST1.16 instructions + */ + +asm_function jsimd_convsamp_neon + SAMPLE_DATA .req x0 + START_COL .req x1 + WORKSPACE .req x2 + TMP1 .req x9 + TMP2 .req x10 + TMP3 .req x11 + TMP4 .req x12 + TMP5 .req x13 + TMP6 .req x14 + TMP7 .req x15 + TMP8 .req x4 + TMPDUP .req w3 + + mov TMPDUP, #128 + ldp TMP1, TMP2, [SAMPLE_DATA], 16 + ldp TMP3, TMP4, [SAMPLE_DATA], 16 + dup v0.8b, TMPDUP + add TMP1, TMP1, START_COL + add TMP2, TMP2, START_COL + ldp TMP5, TMP6, [SAMPLE_DATA], 16 + add TMP3, TMP3, START_COL + add TMP4, TMP4, START_COL + ldp TMP7, TMP8, [SAMPLE_DATA], 16 + add TMP5, TMP5, START_COL + add TMP6, TMP6, START_COL + ld1 {v16.8b}, [TMP1] + add TMP7, TMP7, START_COL + add TMP8, TMP8, START_COL + ld1 {v17.8b}, [TMP2] + usubl v16.8h, v16.8b, v0.8b + ld1 {v18.8b}, [TMP3] + usubl v17.8h, v17.8b, v0.8b + ld1 {v19.8b}, [TMP4] + usubl v18.8h, v18.8b, v0.8b + ld1 {v20.8b}, [TMP5] + usubl v19.8h, v19.8b, v0.8b + ld1 {v21.8b}, [TMP6] + st1 {v16.8h, v17.8h, v18.8h, v19.8h}, [WORKSPACE], 64 + usubl v20.8h, v20.8b, v0.8b + ld1 {v22.8b}, [TMP7] + usubl v21.8h, v21.8b, v0.8b + ld1 {v23.8b}, [TMP8] + usubl v22.8h, v22.8b, v0.8b + usubl v23.8h, v23.8b, v0.8b + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [WORKSPACE], 64 + + br x30 + + .unreq SAMPLE_DATA + .unreq START_COL + .unreq WORKSPACE + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMP4 + .unreq TMP5 + .unreq TMP6 + .unreq TMP7 + .unreq TMP8 + .unreq TMPDUP + +/*****************************************************************************/ + +/* + * jsimd_fdct_islow_neon + * + * This file contains a slow-but-accurate integer implementation of the + * forward DCT (Discrete Cosine Transform). The following code is based + * directly on the IJG''s original jfdctint.c; see the jfdctint.c for + * more details. + * + * TODO: can be combined with 'jsimd_convsamp_neon' to get + * rid of a bunch of VLD1.16 instructions + */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 + +#define DESCALE_P1 (CONST_BITS-PASS1_BITS) +#define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +.balign 16 +Ljsimd_fdct_islow_neon_consts: + .short F_0_298 + .short -F_0_390 + .short F_0_541 + .short F_0_765 + .short - F_0_899 + .short F_1_175 + .short F_1_501 + .short - F_1_847 + .short - F_1_961 + .short F_2_053 + .short - F_2_562 + .short F_3_072 + .short 0 /* padding */ + .short 0 + .short 0 + .short 0 + +#undef F_0_298 +#undef F_0_390 +#undef F_0_541 +#undef F_0_765 +#undef F_0_899 +#undef F_1_175 +#undef F_1_501 +#undef F_1_847 +#undef F_1_961 +#undef F_2_053 +#undef F_2_562 +#undef F_3_072 +#define XFIX_P_0_298 v0.h[0] +#define XFIX_N_0_390 v0.h[1] +#define XFIX_P_0_541 v0.h[2] +#define XFIX_P_0_765 v0.h[3] +#define XFIX_N_0_899 v0.h[4] +#define XFIX_P_1_175 v0.h[5] +#define XFIX_P_1_501 v0.h[6] +#define XFIX_N_1_847 v0.h[7] +#define XFIX_N_1_961 v1.h[0] +#define XFIX_P_2_053 v1.h[1] +#define XFIX_N_2_562 v1.h[2] +#define XFIX_P_3_072 v1.h[3] + +asm_function jsimd_fdct_islow_neon + + DATA .req x0 + TMP .req x9 + + /* Load constants */ + adr TMP, Ljsimd_fdct_islow_neon_consts + ld1 {v0.8h, v1.8h}, [TMP] + + /* Save NEON registers */ + sub sp, sp, #64 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + + /* Load all DATA into NEON registers with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | d16 | d17 | v16.8h + * 1 | d18 | d19 | v17.8h + * 2 | d20 | d21 | v18.8h + * 3 | d22 | d23 | v19.8h + * 4 | d24 | d25 | v20.8h + * 5 | d26 | d27 | v21.8h + * 6 | d28 | d29 | v22.8h + * 7 | d30 | d31 | v23.8h + */ + + ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 + ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] + sub DATA, DATA, #64 + + /* Transpose */ + transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v31, v2, v3, v4 + /* 1-D FDCT */ + add v24.8h, v16.8h, v23.8h /* tmp0 = dataptr[0] + dataptr[7]; */ + sub v31.8h, v16.8h, v23.8h /* tmp7 = dataptr[0] - dataptr[7]; */ + add v25.8h, v17.8h, v22.8h /* tmp1 = dataptr[1] + dataptr[6]; */ + sub v30.8h, v17.8h, v22.8h /* tmp6 = dataptr[1] - dataptr[6]; */ + add v26.8h, v18.8h, v21.8h /* tmp2 = dataptr[2] + dataptr[5]; */ + sub v29.8h, v18.8h, v21.8h /* tmp5 = dataptr[2] - dataptr[5]; */ + add v27.8h, v19.8h, v20.8h /* tmp3 = dataptr[3] + dataptr[4]; */ + sub v28.8h, v19.8h, v20.8h /* tmp4 = dataptr[3] - dataptr[4]; */ + + /* even part */ + + add v8.8h, v24.8h, v27.8h /* tmp10 = tmp0 + tmp3; */ + sub v9.8h, v24.8h, v27.8h /* tmp13 = tmp0 - tmp3; */ + add v10.8h, v25.8h, v26.8h /* tmp11 = tmp1 + tmp2; */ + sub v11.8h, v25.8h, v26.8h /* tmp12 = tmp1 - tmp2; */ + + add v16.8h, v8.8h, v10.8h /* tmp10 + tmp11 */ + sub v20.8h, v8.8h, v10.8h /* tmp10 - tmp11 */ + + add v18.8h, v11.8h, v9.8h /* tmp12 + tmp13 */ + + shl v16.8h, v16.8h, #PASS1_BITS /* dataptr[0] = (DCTELEM) LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); */ + shl v20.8h, v20.8h, #PASS1_BITS /* dataptr[4] = (DCTELEM) LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); */ + + smull2 v24.4s, v18.8h, XFIX_P_0_541 /* z1 hi = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1 lo = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ + mov v22.16b, v18.16b + mov v25.16b, v24.16b + + smlal v18.4s, v9.4h, XFIX_P_0_765 /* lo z1 + MULTIPLY(tmp13, XFIX_P_0_765) */ + smlal2 v24.4s, v9.8h, XFIX_P_0_765 /* hi z1 + MULTIPLY(tmp13, XFIX_P_0_765) */ + smlal v22.4s, v11.4h, XFIX_N_1_847 /* lo z1 + MULTIPLY(tmp12, XFIX_N_1_847) */ + smlal2 v25.4s, v11.8h, XFIX_N_1_847 /* hi z1 + MULTIPLY(tmp12, XFIX_N_1_847) */ + + rshrn v18.4h, v18.4s, #DESCALE_P1 + rshrn v22.4h, v22.4s, #DESCALE_P1 + rshrn2 v18.8h, v24.4s, #DESCALE_P1 /* dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */ + rshrn2 v22.8h, v25.4s, #DESCALE_P1 /* dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */ + + /* Odd part */ + + add v8.8h, v28.8h, v31.8h /* z1 = tmp4 + tmp7; */ + add v9.8h, v29.8h, v30.8h /* z2 = tmp5 + tmp6; */ + add v10.8h, v28.8h, v30.8h /* z3 = tmp4 + tmp6; */ + add v11.8h, v29.8h, v31.8h /* z4 = tmp5 + tmp7; */ + smull v4.4s, v10.4h, XFIX_P_1_175 /* z5 lo = z3 lo * XFIX_P_1_175 */ + smull2 v5.4s, v10.8h, XFIX_P_1_175 + smlal v4.4s, v11.4h, XFIX_P_1_175 /* z5 = MULTIPLY(z3 + z4, FIX_1_175875602); */ + smlal2 v5.4s, v11.8h, XFIX_P_1_175 + + smull2 v24.4s, v28.8h, XFIX_P_0_298 + smull2 v25.4s, v29.8h, XFIX_P_2_053 + smull2 v26.4s, v30.8h, XFIX_P_3_072 + smull2 v27.4s, v31.8h, XFIX_P_1_501 + smull v28.4s, v28.4h, XFIX_P_0_298 /* tmp4 = MULTIPLY(tmp4, FIX_0_298631336); */ + smull v29.4s, v29.4h, XFIX_P_2_053 /* tmp5 = MULTIPLY(tmp5, FIX_2_053119869); */ + smull v30.4s, v30.4h, XFIX_P_3_072 /* tmp6 = MULTIPLY(tmp6, FIX_3_072711026); */ + smull v31.4s, v31.4h, XFIX_P_1_501 /* tmp7 = MULTIPLY(tmp7, FIX_1_501321110); */ + + smull2 v12.4s, v8.8h, XFIX_N_0_899 + smull2 v13.4s, v9.8h, XFIX_N_2_562 + smull2 v14.4s, v10.8h, XFIX_N_1_961 + smull2 v15.4s, v11.8h, XFIX_N_0_390 + smull v8.4s, v8.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223); */ + smull v9.4s, v9.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447); */ + smull v10.4s, v10.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560); */ + smull v11.4s, v11.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644); */ + + add v10.4s, v10.4s, v4.4s /* z3 += z5 */ + add v14.4s, v14.4s, v5.4s + add v11.4s, v11.4s, v4.4s /* z4 += z5 */ + add v15.4s, v15.4s, v5.4s + + add v28.4s, v28.4s, v8.4s /* tmp4 += z1 */ + add v24.4s, v24.4s, v12.4s + add v29.4s, v29.4s, v9.4s /* tmp5 += z2 */ + add v25.4s, v25.4s, v13.4s + add v30.4s, v30.4s, v10.4s /* tmp6 += z3 */ + add v26.4s, v26.4s, v14.4s + add v31.4s, v31.4s, v11.4s /* tmp7 += z4 */ + add v27.4s, v27.4s, v15.4s + + add v28.4s, v28.4s, v10.4s /* tmp4 += z3 */ + add v24.4s, v24.4s, v14.4s + add v29.4s, v29.4s, v11.4s /* tmp5 += z4 */ + add v25.4s, v25.4s, v15.4s + add v30.4s, v30.4s, v9.4s /* tmp6 += z2 */ + add v26.4s, v26.4s, v13.4s + add v31.4s, v31.4s, v8.4s /* tmp7 += z1 */ + add v27.4s, v27.4s, v12.4s + + rshrn v23.4h, v28.4s, #DESCALE_P1 + rshrn v21.4h, v29.4s, #DESCALE_P1 + rshrn v19.4h, v30.4s, #DESCALE_P1 + rshrn v17.4h, v31.4s, #DESCALE_P1 + rshrn2 v23.8h, v24.4s, #DESCALE_P1 /* dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v21.8h, v25.4s, #DESCALE_P1 /* dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */ + rshrn2 v19.8h, v26.4s, #DESCALE_P1 /* dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v17.8h, v27.4s, #DESCALE_P1 /* dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */ + + /* Transpose */ + transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v31, v2, v3, v4 + + /* 1-D FDCT */ + add v24.8h, v16.8h, v23.8h /* tmp0 = dataptr[0] + dataptr[7]; */ + sub v31.8h, v16.8h, v23.8h /* tmp7 = dataptr[0] - dataptr[7]; */ + add v25.8h, v17.8h, v22.8h /* tmp1 = dataptr[1] + dataptr[6]; */ + sub v30.8h, v17.8h, v22.8h /* tmp6 = dataptr[1] - dataptr[6]; */ + add v26.8h, v18.8h, v21.8h /* tmp2 = dataptr[2] + dataptr[5]; */ + sub v29.8h, v18.8h, v21.8h /* tmp5 = dataptr[2] - dataptr[5]; */ + add v27.8h, v19.8h, v20.8h /* tmp3 = dataptr[3] + dataptr[4]; */ + sub v28.8h, v19.8h, v20.8h /* tmp4 = dataptr[3] - dataptr[4]; */ + + /* even part */ + add v8.8h, v24.8h, v27.8h /* tmp10 = tmp0 + tmp3; */ + sub v9.8h, v24.8h, v27.8h /* tmp13 = tmp0 - tmp3; */ + add v10.8h, v25.8h, v26.8h /* tmp11 = tmp1 + tmp2; */ + sub v11.8h, v25.8h, v26.8h /* tmp12 = tmp1 - tmp2; */ + + add v16.8h, v8.8h, v10.8h /* tmp10 + tmp11 */ + sub v20.8h, v8.8h, v10.8h /* tmp10 - tmp11 */ + + add v18.8h, v11.8h, v9.8h /* tmp12 + tmp13 */ + + srshr v16.8h, v16.8h, #PASS1_BITS /* dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); */ + srshr v20.8h, v20.8h, #PASS1_BITS /* dataptr[4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); */ + + smull2 v24.4s, v18.8h, XFIX_P_0_541 /* z1 hi = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ + smull v18.4s, v18.4h, XFIX_P_0_541 /* z1 lo = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ + mov v22.16b, v18.16b + mov v25.16b, v24.16b + + smlal v18.4s, v9.4h, XFIX_P_0_765 /* lo z1 + MULTIPLY(tmp13, XFIX_P_0_765) */ + smlal2 v24.4s, v9.8h, XFIX_P_0_765 /* hi z1 + MULTIPLY(tmp13, XFIX_P_0_765) */ + smlal v22.4s, v11.4h, XFIX_N_1_847 /* lo z1 + MULTIPLY(tmp12, XFIX_N_1_847) */ + smlal2 v25.4s, v11.8h, XFIX_N_1_847 /* hi z1 + MULTIPLY(tmp12, XFIX_N_1_847) */ + + rshrn v18.4h, v18.4s, #DESCALE_P2 + rshrn v22.4h, v22.4s, #DESCALE_P2 + rshrn2 v18.8h, v24.4s, #DESCALE_P2 /* dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */ + rshrn2 v22.8h, v25.4s, #DESCALE_P2 /* dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */ + + /* Odd part */ + add v8.8h, v28.8h, v31.8h /* z1 = tmp4 + tmp7; */ + add v9.8h, v29.8h, v30.8h /* z2 = tmp5 + tmp6; */ + add v10.8h, v28.8h, v30.8h /* z3 = tmp4 + tmp6; */ + add v11.8h, v29.8h, v31.8h /* z4 = tmp5 + tmp7; */ + + smull v4.4s, v10.4h, XFIX_P_1_175 /* z5 lo = z3 lo * XFIX_P_1_175 */ + smull2 v5.4s, v10.8h, XFIX_P_1_175 + smlal v4.4s, v11.4h, XFIX_P_1_175 /* z5 = MULTIPLY(z3 + z4, FIX_1_175875602); */ + smlal2 v5.4s, v11.8h, XFIX_P_1_175 + + smull2 v24.4s, v28.8h, XFIX_P_0_298 + smull2 v25.4s, v29.8h, XFIX_P_2_053 + smull2 v26.4s, v30.8h, XFIX_P_3_072 + smull2 v27.4s, v31.8h, XFIX_P_1_501 + smull v28.4s, v28.4h, XFIX_P_0_298 /* tmp4 = MULTIPLY(tmp4, FIX_0_298631336); */ + smull v29.4s, v29.4h, XFIX_P_2_053 /* tmp5 = MULTIPLY(tmp5, FIX_2_053119869); */ + smull v30.4s, v30.4h, XFIX_P_3_072 /* tmp6 = MULTIPLY(tmp6, FIX_3_072711026); */ + smull v31.4s, v31.4h, XFIX_P_1_501 /* tmp7 = MULTIPLY(tmp7, FIX_1_501321110); */ + + smull2 v12.4s, v8.8h, XFIX_N_0_899 + smull2 v13.4s, v9.8h, XFIX_N_2_562 + smull2 v14.4s, v10.8h, XFIX_N_1_961 + smull2 v15.4s, v11.8h, XFIX_N_0_390 + smull v8.4s, v8.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223); */ + smull v9.4s, v9.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447); */ + smull v10.4s, v10.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560); */ + smull v11.4s, v11.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644); */ + + add v10.4s, v10.4s, v4.4s + add v14.4s, v14.4s, v5.4s + add v11.4s, v11.4s, v4.4s + add v15.4s, v15.4s, v5.4s + + add v28.4s, v28.4s, v8.4s /* tmp4 += z1 */ + add v24.4s, v24.4s, v12.4s + add v29.4s, v29.4s, v9.4s /* tmp5 += z2 */ + add v25.4s, v25.4s, v13.4s + add v30.4s, v30.4s, v10.4s /* tmp6 += z3 */ + add v26.4s, v26.4s, v14.4s + add v31.4s, v31.4s, v11.4s /* tmp7 += z4 */ + add v27.4s, v27.4s, v15.4s + + add v28.4s, v28.4s, v10.4s /* tmp4 += z3 */ + add v24.4s, v24.4s, v14.4s + add v29.4s, v29.4s, v11.4s /* tmp5 += z4 */ + add v25.4s, v25.4s, v15.4s + add v30.4s, v30.4s, v9.4s /* tmp6 += z2 */ + add v26.4s, v26.4s, v13.4s + add v31.4s, v31.4s, v8.4s /* tmp7 += z1 */ + add v27.4s, v27.4s, v12.4s + + rshrn v23.4h, v28.4s, #DESCALE_P2 + rshrn v21.4h, v29.4s, #DESCALE_P2 + rshrn v19.4h, v30.4s, #DESCALE_P2 + rshrn v17.4h, v31.4s, #DESCALE_P2 + rshrn2 v23.8h, v24.4s, #DESCALE_P2 /* dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v21.8h, v25.4s, #DESCALE_P2 /* dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */ + rshrn2 v19.8h, v26.4s, #DESCALE_P2 /* dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v17.8h, v27.4s, #DESCALE_P2 /* dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */ + + /* store results */ + st1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] + + /* Restore NEON registers */ + sub sp, sp, #64 + ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 + ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + + br x30 + + .unreq DATA + .unreq TMP + +#undef XFIX_P_0_298 +#undef XFIX_N_0_390 +#undef XFIX_P_0_541 +#undef XFIX_P_0_765 +#undef XFIX_N_0_899 +#undef XFIX_P_1_175 +#undef XFIX_P_1_501 +#undef XFIX_N_1_847 +#undef XFIX_N_1_961 +#undef XFIX_P_2_053 +#undef XFIX_N_2_562 +#undef XFIX_P_3_072 + + +/*****************************************************************************/ + +/* + * jsimd_fdct_ifast_neon + * + * This function contains a fast, not so accurate integer implementation of + * the forward DCT (Discrete Cosine Transform). It uses the same calculations + * and produces exactly the same output as IJG's original 'jpeg_fdct_ifast' + * function from jfdctfst.c + * + * TODO: can be combined with 'jsimd_convsamp_neon' to get + * rid of a bunch of VLD1.16 instructions + */ + +#undef XFIX_0_541196100 +#define XFIX_0_382683433 v0.h[0] +#define XFIX_0_541196100 v0.h[1] +#define XFIX_0_707106781 v0.h[2] +#define XFIX_1_306562965 v0.h[3] + +.balign 16 +Ljsimd_fdct_ifast_neon_consts: + .short (98 * 128) /* XFIX_0_382683433 */ + .short (139 * 128) /* XFIX_0_541196100 */ + .short (181 * 128) /* XFIX_0_707106781 */ + .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */ + +asm_function jsimd_fdct_ifast_neon + + DATA .req x0 + TMP .req x9 + + /* Load constants */ + adr TMP, Ljsimd_fdct_ifast_neon_consts + ld1 {v0.4h}, [TMP] + + /* Load all DATA into NEON registers with the following allocation: + * 0 1 2 3 | 4 5 6 7 + * ---------+-------- + * 0 | d16 | d17 | v0.8h + * 1 | d18 | d19 | q9 + * 2 | d20 | d21 | q10 + * 3 | d22 | d23 | q11 + * 4 | d24 | d25 | q12 + * 5 | d26 | d27 | q13 + * 6 | d28 | d29 | q14 + * 7 | d30 | d31 | q15 + */ + + ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 + ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] + mov TMP, #2 + sub DATA, DATA, #64 +1: + /* Transpose */ + transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v1, v2, v3, v4 + subs TMP, TMP, #1 + /* 1-D FDCT */ + add v4.8h, v19.8h, v20.8h + sub v20.8h, v19.8h, v20.8h + sub v28.8h, v18.8h, v21.8h + add v18.8h, v18.8h, v21.8h + sub v29.8h, v17.8h, v22.8h + add v17.8h, v17.8h, v22.8h + sub v21.8h, v16.8h, v23.8h + add v16.8h, v16.8h, v23.8h + sub v6.8h, v17.8h, v18.8h + sub v7.8h, v16.8h, v4.8h + add v5.8h, v17.8h, v18.8h + add v6.8h, v6.8h, v7.8h + add v4.8h, v16.8h, v4.8h + sqdmulh v6.8h, v6.8h, XFIX_0_707106781 + add v19.8h, v20.8h, v28.8h + add v16.8h, v4.8h, v5.8h + sub v20.8h, v4.8h, v5.8h + add v5.8h, v28.8h, v29.8h + add v29.8h, v29.8h, v21.8h + sqdmulh v5.8h, v5.8h, XFIX_0_707106781 + sub v28.8h, v19.8h, v29.8h + add v18.8h, v7.8h, v6.8h + sqdmulh v28.8h, v28.8h, XFIX_0_382683433 + sub v22.8h, v7.8h, v6.8h + sqdmulh v19.8h, v19.8h, XFIX_0_541196100 + sqdmulh v7.8h, v29.8h, XFIX_1_306562965 + add v6.8h, v21.8h, v5.8h + sub v5.8h, v21.8h, v5.8h + add v29.8h, v29.8h, v28.8h + add v19.8h, v19.8h, v28.8h + add v29.8h, v29.8h, v7.8h + add v21.8h, v5.8h, v19.8h + sub v19.8h, v5.8h, v19.8h + add v17.8h, v6.8h, v29.8h + sub v23.8h, v6.8h, v29.8h + + b.ne 1b + + /* store results */ + st1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] + + br x30 + + .unreq DATA + .unreq TMP +#undef XFIX_0_382683433 +#undef XFIX_0_541196100 +#undef XFIX_0_707106781 +#undef XFIX_1_306562965 + + +/*****************************************************************************/ + +/* + * GLOBAL(void) + * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM *divisors, + * DCTELEM *workspace); + * + */ +asm_function jsimd_quantize_neon + + COEF_BLOCK .req x0 + DIVISORS .req x1 + WORKSPACE .req x2 + + RECIPROCAL .req DIVISORS + CORRECTION .req x9 + SHIFT .req x10 + LOOP_COUNT .req x11 + + mov LOOP_COUNT, #2 + add CORRECTION, DIVISORS, #(64 * 2) + add SHIFT, DIVISORS, #(64 * 6) +1: + subs LOOP_COUNT, LOOP_COUNT, #1 + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [WORKSPACE], 64 + ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [CORRECTION], 64 + abs v20.8h, v0.8h + abs v21.8h, v1.8h + abs v22.8h, v2.8h + abs v23.8h, v3.8h + ld1 {v28.8h, v29.8h, v30.8h, v31.8h}, [RECIPROCAL], 64 + add v20.8h, v20.8h, v4.8h /* add correction */ + add v21.8h, v21.8h, v5.8h + add v22.8h, v22.8h, v6.8h + add v23.8h, v23.8h, v7.8h + umull v4.4s, v20.4h, v28.4h /* multiply by reciprocal */ + umull2 v16.4s, v20.8h, v28.8h + umull v5.4s, v21.4h, v29.4h + umull2 v17.4s, v21.8h, v29.8h + umull v6.4s, v22.4h, v30.4h /* multiply by reciprocal */ + umull2 v18.4s, v22.8h, v30.8h + umull v7.4s, v23.4h, v31.4h + umull2 v19.4s, v23.8h, v31.8h + ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [SHIFT], 64 + shrn v4.4h, v4.4s, #16 + shrn v5.4h, v5.4s, #16 + shrn v6.4h, v6.4s, #16 + shrn v7.4h, v7.4s, #16 + shrn2 v4.8h, v16.4s, #16 + shrn2 v5.8h, v17.4s, #16 + shrn2 v6.8h, v18.4s, #16 + shrn2 v7.8h, v19.4s, #16 + neg v24.8h, v24.8h + neg v25.8h, v25.8h + neg v26.8h, v26.8h + neg v27.8h, v27.8h + sshr v0.8h, v0.8h, #15 /* extract sign */ + sshr v1.8h, v1.8h, #15 + sshr v2.8h, v2.8h, #15 + sshr v3.8h, v3.8h, #15 + ushl v4.8h, v4.8h, v24.8h /* shift */ + ushl v5.8h, v5.8h, v25.8h + ushl v6.8h, v6.8h, v26.8h + ushl v7.8h, v7.8h, v27.8h + + eor v4.16b, v4.16b, v0.16b /* restore sign */ + eor v5.16b, v5.16b, v1.16b + eor v6.16b, v6.16b, v2.16b + eor v7.16b, v7.16b, v3.16b + sub v4.8h, v4.8h, v0.8h + sub v5.8h, v5.8h, v1.8h + sub v6.8h, v6.8h, v2.8h + sub v7.8h, v7.8h, v3.8h + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [COEF_BLOCK], 64 + + b.ne 1b + + br x30 /* return */ + + .unreq COEF_BLOCK + .unreq DIVISORS + .unreq WORKSPACE + .unreq RECIPROCAL + .unreq CORRECTION + .unreq SHIFT + .unreq LOOP_COUNT + + +/*****************************************************************************/ + +/* + * Downsample pixel values of a single component. + * This version handles the common case of 2:1 horizontal and 1:1 vertical, + * without smoothing. + * + * GLOBAL(void) + * jsimd_h2v1_downsample_neon (JDIMENSION image_width, int max_v_samp_factor, + * JDIMENSION v_samp_factor, + * JDIMENSION width_blocks, JSAMPARRAY input_data, + * JSAMPARRAY output_data); + */ + +.balign 16 +Ljsimd_h2_downsample_neon_consts: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F /* diff 0 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0E /* diff 1 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0D, 0x0D /* diff 2 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C /* diff 3 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B /* diff 4 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A /* diff 5 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 /* diff 6 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 /* diff 7 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 /* diff 8 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, \ + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 /* diff 9 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x05, \ + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 /* diff 10 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x04, \ + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 /* diff 11 */ + .byte 0x00, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, \ + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 /* diff 12 */ + .byte 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, \ + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 /* diff 13 */ + .byte 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, \ + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 /* diff 14 */ + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* diff 15 */ + +asm_function jsimd_h2v1_downsample_neon + IMAGE_WIDTH .req x0 + MAX_V_SAMP .req x1 + V_SAMP .req x2 + BLOCK_WIDTH .req x3 + INPUT_DATA .req x4 + OUTPUT_DATA .req x5 + OUTPTR .req x9 + INPTR .req x10 + TMP1 .req x11 + TMP2 .req x12 + TMP3 .req x13 + TMPDUP .req w15 + + mov TMPDUP, #0x10000 + lsl TMP2, BLOCK_WIDTH, #4 + sub TMP2, TMP2, IMAGE_WIDTH + adr TMP3, Ljsimd_h2_downsample_neon_consts + add TMP3, TMP3, TMP2, lsl #4 + dup v16.4s, TMPDUP + ld1 {v18.16b}, [TMP3] + +1: /* row loop */ + ldr INPTR, [INPUT_DATA], #8 + ldr OUTPTR, [OUTPUT_DATA], #8 + subs TMP1, BLOCK_WIDTH, #1 + b.eq 3f +2: /* columns */ + ld1 {v0.16b}, [INPTR], #16 + mov v4.16b, v16.16b + subs TMP1, TMP1, #1 + uadalp v4.8h, v0.16b + shrn v6.8b, v4.8h, #1 + st1 {v6.8b}, [OUTPTR], #8 + b.ne 2b +3: /* last columns */ + ld1 {v0.16b}, [INPTR] + mov v4.16b, v16.16b + subs V_SAMP, V_SAMP, #1 + /* expand right */ + tbl v2.16b, {v0.16b}, v18.16b + uadalp v4.8h, v2.16b + shrn v6.8b, v4.8h, #1 + st1 {v6.8b}, [OUTPTR], #8 + b.ne 1b + + br x30 + + .unreq IMAGE_WIDTH + .unreq MAX_V_SAMP + .unreq V_SAMP + .unreq BLOCK_WIDTH + .unreq INPUT_DATA + .unreq OUTPUT_DATA + .unreq OUTPTR + .unreq INPTR + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMPDUP + + +/*****************************************************************************/ + +/* + * Downsample pixel values of a single component. + * This version handles the common case of 2:1 horizontal and 2:1 vertical, + * without smoothing. + * + * GLOBAL(void) + * jsimd_h2v2_downsample_neon (JDIMENSION image_width, int max_v_samp_factor, + * JDIMENSION v_samp_factor, JDIMENSION width_blocks, + * JSAMPARRAY input_data, JSAMPARRAY output_data); + */ + +.balign 16 +asm_function jsimd_h2v2_downsample_neon + IMAGE_WIDTH .req x0 + MAX_V_SAMP .req x1 + V_SAMP .req x2 + BLOCK_WIDTH .req x3 + INPUT_DATA .req x4 + OUTPUT_DATA .req x5 + OUTPTR .req x9 + INPTR0 .req x10 + INPTR1 .req x14 + TMP1 .req x11 + TMP2 .req x12 + TMP3 .req x13 + TMPDUP .req w15 + + mov TMPDUP, #1 + lsl TMP2, BLOCK_WIDTH, #4 + lsl TMPDUP, TMPDUP, #17 + sub TMP2, TMP2, IMAGE_WIDTH + adr TMP3, Ljsimd_h2_downsample_neon_consts + orr TMPDUP, TMPDUP, #1 + add TMP3, TMP3, TMP2, lsl #4 + dup v16.4s, TMPDUP + ld1 {v18.16b}, [TMP3] + +1: /* row loop */ + ldr INPTR0, [INPUT_DATA], #8 + ldr OUTPTR, [OUTPUT_DATA], #8 + ldr INPTR1, [INPUT_DATA], #8 + subs TMP1, BLOCK_WIDTH, #1 + b.eq 3f +2: /* columns */ + ld1 {v0.16b}, [INPTR0], #16 + ld1 {v1.16b}, [INPTR1], #16 + mov v4.16b, v16.16b + subs TMP1, TMP1, #1 + uadalp v4.8h, v0.16b + uadalp v4.8h, v1.16b + shrn v6.8b, v4.8h, #2 + st1 {v6.8b}, [OUTPTR], #8 + b.ne 2b +3: /* last columns */ + ld1 {v0.16b}, [INPTR0], #16 + ld1 {v1.16b}, [INPTR1], #16 + mov v4.16b, v16.16b + subs V_SAMP, V_SAMP, #1 + /* expand right */ + tbl v2.16b, {v0.16b}, v18.16b + tbl v3.16b, {v1.16b}, v18.16b + uadalp v4.8h, v2.16b + uadalp v4.8h, v3.16b + shrn v6.8b, v4.8h, #2 + st1 {v6.8b}, [OUTPTR], #8 + b.ne 1b + + br x30 + + .unreq IMAGE_WIDTH + .unreq MAX_V_SAMP + .unreq V_SAMP + .unreq BLOCK_WIDTH + .unreq INPUT_DATA + .unreq OUTPUT_DATA + .unreq OUTPTR + .unreq INPTR0 + .unreq INPTR1 + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq TMPDUP + + +/*****************************************************************************/ + +/* + * GLOBAL(JOCTET*) + * jsimd_huff_encode_one_block (working_state *state, JOCTET *buffer, + * JCOEFPTR block, int last_dc_val, + * c_derived_tbl *dctbl, c_derived_tbl *actbl) + * + */ + + BUFFER .req x1 + PUT_BUFFER .req x6 + PUT_BITS .req x7 + PUT_BITSw .req w7 + +.macro emit_byte + sub PUT_BITS, PUT_BITS, #0x8 + lsr x19, PUT_BUFFER, PUT_BITS + uxtb w19, w19 + strb w19, [BUFFER, #1]! + cmp w19, #0xff + b.ne 14f + strb wzr, [BUFFER, #1]! +14: +.endm +.macro put_bits CODE, SIZE + lsl PUT_BUFFER, PUT_BUFFER, \SIZE + add PUT_BITS, PUT_BITS, \SIZE + orr PUT_BUFFER, PUT_BUFFER, \CODE +.endm +.macro checkbuf31 + cmp PUT_BITS, #0x20 + b.lt 31f + emit_byte + emit_byte + emit_byte + emit_byte +31: +.endm +.macro checkbuf47 + cmp PUT_BITS, #0x30 + b.lt 47f + emit_byte + emit_byte + emit_byte + emit_byte + emit_byte + emit_byte +47: +.endm + +.macro generate_jsimd_huff_encode_one_block fast_tbl + +.balign 16 +.if \fast_tbl == 1 +Ljsimd_huff_encode_one_block_neon_consts: +.else +Ljsimd_huff_encode_one_block_neon_slowtbl_consts: +.endif + .byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, \ + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 +.if \fast_tbl == 1 + .byte 0, 1, 2, 3, 16, 17, 32, 33, \ + 18, 19, 4, 5, 6, 7, 20, 21 /* L0 => L3 : 4 lines OK */ + .byte 34, 35, 48, 49, 255, 255, 50, 51, \ + 36, 37, 22, 23, 8, 9, 10, 11 /* L0 => L3 : 4 lines OK */ + .byte 8, 9, 22, 23, 36, 37, 50, 51, \ + 255, 255, 255, 255, 255, 255, 52, 53 /* L1 => L4 : 4 lines OK */ + .byte 54, 55, 40, 41, 26, 27, 12, 13, \ + 14, 15, 28, 29, 42, 43, 56, 57 /* L0 => L3 : 4 lines OK */ + .byte 6, 7, 20, 21, 34, 35, 48, 49, \ + 50, 51, 36, 37, 22, 23, 8, 9 /* L4 => L7 : 4 lines OK */ + .byte 42, 43, 28, 29, 14, 15, 30, 31, \ + 44, 45, 58, 59, 255, 255, 255, 255 /* L1 => L4 : 4 lines OK */ + .byte 255, 255, 255, 255, 56, 57, 42, 43, \ + 28, 29, 14, 15, 30, 31, 44, 45 /* L3 => L6 : 4 lines OK */ + .byte 26, 27, 40, 41, 42, 43, 28, 29, \ + 14, 15, 30, 31, 44, 45, 46, 47 /* L5 => L7 : 3 lines OK */ + .byte 255, 255, 255, 255, 0, 1, 255, 255, \ + 255, 255, 255, 255, 255, 255, 255, 255 /* L4 : 1 lines OK */ + .byte 255, 255, 255, 255, 255, 255, 255, 255, \ + 0, 1, 16, 17, 2, 3, 255, 255 /* L5 => L6 : 2 lines OK */ + .byte 255, 255, 255, 255, 255, 255, 255, 255, \ + 255, 255, 255, 255, 8, 9, 22, 23 /* L5 => L6 : 2 lines OK */ + .byte 4, 5, 6, 7, 255, 255, 255, 255, \ + 255, 255, 255, 255, 255, 255, 255, 255 /* L7 : 1 line OK */ +.endif + +.if \fast_tbl == 1 +asm_function jsimd_huff_encode_one_block_neon +.else +asm_function jsimd_huff_encode_one_block_neon_slowtbl +.endif + sub sp, sp, 272 + sub BUFFER, BUFFER, #0x1 /* BUFFER=buffer-- */ + /* Save ARM registers */ + stp x19, x20, [sp], 16 +.if \fast_tbl == 1 + adr x15, Ljsimd_huff_encode_one_block_neon_consts +.else + adr x15, Ljsimd_huff_encode_one_block_neon_slowtbl_consts +.endif + ldr PUT_BUFFER, [x0, #0x10] + ldr PUT_BITSw, [x0, #0x18] + ldrsh w12, [x2] /* load DC coeff in w12 */ + /* prepare data */ +.if \fast_tbl == 1 + ld1 {v23.16b}, [x15], #16 + ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x15], #64 + ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x15], #64 + ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x15], #64 + ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [x2], #64 + ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [x2], #64 + sub w12, w12, w3 /* last_dc_val, not used afterwards */ + /* ZigZag 8x8 */ + tbl v0.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v0.16b + tbl v1.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v1.16b + tbl v2.16b, {v25.16b, v26.16b, v27.16b, v28.16b}, v2.16b + tbl v3.16b, {v24.16b, v25.16b, v26.16b, v27.16b}, v3.16b + tbl v4.16b, {v28.16b, v29.16b, v30.16b, v31.16b}, v4.16b + tbl v5.16b, {v25.16b, v26.16b, v27.16b, v28.16b}, v5.16b + tbl v6.16b, {v27.16b, v28.16b, v29.16b, v30.16b}, v6.16b + tbl v7.16b, {v29.16b, v30.16b, v31.16b}, v7.16b + ins v0.h[0], w12 + tbx v1.16b, {v28.16b}, v16.16b + tbx v2.16b, {v29.16b, v30.16b}, v17.16b + tbx v5.16b, {v29.16b, v30.16b}, v18.16b + tbx v6.16b, {v31.16b}, v19.16b +.else + add x13, x2, #0x22 + sub w12, w12, w3 /* last_dc_val, not used afterwards */ + ld1 {v23.16b}, [x15] + add x14, x2, #0x18 + add x3, x2, #0x36 + ins v0.h[0], w12 + add x9, x2, #0x2 + ld1 {v1.h}[0], [x13] + add x15, x2, #0x30 + ld1 {v2.h}[0], [x14] + add x19, x2, #0x26 + ld1 {v3.h}[0], [x3] + add x20, x2, #0x28 + ld1 {v0.h}[1], [x9] + add x12, x2, #0x10 + ld1 {v1.h}[1], [x15] + add x13, x2, #0x40 + ld1 {v2.h}[1], [x19] + add x14, x2, #0x34 + ld1 {v3.h}[1], [x20] + add x3, x2, #0x1a + ld1 {v0.h}[2], [x12] + add x9, x2, #0x20 + ld1 {v1.h}[2], [x13] + add x15, x2, #0x32 + ld1 {v2.h}[2], [x14] + add x19, x2, #0x42 + ld1 {v3.h}[2], [x3] + add x20, x2, #0xc + ld1 {v0.h}[3], [x9] + add x12, x2, #0x12 + ld1 {v1.h}[3], [x15] + add x13, x2, #0x24 + ld1 {v2.h}[3], [x19] + add x14, x2, #0x50 + ld1 {v3.h}[3], [x20] + add x3, x2, #0xe + ld1 {v0.h}[4], [x12] + add x9, x2, #0x4 + ld1 {v1.h}[4], [x13] + add x15, x2, #0x16 + ld1 {v2.h}[4], [x14] + add x19, x2, #0x60 + ld1 {v3.h}[4], [x3] + add x20, x2, #0x1c + ld1 {v0.h}[5], [x9] + add x12, x2, #0x6 + ld1 {v1.h}[5], [x15] + add x13, x2, #0x8 + ld1 {v2.h}[5], [x19] + add x14, x2, #0x52 + ld1 {v3.h}[5], [x20] + add x3, x2, #0x2a + ld1 {v0.h}[6], [x12] + add x9, x2, #0x14 + ld1 {v1.h}[6], [x13] + add x15, x2, #0xa + ld1 {v2.h}[6], [x14] + add x19, x2, #0x44 + ld1 {v3.h}[6], [x3] + add x20, x2, #0x38 + ld1 {v0.h}[7], [x9] + add x12, x2, #0x46 + ld1 {v1.h}[7], [x15] + add x13, x2, #0x3a + ld1 {v2.h}[7], [x19] + add x14, x2, #0x74 + ld1 {v3.h}[7], [x20] + add x3, x2, #0x6a + ld1 {v4.h}[0], [x12] + add x9, x2, #0x54 + ld1 {v5.h}[0], [x13] + add x15, x2, #0x2c + ld1 {v6.h}[0], [x14] + add x19, x2, #0x76 + ld1 {v7.h}[0], [x3] + add x20, x2, #0x78 + ld1 {v4.h}[1], [x9] + add x12, x2, #0x62 + ld1 {v5.h}[1], [x15] + add x13, x2, #0x1e + ld1 {v6.h}[1], [x19] + add x14, x2, #0x68 + ld1 {v7.h}[1], [x20] + add x3, x2, #0x7a + ld1 {v4.h}[2], [x12] + add x9, x2, #0x70 + ld1 {v5.h}[2], [x13] + add x15, x2, #0x2e + ld1 {v6.h}[2], [x14] + add x19, x2, #0x5a + ld1 {v7.h}[2], [x3] + add x20, x2, #0x6c + ld1 {v4.h}[3], [x9] + add x12, x2, #0x72 + ld1 {v5.h}[3], [x15] + add x13, x2, #0x3c + ld1 {v6.h}[3], [x19] + add x14, x2, #0x4c + ld1 {v7.h}[3], [x20] + add x3, x2, #0x5e + ld1 {v4.h}[4], [x12] + add x9, x2, #0x64 + ld1 {v5.h}[4], [x13] + add x15, x2, #0x4a + ld1 {v6.h}[4], [x14] + add x19, x2, #0x3e + ld1 {v7.h}[4], [x3] + add x20, x2, #0x6e + ld1 {v4.h}[5], [x9] + add x12, x2, #0x56 + ld1 {v5.h}[5], [x15] + add x13, x2, #0x58 + ld1 {v6.h}[5], [x19] + add x14, x2, #0x4e + ld1 {v7.h}[5], [x20] + add x3, x2, #0x7c + ld1 {v4.h}[6], [x12] + add x9, x2, #0x48 + ld1 {v5.h}[6], [x13] + add x15, x2, #0x66 + ld1 {v6.h}[6], [x14] + add x19, x2, #0x5c + ld1 {v7.h}[6], [x3] + add x20, x2, #0x7e + ld1 {v4.h}[7], [x9] + ld1 {v5.h}[7], [x15] + ld1 {v6.h}[7], [x19] + ld1 {v7.h}[7], [x20] +.endif + cmlt v24.8h, v0.8h, #0 + cmlt v25.8h, v1.8h, #0 + cmlt v26.8h, v2.8h, #0 + cmlt v27.8h, v3.8h, #0 + cmlt v28.8h, v4.8h, #0 + cmlt v29.8h, v5.8h, #0 + cmlt v30.8h, v6.8h, #0 + cmlt v31.8h, v7.8h, #0 + abs v0.8h, v0.8h + abs v1.8h, v1.8h + abs v2.8h, v2.8h + abs v3.8h, v3.8h + abs v4.8h, v4.8h + abs v5.8h, v5.8h + abs v6.8h, v6.8h + abs v7.8h, v7.8h + eor v24.16b, v24.16b, v0.16b + eor v25.16b, v25.16b, v1.16b + eor v26.16b, v26.16b, v2.16b + eor v27.16b, v27.16b, v3.16b + eor v28.16b, v28.16b, v4.16b + eor v29.16b, v29.16b, v5.16b + eor v30.16b, v30.16b, v6.16b + eor v31.16b, v31.16b, v7.16b + cmeq v16.8h, v0.8h, #0 + cmeq v17.8h, v1.8h, #0 + cmeq v18.8h, v2.8h, #0 + cmeq v19.8h, v3.8h, #0 + cmeq v20.8h, v4.8h, #0 + cmeq v21.8h, v5.8h, #0 + cmeq v22.8h, v6.8h, #0 + xtn v16.8b, v16.8h + xtn v18.8b, v18.8h + xtn v20.8b, v20.8h + xtn v22.8b, v22.8h + umov w14, v0.h[0] + xtn2 v16.16b, v17.8h + umov w13, v24.h[0] + xtn2 v18.16b, v19.8h + clz w14, w14 + xtn2 v20.16b, v21.8h + lsl w13, w13, w14 + cmeq v17.8h, v7.8h, #0 + sub w12, w14, #32 + xtn2 v22.16b, v17.8h + lsr w13, w13, w14 + and v16.16b, v16.16b, v23.16b + neg w12, w12 + and v18.16b, v18.16b, v23.16b + add x3, x4, #0x400 /* r1 = dctbl->ehufsi */ + and v20.16b, v20.16b, v23.16b + add x15, sp, #0x80 /* x15 = t2 */ + and v22.16b, v22.16b, v23.16b + ldr w10, [x4, x12, lsl #2] + addp v16.16b, v16.16b, v18.16b + ldrb w11, [x3, x12] + addp v20.16b, v20.16b, v22.16b + checkbuf47 + addp v16.16b, v16.16b, v20.16b + put_bits x10, x11 + addp v16.16b, v16.16b, v18.16b + checkbuf47 + umov x9,v16.D[0] + put_bits x13, x12 + cnt v17.8b, v16.8b + mvn x9, x9 + addv B18, v17.8b + add x4, x5, #0x400 /* x4 = actbl->ehufsi */ + umov w12, v18.b[0] + lsr x9, x9, #0x1 /* clear AC coeff */ + ldr w13, [x5, #0x3c0] /* x13 = actbl->ehufco[0xf0] */ + rbit x9, x9 /* x9 = index0 */ + ldrb w14, [x4, #0xf0] /* x14 = actbl->ehufsi[0xf0] */ + cmp w12, #(64-8) + mov x11, sp + b.lt 4f + cbz x9, 6f + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x11], #64 + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x11], #64 + st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x11], #64 + st1 {v28.8h, v29.8h, v30.8h, v31.8h}, [x11], #64 +1: + clz x2, x9 + add x15, x15, x2, lsl #1 + lsl x9, x9, x2 + ldrh w20, [x15, #-126] +2: + cmp x2, #0x10 + b.lt 3f + sub x2, x2, #0x10 + checkbuf47 + put_bits x13, x14 + b 2b +3: + clz w20, w20 + ldrh w3, [x15, #2]! + sub w11, w20, #32 + lsl w3, w3, w20 + neg w11, w11 + lsr w3, w3, w20 + add x2, x11, x2, lsl #4 + lsl x9, x9, #0x1 + ldr w12, [x5, x2, lsl #2] + ldrb w10, [x4, x2] + checkbuf31 + put_bits x12, x10 + put_bits x3, x11 + cbnz x9, 1b + b 6f +4: + movi v21.8h, #0x0010 + clz v0.8h, v0.8h + clz v1.8h, v1.8h + clz v2.8h, v2.8h + clz v3.8h, v3.8h + clz v4.8h, v4.8h + clz v5.8h, v5.8h + clz v6.8h, v6.8h + clz v7.8h, v7.8h + ushl v24.8h, v24.8h, v0.8h + ushl v25.8h, v25.8h, v1.8h + ushl v26.8h, v26.8h, v2.8h + ushl v27.8h, v27.8h, v3.8h + ushl v28.8h, v28.8h, v4.8h + ushl v29.8h, v29.8h, v5.8h + ushl v30.8h, v30.8h, v6.8h + ushl v31.8h, v31.8h, v7.8h + neg v0.8h, v0.8h + neg v1.8h, v1.8h + neg v2.8h, v2.8h + neg v3.8h, v3.8h + neg v4.8h, v4.8h + neg v5.8h, v5.8h + neg v6.8h, v6.8h + neg v7.8h, v7.8h + ushl v24.8h, v24.8h, v0.8h + ushl v25.8h, v25.8h, v1.8h + ushl v26.8h, v26.8h, v2.8h + ushl v27.8h, v27.8h, v3.8h + ushl v28.8h, v28.8h, v4.8h + ushl v29.8h, v29.8h, v5.8h + ushl v30.8h, v30.8h, v6.8h + ushl v31.8h, v31.8h, v7.8h + add v0.8h, v21.8h, v0.8h + add v1.8h, v21.8h, v1.8h + add v2.8h, v21.8h, v2.8h + add v3.8h, v21.8h, v3.8h + add v4.8h, v21.8h, v4.8h + add v5.8h, v21.8h, v5.8h + add v6.8h, v21.8h, v6.8h + add v7.8h, v21.8h, v7.8h + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x11], #64 + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x11], #64 + st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x11], #64 + st1 {v28.8h, v29.8h, v30.8h, v31.8h}, [x11], #64 +1: + clz x2, x9 + add x15, x15, x2, lsl #1 + lsl x9, x9, x2 + ldrh w11, [x15, #-126] +2: + cmp x2, #0x10 + b.lt 3f + sub x2, x2, #0x10 + checkbuf47 + put_bits x13, x14 + b 2b +3: + ldrh w3, [x15, #2]! + add x2, x11, x2, lsl #4 + lsl x9, x9, #0x1 + ldr w12, [x5, x2, lsl #2] + ldrb w10, [x4, x2] + checkbuf31 + put_bits x12, x10 + put_bits x3, x11 + cbnz x9, 1b +6: + add x13, sp, #0xfe + cmp x15, x13 + b.hs 1f + ldr w12, [x5] + ldrb w14, [x4] + checkbuf47 + put_bits x12, x14 +1: + sub sp, sp, 16 + str PUT_BUFFER, [x0, #0x10] + str PUT_BITSw, [x0, #0x18] + ldp x19, x20, [sp], 16 + add x0, BUFFER, #0x1 + add sp, sp, 256 + br x30 + +.endm + +generate_jsimd_huff_encode_one_block 1 +generate_jsimd_huff_encode_one_block 0 + + .unreq BUFFER + .unreq PUT_BUFFER + .unreq PUT_BITS + .unreq PUT_BITSw + +.purgem emit_byte +.purgem put_bits +.purgem checkbuf31 +.purgem checkbuf47 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_arm.c glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_arm.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_arm.c 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_arm.c 2021-04-25 01:47:22.000000000 +0800 @@ -2,17 +2,16 @@ * jsimd_arm.c * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright 2009-2011 D. R. Commander - * + * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander + * Copyright 2015-2016 Matthieu Darbois + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc * * This file contains the interface between the "normal" portions - * of the library and the SIMD implementations when running on - * ARM architecture. - * - * Based on the stubs from 'jsimd_none.c' + * of the library and the SIMD implementations when running on a + * 32-bit ARM architecture. */ #define JPEG_INTERNALS @@ -28,8 +27,9 @@ #include static unsigned int simd_support = ~0; +static unsigned int simd_huffman = 1; -#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) +#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) @@ -100,7 +100,7 @@ init_simd (void) { char *env = NULL; -#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) +#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) int bufsize = 1024; /* an initial guess for the line buffer size limit */ #endif @@ -123,12 +123,15 @@ #endif /* Force different settings through environment variables */ - env = getenv("JSIMD_FORCE_ARM_NEON"); + env = getenv("JSIMD_FORCENEON"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support &= JSIMD_ARM_NEON; - env = getenv("JSIMD_FORCE_NO_SIMD"); + env = getenv("JSIMD_FORCENONE"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; } GLOBAL(int) @@ -170,6 +173,24 @@ return 0; if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (simd_support & JSIMD_ARM_NEON) return 1; @@ -183,8 +204,7 @@ { void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - switch(cinfo->in_color_space) - { + switch(cinfo->in_color_space) { case JCS_EXT_RGB: neonfct=jsimd_extrgb_ycc_convert_neon; break; @@ -212,9 +232,7 @@ break; } - if (simd_support & JSIMD_ARM_NEON) - neonfct(cinfo->image_width, input_buf, - output_buf, output_row, num_rows); + neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); } GLOBAL(void) @@ -231,8 +249,7 @@ { void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - switch(cinfo->out_color_space) - { + switch(cinfo->out_color_space) { case JCS_EXT_RGB: neonfct=jsimd_ycc_extrgb_convert_neon; break; @@ -255,14 +272,21 @@ case JCS_EXT_ARGB: neonfct=jsimd_ycc_extxrgb_convert_neon; break; - default: + default: neonfct=jsimd_ycc_extrgb_convert_neon; break; } - if (simd_support & JSIMD_ARM_NEON) - neonfct(cinfo->output_width, input_buf, - input_row, output_buf, num_rows); + neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, + output_buf, num_rows); } GLOBAL(int) @@ -282,13 +306,13 @@ } GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { } GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { } @@ -311,17 +335,17 @@ GLOBAL(void) jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info * compptr, + jpeg_component_info *compptr, JSAMPARRAY input_data, - JSAMPARRAY * output_data_ptr) + JSAMPARRAY *output_data_ptr) { } GLOBAL(void) jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info * compptr, + jpeg_component_info *compptr, JSAMPARRAY input_data, - JSAMPARRAY * output_data_ptr) + JSAMPARRAY *output_data_ptr) { } @@ -338,23 +362,35 @@ { init_simd(); + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + return 0; } GLOBAL(void) jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info * compptr, + jpeg_component_info *compptr, JSAMPARRAY input_data, - JSAMPARRAY * output_data_ptr) + JSAMPARRAY *output_data_ptr) { } GLOBAL(void) jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info * compptr, + jpeg_component_info *compptr, JSAMPARRAY input_data, - JSAMPARRAY * output_data_ptr) + JSAMPARRAY *output_data_ptr) { + jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); } GLOBAL(int) @@ -420,15 +456,14 @@ GLOBAL(void) jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM * workspace) + DCTELEM *workspace) { - if (simd_support & JSIMD_ARM_NEON) - jsimd_convsamp_neon(sample_data, start_col, workspace); + jsimd_convsamp_neon(sample_data, start_col, workspace); } GLOBAL(void) jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT * workspace) + FAST_FLOAT *workspace) { } @@ -466,19 +501,18 @@ } GLOBAL(void) -jsimd_fdct_islow (DCTELEM * data) +jsimd_fdct_islow (DCTELEM *data) { } GLOBAL(void) -jsimd_fdct_ifast (DCTELEM * data) +jsimd_fdct_ifast (DCTELEM *data) { - if (simd_support & JSIMD_ARM_NEON) - jsimd_fdct_ifast_neon(data); + jsimd_fdct_ifast_neon(data); } GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT * data) +jsimd_fdct_float (FAST_FLOAT *data) { } @@ -510,16 +544,15 @@ } GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, - DCTELEM * workspace) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) { - if (simd_support & JSIMD_ARM_NEON) - jsimd_quantize_neon(coef_block, divisors, workspace); + jsimd_quantize_neon(coef_block, divisors, workspace); } GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, - FAST_FLOAT * workspace) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) { } @@ -540,7 +573,7 @@ if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if ((simd_support & JSIMD_ARM_NEON)) + if (simd_support & JSIMD_ARM_NEON) return 1; return 0; @@ -563,28 +596,28 @@ if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if ((simd_support & JSIMD_ARM_NEON)) + if (simd_support & JSIMD_ARM_NEON) return 1; return 0; } GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { - if ((simd_support & JSIMD_ARM_NEON)) - jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col); + jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, + output_col); } GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { - if ((simd_support & JSIMD_ARM_NEON)) - jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col); + jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, + output_col); } GLOBAL(int) @@ -629,7 +662,7 @@ if (IFAST_SCALE_BITS != 2) return 0; - if ((simd_support & JSIMD_ARM_NEON)) + if (simd_support & JSIMD_ARM_NEON) return 1; return 0; @@ -644,27 +677,51 @@ } GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { - if ((simd_support & JSIMD_ARM_NEON)) - jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col); + jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, + output_col); } GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { - if ((simd_support & JSIMD_ARM_NEON)) - jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col); + jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, + output_col); } GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if (simd_support & JSIMD_ARM_NEON && simd_huffman) + return 1; + + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, + dctbl, actbl); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_arm_neon.S glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_arm_neon.S --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_arm_neon.S 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_arm_neon.S 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,13 @@ /* - * ARM NEON optimizations for libjpeg-turbo + * ARMv7 NEON optimizations for libjpeg-turbo * * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies). * All rights reserved. * Author: Siarhei Siamashka + * Copyright (C) 2014 Siarhei Siamashka. All Rights Reserved. + * Copyright (C) 2014 Linaro Limited. All Rights Reserved. + * Copyright (C) 2015 D. R. Commander. All Rights Reserved. + * Copyright (C) 2015-2016 Matthieu Darbois. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -23,7 +27,7 @@ */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ +.section .note.GNU-stack, "", %progbits /* mark stack as non-executable */ #endif .text @@ -31,20 +35,20 @@ .arch armv7a .object_arch armv4 .arm +.syntax unified #define RESPECT_STRICT_ALIGNMENT 1 + /*****************************************************************************/ /* Supplementary macro for setting function attributes */ .macro asm_function fname #ifdef __APPLE__ - .func _\fname .globl _\fname _\fname: #else - .func \fname .global \fname #ifdef __ELF__ .hidden \fname @@ -56,12 +60,13 @@ /* Transpose a block of 4x4 coefficients in four 64-bit registers */ .macro transpose_4x4 x0, x1, x2, x3 - vtrn.16 \x0, \x1 - vtrn.16 \x2, \x3 - vtrn.32 \x0, \x2 - vtrn.32 \x1, \x3 + vtrn.16 \x0, \x1 + vtrn.16 \x2, \x3 + vtrn.32 \x0, \x2 + vtrn.32 \x1, \x3 .endm + #define CENTERJSAMPLE 128 /*****************************************************************************/ @@ -70,22 +75,22 @@ * Perform dequantization and inverse DCT on one block of coefficients. * * GLOBAL(void) - * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block, + * jsimd_idct_islow_neon (void *dct_table, JCOEFPTR coef_block, * JSAMPARRAY output_buf, JDIMENSION output_col) */ -#define FIX_0_298631336 (2446) -#define FIX_0_390180644 (3196) -#define FIX_0_541196100 (4433) -#define FIX_0_765366865 (6270) -#define FIX_0_899976223 (7373) -#define FIX_1_175875602 (9633) -#define FIX_1_501321110 (12299) -#define FIX_1_847759065 (15137) -#define FIX_1_961570560 (16069) -#define FIX_2_053119869 (16819) -#define FIX_2_562915447 (20995) -#define FIX_3_072711026 (25172) +#define FIX_0_298631336 (2446) +#define FIX_0_390180644 (3196) +#define FIX_0_541196100 (4433) +#define FIX_0_765366865 (6270) +#define FIX_0_899976223 (7373) +#define FIX_1_175875602 (9633) +#define FIX_1_501321110 (12299) +#define FIX_1_847759065 (15137) +#define FIX_1_961570560 (16069) +#define FIX_2_053119869 (16819) +#define FIX_2_562915447 (20995) +#define FIX_3_072711026 (25172) #define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560) #define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644) @@ -103,8 +108,8 @@ #define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) \ { \ DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \ - INT32 q1, q2, q3, q4, q5, q6, q7; \ - INT32 tmp11_plus_tmp2, tmp11_minus_tmp2; \ + JLONG q1, q2, q3, q4, q5, q6, q7; \ + JLONG tmp11_plus_tmp2, tmp11_minus_tmp2; \ \ /* 1-D iDCT input data */ \ row0 = xrow0; \ @@ -125,7 +130,7 @@ q2 = MULTIPLY(row2, FIX_0_541196100) + \ MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \ q4 = q6; \ - q3 = ((INT32) row0 - (INT32) row4) << 13; \ + q3 = ((JLONG) row0 - (JLONG) row4) << 13; \ q6 += MULTIPLY(row5, -FIX_2_562915447) + \ MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \ /* now we can use q1 (reloadable constants have been used up) */ \ @@ -152,7 +157,7 @@ /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \ tmp11_minus_tmp2 = q1; \ \ - q1 = ((INT32) row0 + (INT32) row4) << 13; \ + q1 = ((JLONG) row0 + (JLONG) row4) << 13; \ q2 = q1 + q6; \ q1 = q1 - q6; \ \ @@ -167,34 +172,34 @@ tmp13 = q1; \ } -#define XFIX_0_899976223 d0[0] -#define XFIX_0_541196100 d0[1] -#define XFIX_2_562915447 d0[2] -#define XFIX_0_298631336_MINUS_0_899976223 d0[3] -#define XFIX_1_501321110_MINUS_0_899976223 d1[0] -#define XFIX_2_053119869_MINUS_2_562915447 d1[1] -#define XFIX_0_541196100_PLUS_0_765366865 d1[2] -#define XFIX_1_175875602 d1[3] -#define XFIX_1_175875602_MINUS_0_390180644 d2[0] -#define XFIX_0_541196100_MINUS_1_847759065 d2[1] -#define XFIX_3_072711026_MINUS_2_562915447 d2[2] -#define XFIX_1_175875602_MINUS_1_961570560 d2[3] +#define XFIX_0_899976223 d0[0] +#define XFIX_0_541196100 d0[1] +#define XFIX_2_562915447 d0[2] +#define XFIX_0_298631336_MINUS_0_899976223 d0[3] +#define XFIX_1_501321110_MINUS_0_899976223 d1[0] +#define XFIX_2_053119869_MINUS_2_562915447 d1[1] +#define XFIX_0_541196100_PLUS_0_765366865 d1[2] +#define XFIX_1_175875602 d1[3] +#define XFIX_1_175875602_MINUS_0_390180644 d2[0] +#define XFIX_0_541196100_MINUS_1_847759065 d2[1] +#define XFIX_3_072711026_MINUS_2_562915447 d2[2] +#define XFIX_1_175875602_MINUS_1_961570560 d2[3] .balign 16 jsimd_idct_islow_neon_consts: - .short FIX_0_899976223 /* d0[0] */ - .short FIX_0_541196100 /* d0[1] */ - .short FIX_2_562915447 /* d0[2] */ - .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */ - .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */ - .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */ - .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */ - .short FIX_1_175875602 /* d1[3] */ - /* reloadable constants */ - .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */ - .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */ - .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */ - .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */ + .short FIX_0_899976223 /* d0[0] */ + .short FIX_0_541196100 /* d0[1] */ + .short FIX_2_562915447 /* d0[2] */ + .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */ + .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */ + .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */ + .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */ + .short FIX_1_175875602 /* d1[3] */ + /* reloadable constants */ + .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */ + .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */ + .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */ + .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */ asm_function jsimd_idct_islow_neon @@ -253,140 +258,141 @@ vld1.16 {d4, d5, d6, d7}, [DCT_TABLE, :128]! vmul.s16 q14, q14, q2 vmul.s16 q13, q13, q1 - vld1.16 {d0, d1, d2, d3}, [ip, :128] /* load constants */ + vld1.16 {d0, d1, d2, d3}, [ip, :128] /* load constants */ add ip, ip, #16 vmul.s16 q15, q15, q3 - vpush {d8-d15} /* save NEON registers */ + vpush {d8-d15} /* save NEON registers */ /* 1-D IDCT, pass 1, left 4x8 half */ - vadd.s16 d4, ROW7L, ROW3L - vadd.s16 d5, ROW5L, ROW1L - vmull.s16 q6, d4, XFIX_1_175875602_MINUS_1_961570560 - vmlal.s16 q6, d5, XFIX_1_175875602 - vmull.s16 q7, d4, XFIX_1_175875602 + vadd.s16 d4, ROW7L, ROW3L + vadd.s16 d5, ROW5L, ROW1L + vmull.s16 q6, d4, XFIX_1_175875602_MINUS_1_961570560 + vmlal.s16 q6, d5, XFIX_1_175875602 + vmull.s16 q7, d4, XFIX_1_175875602 /* Check for the zero coefficients in the right 4x8 half */ push {r4, r5} - vmlal.s16 q7, d5, XFIX_1_175875602_MINUS_0_390180644 - vsubl.s16 q3, ROW0L, ROW4L - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))] - vmull.s16 q2, ROW2L, XFIX_0_541196100 - vmlal.s16 q2, ROW6L, XFIX_0_541196100_MINUS_1_847759065 - orr r0, r4, r5 - vmov q4, q6 - vmlsl.s16 q6, ROW5L, XFIX_2_562915447 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))] - vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 - vshl.s32 q3, q3, #13 - orr r0, r0, r4 - vmlsl.s16 q4, ROW1L, XFIX_0_899976223 - orr r0, r0, r5 - vadd.s32 q1, q3, q2 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))] - vmov q5, q7 - vadd.s32 q1, q1, q6 - orr r0, r0, r4 - vmlsl.s16 q7, ROW7L, XFIX_0_899976223 - orr r0, r0, r5 - vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 - vrshrn.s32 ROW1L, q1, #11 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))] - vsub.s32 q1, q1, q6 - vmlal.s16 q5, ROW5L, XFIX_2_053119869_MINUS_2_562915447 - orr r0, r0, r4 - vmlsl.s16 q5, ROW3L, XFIX_2_562915447 - orr r0, r0, r5 - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))] - vmlal.s16 q6, ROW6L, XFIX_0_541196100 - vsub.s32 q3, q3, q2 - orr r0, r0, r4 - vrshrn.s32 ROW6L, q1, #11 - orr r0, r0, r5 - vadd.s32 q1, q3, q5 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))] - vsub.s32 q3, q3, q5 - vaddl.s16 q5, ROW0L, ROW4L - orr r0, r0, r4 - vrshrn.s32 ROW2L, q1, #11 - orr r0, r0, r5 - vrshrn.s32 ROW5L, q3, #11 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))] - vshl.s32 q5, q5, #13 - vmlal.s16 q4, ROW7L, XFIX_0_298631336_MINUS_0_899976223 - orr r0, r0, r4 - vadd.s32 q2, q5, q6 - orrs r0, r0, r5 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))] - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - orr r0, r4, r5 - vsub.s32 q3, q1, q4 + vmlal.s16 q7, d5, XFIX_1_175875602_MINUS_0_390180644 + vsubl.s16 q3, ROW0L, ROW4L + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))] + vmull.s16 q2, ROW2L, XFIX_0_541196100 + vmlal.s16 q2, ROW6L, XFIX_0_541196100_MINUS_1_847759065 + orr r0, r4, r5 + vmov q4, q6 + vmlsl.s16 q6, ROW5L, XFIX_2_562915447 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))] + vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 + vshl.s32 q3, q3, #13 + orr r0, r0, r4 + vmlsl.s16 q4, ROW1L, XFIX_0_899976223 + orr r0, r0, r5 + vadd.s32 q1, q3, q2 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))] + vmov q5, q7 + vadd.s32 q1, q1, q6 + orr r0, r0, r4 + vmlsl.s16 q7, ROW7L, XFIX_0_899976223 + orr r0, r0, r5 + vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 + vrshrn.s32 ROW1L, q1, #11 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))] + vsub.s32 q1, q1, q6 + vmlal.s16 q5, ROW5L, XFIX_2_053119869_MINUS_2_562915447 + orr r0, r0, r4 + vmlsl.s16 q5, ROW3L, XFIX_2_562915447 + orr r0, r0, r5 + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))] + vmlal.s16 q6, ROW6L, XFIX_0_541196100 + vsub.s32 q3, q3, q2 + orr r0, r0, r4 + vrshrn.s32 ROW6L, q1, #11 + orr r0, r0, r5 + vadd.s32 q1, q3, q5 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))] + vsub.s32 q3, q3, q5 + vaddl.s16 q5, ROW0L, ROW4L + orr r0, r0, r4 + vrshrn.s32 ROW2L, q1, #11 + orr r0, r0, r5 + vrshrn.s32 ROW5L, q3, #11 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))] + vshl.s32 q5, q5, #13 + vmlal.s16 q4, ROW7L, XFIX_0_298631336_MINUS_0_899976223 + orr r0, r0, r4 + vadd.s32 q2, q5, q6 + orrs r0, r0, r5 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + ldrd r4, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))] + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + orr r0, r4, r5 + vsub.s32 q3, q1, q4 pop {r4, r5} - vrshrn.s32 ROW7L, q2, #11 - vrshrn.s32 ROW3L, q5, #11 - vrshrn.s32 ROW0L, q6, #11 - vrshrn.s32 ROW4L, q3, #11 + vrshrn.s32 ROW7L, q2, #11 + vrshrn.s32 ROW3L, q5, #11 + vrshrn.s32 ROW0L, q6, #11 + vrshrn.s32 ROW4L, q3, #11 - beq 3f /* Go to do some special handling for the sparse right 4x8 half */ + beq 3f /* Go to do some special handling for the sparse + right 4x8 half */ /* 1-D IDCT, pass 1, right 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vadd.s16 d10, ROW7R, ROW3R - vadd.s16 d8, ROW5R, ROW1R + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vadd.s16 d10, ROW7R, ROW3R + vadd.s16 d8, ROW5R, ROW1R /* Transpose left 4x8 half */ vtrn.16 ROW6L, ROW7L - vmull.s16 q6, d10, XFIX_1_175875602_MINUS_1_961570560 - vmlal.s16 q6, d8, XFIX_1_175875602 + vmull.s16 q6, d10, XFIX_1_175875602_MINUS_1_961570560 + vmlal.s16 q6, d8, XFIX_1_175875602 vtrn.16 ROW2L, ROW3L - vmull.s16 q7, d10, XFIX_1_175875602 - vmlal.s16 q7, d8, XFIX_1_175875602_MINUS_0_390180644 + vmull.s16 q7, d10, XFIX_1_175875602 + vmlal.s16 q7, d8, XFIX_1_175875602_MINUS_0_390180644 vtrn.16 ROW0L, ROW1L - vsubl.s16 q3, ROW0R, ROW4R - vmull.s16 q2, ROW2R, XFIX_0_541196100 - vmlal.s16 q2, ROW6R, XFIX_0_541196100_MINUS_1_847759065 + vsubl.s16 q3, ROW0R, ROW4R + vmull.s16 q2, ROW2R, XFIX_0_541196100 + vmlal.s16 q2, ROW6R, XFIX_0_541196100_MINUS_1_847759065 vtrn.16 ROW4L, ROW5L - vmov q4, q6 - vmlsl.s16 q6, ROW5R, XFIX_2_562915447 - vmlal.s16 q6, ROW3R, XFIX_3_072711026_MINUS_2_562915447 + vmov q4, q6 + vmlsl.s16 q6, ROW5R, XFIX_2_562915447 + vmlal.s16 q6, ROW3R, XFIX_3_072711026_MINUS_2_562915447 vtrn.32 ROW1L, ROW3L - vshl.s32 q3, q3, #13 - vmlsl.s16 q4, ROW1R, XFIX_0_899976223 + vshl.s32 q3, q3, #13 + vmlsl.s16 q4, ROW1R, XFIX_0_899976223 vtrn.32 ROW4L, ROW6L - vadd.s32 q1, q3, q2 - vmov q5, q7 - vadd.s32 q1, q1, q6 + vadd.s32 q1, q3, q2 + vmov q5, q7 + vadd.s32 q1, q1, q6 vtrn.32 ROW0L, ROW2L - vmlsl.s16 q7, ROW7R, XFIX_0_899976223 - vmlal.s16 q7, ROW1R, XFIX_1_501321110_MINUS_0_899976223 - vrshrn.s32 ROW1R, q1, #11 + vmlsl.s16 q7, ROW7R, XFIX_0_899976223 + vmlal.s16 q7, ROW1R, XFIX_1_501321110_MINUS_0_899976223 + vrshrn.s32 ROW1R, q1, #11 vtrn.32 ROW5L, ROW7L - vsub.s32 q1, q1, q6 - vmlal.s16 q5, ROW5R, XFIX_2_053119869_MINUS_2_562915447 - vmlsl.s16 q5, ROW3R, XFIX_2_562915447 - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW2R, XFIX_0_541196100_PLUS_0_765366865 - vmlal.s16 q6, ROW6R, XFIX_0_541196100 - vsub.s32 q3, q3, q2 - vrshrn.s32 ROW6R, q1, #11 - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vaddl.s16 q5, ROW0R, ROW4R - vrshrn.s32 ROW2R, q1, #11 - vrshrn.s32 ROW5R, q3, #11 - vshl.s32 q5, q5, #13 - vmlal.s16 q4, ROW7R, XFIX_0_298631336_MINUS_0_899976223 - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vrshrn.s32 ROW7R, q2, #11 - vrshrn.s32 ROW3R, q5, #11 - vrshrn.s32 ROW0R, q6, #11 - vrshrn.s32 ROW4R, q3, #11 + vsub.s32 q1, q1, q6 + vmlal.s16 q5, ROW5R, XFIX_2_053119869_MINUS_2_562915447 + vmlsl.s16 q5, ROW3R, XFIX_2_562915447 + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW2R, XFIX_0_541196100_PLUS_0_765366865 + vmlal.s16 q6, ROW6R, XFIX_0_541196100 + vsub.s32 q3, q3, q2 + vrshrn.s32 ROW6R, q1, #11 + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vaddl.s16 q5, ROW0R, ROW4R + vrshrn.s32 ROW2R, q1, #11 + vrshrn.s32 ROW5R, q3, #11 + vshl.s32 q5, q5, #13 + vmlal.s16 q4, ROW7R, XFIX_0_298631336_MINUS_0_899976223 + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vrshrn.s32 ROW7R, q2, #11 + vrshrn.s32 ROW3R, q5, #11 + vrshrn.s32 ROW0R, q6, #11 + vrshrn.s32 ROW4R, q3, #11 /* Transpose right 4x8 half */ vtrn.16 ROW6R, ROW7R vtrn.16 ROW2R, ROW3R @@ -398,122 +404,122 @@ vtrn.32 ROW5R, ROW7R 1: /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vmull.s16 q6, ROW1R, XFIX_1_175875602 /* ROW5L <-> ROW1R */ - vmlal.s16 q6, ROW1L, XFIX_1_175875602 - vmlal.s16 q6, ROW3R, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */ - vmlal.s16 q6, ROW3L, XFIX_1_175875602_MINUS_1_961570560 - vmull.s16 q7, ROW3R, XFIX_1_175875602 /* ROW7L <-> ROW3R */ - vmlal.s16 q7, ROW3L, XFIX_1_175875602 - vmlal.s16 q7, ROW1R, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */ - vmlal.s16 q7, ROW1L, XFIX_1_175875602_MINUS_0_390180644 - vsubl.s16 q3, ROW0L, ROW0R /* ROW4L <-> ROW0R */ - vmull.s16 q2, ROW2L, XFIX_0_541196100 - vmlal.s16 q2, ROW2R, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L <-> ROW2R */ - vmov q4, q6 - vmlsl.s16 q6, ROW1R, XFIX_2_562915447 /* ROW5L <-> ROW1R */ - vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 - vshl.s32 q3, q3, #13 - vmlsl.s16 q4, ROW1L, XFIX_0_899976223 - vadd.s32 q1, q3, q2 - vmov q5, q7 - vadd.s32 q1, q1, q6 - vmlsl.s16 q7, ROW3R, XFIX_0_899976223 /* ROW7L <-> ROW3R */ - vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 - vshrn.s32 ROW1L, q1, #16 - vsub.s32 q1, q1, q6 - vmlal.s16 q5, ROW1R, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L <-> ROW1R */ - vmlsl.s16 q5, ROW3L, XFIX_2_562915447 - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 - vmlal.s16 q6, ROW2R, XFIX_0_541196100 /* ROW6L <-> ROW2R */ - vsub.s32 q3, q3, q2 - vshrn.s32 ROW2R, q1, #16 /* ROW6L <-> ROW2R */ - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vaddl.s16 q5, ROW0L, ROW0R /* ROW4L <-> ROW0R */ - vshrn.s32 ROW2L, q1, #16 - vshrn.s32 ROW1R, q3, #16 /* ROW5L <-> ROW1R */ - vshl.s32 q5, q5, #13 - vmlal.s16 q4, ROW3R, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L <-> ROW3R */ - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vshrn.s32 ROW3R, q2, #16 /* ROW7L <-> ROW3R */ - vshrn.s32 ROW3L, q5, #16 - vshrn.s32 ROW0L, q6, #16 - vshrn.s32 ROW0R, q3, #16 /* ROW4L <-> ROW0R */ + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vmull.s16 q6, ROW1R, XFIX_1_175875602 /* ROW5L <-> ROW1R */ + vmlal.s16 q6, ROW1L, XFIX_1_175875602 + vmlal.s16 q6, ROW3R, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */ + vmlal.s16 q6, ROW3L, XFIX_1_175875602_MINUS_1_961570560 + vmull.s16 q7, ROW3R, XFIX_1_175875602 /* ROW7L <-> ROW3R */ + vmlal.s16 q7, ROW3L, XFIX_1_175875602 + vmlal.s16 q7, ROW1R, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */ + vmlal.s16 q7, ROW1L, XFIX_1_175875602_MINUS_0_390180644 + vsubl.s16 q3, ROW0L, ROW0R /* ROW4L <-> ROW0R */ + vmull.s16 q2, ROW2L, XFIX_0_541196100 + vmlal.s16 q2, ROW2R, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L <-> ROW2R */ + vmov q4, q6 + vmlsl.s16 q6, ROW1R, XFIX_2_562915447 /* ROW5L <-> ROW1R */ + vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 + vshl.s32 q3, q3, #13 + vmlsl.s16 q4, ROW1L, XFIX_0_899976223 + vadd.s32 q1, q3, q2 + vmov q5, q7 + vadd.s32 q1, q1, q6 + vmlsl.s16 q7, ROW3R, XFIX_0_899976223 /* ROW7L <-> ROW3R */ + vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 + vshrn.s32 ROW1L, q1, #16 + vsub.s32 q1, q1, q6 + vmlal.s16 q5, ROW1R, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L <-> ROW1R */ + vmlsl.s16 q5, ROW3L, XFIX_2_562915447 + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 + vmlal.s16 q6, ROW2R, XFIX_0_541196100 /* ROW6L <-> ROW2R */ + vsub.s32 q3, q3, q2 + vshrn.s32 ROW2R, q1, #16 /* ROW6L <-> ROW2R */ + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vaddl.s16 q5, ROW0L, ROW0R /* ROW4L <-> ROW0R */ + vshrn.s32 ROW2L, q1, #16 + vshrn.s32 ROW1R, q3, #16 /* ROW5L <-> ROW1R */ + vshl.s32 q5, q5, #13 + vmlal.s16 q4, ROW3R, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L <-> ROW3R */ + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vshrn.s32 ROW3R, q2, #16 /* ROW7L <-> ROW3R */ + vshrn.s32 ROW3L, q5, #16 + vshrn.s32 ROW0L, q6, #16 + vshrn.s32 ROW0R, q3, #16 /* ROW4L <-> ROW0R */ /* 1-D IDCT, pass 2, right 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vmull.s16 q6, ROW5R, XFIX_1_175875602 - vmlal.s16 q6, ROW5L, XFIX_1_175875602 /* ROW5L <-> ROW1R */ - vmlal.s16 q6, ROW7R, XFIX_1_175875602_MINUS_1_961570560 - vmlal.s16 q6, ROW7L, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */ - vmull.s16 q7, ROW7R, XFIX_1_175875602 - vmlal.s16 q7, ROW7L, XFIX_1_175875602 /* ROW7L <-> ROW3R */ - vmlal.s16 q7, ROW5R, XFIX_1_175875602_MINUS_0_390180644 - vmlal.s16 q7, ROW5L, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */ - vsubl.s16 q3, ROW4L, ROW4R /* ROW4L <-> ROW0R */ - vmull.s16 q2, ROW6L, XFIX_0_541196100 /* ROW6L <-> ROW2R */ - vmlal.s16 q2, ROW6R, XFIX_0_541196100_MINUS_1_847759065 - vmov q4, q6 - vmlsl.s16 q6, ROW5R, XFIX_2_562915447 - vmlal.s16 q6, ROW7L, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L <-> ROW3R */ - vshl.s32 q3, q3, #13 - vmlsl.s16 q4, ROW5L, XFIX_0_899976223 /* ROW5L <-> ROW1R */ - vadd.s32 q1, q3, q2 - vmov q5, q7 - vadd.s32 q1, q1, q6 - vmlsl.s16 q7, ROW7R, XFIX_0_899976223 - vmlal.s16 q7, ROW5L, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L <-> ROW1R */ - vshrn.s32 ROW5L, q1, #16 /* ROW5L <-> ROW1R */ - vsub.s32 q1, q1, q6 - vmlal.s16 q5, ROW5R, XFIX_2_053119869_MINUS_2_562915447 - vmlsl.s16 q5, ROW7L, XFIX_2_562915447 /* ROW7L <-> ROW3R */ - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW6L, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L <-> ROW2R */ - vmlal.s16 q6, ROW6R, XFIX_0_541196100 - vsub.s32 q3, q3, q2 - vshrn.s32 ROW6R, q1, #16 - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vaddl.s16 q5, ROW4L, ROW4R /* ROW4L <-> ROW0R */ - vshrn.s32 ROW6L, q1, #16 /* ROW6L <-> ROW2R */ - vshrn.s32 ROW5R, q3, #16 - vshl.s32 q5, q5, #13 - vmlal.s16 q4, ROW7R, XFIX_0_298631336_MINUS_0_899976223 - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vshrn.s32 ROW7R, q2, #16 - vshrn.s32 ROW7L, q5, #16 /* ROW7L <-> ROW3R */ - vshrn.s32 ROW4L, q6, #16 /* ROW4L <-> ROW0R */ - vshrn.s32 ROW4R, q3, #16 + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vmull.s16 q6, ROW5R, XFIX_1_175875602 + vmlal.s16 q6, ROW5L, XFIX_1_175875602 /* ROW5L <-> ROW1R */ + vmlal.s16 q6, ROW7R, XFIX_1_175875602_MINUS_1_961570560 + vmlal.s16 q6, ROW7L, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */ + vmull.s16 q7, ROW7R, XFIX_1_175875602 + vmlal.s16 q7, ROW7L, XFIX_1_175875602 /* ROW7L <-> ROW3R */ + vmlal.s16 q7, ROW5R, XFIX_1_175875602_MINUS_0_390180644 + vmlal.s16 q7, ROW5L, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */ + vsubl.s16 q3, ROW4L, ROW4R /* ROW4L <-> ROW0R */ + vmull.s16 q2, ROW6L, XFIX_0_541196100 /* ROW6L <-> ROW2R */ + vmlal.s16 q2, ROW6R, XFIX_0_541196100_MINUS_1_847759065 + vmov q4, q6 + vmlsl.s16 q6, ROW5R, XFIX_2_562915447 + vmlal.s16 q6, ROW7L, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L <-> ROW3R */ + vshl.s32 q3, q3, #13 + vmlsl.s16 q4, ROW5L, XFIX_0_899976223 /* ROW5L <-> ROW1R */ + vadd.s32 q1, q3, q2 + vmov q5, q7 + vadd.s32 q1, q1, q6 + vmlsl.s16 q7, ROW7R, XFIX_0_899976223 + vmlal.s16 q7, ROW5L, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L <-> ROW1R */ + vshrn.s32 ROW5L, q1, #16 /* ROW5L <-> ROW1R */ + vsub.s32 q1, q1, q6 + vmlal.s16 q5, ROW5R, XFIX_2_053119869_MINUS_2_562915447 + vmlsl.s16 q5, ROW7L, XFIX_2_562915447 /* ROW7L <-> ROW3R */ + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW6L, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L <-> ROW2R */ + vmlal.s16 q6, ROW6R, XFIX_0_541196100 + vsub.s32 q3, q3, q2 + vshrn.s32 ROW6R, q1, #16 + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vaddl.s16 q5, ROW4L, ROW4R /* ROW4L <-> ROW0R */ + vshrn.s32 ROW6L, q1, #16 /* ROW6L <-> ROW2R */ + vshrn.s32 ROW5R, q3, #16 + vshl.s32 q5, q5, #13 + vmlal.s16 q4, ROW7R, XFIX_0_298631336_MINUS_0_899976223 + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vshrn.s32 ROW7R, q2, #16 + vshrn.s32 ROW7L, q5, #16 /* ROW7L <-> ROW3R */ + vshrn.s32 ROW4L, q6, #16 /* ROW4L <-> ROW0R */ + vshrn.s32 ROW4R, q3, #16 2: /* Descale to 8-bit and range limit */ - vqrshrn.s16 d16, q8, #2 - vqrshrn.s16 d17, q9, #2 - vqrshrn.s16 d18, q10, #2 - vqrshrn.s16 d19, q11, #2 - vpop {d8-d15} /* restore NEON registers */ - vqrshrn.s16 d20, q12, #2 + vqrshrn.s16 d16, q8, #2 + vqrshrn.s16 d17, q9, #2 + vqrshrn.s16 d18, q10, #2 + vqrshrn.s16 d19, q11, #2 + vpop {d8-d15} /* restore NEON registers */ + vqrshrn.s16 d20, q12, #2 /* Transpose the final 8-bit samples and do signed->unsigned conversion */ - vtrn.16 q8, q9 - vqrshrn.s16 d21, q13, #2 - vqrshrn.s16 d22, q14, #2 - vmov.u8 q0, #(CENTERJSAMPLE) - vqrshrn.s16 d23, q15, #2 - vtrn.8 d16, d17 - vtrn.8 d18, d19 - vadd.u8 q8, q8, q0 - vadd.u8 q9, q9, q0 - vtrn.16 q10, q11 + vtrn.16 q8, q9 + vqrshrn.s16 d21, q13, #2 + vqrshrn.s16 d22, q14, #2 + vmov.u8 q0, #(CENTERJSAMPLE) + vqrshrn.s16 d23, q15, #2 + vtrn.8 d16, d17 + vtrn.8 d18, d19 + vadd.u8 q8, q8, q0 + vadd.u8 q9, q9, q0 + vtrn.16 q10, q11 /* Store results to the output buffer */ ldmia OUTPUT_BUF!, {TMP1, TMP2} add TMP1, TMP1, OUTPUT_COL @@ -525,7 +531,7 @@ add TMP1, TMP1, OUTPUT_COL add TMP2, TMP2, OUTPUT_COL vst1.8 {d18}, [TMP1] - vadd.u8 q10, q10, q0 + vadd.u8 q10, q10, q0 vst1.8 {d19}, [TMP2] ldmia OUTPUT_BUF, {TMP1, TMP2, TMP3, TMP4} add TMP1, TMP1, OUTPUT_COL @@ -534,7 +540,7 @@ add TMP4, TMP4, OUTPUT_COL vtrn.8 d22, d23 vst1.8 {d20}, [TMP1] - vadd.u8 q11, q11, q0 + vadd.u8 q11, q11, q0 vst1.8 {d21}, [TMP2] vst1.8 {d22}, [TMP3] vst1.8 {d23}, [TMP4] @@ -547,14 +553,15 @@ vtrn.16 ROW2L, ROW3L vtrn.16 ROW0L, ROW1L vtrn.16 ROW4L, ROW5L - vshl.s16 ROW0R, ROW0R, #2 /* PASS1_BITS */ + vshl.s16 ROW0R, ROW0R, #2 /* PASS1_BITS */ vtrn.32 ROW1L, ROW3L vtrn.32 ROW4L, ROW6L vtrn.32 ROW0L, ROW2L vtrn.32 ROW5L, ROW7L cmp r0, #0 - beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */ + beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second + pass */ /* Only row 0 is non-zero for the right 4x8 half */ vdup.s16 ROW1R, ROW0R[1] @@ -565,83 +572,83 @@ vdup.s16 ROW6R, ROW0R[2] vdup.s16 ROW7R, ROW0R[3] vdup.s16 ROW0R, ROW0R[0] - b 1b /* Go to 'normal' second pass */ + b 1b /* Go to 'normal' second pass */ 4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vmull.s16 q6, ROW1L, XFIX_1_175875602 - vmlal.s16 q6, ROW3L, XFIX_1_175875602_MINUS_1_961570560 - vmull.s16 q7, ROW3L, XFIX_1_175875602 - vmlal.s16 q7, ROW1L, XFIX_1_175875602_MINUS_0_390180644 - vmull.s16 q2, ROW2L, XFIX_0_541196100 - vshll.s16 q3, ROW0L, #13 - vmov q4, q6 - vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 - vmlsl.s16 q4, ROW1L, XFIX_0_899976223 - vadd.s32 q1, q3, q2 - vmov q5, q7 - vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 - vadd.s32 q1, q1, q6 - vadd.s32 q6, q6, q6 - vmlsl.s16 q5, ROW3L, XFIX_2_562915447 - vshrn.s32 ROW1L, q1, #16 - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 - vsub.s32 q3, q3, q2 - vshrn.s32 ROW2R, q1, #16 /* ROW6L <-> ROW2R */ - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vshll.s16 q5, ROW0L, #13 - vshrn.s32 ROW2L, q1, #16 - vshrn.s32 ROW1R, q3, #16 /* ROW5L <-> ROW1R */ - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vshrn.s32 ROW3R, q2, #16 /* ROW7L <-> ROW3R */ - vshrn.s32 ROW3L, q5, #16 - vshrn.s32 ROW0L, q6, #16 - vshrn.s32 ROW0R, q3, #16 /* ROW4L <-> ROW0R */ + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vmull.s16 q6, ROW1L, XFIX_1_175875602 + vmlal.s16 q6, ROW3L, XFIX_1_175875602_MINUS_1_961570560 + vmull.s16 q7, ROW3L, XFIX_1_175875602 + vmlal.s16 q7, ROW1L, XFIX_1_175875602_MINUS_0_390180644 + vmull.s16 q2, ROW2L, XFIX_0_541196100 + vshll.s16 q3, ROW0L, #13 + vmov q4, q6 + vmlal.s16 q6, ROW3L, XFIX_3_072711026_MINUS_2_562915447 + vmlsl.s16 q4, ROW1L, XFIX_0_899976223 + vadd.s32 q1, q3, q2 + vmov q5, q7 + vmlal.s16 q7, ROW1L, XFIX_1_501321110_MINUS_0_899976223 + vadd.s32 q1, q1, q6 + vadd.s32 q6, q6, q6 + vmlsl.s16 q5, ROW3L, XFIX_2_562915447 + vshrn.s32 ROW1L, q1, #16 + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW2L, XFIX_0_541196100_PLUS_0_765366865 + vsub.s32 q3, q3, q2 + vshrn.s32 ROW2R, q1, #16 /* ROW6L <-> ROW2R */ + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vshll.s16 q5, ROW0L, #13 + vshrn.s32 ROW2L, q1, #16 + vshrn.s32 ROW1R, q3, #16 /* ROW5L <-> ROW1R */ + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vshrn.s32 ROW3R, q2, #16 /* ROW7L <-> ROW3R */ + vshrn.s32 ROW3L, q5, #16 + vshrn.s32 ROW0L, q6, #16 + vshrn.s32 ROW0R, q3, #16 /* ROW4L <-> ROW0R */ /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */ - vld1.s16 {d2}, [ip, :64] /* reload constants */ - vmull.s16 q6, ROW5L, XFIX_1_175875602 - vmlal.s16 q6, ROW7L, XFIX_1_175875602_MINUS_1_961570560 - vmull.s16 q7, ROW7L, XFIX_1_175875602 - vmlal.s16 q7, ROW5L, XFIX_1_175875602_MINUS_0_390180644 - vmull.s16 q2, ROW6L, XFIX_0_541196100 - vshll.s16 q3, ROW4L, #13 - vmov q4, q6 - vmlal.s16 q6, ROW7L, XFIX_3_072711026_MINUS_2_562915447 - vmlsl.s16 q4, ROW5L, XFIX_0_899976223 - vadd.s32 q1, q3, q2 - vmov q5, q7 - vmlal.s16 q7, ROW5L, XFIX_1_501321110_MINUS_0_899976223 - vadd.s32 q1, q1, q6 - vadd.s32 q6, q6, q6 - vmlsl.s16 q5, ROW7L, XFIX_2_562915447 - vshrn.s32 ROW5L, q1, #16 /* ROW5L <-> ROW1R */ - vsub.s32 q1, q1, q6 - vmull.s16 q6, ROW6L, XFIX_0_541196100_PLUS_0_765366865 - vsub.s32 q3, q3, q2 - vshrn.s32 ROW6R, q1, #16 - vadd.s32 q1, q3, q5 - vsub.s32 q3, q3, q5 - vshll.s16 q5, ROW4L, #13 - vshrn.s32 ROW6L, q1, #16 /* ROW6L <-> ROW2R */ - vshrn.s32 ROW5R, q3, #16 - vadd.s32 q2, q5, q6 - vsub.s32 q1, q5, q6 - vadd.s32 q6, q2, q7 - vsub.s32 q2, q2, q7 - vadd.s32 q5, q1, q4 - vsub.s32 q3, q1, q4 - vshrn.s32 ROW7R, q2, #16 - vshrn.s32 ROW7L, q5, #16 /* ROW7L <-> ROW3R */ - vshrn.s32 ROW4L, q6, #16 /* ROW4L <-> ROW0R */ - vshrn.s32 ROW4R, q3, #16 - b 2b /* Go to epilogue */ + vld1.s16 {d2}, [ip, :64] /* reload constants */ + vmull.s16 q6, ROW5L, XFIX_1_175875602 + vmlal.s16 q6, ROW7L, XFIX_1_175875602_MINUS_1_961570560 + vmull.s16 q7, ROW7L, XFIX_1_175875602 + vmlal.s16 q7, ROW5L, XFIX_1_175875602_MINUS_0_390180644 + vmull.s16 q2, ROW6L, XFIX_0_541196100 + vshll.s16 q3, ROW4L, #13 + vmov q4, q6 + vmlal.s16 q6, ROW7L, XFIX_3_072711026_MINUS_2_562915447 + vmlsl.s16 q4, ROW5L, XFIX_0_899976223 + vadd.s32 q1, q3, q2 + vmov q5, q7 + vmlal.s16 q7, ROW5L, XFIX_1_501321110_MINUS_0_899976223 + vadd.s32 q1, q1, q6 + vadd.s32 q6, q6, q6 + vmlsl.s16 q5, ROW7L, XFIX_2_562915447 + vshrn.s32 ROW5L, q1, #16 /* ROW5L <-> ROW1R */ + vsub.s32 q1, q1, q6 + vmull.s16 q6, ROW6L, XFIX_0_541196100_PLUS_0_765366865 + vsub.s32 q3, q3, q2 + vshrn.s32 ROW6R, q1, #16 + vadd.s32 q1, q3, q5 + vsub.s32 q3, q3, q5 + vshll.s16 q5, ROW4L, #13 + vshrn.s32 ROW6L, q1, #16 /* ROW6L <-> ROW2R */ + vshrn.s32 ROW5R, q3, #16 + vadd.s32 q2, q5, q6 + vsub.s32 q1, q5, q6 + vadd.s32 q6, q2, q7 + vsub.s32 q2, q2, q7 + vadd.s32 q5, q1, q4 + vsub.s32 q3, q1, q4 + vshrn.s32 ROW7R, q2, #16 + vshrn.s32 ROW7L, q5, #16 /* ROW7L <-> ROW3R */ + vshrn.s32 ROW4L, q6, #16 /* ROW4L <-> ROW0R */ + vshrn.s32 ROW4R, q3, #16 + b 2b /* Go to epilogue */ .unreq DCT_TABLE .unreq COEF_BLOCK @@ -668,7 +675,7 @@ .unreq ROW6R .unreq ROW7L .unreq ROW7R -.endfunc + /*****************************************************************************/ @@ -695,10 +702,10 @@ .balign 16 jsimd_idct_ifast_neon_consts: - .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ - .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ - .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ - .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ + .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ + .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ + .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ + .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ asm_function jsimd_idct_ifast_neon @@ -728,9 +735,9 @@ vld1.16 {d16, d17, d18, d19}, [COEF_BLOCK, :128]! vld1.16 {d0, d1, d2, d3}, [DCT_TABLE, :128]! vld1.16 {d20, d21, d22, d23}, [COEF_BLOCK, :128]! - vmul.s16 q8, q8, q0 + vmul.s16 q8, q8, q0 vld1.16 {d4, d5, d6, d7}, [DCT_TABLE, :128]! - vmul.s16 q9, q9, q1 + vmul.s16 q9, q9, q1 vld1.16 {d24, d25, d26, d27}, [COEF_BLOCK, :128]! vmul.s16 q10, q10, q2 vld1.16 {d0, d1, d2, d3}, [DCT_TABLE, :128]! @@ -740,124 +747,124 @@ vld1.16 {d4, d5, d6, d7}, [DCT_TABLE, :128]! vmul.s16 q14, q14, q2 vmul.s16 q13, q13, q1 - vld1.16 {d0}, [ip, :64] /* load constants */ + vld1.16 {d0}, [ip, :64] /* load constants */ vmul.s16 q15, q15, q3 - vpush {d8-d13} /* save NEON registers */ + vpush {d8-d13} /* save NEON registers */ /* 1-D IDCT, pass 1 */ - vsub.s16 q2, q10, q14 + vsub.s16 q2, q10, q14 vadd.s16 q14, q10, q14 - vsub.s16 q1, q11, q13 + vsub.s16 q1, q11, q13 vadd.s16 q13, q11, q13 - vsub.s16 q5, q9, q15 - vadd.s16 q15, q9, q15 - vqdmulh.s16 q4, q2, XFIX_1_414213562 - vqdmulh.s16 q6, q1, XFIX_2_613125930 - vadd.s16 q3, q1, q1 - vsub.s16 q1, q5, q1 - vadd.s16 q10, q2, q4 - vqdmulh.s16 q4, q1, XFIX_1_847759065 - vsub.s16 q2, q15, q13 - vadd.s16 q3, q3, q6 - vqdmulh.s16 q6, q2, XFIX_1_414213562 - vadd.s16 q1, q1, q4 - vqdmulh.s16 q4, q5, XFIX_1_082392200 + vsub.s16 q5, q9, q15 + vadd.s16 q15, q9, q15 + vqdmulh.s16 q4, q2, XFIX_1_414213562 + vqdmulh.s16 q6, q1, XFIX_2_613125930 + vadd.s16 q3, q1, q1 + vsub.s16 q1, q5, q1 + vadd.s16 q10, q2, q4 + vqdmulh.s16 q4, q1, XFIX_1_847759065 + vsub.s16 q2, q15, q13 + vadd.s16 q3, q3, q6 + vqdmulh.s16 q6, q2, XFIX_1_414213562 + vadd.s16 q1, q1, q4 + vqdmulh.s16 q4, q5, XFIX_1_082392200 vsub.s16 q10, q10, q14 - vadd.s16 q2, q2, q6 - vsub.s16 q6, q8, q12 - vadd.s16 q12, q8, q12 - vadd.s16 q9, q5, q4 - vadd.s16 q5, q6, q10 - vsub.s16 q10, q6, q10 - vadd.s16 q6, q15, q13 - vadd.s16 q8, q12, q14 - vsub.s16 q3, q6, q3 + vadd.s16 q2, q2, q6 + vsub.s16 q6, q8, q12 + vadd.s16 q12, q8, q12 + vadd.s16 q9, q5, q4 + vadd.s16 q5, q6, q10 + vsub.s16 q10, q6, q10 + vadd.s16 q6, q15, q13 + vadd.s16 q8, q12, q14 + vsub.s16 q3, q6, q3 vsub.s16 q12, q12, q14 - vsub.s16 q3, q3, q1 - vsub.s16 q1, q9, q1 - vadd.s16 q2, q3, q2 - vsub.s16 q15, q8, q6 - vadd.s16 q1, q1, q2 - vadd.s16 q8, q8, q6 - vadd.s16 q14, q5, q3 - vsub.s16 q9, q5, q3 + vsub.s16 q3, q3, q1 + vsub.s16 q1, q9, q1 + vadd.s16 q2, q3, q2 + vsub.s16 q15, q8, q6 + vadd.s16 q1, q1, q2 + vadd.s16 q8, q8, q6 + vadd.s16 q14, q5, q3 + vsub.s16 q9, q5, q3 vsub.s16 q13, q10, q2 vadd.s16 q10, q10, q2 /* Transpose */ - vtrn.16 q8, q9 + vtrn.16 q8, q9 vsub.s16 q11, q12, q1 vtrn.16 q14, q15 vadd.s16 q12, q12, q1 vtrn.16 q10, q11 vtrn.16 q12, q13 - vtrn.32 q9, q11 + vtrn.32 q9, q11 vtrn.32 q12, q14 - vtrn.32 q8, q10 + vtrn.32 q8, q10 vtrn.32 q13, q15 vswp d28, d21 vswp d26, d19 /* 1-D IDCT, pass 2 */ - vsub.s16 q2, q10, q14 + vsub.s16 q2, q10, q14 vswp d30, d23 vadd.s16 q14, q10, q14 vswp d24, d17 - vsub.s16 q1, q11, q13 + vsub.s16 q1, q11, q13 vadd.s16 q13, q11, q13 - vsub.s16 q5, q9, q15 - vadd.s16 q15, q9, q15 - vqdmulh.s16 q4, q2, XFIX_1_414213562 - vqdmulh.s16 q6, q1, XFIX_2_613125930 - vadd.s16 q3, q1, q1 - vsub.s16 q1, q5, q1 - vadd.s16 q10, q2, q4 - vqdmulh.s16 q4, q1, XFIX_1_847759065 - vsub.s16 q2, q15, q13 - vadd.s16 q3, q3, q6 - vqdmulh.s16 q6, q2, XFIX_1_414213562 - vadd.s16 q1, q1, q4 - vqdmulh.s16 q4, q5, XFIX_1_082392200 + vsub.s16 q5, q9, q15 + vadd.s16 q15, q9, q15 + vqdmulh.s16 q4, q2, XFIX_1_414213562 + vqdmulh.s16 q6, q1, XFIX_2_613125930 + vadd.s16 q3, q1, q1 + vsub.s16 q1, q5, q1 + vadd.s16 q10, q2, q4 + vqdmulh.s16 q4, q1, XFIX_1_847759065 + vsub.s16 q2, q15, q13 + vadd.s16 q3, q3, q6 + vqdmulh.s16 q6, q2, XFIX_1_414213562 + vadd.s16 q1, q1, q4 + vqdmulh.s16 q4, q5, XFIX_1_082392200 vsub.s16 q10, q10, q14 - vadd.s16 q2, q2, q6 - vsub.s16 q6, q8, q12 - vadd.s16 q12, q8, q12 - vadd.s16 q9, q5, q4 - vadd.s16 q5, q6, q10 - vsub.s16 q10, q6, q10 - vadd.s16 q6, q15, q13 - vadd.s16 q8, q12, q14 - vsub.s16 q3, q6, q3 + vadd.s16 q2, q2, q6 + vsub.s16 q6, q8, q12 + vadd.s16 q12, q8, q12 + vadd.s16 q9, q5, q4 + vadd.s16 q5, q6, q10 + vsub.s16 q10, q6, q10 + vadd.s16 q6, q15, q13 + vadd.s16 q8, q12, q14 + vsub.s16 q3, q6, q3 vsub.s16 q12, q12, q14 - vsub.s16 q3, q3, q1 - vsub.s16 q1, q9, q1 - vadd.s16 q2, q3, q2 - vsub.s16 q15, q8, q6 - vadd.s16 q1, q1, q2 - vadd.s16 q8, q8, q6 - vadd.s16 q14, q5, q3 - vsub.s16 q9, q5, q3 + vsub.s16 q3, q3, q1 + vsub.s16 q1, q9, q1 + vadd.s16 q2, q3, q2 + vsub.s16 q15, q8, q6 + vadd.s16 q1, q1, q2 + vadd.s16 q8, q8, q6 + vadd.s16 q14, q5, q3 + vsub.s16 q9, q5, q3 vsub.s16 q13, q10, q2 - vpop {d8-d13} /* restore NEON registers */ + vpop {d8-d13} /* restore NEON registers */ vadd.s16 q10, q10, q2 vsub.s16 q11, q12, q1 vadd.s16 q12, q12, q1 /* Descale to 8-bit and range limit */ - vmov.u8 q0, #0x80 - vqshrn.s16 d16, q8, #5 - vqshrn.s16 d17, q9, #5 + vmov.u8 q0, #0x80 + vqshrn.s16 d16, q8, #5 + vqshrn.s16 d17, q9, #5 vqshrn.s16 d18, q10, #5 vqshrn.s16 d19, q11, #5 vqshrn.s16 d20, q12, #5 vqshrn.s16 d21, q13, #5 vqshrn.s16 d22, q14, #5 vqshrn.s16 d23, q15, #5 - vadd.u8 q8, q8, q0 - vadd.u8 q9, q9, q0 + vadd.u8 q8, q8, q0 + vadd.u8 q9, q9, q0 vadd.u8 q10, q10, q0 vadd.u8 q11, q11, q0 /* Transpose the final 8-bit samples */ - vtrn.16 q8, q9 + vtrn.16 q8, q9 vtrn.16 q10, q11 - vtrn.32 q8, q10 - vtrn.32 q9, q11 + vtrn.32 q8, q10 + vtrn.32 q9, q11 vtrn.8 d16, d17 vtrn.8 d18, d19 /* Store results to the output buffer */ @@ -892,7 +899,7 @@ .unreq TMP2 .unreq TMP3 .unreq TMP4 -.endfunc + /*****************************************************************************/ @@ -916,81 +923,80 @@ #define CONST_BITS 13 -#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ -#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ -#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ -#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ -#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ -#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ -#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ -#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ -#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ -#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ -#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ -#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ -#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ -#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ +#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ .balign 16 jsimd_idct_4x4_neon_consts: - .short FIX_1_847759065 /* d0[0] */ - .short -FIX_0_765366865 /* d0[1] */ - .short -FIX_0_211164243 /* d0[2] */ - .short FIX_1_451774981 /* d0[3] */ - .short -FIX_2_172734803 /* d1[0] */ - .short FIX_1_061594337 /* d1[1] */ - .short -FIX_0_509795579 /* d1[2] */ - .short -FIX_0_601344887 /* d1[3] */ - .short FIX_0_899976223 /* d2[0] */ - .short FIX_2_562915447 /* d2[1] */ - .short 1 << (CONST_BITS+1) /* d2[2] */ - .short 0 /* d2[3] */ + .short FIX_1_847759065 /* d0[0] */ + .short -FIX_0_765366865 /* d0[1] */ + .short -FIX_0_211164243 /* d0[2] */ + .short FIX_1_451774981 /* d0[3] */ + .short -FIX_2_172734803 /* d1[0] */ + .short FIX_1_061594337 /* d1[1] */ + .short -FIX_0_509795579 /* d1[2] */ + .short -FIX_0_601344887 /* d1[3] */ + .short FIX_0_899976223 /* d2[0] */ + .short FIX_2_562915447 /* d2[1] */ + .short 1 << (CONST_BITS+1) /* d2[2] */ + .short 0 /* d2[3] */ .macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 - vmull.s16 q14, \x4, d2[2] - vmlal.s16 q14, \x8, d0[0] + vmull.s16 q14, \x4, d2[2] + vmlal.s16 q14, \x8, d0[0] vmlal.s16 q14, \x14, d0[1] vmull.s16 q13, \x16, d1[2] vmlal.s16 q13, \x12, d1[3] vmlal.s16 q13, \x10, d2[0] - vmlal.s16 q13, \x6, d2[1] + vmlal.s16 q13, \x6, d2[1] - vmull.s16 q15, \x4, d2[2] - vmlsl.s16 q15, \x8, d0[0] + vmull.s16 q15, \x4, d2[2] + vmlsl.s16 q15, \x8, d0[0] vmlsl.s16 q15, \x14, d0[1] vmull.s16 q12, \x16, d0[2] vmlal.s16 q12, \x12, d0[3] vmlal.s16 q12, \x10, d1[0] - vmlal.s16 q12, \x6, d1[1] + vmlal.s16 q12, \x6, d1[1] vadd.s32 q10, q14, q13 vsub.s32 q14, q14, q13 -.if \shift > 16 - vrshr.s32 q10, q10, #\shift - vrshr.s32 q14, q14, #\shift + .if \shift > 16 + vrshr.s32 q10, q10, #\shift + vrshr.s32 q14, q14, #\shift vmovn.s32 \y26, q10 vmovn.s32 \y29, q14 -.else + .else vrshrn.s32 \y26, q10, #\shift vrshrn.s32 \y29, q14, #\shift -.endif + .endif vadd.s32 q10, q15, q12 vsub.s32 q15, q15, q12 -.if \shift > 16 - vrshr.s32 q10, q10, #\shift - vrshr.s32 q15, q15, #\shift + .if \shift > 16 + vrshr.s32 q10, q10, #\shift + vrshr.s32 q15, q15, #\shift vmovn.s32 \y27, q10 vmovn.s32 \y28, q15 -.else + .else vrshrn.s32 \y27, q10, #\shift vrshrn.s32 \y28, q15, #\shift -.endif - + .endif .endm asm_function jsimd_idct_4x4_neon @@ -1104,10 +1110,10 @@ .unreq TMP2 .unreq TMP3 .unreq TMP4 -.endfunc .purgem idct_helper + /*****************************************************************************/ /* @@ -1126,31 +1132,30 @@ .balign 8 jsimd_idct_2x2_neon_consts: - .short -FIX_0_720959822 /* d0[0] */ - .short FIX_0_850430095 /* d0[1] */ - .short -FIX_1_272758580 /* d0[2] */ - .short FIX_3_624509785 /* d0[3] */ + .short -FIX_0_720959822 /* d0[0] */ + .short FIX_0_850430095 /* d0[1] */ + .short -FIX_1_272758580 /* d0[2] */ + .short FIX_3_624509785 /* d0[3] */ .macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27 - vshll.s16 q14, \x4, #15 - vmull.s16 q13, \x6, d0[3] - vmlal.s16 q13, \x10, d0[2] - vmlal.s16 q13, \x12, d0[1] - vmlal.s16 q13, \x16, d0[0] - - vadd.s32 q10, q14, q13 - vsub.s32 q14, q14, q13 - -.if \shift > 16 - vrshr.s32 q10, q10, #\shift - vrshr.s32 q14, q14, #\shift - vmovn.s32 \y26, q10 - vmovn.s32 \y27, q14 -.else - vrshrn.s32 \y26, q10, #\shift - vrshrn.s32 \y27, q14, #\shift -.endif + vshll.s16 q14, \x4, #15 + vmull.s16 q13, \x6, d0[3] + vmlal.s16 q13, \x10, d0[2] + vmlal.s16 q13, \x12, d0[1] + vmlal.s16 q13, \x16, d0[0] + vadd.s32 q10, q14, q13 + vsub.s32 q14, q14, q13 + + .if \shift > 16 + vrshr.s32 q10, q10, #\shift + vrshr.s32 q14, q14, #\shift + vmovn.s32 \y26, q10 + vmovn.s32 \y27, q14 + .else + vrshrn.s32 \y26, q10, #\shift + vrshrn.s32 \y27, q14, #\shift + .endif .endm asm_function jsimd_idct_2x2_neon @@ -1204,30 +1209,30 @@ /* Pass 1 */ #if 0 idct_helper d4, d6, d10, d12, d16, 13, d4, d6 - transpose_4x4 d4, d6, d8, d10 + transpose_4x4 d4, d6, d8, d10 idct_helper d5, d7, d11, d13, d17, 13, d5, d7 - transpose_4x4 d5, d7, d9, d11 + transpose_4x4 d5, d7, d9, d11 #else - vmull.s16 q13, d6, d0[3] + vmull.s16 q13, d6, d0[3] vmlal.s16 q13, d10, d0[2] vmlal.s16 q13, d12, d0[1] vmlal.s16 q13, d16, d0[0] - vmull.s16 q12, d7, d0[3] + vmull.s16 q12, d7, d0[3] vmlal.s16 q12, d11, d0[2] vmlal.s16 q12, d13, d0[1] vmlal.s16 q12, d17, d0[0] - vshll.s16 q14, d4, #15 - vshll.s16 q15, d5, #15 + vshll.s16 q14, d4, #15 + vshll.s16 q15, d5, #15 vadd.s32 q10, q14, q13 vsub.s32 q14, q14, q13 - vrshrn.s32 d4, q10, #13 - vrshrn.s32 d6, q14, #13 + vrshrn.s32 d4, q10, #13 + vrshrn.s32 d6, q14, #13 vadd.s32 q10, q15, q12 vsub.s32 q14, q15, q12 - vrshrn.s32 d5, q10, #13 - vrshrn.s32 d7, q14, #13 - vtrn.16 q2, q3 - vtrn.32 q3, q5 + vrshrn.s32 d5, q10, #13 + vrshrn.s32 d7, q14, #13 + vtrn.16 q2, q3 + vtrn.32 q3, q5 #endif /* Pass 2 */ @@ -1258,10 +1263,10 @@ .unreq OUTPUT_COL .unreq TMP1 .unreq TMP2 -.endfunc .purgem idct_helper + /*****************************************************************************/ /* @@ -1277,97 +1282,110 @@ .macro do_load size + .if \size == 8 + vld1.8 {d4}, [U, :64]! + vld1.8 {d5}, [V, :64]! + vld1.8 {d0}, [Y, :64]! + pld [U, #64] + pld [V, #64] + pld [Y, #64] + .elseif \size == 4 + vld1.8 {d4[0]}, [U]! + vld1.8 {d4[1]}, [U]! + vld1.8 {d4[2]}, [U]! + vld1.8 {d4[3]}, [U]! + vld1.8 {d5[0]}, [V]! + vld1.8 {d5[1]}, [V]! + vld1.8 {d5[2]}, [V]! + vld1.8 {d5[3]}, [V]! + vld1.8 {d0[0]}, [Y]! + vld1.8 {d0[1]}, [Y]! + vld1.8 {d0[2]}, [Y]! + vld1.8 {d0[3]}, [Y]! + .elseif \size == 2 + vld1.8 {d4[4]}, [U]! + vld1.8 {d4[5]}, [U]! + vld1.8 {d5[4]}, [V]! + vld1.8 {d5[5]}, [V]! + vld1.8 {d0[4]}, [Y]! + vld1.8 {d0[5]}, [Y]! + .elseif \size == 1 + vld1.8 {d4[6]}, [U]! + vld1.8 {d5[6]}, [V]! + vld1.8 {d0[6]}, [Y]! + .else + .error unsupported macroblock size + .endif +.endm + +.macro do_store bpp, size + .if \bpp == 24 .if \size == 8 - vld1.8 {d4}, [U, :64]! - vld1.8 {d5}, [V, :64]! - vld1.8 {d0}, [Y, :64]! - pld [U, #64] - pld [V, #64] - pld [Y, #64] + vst3.8 {d10, d11, d12}, [RGB]! .elseif \size == 4 - vld1.8 {d4[0]}, [U]! - vld1.8 {d4[1]}, [U]! - vld1.8 {d4[2]}, [U]! - vld1.8 {d4[3]}, [U]! - vld1.8 {d5[0]}, [V]! - vld1.8 {d5[1]}, [V]! - vld1.8 {d5[2]}, [V]! - vld1.8 {d5[3]}, [V]! - vld1.8 {d0[0]}, [Y]! - vld1.8 {d0[1]}, [Y]! - vld1.8 {d0[2]}, [Y]! - vld1.8 {d0[3]}, [Y]! + vst3.8 {d10[0], d11[0], d12[0]}, [RGB]! + vst3.8 {d10[1], d11[1], d12[1]}, [RGB]! + vst3.8 {d10[2], d11[2], d12[2]}, [RGB]! + vst3.8 {d10[3], d11[3], d12[3]}, [RGB]! .elseif \size == 2 - vld1.8 {d4[4]}, [U]! - vld1.8 {d4[5]}, [U]! - vld1.8 {d5[4]}, [V]! - vld1.8 {d5[5]}, [V]! - vld1.8 {d0[4]}, [Y]! - vld1.8 {d0[5]}, [Y]! + vst3.8 {d10[4], d11[4], d12[4]}, [RGB]! + vst3.8 {d10[5], d11[5], d12[5]}, [RGB]! .elseif \size == 1 - vld1.8 {d4[6]}, [U]! - vld1.8 {d5[6]}, [V]! - vld1.8 {d0[6]}, [Y]! + vst3.8 {d10[6], d11[6], d12[6]}, [RGB]! .else - .error unsupported macroblock size + .error unsupported macroblock size .endif -.endm - -.macro do_store bpp, size - .if \bpp == 24 - .if \size == 8 - vst3.8 {d10, d11, d12}, [RGB]! - .elseif \size == 4 - vst3.8 {d10[0], d11[0], d12[0]}, [RGB]! - vst3.8 {d10[1], d11[1], d12[1]}, [RGB]! - vst3.8 {d10[2], d11[2], d12[2]}, [RGB]! - vst3.8 {d10[3], d11[3], d12[3]}, [RGB]! - .elseif \size == 2 - vst3.8 {d10[4], d11[4], d12[4]}, [RGB]! - vst3.8 {d10[5], d11[5], d12[5]}, [RGB]! - .elseif \size == 1 - vst3.8 {d10[6], d11[6], d12[6]}, [RGB]! - .else - .error unsupported macroblock size - .endif - .elseif \bpp == 32 - .if \size == 8 - vst4.8 {d10, d11, d12, d13}, [RGB]! - .elseif \size == 4 - vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! - vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! - vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! - vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! - .elseif \size == 2 - vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! - vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! - .elseif \size == 1 - vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! - .else - .error unsupported macroblock size - .endif + .elseif \bpp == 32 + .if \size == 8 + vst4.8 {d10, d11, d12, d13}, [RGB]! + .elseif \size == 4 + vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! + vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! + vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! + vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! + .elseif \size == 2 + vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! + vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! + .elseif \size == 1 + vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! .else - .error unsupported bpp + .error unsupported macroblock size .endif + .elseif \bpp == 16 + .if \size == 8 + vst1.16 {q15}, [RGB]! + .elseif \size == 4 + vst1.16 {d30}, [RGB]! + .elseif \size == 2 + vst1.16 {d31[0]}, [RGB]! + vst1.16 {d31[1]}, [RGB]! + .elseif \size == 1 + vst1.16 {d31[2]}, [RGB]! + .else + .error unsupported macroblock size + .endif + .else + .error unsupported bpp + .endif .endm .macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, g_offs, b_offs /* - * 2 stage pipelined YCbCr->RGB conversion + * 2-stage pipelined YCbCr->RGB conversion */ .macro do_yuv_to_rgb_stage1 - vaddw.u8 q3, q1, d4 /* q3 = u - 128 */ - vaddw.u8 q4, q1, d5 /* q2 = v - 128 */ - vmull.s16 q10, d6, d1[1] /* multiply by -11277 */ - vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */ - vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ - vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ - vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ - vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ - vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ - vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ + vaddw.u8 q3, q1, d4 /* q3 = u - 128 */ + vaddw.u8 q4, q1, d5 /* q2 = v - 128 */ + vmull.s16 q10, d6, d1[1] /* multiply by -11277 */ + vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */ + vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ + vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ + vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ + vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ + vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ + vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ .endm .macro do_yuv_to_rgb_stage2 @@ -1377,44 +1395,71 @@ vrshrn.s32 d25, q13, #14 vrshrn.s32 d28, q14, #14 vrshrn.s32 d29, q15, #14 - vaddw.u8 q10, q10, d0 + vaddw.u8 q11, q10, d0 vaddw.u8 q12, q12, d0 vaddw.u8 q14, q14, d0 - vqmovun.s16 d1\g_offs, q10 + .if \bpp != 16 + vqmovun.s16 d1\g_offs, q11 vqmovun.s16 d1\r_offs, q12 vqmovun.s16 d1\b_offs, q14 + .else /* rgb565 */ + vqshlu.s16 q13, q11, #8 + vqshlu.s16 q15, q12, #8 + vqshlu.s16 q14, q14, #8 + vsri.u16 q15, q13, #5 + vsri.u16 q15, q14, #11 + .endif .endm .macro do_yuv_to_rgb_stage2_store_load_stage1 + /* "do_yuv_to_rgb_stage2" and "store" */ + vrshrn.s32 d20, q10, #15 + /* "load" and "do_yuv_to_rgb_stage1" */ + pld [U, #64] + vrshrn.s32 d21, q11, #15 + pld [V, #64] + vrshrn.s32 d24, q12, #14 + vrshrn.s32 d25, q13, #14 vld1.8 {d4}, [U, :64]! - vrshrn.s32 d20, q10, #15 - vrshrn.s32 d21, q11, #15 - vrshrn.s32 d24, q12, #14 - vrshrn.s32 d25, q13, #14 - vrshrn.s32 d28, q14, #14 + vrshrn.s32 d28, q14, #14 vld1.8 {d5}, [V, :64]! - vrshrn.s32 d29, q15, #14 - vaddw.u8 q10, q10, d0 - vaddw.u8 q12, q12, d0 - vaddw.u8 q14, q14, d0 - vqmovun.s16 d1\g_offs, q10 + vrshrn.s32 d29, q15, #14 + vaddw.u8 q3, q1, d4 /* q3 = u - 128 */ + vaddw.u8 q4, q1, d5 /* q2 = v - 128 */ + vaddw.u8 q11, q10, d0 + vmull.s16 q10, d6, d1[1] /* multiply by -11277 */ + vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */ + vaddw.u8 q12, q12, d0 + vaddw.u8 q14, q14, d0 + .if \bpp != 16 /**************** rgb24/rgb32 ******************************/ + vqmovun.s16 d1\g_offs, q11 + pld [Y, #64] + vqmovun.s16 d1\r_offs, q12 vld1.8 {d0}, [Y, :64]! - vqmovun.s16 d1\r_offs, q12 - pld [U, #64] - pld [V, #64] + vqmovun.s16 d1\b_offs, q14 + vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ + vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ + do_store \bpp, 8 + vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ + vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ + vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ + vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ + .else /**************************** rgb565 ********************************/ + vqshlu.s16 q13, q11, #8 pld [Y, #64] - vqmovun.s16 d1\b_offs, q14 - vaddw.u8 q3, q1, d4 /* q3 = u - 128 */ - vaddw.u8 q4, q1, d5 /* q2 = v - 128 */ - do_store \bpp, 8 - vmull.s16 q10, d6, d1[1] /* multiply by -11277 */ - vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */ - vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ - vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ - vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ - vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ - vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ - vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ + vqshlu.s16 q15, q12, #8 + vqshlu.s16 q14, q14, #8 + vld1.8 {d0}, [Y, :64]! + vmull.s16 q11, d7, d1[1] + vmlal.s16 q11, d9, d1[2] + vsri.u16 q15, q13, #5 + vmull.s16 q12, d8, d1[0] + vsri.u16 q15, q14, #11 + vmull.s16 q13, d9, d1[0] + vmull.s16 q14, d6, d1[3] + do_store \bpp, 8 + vmull.s16 q15, d7, d1[3] + .endif .endm .macro do_yuv_to_rgb @@ -1428,10 +1473,10 @@ .balign 16 jsimd_ycc_\colorid\()_neon_consts: - .short 0, 0, 0, 0 - .short 22971, -11277, -23401, 29033 - .short -128, -128, -128, -128 - .short -128, -128, -128, -128 + .short 0, 0, 0, 0 + .short 22971, -11277, -23401, 29033 + .short -128, -128, -128, -128 + .short -128, -128, -128, -128 asm_function jsimd_ycc_\colorid\()_convert_neon OUTPUT_WIDTH .req r0 @@ -1541,7 +1586,6 @@ .unreq U .unreq V .unreq N -.endfunc .purgem do_yuv_to_rgb .purgem do_yuv_to_rgb_stage1 @@ -1557,10 +1601,12 @@ generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, 1, 0 generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, 2, 1 generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3 +generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, 0, 0 .purgem do_load .purgem do_store + /*****************************************************************************/ /* @@ -1575,123 +1621,123 @@ */ .macro do_store size + .if \size == 8 + vst1.8 {d20}, [Y]! + vst1.8 {d21}, [U]! + vst1.8 {d22}, [V]! + .elseif \size == 4 + vst1.8 {d20[0]}, [Y]! + vst1.8 {d20[1]}, [Y]! + vst1.8 {d20[2]}, [Y]! + vst1.8 {d20[3]}, [Y]! + vst1.8 {d21[0]}, [U]! + vst1.8 {d21[1]}, [U]! + vst1.8 {d21[2]}, [U]! + vst1.8 {d21[3]}, [U]! + vst1.8 {d22[0]}, [V]! + vst1.8 {d22[1]}, [V]! + vst1.8 {d22[2]}, [V]! + vst1.8 {d22[3]}, [V]! + .elseif \size == 2 + vst1.8 {d20[4]}, [Y]! + vst1.8 {d20[5]}, [Y]! + vst1.8 {d21[4]}, [U]! + vst1.8 {d21[5]}, [U]! + vst1.8 {d22[4]}, [V]! + vst1.8 {d22[5]}, [V]! + .elseif \size == 1 + vst1.8 {d20[6]}, [Y]! + vst1.8 {d21[6]}, [U]! + vst1.8 {d22[6]}, [V]! + .else + .error unsupported macroblock size + .endif +.endm + +.macro do_load bpp, size + .if \bpp == 24 .if \size == 8 - vst1.8 {d20}, [Y]! - vst1.8 {d21}, [U]! - vst1.8 {d22}, [V]! + vld3.8 {d10, d11, d12}, [RGB]! + pld [RGB, #128] .elseif \size == 4 - vst1.8 {d20[0]}, [Y]! - vst1.8 {d20[1]}, [Y]! - vst1.8 {d20[2]}, [Y]! - vst1.8 {d20[3]}, [Y]! - vst1.8 {d21[0]}, [U]! - vst1.8 {d21[1]}, [U]! - vst1.8 {d21[2]}, [U]! - vst1.8 {d21[3]}, [U]! - vst1.8 {d22[0]}, [V]! - vst1.8 {d22[1]}, [V]! - vst1.8 {d22[2]}, [V]! - vst1.8 {d22[3]}, [V]! + vld3.8 {d10[0], d11[0], d12[0]}, [RGB]! + vld3.8 {d10[1], d11[1], d12[1]}, [RGB]! + vld3.8 {d10[2], d11[2], d12[2]}, [RGB]! + vld3.8 {d10[3], d11[3], d12[3]}, [RGB]! .elseif \size == 2 - vst1.8 {d20[4]}, [Y]! - vst1.8 {d20[5]}, [Y]! - vst1.8 {d21[4]}, [U]! - vst1.8 {d21[5]}, [U]! - vst1.8 {d22[4]}, [V]! - vst1.8 {d22[5]}, [V]! + vld3.8 {d10[4], d11[4], d12[4]}, [RGB]! + vld3.8 {d10[5], d11[5], d12[5]}, [RGB]! .elseif \size == 1 - vst1.8 {d20[6]}, [Y]! - vst1.8 {d21[6]}, [U]! - vst1.8 {d22[6]}, [V]! + vld3.8 {d10[6], d11[6], d12[6]}, [RGB]! .else - .error unsupported macroblock size + .error unsupported macroblock size .endif -.endm - -.macro do_load bpp, size - .if \bpp == 24 - .if \size == 8 - vld3.8 {d10, d11, d12}, [RGB]! - pld [RGB, #128] - .elseif \size == 4 - vld3.8 {d10[0], d11[0], d12[0]}, [RGB]! - vld3.8 {d10[1], d11[1], d12[1]}, [RGB]! - vld3.8 {d10[2], d11[2], d12[2]}, [RGB]! - vld3.8 {d10[3], d11[3], d12[3]}, [RGB]! - .elseif \size == 2 - vld3.8 {d10[4], d11[4], d12[4]}, [RGB]! - vld3.8 {d10[5], d11[5], d12[5]}, [RGB]! - .elseif \size == 1 - vld3.8 {d10[6], d11[6], d12[6]}, [RGB]! - .else - .error unsupported macroblock size - .endif - .elseif \bpp == 32 - .if \size == 8 - vld4.8 {d10, d11, d12, d13}, [RGB]! - pld [RGB, #128] - .elseif \size == 4 - vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! - vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! - vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! - vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! - .elseif \size == 2 - vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! - vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! - .elseif \size == 1 - vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! - .else - .error unsupported macroblock size - .endif + .elseif \bpp == 32 + .if \size == 8 + vld4.8 {d10, d11, d12, d13}, [RGB]! + pld [RGB, #128] + .elseif \size == 4 + vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! + vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! + vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! + vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! + .elseif \size == 2 + vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! + vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! + .elseif \size == 1 + vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! .else - .error unsupported bpp + .error unsupported macroblock size .endif + .else + .error unsupported bpp + .endif .endm .macro generate_jsimd_rgb_ycc_convert_neon colorid, bpp, r_offs, g_offs, b_offs /* - * 2 stage pipelined RGB->YCbCr conversion + * 2-stage pipelined RGB->YCbCr conversion */ .macro do_rgb_to_yuv_stage1 - vmovl.u8 q2, d1\r_offs /* r = { d4, d5 } */ - vmovl.u8 q3, d1\g_offs /* g = { d6, d7 } */ - vmovl.u8 q4, d1\b_offs /* b = { d8, d9 } */ - vmull.u16 q7, d4, d0[0] - vmlal.u16 q7, d6, d0[1] - vmlal.u16 q7, d8, d0[2] - vmull.u16 q8, d5, d0[0] - vmlal.u16 q8, d7, d0[1] - vmlal.u16 q8, d9, d0[2] - vrev64.32 q9, q1 - vrev64.32 q13, q1 - vmlsl.u16 q9, d4, d0[3] - vmlsl.u16 q9, d6, d1[0] - vmlal.u16 q9, d8, d1[1] - vmlsl.u16 q13, d5, d0[3] - vmlsl.u16 q13, d7, d1[0] - vmlal.u16 q13, d9, d1[1] - vrev64.32 q14, q1 - vrev64.32 q15, q1 - vmlal.u16 q14, d4, d1[1] - vmlsl.u16 q14, d6, d1[2] - vmlsl.u16 q14, d8, d1[3] - vmlal.u16 q15, d5, d1[1] - vmlsl.u16 q15, d7, d1[2] - vmlsl.u16 q15, d9, d1[3] + vmovl.u8 q2, d1\r_offs /* r = { d4, d5 } */ + vmovl.u8 q3, d1\g_offs /* g = { d6, d7 } */ + vmovl.u8 q4, d1\b_offs /* b = { d8, d9 } */ + vmull.u16 q7, d4, d0[0] + vmlal.u16 q7, d6, d0[1] + vmlal.u16 q7, d8, d0[2] + vmull.u16 q8, d5, d0[0] + vmlal.u16 q8, d7, d0[1] + vmlal.u16 q8, d9, d0[2] + vrev64.32 q9, q1 + vrev64.32 q13, q1 + vmlsl.u16 q9, d4, d0[3] + vmlsl.u16 q9, d6, d1[0] + vmlal.u16 q9, d8, d1[1] + vmlsl.u16 q13, d5, d0[3] + vmlsl.u16 q13, d7, d1[0] + vmlal.u16 q13, d9, d1[1] + vrev64.32 q14, q1 + vrev64.32 q15, q1 + vmlal.u16 q14, d4, d1[1] + vmlsl.u16 q14, d6, d1[2] + vmlsl.u16 q14, d8, d1[3] + vmlal.u16 q15, d5, d1[1] + vmlsl.u16 q15, d7, d1[2] + vmlsl.u16 q15, d9, d1[3] .endm .macro do_rgb_to_yuv_stage2 - vrshrn.u32 d20, q7, #16 - vrshrn.u32 d21, q8, #16 - vshrn.u32 d22, q9, #16 - vshrn.u32 d23, q13, #16 - vshrn.u32 d24, q14, #16 - vshrn.u32 d25, q15, #16 - vmovn.u16 d20, q10 /* d20 = y */ - vmovn.u16 d21, q11 /* d21 = u */ - vmovn.u16 d22, q12 /* d22 = v */ + vrshrn.u32 d20, q7, #16 + vrshrn.u32 d21, q8, #16 + vshrn.u32 d22, q9, #16 + vshrn.u32 d23, q13, #16 + vshrn.u32 d24, q14, #16 + vshrn.u32 d25, q15, #16 + vmovn.u16 d20, q10 /* d20 = y */ + vmovn.u16 d21, q11 /* d21 = u */ + vmovn.u16 d22, q12 /* d22 = v */ .endm .macro do_rgb_to_yuv @@ -1700,52 +1746,52 @@ .endm .macro do_rgb_to_yuv_stage2_store_load_stage1 - vrshrn.u32 d20, q7, #16 - vrshrn.u32 d21, q8, #16 - vshrn.u32 d22, q9, #16 - vrev64.32 q9, q1 - vshrn.u32 d23, q13, #16 - vrev64.32 q13, q1 - vshrn.u32 d24, q14, #16 - vshrn.u32 d25, q15, #16 - do_load \bpp, 8 - vmovn.u16 d20, q10 /* d20 = y */ - vmovl.u8 q2, d1\r_offs /* r = { d4, d5 } */ - vmovn.u16 d21, q11 /* d21 = u */ - vmovl.u8 q3, d1\g_offs /* g = { d6, d7 } */ - vmovn.u16 d22, q12 /* d22 = v */ - vmovl.u8 q4, d1\b_offs /* b = { d8, d9 } */ - vmull.u16 q7, d4, d0[0] - vmlal.u16 q7, d6, d0[1] - vmlal.u16 q7, d8, d0[2] - vst1.8 {d20}, [Y]! - vmull.u16 q8, d5, d0[0] - vmlal.u16 q8, d7, d0[1] - vmlal.u16 q8, d9, d0[2] - vmlsl.u16 q9, d4, d0[3] - vmlsl.u16 q9, d6, d1[0] - vmlal.u16 q9, d8, d1[1] - vst1.8 {d21}, [U]! - vmlsl.u16 q13, d5, d0[3] - vmlsl.u16 q13, d7, d1[0] - vmlal.u16 q13, d9, d1[1] - vrev64.32 q14, q1 - vrev64.32 q15, q1 - vmlal.u16 q14, d4, d1[1] - vmlsl.u16 q14, d6, d1[2] - vmlsl.u16 q14, d8, d1[3] - vst1.8 {d22}, [V]! - vmlal.u16 q15, d5, d1[1] - vmlsl.u16 q15, d7, d1[2] - vmlsl.u16 q15, d9, d1[3] + vrshrn.u32 d20, q7, #16 + vrshrn.u32 d21, q8, #16 + vshrn.u32 d22, q9, #16 + vrev64.32 q9, q1 + vshrn.u32 d23, q13, #16 + vrev64.32 q13, q1 + vshrn.u32 d24, q14, #16 + vshrn.u32 d25, q15, #16 + do_load \bpp, 8 + vmovn.u16 d20, q10 /* d20 = y */ + vmovl.u8 q2, d1\r_offs /* r = { d4, d5 } */ + vmovn.u16 d21, q11 /* d21 = u */ + vmovl.u8 q3, d1\g_offs /* g = { d6, d7 } */ + vmovn.u16 d22, q12 /* d22 = v */ + vmovl.u8 q4, d1\b_offs /* b = { d8, d9 } */ + vmull.u16 q7, d4, d0[0] + vmlal.u16 q7, d6, d0[1] + vmlal.u16 q7, d8, d0[2] + vst1.8 {d20}, [Y]! + vmull.u16 q8, d5, d0[0] + vmlal.u16 q8, d7, d0[1] + vmlal.u16 q8, d9, d0[2] + vmlsl.u16 q9, d4, d0[3] + vmlsl.u16 q9, d6, d1[0] + vmlal.u16 q9, d8, d1[1] + vst1.8 {d21}, [U]! + vmlsl.u16 q13, d5, d0[3] + vmlsl.u16 q13, d7, d1[0] + vmlal.u16 q13, d9, d1[1] + vrev64.32 q14, q1 + vrev64.32 q15, q1 + vmlal.u16 q14, d4, d1[1] + vmlsl.u16 q14, d6, d1[2] + vmlsl.u16 q14, d8, d1[3] + vst1.8 {d22}, [V]! + vmlal.u16 q15, d5, d1[1] + vmlsl.u16 q15, d7, d1[2] + vmlsl.u16 q15, d9, d1[3] .endm .balign 16 jsimd_\colorid\()_ycc_neon_consts: - .short 19595, 38470, 7471, 11059 - .short 21709, 32768, 27439, 5329 - .short 32767, 128, 32767, 128 - .short 32767, 128, 32767, 128 + .short 19595, 38470, 7471, 11059 + .short 21709, 32768, 27439, 5329 + .short 32767, 128, 32767, 128 + .short 32767, 128, 32767, 128 asm_function jsimd_\colorid\()_ycc_convert_neon OUTPUT_WIDTH .req r0 @@ -1851,7 +1897,6 @@ .unreq U .unreq V .unreq N -.endfunc .purgem do_rgb_to_yuv .purgem do_rgb_to_yuv_stage1 @@ -1871,6 +1916,7 @@ .purgem do_load .purgem do_store + /*****************************************************************************/ /* @@ -1932,7 +1978,7 @@ .unreq TMP2 .unreq TMP3 .unreq TMP4 -.endfunc + /*****************************************************************************/ @@ -1955,10 +2001,10 @@ .balign 16 jsimd_fdct_ifast_neon_consts: - .short (98 * 128) /* XFIX_0_382683433 */ - .short (139 * 128) /* XFIX_0_541196100 */ - .short (181 * 128) /* XFIX_0_707106781 */ - .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */ + .short (98 * 128) /* XFIX_0_382683433 */ + .short (139 * 128) /* XFIX_0_541196100 */ + .short (181 * 128) /* XFIX_0_707106781 */ + .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */ asm_function jsimd_fdct_ifast_neon @@ -1995,52 +2041,52 @@ /* Transpose */ vtrn.16 q12, q13 vtrn.16 q10, q11 - vtrn.16 q8, q9 + vtrn.16 q8, q9 vtrn.16 q14, q15 - vtrn.32 q9, q11 + vtrn.32 q9, q11 vtrn.32 q13, q15 - vtrn.32 q8, q10 + vtrn.32 q8, q10 vtrn.32 q12, q14 vswp d30, d23 vswp d24, d17 vswp d26, d19 /* 1-D FDCT */ - vadd.s16 q2, q11, q12 + vadd.s16 q2, q11, q12 vswp d28, d21 vsub.s16 q12, q11, q12 - vsub.s16 q6, q10, q13 + vsub.s16 q6, q10, q13 vadd.s16 q10, q10, q13 - vsub.s16 q7, q9, q14 - vadd.s16 q9, q9, q14 - vsub.s16 q1, q8, q15 - vadd.s16 q8, q8, q15 - vsub.s16 q4, q9, q10 - vsub.s16 q5, q8, q2 - vadd.s16 q3, q9, q10 - vadd.s16 q4, q4, q5 - vadd.s16 q2, q8, q2 - vqdmulh.s16 q4, q4, XFIX_0_707106781 + vsub.s16 q7, q9, q14 + vadd.s16 q9, q9, q14 + vsub.s16 q1, q8, q15 + vadd.s16 q8, q8, q15 + vsub.s16 q4, q9, q10 + vsub.s16 q5, q8, q2 + vadd.s16 q3, q9, q10 + vadd.s16 q4, q4, q5 + vadd.s16 q2, q8, q2 + vqdmulh.s16 q4, q4, XFIX_0_707106781 vadd.s16 q11, q12, q6 - vadd.s16 q8, q2, q3 - vsub.s16 q12, q2, q3 - vadd.s16 q3, q6, q7 - vadd.s16 q7, q7, q1 - vqdmulh.s16 q3, q3, XFIX_0_707106781 - vsub.s16 q6, q11, q7 - vadd.s16 q10, q5, q4 - vqdmulh.s16 q6, q6, XFIX_0_382683433 - vsub.s16 q14, q5, q4 + vadd.s16 q8, q2, q3 + vsub.s16 q12, q2, q3 + vadd.s16 q3, q6, q7 + vadd.s16 q7, q7, q1 + vqdmulh.s16 q3, q3, XFIX_0_707106781 + vsub.s16 q6, q11, q7 + vadd.s16 q10, q5, q4 + vqdmulh.s16 q6, q6, XFIX_0_382683433 + vsub.s16 q14, q5, q4 vqdmulh.s16 q11, q11, XFIX_0_541196100 - vqdmulh.s16 q5, q7, XFIX_1_306562965 - vadd.s16 q4, q1, q3 - vsub.s16 q3, q1, q3 - vadd.s16 q7, q7, q6 + vqdmulh.s16 q5, q7, XFIX_1_306562965 + vadd.s16 q4, q1, q3 + vsub.s16 q3, q1, q3 + vadd.s16 q7, q7, q6 vadd.s16 q11, q11, q6 - vadd.s16 q7, q7, q5 - vadd.s16 q13, q3, q11 - vsub.s16 q11, q3, q11 - vadd.s16 q9, q4, q7 - vsub.s16 q15, q4, q7 + vadd.s16 q7, q7, q5 + vadd.s16 q13, q3, q11 + vsub.s16 q11, q3, q11 + vadd.s16 q9, q4, q7 + vsub.s16 q15, q4, q7 subs TMP, TMP, #1 bne 1b @@ -2055,14 +2101,14 @@ .unreq DATA .unreq TMP -.endfunc + /*****************************************************************************/ /* * GLOBAL(void) - * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM * divisors, - * DCTELEM * workspace); + * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM *divisors, + * DCTELEM *workspace); * * Note: the code uses 2 stage pipelining in order to improve instructions * scheduling and eliminate stalls (this provides ~15% better @@ -2089,22 +2135,22 @@ vld1.16 {d20, d21, d22, d23}, [CORRECTION, :128]! vabs.s16 q13, q1 vld1.16 {d16, d17, d18, d19}, [RECIPROCAL, :128]! - vadd.u16 q12, q12, q10 /* add correction */ + vadd.u16 q12, q12, q10 /* add correction */ vadd.u16 q13, q13, q11 - vmull.u16 q10, d24, d16 /* multiply by reciprocal */ + vmull.u16 q10, d24, d16 /* multiply by reciprocal */ vmull.u16 q11, d25, d17 - vmull.u16 q8, d26, d18 - vmull.u16 q9, d27, d19 + vmull.u16 q8, d26, d18 + vmull.u16 q9, d27, d19 vld1.16 {d24, d25, d26, d27}, [SHIFT, :128]! vshrn.u32 d20, q10, #16 vshrn.u32 d21, q11, #16 - vshrn.u32 d22, q8, #16 - vshrn.u32 d23, q9, #16 + vshrn.u32 d22, q8, #16 + vshrn.u32 d23, q9, #16 vneg.s16 q12, q12 vneg.s16 q13, q13 - vshr.s16 q2, q0, #15 /* extract sign */ - vshr.s16 q3, q1, #15 - vshl.u16 q14, q10, q12 /* shift */ + vshr.s16 q2, q0, #15 /* extract sign */ + vshr.s16 q3, q1, #15 + vshl.u16 q14, q10, q12 /* shift */ vshl.u16 q15, q11, q13 push {r4, r5} @@ -2117,25 +2163,25 @@ vabs.s16 q13, q1 veor.u16 q15, q15, q3 vld1.16 {d16, d17, d18, d19}, [RECIPROCAL, :128]! - vadd.u16 q12, q12, q10 /* add correction */ + vadd.u16 q12, q12, q10 /* add correction */ vadd.u16 q13, q13, q11 - vmull.u16 q10, d24, d16 /* multiply by reciprocal */ + vmull.u16 q10, d24, d16 /* multiply by reciprocal */ vmull.u16 q11, d25, d17 - vmull.u16 q8, d26, d18 - vmull.u16 q9, d27, d19 + vmull.u16 q8, d26, d18 + vmull.u16 q9, d27, d19 vsub.u16 q14, q14, q2 vld1.16 {d24, d25, d26, d27}, [SHIFT, :128]! vsub.u16 q15, q15, q3 vshrn.u32 d20, q10, #16 vshrn.u32 d21, q11, #16 vst1.16 {d28, d29, d30, d31}, [COEF_BLOCK, :128]! - vshrn.u32 d22, q8, #16 - vshrn.u32 d23, q9, #16 + vshrn.u32 d22, q8, #16 + vshrn.u32 d23, q9, #16 vneg.s16 q12, q12 vneg.s16 q13, q13 - vshr.s16 q2, q0, #15 /* extract sign */ - vshr.s16 q3, q1, #15 - vshl.u16 q14, q10, q12 /* shift */ + vshr.s16 q2, q0, #15 /* extract sign */ + vshr.s16 q3, q1, #15 + vshl.u16 q14, q10, q12 /* shift */ vshl.u16 q15, q11, q13 subs LOOP_COUNT, LOOP_COUNT, #1 bne 1b @@ -2147,7 +2193,7 @@ vsub.u16 q15, q15, q3 vst1.16 {d28, d29, d30, d31}, [COEF_BLOCK, :128]! - bx lr /* return */ + bx lr /* return */ .unreq COEF_BLOCK .unreq DIVISORS @@ -2156,4 +2202,677 @@ .unreq CORRECTION .unreq SHIFT .unreq LOOP_COUNT -.endfunc + + +/*****************************************************************************/ + +/* + * GLOBAL(void) + * jsimd_h2v1_fancy_upsample_neon (int max_v_samp_factor, + * JDIMENSION downsampled_width, + * JSAMPARRAY input_data, + * JSAMPARRAY *output_data_ptr); + * + * Note: the use of unaligned writes is the main remaining bottleneck in + * this code, which can be potentially solved to get up to tens + * of percents performance improvement on Cortex-A8/Cortex-A9. + */ + +/* + * Upsample 16 source pixels to 32 destination pixels. The new 16 source + * pixels are loaded to q0. The previous 16 source pixels are in q1. The + * shifted-by-one source pixels are constructed in q2 by using q0 and q1. + * Register d28 is used for multiplication by 3. Register q15 is used + * for adding +1 bias. + */ +.macro upsample16 OUTPTR, INPTR + vld1.8 {q0}, [\INPTR]! + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + vmov q1, q0 /* backup source pixels to q1 */ + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! +.endm + +/* + * Upsample 32 source pixels to 64 destination pixels. Compared to 'usample16' + * macro, the roles of q0 and q1 registers are reversed for even and odd + * groups of 16 pixels, that's why "vmov q1, q0" instructions are not needed. + * Also this unrolling allows to reorder loads and stores to compensate + * multiplication latency and reduce stalls. + */ +.macro upsample32 OUTPTR, INPTR + /* even 16 pixels group */ + vld1.8 {q0}, [\INPTR]! + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + /* odd 16 pixels group */ + vld1.8 {q1}, [\INPTR]! + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vmovl.u8 q8, d2 + vext.8 q2, q0, q1, #15 + vmovl.u8 q9, d3 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d2, d28 + vmlal.u8 q11, d3, d28 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! +.endm + +/* + * Upsample a row of WIDTH pixels from INPTR to OUTPTR. + */ +.macro upsample_row OUTPTR, INPTR, WIDTH, TMP1 + /* special case for the first and last pixels */ + sub \WIDTH, \WIDTH, #1 + add \OUTPTR, \OUTPTR, #1 + ldrb \TMP1, [\INPTR, \WIDTH] + strb \TMP1, [\OUTPTR, \WIDTH, asl #1] + ldrb \TMP1, [\INPTR], #1 + strb \TMP1, [\OUTPTR, #-1] + vmov.8 d3[7], \TMP1 + + subs \WIDTH, \WIDTH, #32 + blt 5f +0: /* process 32 pixels per iteration */ + upsample32 \OUTPTR, \INPTR + subs \WIDTH, \WIDTH, #32 + bge 0b +5: + adds \WIDTH, \WIDTH, #16 + blt 1f +0: /* process 16 pixels if needed */ + upsample16 \OUTPTR, \INPTR + subs \WIDTH, \WIDTH, #16 +1: + adds \WIDTH, \WIDTH, #16 + beq 9f + + /* load the remaining 1-15 pixels */ + add \INPTR, \INPTR, \WIDTH + tst \WIDTH, #1 + beq 2f + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #2 + beq 2f + vext.8 d0, d0, d0, #6 + sub \INPTR, \INPTR, #1 + vld1.8 {d0[1]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #4 + beq 2f + vrev64.32 d0, d0 + sub \INPTR, \INPTR, #1 + vld1.8 {d0[3]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[2]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[1]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #8 + beq 2f + vmov d1, d0 + sub \INPTR, \INPTR, #8 + vld1.8 {d0}, [\INPTR] +2: /* upsample the remaining pixels */ + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + vrshrn.u16 d10, q8, #2 + vrshrn.u16 d12, q9, #2 + vshrn.u16 d11, q10, #2 + vshrn.u16 d13, q11, #2 + vzip.8 d10, d11 + vzip.8 d12, d13 + /* store the remaining pixels */ + tst \WIDTH, #8 + beq 2f + vst1.8 {d10, d11}, [\OUTPTR]! + vmov q5, q6 +2: + tst \WIDTH, #4 + beq 2f + vst1.8 {d10}, [\OUTPTR]! + vmov d10, d11 +2: + tst \WIDTH, #2 + beq 2f + vst1.8 {d10[0]}, [\OUTPTR]! + vst1.8 {d10[1]}, [\OUTPTR]! + vst1.8 {d10[2]}, [\OUTPTR]! + vst1.8 {d10[3]}, [\OUTPTR]! + vext.8 d10, d10, d10, #4 +2: + tst \WIDTH, #1 + beq 2f + vst1.8 {d10[0]}, [\OUTPTR]! + vst1.8 {d10[1]}, [\OUTPTR]! +2: +9: +.endm + +asm_function jsimd_h2v1_fancy_upsample_neon + + MAX_V_SAMP_FACTOR .req r0 + DOWNSAMPLED_WIDTH .req r1 + INPUT_DATA .req r2 + OUTPUT_DATA_PTR .req r3 + OUTPUT_DATA .req OUTPUT_DATA_PTR + + OUTPTR .req r4 + INPTR .req r5 + WIDTH .req ip + TMP .req lr + + push {r4, r5, r6, lr} + vpush {d8-d15} + + ldr OUTPUT_DATA, [OUTPUT_DATA_PTR] + cmp MAX_V_SAMP_FACTOR, #0 + ble 99f + + /* initialize constants */ + vmov.u8 d28, #3 + vmov.u16 q15, #1 +11: + ldr INPTR, [INPUT_DATA], #4 + ldr OUTPTR, [OUTPUT_DATA], #4 + mov WIDTH, DOWNSAMPLED_WIDTH + upsample_row OUTPTR, INPTR, WIDTH, TMP + subs MAX_V_SAMP_FACTOR, MAX_V_SAMP_FACTOR, #1 + bgt 11b + +99: + vpop {d8-d15} + pop {r4, r5, r6, pc} + + .unreq MAX_V_SAMP_FACTOR + .unreq DOWNSAMPLED_WIDTH + .unreq INPUT_DATA + .unreq OUTPUT_DATA_PTR + .unreq OUTPUT_DATA + + .unreq OUTPTR + .unreq INPTR + .unreq WIDTH + .unreq TMP + +.purgem upsample16 +.purgem upsample32 +.purgem upsample_row + + +/*****************************************************************************/ + +/* + * GLOBAL(JOCTET*) + * jsimd_huff_encode_one_block (working_state *state, JOCTET *buffer, + * JCOEFPTR block, int last_dc_val, + * c_derived_tbl *dctbl, c_derived_tbl *actbl) + * + */ + +.macro emit_byte BUFFER, PUT_BUFFER, PUT_BITS, ZERO, TMP + sub \PUT_BITS, \PUT_BITS, #0x8 + lsr \TMP, \PUT_BUFFER, \PUT_BITS + uxtb \TMP, \TMP + strb \TMP, [\BUFFER, #1]! + cmp \TMP, #0xff + /*it eq*/ + strbeq \ZERO, [\BUFFER, #1]! +.endm + +.macro put_bits PUT_BUFFER, PUT_BITS, CODE, SIZE + /*lsl \PUT_BUFFER, \PUT_BUFFER, \SIZE*/ + add \PUT_BITS, \SIZE + /*orr \PUT_BUFFER, \PUT_BUFFER, \CODE*/ + orr \PUT_BUFFER, \CODE, \PUT_BUFFER, lsl \SIZE +.endm + +.macro checkbuf15 BUFFER, PUT_BUFFER, PUT_BITS, ZERO, TMP + cmp \PUT_BITS, #0x10 + blt 15f + eor \ZERO, \ZERO, \ZERO + emit_byte \BUFFER, \PUT_BUFFER, \PUT_BITS, \ZERO, \TMP + emit_byte \BUFFER, \PUT_BUFFER, \PUT_BITS, \ZERO, \TMP +15: +.endm + +.balign 16 +jsimd_huff_encode_one_block_neon_consts: + .byte 0x01 + .byte 0x02 + .byte 0x04 + .byte 0x08 + .byte 0x10 + .byte 0x20 + .byte 0x40 + .byte 0x80 + +asm_function jsimd_huff_encode_one_block_neon + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + add r7, sp, #0x1c + sub r4, sp, #0x40 + bfc r4, #0, #5 + mov sp, r4 /* align sp on 32 bytes */ + vst1.64 {d8, d9, d10, d11}, [r4, :128]! + vst1.64 {d12, d13, d14, d15}, [r4, :128] + sub sp, #0x140 /* reserve 320 bytes */ + str r0, [sp, #0x18] /* working state > sp + Ox18 */ + add r4, sp, #0x20 /* r4 = t1 */ + ldr lr, [r7, #0x8] /* lr = dctbl */ + sub r10, r1, #0x1 /* r10=buffer-- */ + ldrsh r1, [r2] + mov r9, #0x10 + mov r8, #0x1 + adr r5, jsimd_huff_encode_one_block_neon_consts + /* prepare data */ + vld1.8 {d26}, [r5, :64] + veor q8, q8, q8 + veor q9, q9, q9 + vdup.16 q14, r9 + vdup.16 q15, r8 + veor q10, q10, q10 + veor q11, q11, q11 + sub r1, r1, r3 + add r9, r2, #0x22 + add r8, r2, #0x18 + add r3, r2, #0x36 + vmov.16 d0[0], r1 + vld1.16 {d2[0]}, [r9, :16] + vld1.16 {d4[0]}, [r8, :16] + vld1.16 {d6[0]}, [r3, :16] + add r1, r2, #0x2 + add r9, r2, #0x30 + add r8, r2, #0x26 + add r3, r2, #0x28 + vld1.16 {d0[1]}, [r1, :16] + vld1.16 {d2[1]}, [r9, :16] + vld1.16 {d4[1]}, [r8, :16] + vld1.16 {d6[1]}, [r3, :16] + add r1, r2, #0x10 + add r9, r2, #0x40 + add r8, r2, #0x34 + add r3, r2, #0x1a + vld1.16 {d0[2]}, [r1, :16] + vld1.16 {d2[2]}, [r9, :16] + vld1.16 {d4[2]}, [r8, :16] + vld1.16 {d6[2]}, [r3, :16] + add r1, r2, #0x20 + add r9, r2, #0x32 + add r8, r2, #0x42 + add r3, r2, #0xc + vld1.16 {d0[3]}, [r1, :16] + vld1.16 {d2[3]}, [r9, :16] + vld1.16 {d4[3]}, [r8, :16] + vld1.16 {d6[3]}, [r3, :16] + add r1, r2, #0x12 + add r9, r2, #0x24 + add r8, r2, #0x50 + add r3, r2, #0xe + vld1.16 {d1[0]}, [r1, :16] + vld1.16 {d3[0]}, [r9, :16] + vld1.16 {d5[0]}, [r8, :16] + vld1.16 {d7[0]}, [r3, :16] + add r1, r2, #0x4 + add r9, r2, #0x16 + add r8, r2, #0x60 + add r3, r2, #0x1c + vld1.16 {d1[1]}, [r1, :16] + vld1.16 {d3[1]}, [r9, :16] + vld1.16 {d5[1]}, [r8, :16] + vld1.16 {d7[1]}, [r3, :16] + add r1, r2, #0x6 + add r9, r2, #0x8 + add r8, r2, #0x52 + add r3, r2, #0x2a + vld1.16 {d1[2]}, [r1, :16] + vld1.16 {d3[2]}, [r9, :16] + vld1.16 {d5[2]}, [r8, :16] + vld1.16 {d7[2]}, [r3, :16] + add r1, r2, #0x14 + add r9, r2, #0xa + add r8, r2, #0x44 + add r3, r2, #0x38 + vld1.16 {d1[3]}, [r1, :16] + vld1.16 {d3[3]}, [r9, :16] + vld1.16 {d5[3]}, [r8, :16] + vld1.16 {d7[3]}, [r3, :16] + vcgt.s16 q8, q8, q0 + vcgt.s16 q9, q9, q1 + vcgt.s16 q10, q10, q2 + vcgt.s16 q11, q11, q3 + vabs.s16 q0, q0 + vabs.s16 q1, q1 + vabs.s16 q2, q2 + vabs.s16 q3, q3 + veor q8, q8, q0 + veor q9, q9, q1 + veor q10, q10, q2 + veor q11, q11, q3 + add r9, r4, #0x20 + add r8, r4, #0x80 + add r3, r4, #0xa0 + vclz.i16 q0, q0 + vclz.i16 q1, q1 + vclz.i16 q2, q2 + vclz.i16 q3, q3 + vsub.i16 q0, q14, q0 + vsub.i16 q1, q14, q1 + vsub.i16 q2, q14, q2 + vsub.i16 q3, q14, q3 + vst1.16 {d0, d1, d2, d3}, [r4, :256] + vst1.16 {d4, d5, d6, d7}, [r9, :256] + vshl.s16 q0, q15, q0 + vshl.s16 q1, q15, q1 + vshl.s16 q2, q15, q2 + vshl.s16 q3, q15, q3 + vsub.i16 q0, q0, q15 + vsub.i16 q1, q1, q15 + vsub.i16 q2, q2, q15 + vsub.i16 q3, q3, q15 + vand q8, q8, q0 + vand q9, q9, q1 + vand q10, q10, q2 + vand q11, q11, q3 + vst1.16 {d16, d17, d18, d19}, [r8, :256] + vst1.16 {d20, d21, d22, d23}, [r3, :256] + add r1, r2, #0x46 + add r9, r2, #0x3a + add r8, r2, #0x74 + add r3, r2, #0x6a + vld1.16 {d8[0]}, [r1, :16] + vld1.16 {d10[0]}, [r9, :16] + vld1.16 {d12[0]}, [r8, :16] + vld1.16 {d14[0]}, [r3, :16] + veor q8, q8, q8 + veor q9, q9, q9 + veor q10, q10, q10 + veor q11, q11, q11 + add r1, r2, #0x54 + add r9, r2, #0x2c + add r8, r2, #0x76 + add r3, r2, #0x78 + vld1.16 {d8[1]}, [r1, :16] + vld1.16 {d10[1]}, [r9, :16] + vld1.16 {d12[1]}, [r8, :16] + vld1.16 {d14[1]}, [r3, :16] + add r1, r2, #0x62 + add r9, r2, #0x1e + add r8, r2, #0x68 + add r3, r2, #0x7a + vld1.16 {d8[2]}, [r1, :16] + vld1.16 {d10[2]}, [r9, :16] + vld1.16 {d12[2]}, [r8, :16] + vld1.16 {d14[2]}, [r3, :16] + add r1, r2, #0x70 + add r9, r2, #0x2e + add r8, r2, #0x5a + add r3, r2, #0x6c + vld1.16 {d8[3]}, [r1, :16] + vld1.16 {d10[3]}, [r9, :16] + vld1.16 {d12[3]}, [r8, :16] + vld1.16 {d14[3]}, [r3, :16] + add r1, r2, #0x72 + add r9, r2, #0x3c + add r8, r2, #0x4c + add r3, r2, #0x5e + vld1.16 {d9[0]}, [r1, :16] + vld1.16 {d11[0]}, [r9, :16] + vld1.16 {d13[0]}, [r8, :16] + vld1.16 {d15[0]}, [r3, :16] + add r1, r2, #0x64 + add r9, r2, #0x4a + add r8, r2, #0x3e + add r3, r2, #0x6e + vld1.16 {d9[1]}, [r1, :16] + vld1.16 {d11[1]}, [r9, :16] + vld1.16 {d13[1]}, [r8, :16] + vld1.16 {d15[1]}, [r3, :16] + add r1, r2, #0x56 + add r9, r2, #0x58 + add r8, r2, #0x4e + add r3, r2, #0x7c + vld1.16 {d9[2]}, [r1, :16] + vld1.16 {d11[2]}, [r9, :16] + vld1.16 {d13[2]}, [r8, :16] + vld1.16 {d15[2]}, [r3, :16] + add r1, r2, #0x48 + add r9, r2, #0x66 + add r8, r2, #0x5c + add r3, r2, #0x7e + vld1.16 {d9[3]}, [r1, :16] + vld1.16 {d11[3]}, [r9, :16] + vld1.16 {d13[3]}, [r8, :16] + vld1.16 {d15[3]}, [r3, :16] + vcgt.s16 q8, q8, q4 + vcgt.s16 q9, q9, q5 + vcgt.s16 q10, q10, q6 + vcgt.s16 q11, q11, q7 + vabs.s16 q4, q4 + vabs.s16 q5, q5 + vabs.s16 q6, q6 + vabs.s16 q7, q7 + veor q8, q8, q4 + veor q9, q9, q5 + veor q10, q10, q6 + veor q11, q11, q7 + add r1, r4, #0x40 + add r9, r4, #0x60 + add r8, r4, #0xc0 + add r3, r4, #0xe0 + vclz.i16 q4, q4 + vclz.i16 q5, q5 + vclz.i16 q6, q6 + vclz.i16 q7, q7 + vsub.i16 q4, q14, q4 + vsub.i16 q5, q14, q5 + vsub.i16 q6, q14, q6 + vsub.i16 q7, q14, q7 + vst1.16 {d8, d9, d10, d11}, [r1, :256] + vst1.16 {d12, d13, d14, d15}, [r9, :256] + vshl.s16 q4, q15, q4 + vshl.s16 q5, q15, q5 + vshl.s16 q6, q15, q6 + vshl.s16 q7, q15, q7 + vsub.i16 q4, q4, q15 + vsub.i16 q5, q5, q15 + vsub.i16 q6, q6, q15 + vsub.i16 q7, q7, q15 + vand q8, q8, q4 + vand q9, q9, q5 + vand q10, q10, q6 + vand q11, q11, q7 + vst1.16 {d16, d17, d18, d19}, [r8, :256] + vst1.16 {d20, d21, d22, d23}, [r3, :256] + ldr r12, [r7, #0xc] /* r12 = actbl */ + add r1, lr, #0x400 /* r1 = dctbl->ehufsi */ + mov r9, r12 /* r9 = actbl */ + add r6, r4, #0x80 /* r6 = t2 */ + ldr r11, [r0, #0x8] /* r11 = put_buffer */ + ldr r4, [r0, #0xc] /* r4 = put_bits */ + ldrh r2, [r6, #-128] /* r2 = nbits */ + ldrh r3, [r6] /* r3 = temp2 & (((JLONG) 1)<ehufsi */ + ldrsb r6, [r5, #0xf0] /* r6 = actbl->ehufsi[0xf0] */ + veor q8, q8, q8 + vceq.i16 q0, q0, q8 + vceq.i16 q1, q1, q8 + vceq.i16 q2, q2, q8 + vceq.i16 q3, q3, q8 + vceq.i16 q4, q4, q8 + vceq.i16 q5, q5, q8 + vceq.i16 q6, q6, q8 + vceq.i16 q7, q7, q8 + vmovn.i16 d0, q0 + vmovn.i16 d2, q1 + vmovn.i16 d4, q2 + vmovn.i16 d6, q3 + vmovn.i16 d8, q4 + vmovn.i16 d10, q5 + vmovn.i16 d12, q6 + vmovn.i16 d14, q7 + vand d0, d0, d26 + vand d2, d2, d26 + vand d4, d4, d26 + vand d6, d6, d26 + vand d8, d8, d26 + vand d10, d10, d26 + vand d12, d12, d26 + vand d14, d14, d26 + vpadd.i8 d0, d0, d2 + vpadd.i8 d4, d4, d6 + vpadd.i8 d8, d8, d10 + vpadd.i8 d12, d12, d14 + vpadd.i8 d0, d0, d4 + vpadd.i8 d8, d8, d12 + vpadd.i8 d0, d0, d8 + vmov.32 r1, d0[1] + vmov.32 r8, d0[0] + mvn r1, r1 + mvn r8, r8 + lsrs r1, r1, #0x1 + rrx r8, r8 /* shift in last r1 bit while shifting out DC bit */ + rbit r1, r1 /* r1 = index1 */ + rbit r8, r8 /* r8 = index0 */ + ldr r0, [r9, #0x3c0] /* r0 = actbl->ehufco[0xf0] */ + str r1, [sp, #0x14] /* index1 > sp + 0x14 */ + cmp r8, #0x0 + beq 6f +1: + clz r2, r8 + add lr, lr, r2, lsl #1 + lsl r8, r8, r2 + ldrh r1, [lr, #-126] +2: + cmp r2, #0x10 + blt 3f + sub r2, r2, #0x10 + put_bits r11, r4, r0, r6 + cmp r4, #0x10 + blt 2b + eor r3, r3, r3 + emit_byte r10, r11, r4, r3, r12 + emit_byte r10, r11, r4, r3, r12 + b 2b +3: + add r2, r1, r2, lsl #4 + ldrh r3, [lr, #2]! + ldr r12, [r9, r2, lsl #2] + ldrb r2, [r5, r2] + put_bits r11, r4, r12, r2 + checkbuf15 r10, r11, r4, r2, r12 + put_bits r11, r4, r3, r1 + checkbuf15 r10, r11, r4, r2, r12 + lsls r8, r8, #0x1 + bne 1b +6: + add r12, sp, #0x20 /* r12 = t1 */ + ldr r8, [sp, #0x14] /* r8 = index1 */ + adds r12, #0xc0 /* r12 = t2 + (DCTSIZE2/2) */ + cmp r8, #0x0 + beq 6f + clz r2, r8 + sub r12, r12, lr + lsl r8, r8, r2 + add r2, r2, r12, lsr #1 + add lr, lr, r2, lsl #1 + b 7f +1: + clz r2, r8 + add lr, lr, r2, lsl #1 + lsl r8, r8, r2 +7: + ldrh r1, [lr, #-126] +2: + cmp r2, #0x10 + blt 3f + sub r2, r2, #0x10 + put_bits r11, r4, r0, r6 + cmp r4, #0x10 + blt 2b + eor r3, r3, r3 + emit_byte r10, r11, r4, r3, r12 + emit_byte r10, r11, r4, r3, r12 + b 2b +3: + add r2, r1, r2, lsl #4 + ldrh r3, [lr, #2]! + ldr r12, [r9, r2, lsl #2] + ldrb r2, [r5, r2] + put_bits r11, r4, r12, r2 + checkbuf15 r10, r11, r4, r2, r12 + put_bits r11, r4, r3, r1 + checkbuf15 r10, r11, r4, r2, r12 + lsls r8, r8, #0x1 + bne 1b +6: + add r0, sp, #0x20 + add r0, #0xfe + cmp lr, r0 + bhs 1f + ldr r1, [r9] + ldrb r0, [r5] + put_bits r11, r4, r1, r0 + checkbuf15 r10, r11, r4, r0, r1 +1: + ldr r12, [sp, #0x18] + str r11, [r12, #0x8] + str r4, [r12, #0xc] + add r0, r10, #0x1 + add r4, sp, #0x140 + vld1.64 {d8, d9, d10, d11}, [r4, :128]! + vld1.64 {d12, d13, d14, d15}, [r4, :128] + sub r4, r7, #0x1c + mov sp, r4 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + +.purgem emit_byte +.purgem put_bits +.purgem checkbuf15 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimdcfg.inc glmark2-2021.02/src/libjpeg-turbo/simd/jsimdcfg.inc --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimdcfg.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimdcfg.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,92 @@ +; +; Automatically generated include file from jsimdcfg.inc.h +; +; +; -- jpeglib.h +; +%define DCTSIZE 8 +%define DCTSIZE2 64 +; +; -- jmorecfg.h +; +%define RGB_RED 0 +%define RGB_GREEN 1 +%define RGB_BLUE 2 +%define RGB_PIXELSIZE 3 +%define EXT_RGB_RED 0 +%define EXT_RGB_GREEN 1 +%define EXT_RGB_BLUE 2 +%define EXT_RGB_PIXELSIZE 3 +%define EXT_RGBX_RED 0 +%define EXT_RGBX_GREEN 1 +%define EXT_RGBX_BLUE 2 +%define EXT_RGBX_PIXELSIZE 4 +%define EXT_BGR_RED 2 +%define EXT_BGR_GREEN 1 +%define EXT_BGR_BLUE 0 +%define EXT_BGR_PIXELSIZE 3 +%define EXT_BGRX_RED 2 +%define EXT_BGRX_GREEN 1 +%define EXT_BGRX_BLUE 0 +%define EXT_BGRX_PIXELSIZE 4 +%define EXT_XBGR_RED 3 +%define EXT_XBGR_GREEN 2 +%define EXT_XBGR_BLUE 1 +%define EXT_XBGR_PIXELSIZE 4 +%define EXT_XRGB_RED 1 +%define EXT_XRGB_GREEN 2 +%define EXT_XRGB_BLUE 3 +%define EXT_XRGB_PIXELSIZE 4 +%define RGBX_FILLER_0XFF 1 +; Representation of a single sample (pixel element value). +; On this SIMD implementation, this must be 'unsigned char'. +; +%define JSAMPLE byte ; unsigned char +%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE) +%define CENTERJSAMPLE 128 +; Representation of a DCT frequency coefficient. +; On this SIMD implementation, this must be 'short'. +; +%define JCOEF word ; short +%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF) +; Datatype used for image dimensions. +; On this SIMD implementation, this must be 'unsigned int'. +; +%define JDIMENSION dword ; unsigned int +%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION) +%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h) +%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h) +%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h) +%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h) +%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW) +%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY) +%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE) +%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR) +; +; -- jdct.h +; +; A forward DCT routine is given a pointer to a work area of type DCTELEM[]; +; the DCT is to be performed in-place in that buffer. +; To maximize parallelism, Type DCTELEM is changed to short (originally, int). +; +%define DCTELEM word ; short +%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM) +%define float FP32 ; float +%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float) +; To maximize parallelism, Type short is changed to short. +; +%define ISLOW_MULT_TYPE word ; must be short +%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE) +%define IFAST_MULT_TYPE word ; must be short +%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE) +%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors +%define FLOAT_MULT_TYPE FP32 ; must be float +%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE) +; +; -- jsimd.h +; +%define JSIMD_NONE 0x00 +%define JSIMD_MMX 0x01 +%define JSIMD_3DNOW 0x02 +%define JSIMD_SSE 0x04 +%define JSIMD_SSE2 0x08 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimdcfg.inc.h glmark2-2021.02/src/libjpeg-turbo/simd/jsimdcfg.inc.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimdcfg.inc.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimdcfg.inc.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,130 @@ +// This file generates the include file for the assembly +// implementations by abusing the C preprocessor. +// +// Note: Some things are manually defined as they need to +// be mapped to NASM types. + +; +; Automatically generated include file from jsimdcfg.inc.h +; + +#define JPEG_INTERNALS + +#include "../jpeglib.h" +#include "../jconfig.h" +#include "../jmorecfg.h" +#include "jsimd.h" + +; +; -- jpeglib.h +; + +%define _cpp_protection_DCTSIZE DCTSIZE +%define _cpp_protection_DCTSIZE2 DCTSIZE2 + +; +; -- jmorecfg.h +; + +%define _cpp_protection_RGB_RED RGB_RED +%define _cpp_protection_RGB_GREEN RGB_GREEN +%define _cpp_protection_RGB_BLUE RGB_BLUE +%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE + +%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED +%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN +%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE +%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE + +%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED +%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN +%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE +%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE + +%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED +%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN +%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE +%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE + +%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED +%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN +%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE +%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE + +%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED +%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN +%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE +%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE + +%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED +%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN +%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE +%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE + +%define RGBX_FILLER_0XFF 1 + +; Representation of a single sample (pixel element value). +; On this SIMD implementation, this must be 'unsigned char'. +; + +%define JSAMPLE byte ; unsigned char +%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE) + +%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE + +; Representation of a DCT frequency coefficient. +; On this SIMD implementation, this must be 'short'. +; +%define JCOEF word ; short +%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF) + +; Datatype used for image dimensions. +; On this SIMD implementation, this must be 'unsigned int'. +; +%define JDIMENSION dword ; unsigned int +%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION) + +%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h) +%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h) +%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h) +%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h) +%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW) +%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY) +%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE) +%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR) + +; +; -- jdct.h +; + +; A forward DCT routine is given a pointer to a work area of type DCTELEM[]; +; the DCT is to be performed in-place in that buffer. +; To maximize parallelism, Type DCTELEM is changed to short (originally, int). +; +%define DCTELEM word ; short +%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM) + +%define FAST_FLOAT FP32 ; float +%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(FAST_FLOAT) + +; To maximize parallelism, Type MULTIPLIER is changed to short. +; +%define ISLOW_MULT_TYPE word ; must be short +%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE) + +%define IFAST_MULT_TYPE word ; must be short +%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE) +%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors + +%define FLOAT_MULT_TYPE FP32 ; must be float +%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE) + +; +; -- jsimd.h +; + +%define _cpp_protection_JSIMD_NONE JSIMD_NONE +%define _cpp_protection_JSIMD_MMX JSIMD_MMX +%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW +%define _cpp_protection_JSIMD_SSE JSIMD_SSE +%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimdcpu.asm glmark2-2021.02/src/libjpeg-turbo/simd/jsimdcpu.asm --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimdcpu.asm 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimdcpu.asm 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,105 @@ +; +; jsimdcpu.asm - SIMD instruction support check +; +; Copyright 2009 Pierre Ossman for Cendio AB +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Check if the CPU supports SIMD instructions +; +; GLOBAL(unsigned int) +; jpeg_simd_cpu_support (void) +; + + align 16 + global EXTN(jpeg_simd_cpu_support) PRIVATE + +EXTN(jpeg_simd_cpu_support): + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused + push edi + + xor edi,edi ; simd support flag + + pushfd + pop eax + mov edx,eax + xor eax, 1<<21 ; flip ID bit in EFLAGS + push eax + popfd + pushfd + pop eax + xor eax,edx + jz short .return ; CPUID is not supported + + ; Check for MMX instruction support + xor eax,eax + cpuid + test eax,eax + jz short .return + + xor eax,eax + inc eax + cpuid + mov eax,edx ; eax = Standard feature flags + + test eax, 1<<23 ; bit23:MMX + jz short .no_mmx + or edi, byte JSIMD_MMX +.no_mmx: + test eax, 1<<25 ; bit25:SSE + jz short .no_sse + or edi, byte JSIMD_SSE +.no_sse: + test eax, 1<<26 ; bit26:SSE2 + jz short .no_sse2 + or edi, byte JSIMD_SSE2 +.no_sse2: + + ; Check for 3DNow! instruction support + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000000 + jbe short .return + + mov eax, 0x80000001 + cpuid + mov eax,edx ; eax = Extended feature flags + + test eax, 1<<31 ; bit31:3DNow!(vendor independent) + jz short .no_3dnow + or edi, byte JSIMD_3DNOW +.no_3dnow: + +.return: + mov eax,edi + + pop edi +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 16 diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimdext.inc glmark2-2021.02/src/libjpeg-turbo/simd/jsimdext.inc --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimdext.inc 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimdext.inc 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,386 @@ +; +; jsimdext.inc - common declarations +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright 2010 D. R. Commander +; +; Based on +; x86 SIMD extension for IJG JPEG library - version 1.02 +; +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. +; +; [TAB8] + +; ========================================================================== +; System-dependent configurations + +%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- +; * Microsoft Visual C++ +; * MinGW (Minimalist GNU for Windows) +; * CygWin +; * LCC-Win32 + +; -- segment definition -- +; +%ifdef __YASM_VER__ +%define SEG_TEXT .text align=16 +%define SEG_CONST .rdata align=16 +%else +%define SEG_TEXT .text align=16 public use32 class=CODE +%define SEG_CONST .rdata align=16 public use32 class=CONST +%endif + +%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- +; * Microsoft Visual C++ + +; -- segment definition -- +; +%ifdef __YASM_VER__ +%define SEG_TEXT .text align=16 +%define SEG_CONST .rdata align=16 +%else +%define SEG_TEXT .text align=16 public use64 class=CODE +%define SEG_CONST .rdata align=16 public use64 class=CONST +%endif +%define EXTN(name) name ; foo() -> foo + +%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- +; * Borland C++ (Win32) + +; -- segment definition -- +; +%define SEG_TEXT _text align=16 public use32 class=CODE +%define SEG_CONST _data align=16 public use32 class=DATA + +%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ +; * Linux +; * *BSD family Unix using elf format +; * Unix System V, including Solaris x86, UnixWare and SCO Unix + +; mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits + +; -- segment definition -- +; +%ifdef __x86_64__ +%define SEG_TEXT .text progbits align=16 +%define SEG_CONST .rodata progbits align=16 +%else +%define SEG_TEXT .text progbits alloc exec nowrite align=16 +%define SEG_CONST .rodata progbits alloc noexec nowrite align=16 +%endif + +; To make the code position-independent, append -DPIC to the commandline +; +%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC +%define EXTN(name) name ; foo() -> foo + +%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- +; * Older Linux using a.out format (nasm -f aout -DAOUT ...) +; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) + +; -- segment definition -- +; +%define SEG_TEXT .text +%define SEG_CONST .data + +; To make the code position-independent, append -DPIC to the commandline +; +%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC + +%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- +; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) + +; -- segment definition -- +; +%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? +%define SEG_CONST .rodata align=16 + +; The generation of position-independent code (PIC) is the default on Darwin. +; +%define PIC +%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing + +%else ; ----(Other case)---------------------- + +; -- segment definition -- +; +%define SEG_TEXT .text +%define SEG_CONST .data + +%endif ; ---------------------------------------------- + +; ========================================================================== + +; -------------------------------------------------------------------------- +; Common types +; +%ifdef __x86_64__ +%define POINTER qword ; general pointer type +%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) +%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT +%else +%define POINTER dword ; general pointer type +%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) +%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT +%endif + +%define INT dword ; signed integer type +%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) +%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT + +%define FP32 dword ; IEEE754 single +%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) +%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT + +%define MMWORD qword ; int64 (MMX register) +%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) +%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT + +; NASM is buggy and doesn't properly handle operand sizes for SSE +; instructions, so for now we have to define XMMWORD as blank. +%define XMMWORD ; int128 (SSE register) +%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) +%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT + +; Similar hacks for when we load a dword or MMWORD into an xmm# register +%define XMM_DWORD +%define XMM_MMWORD + +%define SIZEOF_BYTE 1 ; sizeof(BYTE) +%define SIZEOF_WORD 2 ; sizeof(WORD) +%define SIZEOF_DWORD 4 ; sizeof(DWORD) +%define SIZEOF_QWORD 8 ; sizeof(QWORD) +%define SIZEOF_OWORD 16 ; sizeof(OWORD) + +%define BYTE_BIT 8 ; CHAR_BIT in C +%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT +%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT +%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT +%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT + +; -------------------------------------------------------------------------- +; External Symbol Name +; +%ifndef EXTN +%define EXTN(name) _ %+ name ; foo() -> _foo +%endif + +; -------------------------------------------------------------------------- +; Macros for position-independent code (PIC) support +; +%ifndef GOT_SYMBOL +%undef PIC +%endif + +%ifdef PIC ; ------------------------------------------- + +%ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- + +; At present, nasm doesn't seem to support PIC generation for Mach-O. +; The PIC support code below is a little tricky. + + SECTION SEG_CONST +const_base: + +%define GOTOFF(got,sym) (got) + (sym) - const_base + +%imacro get_GOT 1 + ; NOTE: this macro destroys ecx resister. + call %%geteip + add ecx, byte (%%ref - $) + jmp short %%adjust +%%geteip: + mov ecx, POINTER [esp] + ret +%%adjust: + push ebp + xor ebp,ebp ; ebp = 0 +%ifidni %1,ebx ; (%1 == ebx) + ; db 0x8D,0x9C + jmp near const_base = + ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) + db 0x8D,0x9C ; 8D,9C + jmp near const_base ; E9,(const_base-%%ref) +%%ref: +%else ; (%1 != ebx) + ; db 0x8D,0x8C + jmp near const_base = + ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) + db 0x8D,0x8C ; 8D,8C + jmp near const_base ; E9,(const_base-%%ref) +%%ref: mov %1, ecx +%endif ; (%1 == ebx) + pop ebp +%endmacro + +%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- + +%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff + +%imacro get_GOT 1 + extern GOT_SYMBOL + call %%geteip + add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc + jmp short %%done +%%geteip: + mov %1, POINTER [esp] + ret +%%done: +%endmacro + +%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- + +%imacro pushpic 1.nolist + push %1 +%endmacro +%imacro poppic 1.nolist + pop %1 +%endmacro +%imacro movpic 2.nolist + mov %1,%2 +%endmacro + +%else ; !PIC ----------------------------------------- + +%define GOTOFF(got,sym) (sym) + +%imacro get_GOT 1.nolist +%endmacro +%imacro pushpic 1.nolist +%endmacro +%imacro poppic 1.nolist +%endmacro +%imacro movpic 2.nolist +%endmacro + +%endif ; PIC ----------------------------------------- + +; -------------------------------------------------------------------------- +; Align the next instruction on {2,4,8,16,..}-byte boundary. +; ".balign n,,m" in GNU as +; +%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) +%define FILLB(b,n) (($$-(b)) & ((n)-1)) + +%imacro alignx 1-2.nolist 0xFFFF +%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ + db 0x90 ; nop + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ + db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ + db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ + db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ + db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ + db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ + db 0x8B,0xED ; mov ebp,ebp + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ + db 0x90 ; nop +%endmacro + +; Align the next data on {2,4,8,16,..}-byte boundary. +; +%imacro alignz 1.nolist + align %1, db 0 ; filling zeros +%endmacro + +%ifdef __x86_64__ + +%ifdef WIN64 + +%imacro collect_args 0 + push r12 + push r13 + push r14 + push r15 + mov r10, rcx + mov r11, rdx + mov r12, r8 + mov r13, r9 + mov r14, [rax+48] + mov r15, [rax+56] + push rsi + push rdi + sub rsp, SIZEOF_XMMWORD + movaps XMMWORD [rsp], xmm6 + sub rsp, SIZEOF_XMMWORD + movaps XMMWORD [rsp], xmm7 +%endmacro + +%imacro uncollect_args 0 + movaps xmm7, XMMWORD [rsp] + add rsp, SIZEOF_XMMWORD + movaps xmm6, XMMWORD [rsp] + add rsp, SIZEOF_XMMWORD + pop rdi + pop rsi + pop r15 + pop r14 + pop r13 + pop r12 +%endmacro + +%else + +%imacro collect_args 0 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + mov r10, rdi + mov r11, rsi + mov r12, rdx + mov r13, rcx + mov r14, r8 + mov r15, r9 +%endmacro + +%imacro uncollect_args 0 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 +%endmacro + +%endif + +%endif + +; Begin chromium edits +%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- +%define PRIVATE :private_extern +%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ +%define PRIVATE :hidden +%else +%define PRIVATE +%endif +; End chromium edits + +; -------------------------------------------------------------------------- +; Defines picked up from the C headers +; +%include "jsimdcfg.inc" + +; -------------------------------------------------------------------------- diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd.h glmark2-2021.02/src/libjpeg-turbo/simd/jsimd.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd.h 2021-04-25 01:47:22.000000000 +0800 @@ -2,8 +2,11 @@ * simd/jsimd.h * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright 2011 D. R. Commander - * + * Copyright (C) 2011, 2014-2016 D. R. Commander + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California + * Copyright (C) 2014 Linaro Limited + * Copyright (C) 2015-2016 Matthieu Darbois + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc @@ -18,649 +21,851 @@ #define JSIMD_SSE 0x04 #define JSIMD_SSE2 0x08 #define JSIMD_ARM_NEON 0x10 - -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_simd_cpu_support jSiCpuSupport -#define jsimd_rgb_ycc_convert_mmx jSRGBYCCM -#define jsimd_extrgb_ycc_convert_mmx jSEXTRGBYCCM -#define jsimd_extrgbx_ycc_convert_mmx jSEXTRGBXYCCM -#define jsimd_extbgr_ycc_convert_mmx jSEXTBGRYCCM -#define jsimd_extbgrx_ycc_convert_mmx jSEXTBGRXYCCM -#define jsimd_extxbgr_ycc_convert_mmx jSEXTXBGRYCCM -#define jsimd_extxrgb_ycc_convert_mmx jSEXTXRGBYCCM -#define jsimd_rgb_gray_convert_mmx jSRGBGRYM -#define jsimd_extrgb_gray_convert_mmx jSEXTRGBGRYM -#define jsimd_extrgbx_gray_convert_mmx jSEXTRGBXGRYM -#define jsimd_extbgr_gray_convert_mmx jSEXTBGRGRYM -#define jsimd_extbgrx_gray_convert_mmx jSEXTBGRXGRYM -#define jsimd_extxbgr_gray_convert_mmx jSEXTXBGRGRYM -#define jsimd_extxrgb_gray_convert_mmx jSEXTXRGBGRYM -#define jsimd_ycc_rgb_convert_mmx jSYCCRGBM -#define jsimd_ycc_extrgb_convert_mmx jSYCCEXTRGBM -#define jsimd_ycc_extrgbx_convert_mmx jSYCCEXTRGBXM -#define jsimd_ycc_extbgr_convert_mmx jSYCCEXTBGRM -#define jsimd_ycc_extbgrx_convert_mmx jSYCCEXTBGRXM -#define jsimd_ycc_extxbgr_convert_mmx jSYCCEXTXBGRM -#define jsimd_ycc_extxrgb_convert_mmx jSYCCEXTXRGBM -#define jconst_rgb_ycc_convert_sse2 jSCRGBYCCS2 -#define jsimd_rgb_ycc_convert_sse2 jSRGBYCCS2 -#define jsimd_extrgb_ycc_convert_sse2 jSEXTRGBYCCS2 -#define jsimd_extrgbx_ycc_convert_sse2 jSEXTRGBXYCCS2 -#define jsimd_extbgr_ycc_convert_sse2 jSEXTBGRYCCS2 -#define jsimd_extbgrx_ycc_convert_sse2 jSEXTBGRXYCCS2 -#define jsimd_extxbgr_ycc_convert_sse2 jSEXTXBGRYCCS2 -#define jsimd_extxrgb_ycc_convert_sse2 jSEXTXRGBYCCS2 -#define jconst_rgb_gray_convert_sse2 jSCRGBGRYS2 -#define jsimd_rgb_gray_convert_sse2 jSRGBGRYS2 -#define jsimd_extrgb_gray_convert_sse2 jSEXTRGBGRYS2 -#define jsimd_extrgbx_gray_convert_sse2 jSEXTRGBXGRYS2 -#define jsimd_extbgr_gray_convert_sse2 jSEXTBGRGRYS2 -#define jsimd_extbgrx_gray_convert_sse2 jSEXTBGRXGRYS2 -#define jsimd_extxbgr_gray_convert_sse2 jSEXTXBGRGRYS2 -#define jsimd_extxrgb_gray_convert_sse2 jSEXTXRGBGRYS2 -#define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2 -#define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2 -#define jsimd_ycc_extrgb_convert_sse2 jSYCCEXTRGBS2 -#define jsimd_ycc_extrgbx_convert_sse2 jSYCCEXTRGBXS2 -#define jsimd_ycc_extbgr_convert_sse2 jSYCCEXTBGRS2 -#define jsimd_ycc_extbgrx_convert_sse2 jSYCCEXTBGRXS2 -#define jsimd_ycc_extxbgr_convert_sse2 jSYCCEXTXBGRS2 -#define jsimd_ycc_extxrgb_convert_sse2 jSYCCEXTXRGBS2 -#define jsimd_h2v2_downsample_mmx jSDnH2V2M -#define jsimd_h2v1_downsample_mmx jSDnH2V1M -#define jsimd_h2v2_downsample_sse2 jSDnH2V2S2 -#define jsimd_h2v1_downsample_sse2 jSDnH2V1S2 -#define jsimd_h2v2_upsample_mmx jSUpH2V2M -#define jsimd_h2v1_upsample_mmx jSUpH2V1M -#define jsimd_h2v2_fancy_upsample_mmx jSFUpH2V2M -#define jsimd_h2v1_fancy_upsample_mmx jSFUpH2V1M -#define jsimd_h2v2_merged_upsample_mmx jSMUpH2V2M -#define jsimd_h2v2_extrgb_merged_upsample_mmx jSMUpH2V2EXTRGBM -#define jsimd_h2v2_extrgbx_merged_upsample_mmx jSMUpH2V2EXTRGBXM -#define jsimd_h2v2_extbgr_merged_upsample_mmx jSMUpH2V2EXTBGRM -#define jsimd_h2v2_extbgrx_merged_upsample_mmx jSMUpH2V2EXTBGRXM -#define jsimd_h2v2_extxbgr_merged_upsample_mmx jSMUpH2V2EXTXBGRM -#define jsimd_h2v2_extxrgb_merged_upsample_mmx jSMUpH2V2EXTXRGBM -#define jsimd_h2v1_merged_upsample_mmx jSMUpH2V1M -#define jsimd_h2v1_extrgb_merged_upsample_mmx jSMUpH2V1EXTRGBM -#define jsimd_h2v1_extrgbx_merged_upsample_mmx jSMUpH2V1EXTRGBXM -#define jsimd_h2v1_extbgr_merged_upsample_mmx jSMUpH2V1EXTBGRM -#define jsimd_h2v1_extbgrx_merged_upsample_mmx jSMUpH2V1EXTBGRXM -#define jsimd_h2v1_extxbgr_merged_upsample_mmx jSMUpH2V1EXTXBGRM -#define jsimd_h2v1_extxrgb_merged_upsample_mmx jSMUpH2V1EXTXRGBM -#define jsimd_h2v2_upsample_sse2 jSUpH2V2S2 -#define jsimd_h2v1_upsample_sse2 jSUpH2V1S2 -#define jconst_fancy_upsample_sse2 jSCFUpS2 -#define jsimd_h2v2_fancy_upsample_sse2 jSFUpH2V2S2 -#define jsimd_h2v1_fancy_upsample_sse2 jSFUpH2V1S2 -#define jconst_merged_upsample_sse2 jSCMUpS2 -#define jsimd_h2v2_merged_upsample_sse2 jSMUpH2V2S2 -#define jsimd_h2v2_extrgb_merged_upsample_sse2 jSMUpH2V2EXTRGBS2 -#define jsimd_h2v2_extrgbx_merged_upsample_sse2 jSMUpH2V2EXTRGBXS2 -#define jsimd_h2v2_extbgr_merged_upsample_sse2 jSMUpH2V2EXTBGRS2 -#define jsimd_h2v2_extbgrx_merged_upsample_sse2 jSMUpH2V2EXTBGRXS2 -#define jsimd_h2v2_extxbgr_merged_upsample_sse2 jSMUpH2V2EXTXBGRS2 -#define jsimd_h2v2_extxrgb_merged_upsample_sse2 jSMUpH2V2EXTXRGBS2 -#define jsimd_h2v1_merged_upsample_sse2 jSMUpH2V1S2 -#define jsimd_h2v1_extrgb_merged_upsample_sse2 jSMUpH2V1EXTRGBS2 -#define jsimd_h2v1_extrgbx_merged_upsample_sse2 jSMUpH2V1EXTRGBXS2 -#define jsimd_h2v1_extbgr_merged_upsample_sse2 jSMUpH2V1EXTBGRS2 -#define jsimd_h2v1_extbgrx_merged_upsample_sse2 jSMUpH2V1EXTBGRXS2 -#define jsimd_h2v1_extxbgr_merged_upsample_sse2 jSMUpH2V1EXTXBGRS2 -#define jsimd_h2v1_extxrgb_merged_upsample_sse2 jSMUpH2V1EXTXRGBS2 -#define jsimd_convsamp_mmx jSConvM -#define jsimd_convsamp_sse2 jSConvS2 -#define jsimd_convsamp_float_3dnow jSConvF3D -#define jsimd_convsamp_float_sse jSConvFS -#define jsimd_convsamp_float_sse2 jSConvFS2 -#define jsimd_fdct_islow_mmx jSFDMIS -#define jsimd_fdct_ifast_mmx jSFDMIF -#define jconst_fdct_islow_sse2 jSCFDS2IS -#define jsimd_fdct_islow_sse2 jSFDS2IS -#define jconst_fdct_ifast_sse2 jSCFDS2IF -#define jsimd_fdct_ifast_sse2 jSFDS2IF -#define jsimd_fdct_float_3dnow jSFD3DF -#define jconst_fdct_float_sse jSCFDSF -#define jsimd_fdct_float_sse jSFDSF -#define jsimd_quantize_mmx jSQuantM -#define jsimd_quantize_sse2 jSQuantS2 -#define jsimd_quantize_float_3dnow jSQuantF3D -#define jsimd_quantize_float_sse jSQuantFS -#define jsimd_quantize_float_sse2 jSQuantFS2 -#define jsimd_idct_2x2_mmx jSIDM22 -#define jsimd_idct_4x4_mmx jSIDM44 -#define jconst_idct_red_sse2 jSCIDS2R -#define jsimd_idct_2x2_sse2 jSIDS222 -#define jsimd_idct_4x4_sse2 jSIDS244 -#define jsimd_idct_islow_mmx jSIDMIS -#define jsimd_idct_ifast_mmx jSIDMIF -#define jconst_idct_islow_sse2 jSCIDS2IS -#define jsimd_idct_islow_sse2 jSIDS2IS -#define jconst_idct_ifast_sse2 jSCIDS2IF -#define jsimd_idct_ifast_sse2 jSIDS2IF -#define jsimd_idct_float_3dnow jSID3DF -#define jconst_fdct_float_sse jSCIDSF -#define jsimd_idct_float_sse jSIDSF -#define jconst_fdct_float_sse2 jSCIDS2F -#define jsimd_idct_float_sse2 jSIDS2F -#endif /* NEED_SHORT_EXTERNAL_NAMES */ +#define JSIMD_MIPS_DSPR2 0x20 +#define JSIMD_ALTIVEC 0x40 /* SIMD Ext: retrieve SIMD/CPU information */ -EXTERN(unsigned int) jpeg_simd_cpu_support JPP((void)); +EXTERN(unsigned int) jpeg_simd_cpu_support (void); -/* SIMD Color Space Conversion */ +/* RGB & extended RGB --> YCC Colorspace Conversion */ EXTERN(void) jsimd_rgb_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_ycc_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); - -EXTERN(void) jsimd_rgb_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extrgb_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extrgbx_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extbgr_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extbgrx_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extxbgr_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extxrgb_gray_convert_mmx - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); - -EXTERN(void) jsimd_ycc_rgb_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extrgb_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extrgbx_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extbgr_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extbgrx_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extxbgr_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); -EXTERN(void) jsimd_ycc_extxrgb_convert_mmx - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); extern const int jconst_rgb_ycc_convert_sse2[]; EXTERN(void) jsimd_rgb_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_ycc_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_neon + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_extrgb_ycc_convert_neon_slowld3 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_neon_slowld3 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +/* RGB & extended RGB --> Grayscale Colorspace Conversion */ +EXTERN(void) jsimd_rgb_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); extern const int jconst_rgb_gray_convert_sse2[]; EXTERN(void) jsimd_rgb_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_gray_convert_sse2 - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_gray_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +/* YCC --> RGB & extended RGB Colorspace Conversion */ +EXTERN(void) jsimd_ycc_rgb_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); extern const int jconst_ycc_rgb_convert_sse2[]; EXTERN(void) jsimd_ycc_rgb_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgbx_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgrx_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxbgr_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxrgb_convert_sse2 - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); - -EXTERN(void) jsimd_rgb_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extrgb_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extrgbx_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extbgr_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extbgrx_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extxbgr_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); -EXTERN(void) jsimd_extxrgb_ycc_convert_neon - JPP((JDIMENSION img_width, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_rgb_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgbx_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgrx_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxbgr_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxrgb_convert_neon - JPP((JDIMENSION out_width, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows)); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_rgb565_convert_neon + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +EXTERN(void) jsimd_ycc_extrgb_convert_neon_slowst3 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_neon_slowst3 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +EXTERN(void) jsimd_ycc_rgb_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +EXTERN(void) jsimd_ycc_rgb_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +/* NULL Colorspace Conversion */ +EXTERN(void) jsimd_c_null_convert_mips_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows, int num_components); -/* SIMD Downsample */ -EXTERN(void) jsimd_h2v2_downsample_mmx - JPP((JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data)); +/* h2v1 Downsampling */ EXTERN(void) jsimd_h2v1_downsample_mmx - JPP((JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data)); + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); -EXTERN(void) jsimd_h2v2_downsample_sse2 - JPP((JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data)); EXTERN(void) jsimd_h2v1_downsample_sse2 - JPP((JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data)); + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v1_downsample_neon + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v1_downsample_mips_dspr2 + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v1_downsample_altivec + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); -/* SIMD Upsample */ -EXTERN(void) jsimd_h2v2_upsample_mmx - JPP((int max_v_samp_factor, JDIMENSION output_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +/* h2v2 Downsampling */ +EXTERN(void) jsimd_h2v2_downsample_mmx + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_sse2 + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_neon + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_mips_dspr2 + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_altivec + (JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, JDIMENSION width_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data); + +/* h2v2 Smooth Downsampling */ +EXTERN(void) jsimd_h2v2_smooth_downsample_mips_dspr2 + (JSAMPARRAY input_data, JSAMPARRAY output_data, + JDIMENSION v_samp_factor, int max_v_samp_factor, + int smoothing_factor, JDIMENSION width_blocks, + JDIMENSION image_width); + + +/* Upsampling */ EXTERN(void) jsimd_h2v1_upsample_mmx - JPP((int max_v_samp_factor, JDIMENSION output_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_mmx + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v2_fancy_upsample_mmx - JPP((int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jsimd_h2v1_upsample_sse2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_sse2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_upsample_mips_dspr2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_mips_dspr2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_int_upsample_mips_dspr2 + (UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr, JDIMENSION output_width, + int max_v_samp_factor); + +EXTERN(void) jsimd_h2v1_upsample_altivec + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_altivec + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +/* Fancy Upsampling */ EXTERN(void) jsimd_h2v1_fancy_upsample_mmx - JPP((int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_mmx + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v2_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); +extern const int jconst_fancy_upsample_sse2[]; +EXTERN(void) jsimd_h2v1_fancy_upsample_sse2 + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_sse2 + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_fancy_upsample_neon + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_fancy_upsample_mips_dspr2 + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_mips_dspr2 + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_fancy_upsample_altivec + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_altivec + (int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + +/* Merged Upsampling */ EXTERN(void) jsimd_h2v1_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mmx - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); - -EXTERN(void) jsimd_h2v2_upsample_sse2 - JPP((int max_v_samp_factor, JDIMENSION output_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); -EXTERN(void) jsimd_h2v1_upsample_sse2 - JPP((int max_v_samp_factor, JDIMENSION output_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); -extern const int jconst_fancy_upsample_sse2[]; -EXTERN(void) jsimd_h2v2_fancy_upsample_sse2 - JPP((int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); -EXTERN(void) jsimd_h2v1_fancy_upsample_sse2 - JPP((int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jsimd_h2v2_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); extern const int jconst_merged_upsample_sse2[]; -EXTERN(void) jsimd_h2v2_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); -EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); EXTERN(void) jsimd_h2v1_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_sse2 - JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + +EXTERN(void) jsimd_h2v2_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_sse2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); -/* SIMD Sample Conversion */ -EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - DCTELEM * workspace)); - -EXTERN(void) jsimd_convsamp_sse2 JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - DCTELEM * workspace)); - -EXTERN(void) jsimd_convsamp_neon JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - DCTELEM * workspace)); - -EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT * workspace)); - -EXTERN(void) jsimd_convsamp_float_sse JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT * workspace)); - -EXTERN(void) jsimd_convsamp_float_sse2 JPP((JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT * workspace)); - -/* SIMD Forward DCT */ -EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data)); -EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data)); +EXTERN(void) jsimd_h2v1_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); + +EXTERN(void) jsimd_h2v2_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); + +EXTERN(void) jsimd_h2v1_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + +EXTERN(void) jsimd_h2v2_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + +/* Sample Conversion */ +EXTERN(void) jsimd_convsamp_mmx + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_sse2 + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_neon + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_mips_dspr2 + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_altivec + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +/* Floating Point Sample Conversion */ +EXTERN(void) jsimd_convsamp_float_3dnow + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_convsamp_float_sse + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_convsamp_float_sse2 + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_convsamp_float_mips_dspr2 + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + +/* Slow Integer Forward DCT */ +EXTERN(void) jsimd_fdct_islow_mmx (DCTELEM *data); -extern const int jconst_fdct_ifast_sse2[]; -EXTERN(void) jsimd_fdct_islow_sse2 JPP((DCTELEM * data)); extern const int jconst_fdct_islow_sse2[]; -EXTERN(void) jsimd_fdct_ifast_sse2 JPP((DCTELEM * data)); +EXTERN(void) jsimd_fdct_islow_sse2 (DCTELEM *data); + +EXTERN(void) jsimd_fdct_islow_neon (DCTELEM *data); + +EXTERN(void) jsimd_fdct_islow_mips_dspr2 (DCTELEM *data); + +EXTERN(void) jsimd_fdct_islow_altivec (DCTELEM *data); -EXTERN(void) jsimd_fdct_ifast_neon JPP((DCTELEM * data)); +/* Fast Integer Forward DCT */ +EXTERN(void) jsimd_fdct_ifast_mmx (DCTELEM *data); -EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data)); +extern const int jconst_fdct_ifast_sse2[]; +EXTERN(void) jsimd_fdct_ifast_sse2 (DCTELEM *data); + +EXTERN(void) jsimd_fdct_ifast_neon (DCTELEM *data); + +EXTERN(void) jsimd_fdct_ifast_mips_dspr2 (DCTELEM *data); + +EXTERN(void) jsimd_fdct_ifast_altivec (DCTELEM *data); + +/* Floating Point Forward DCT */ +EXTERN(void) jsimd_fdct_float_3dnow (FAST_FLOAT *data); extern const int jconst_fdct_float_sse[]; -EXTERN(void) jsimd_fdct_float_sse JPP((FAST_FLOAT * data)); +EXTERN(void) jsimd_fdct_float_sse (FAST_FLOAT *data); + +/* Quantization */ +EXTERN(void) jsimd_quantize_mmx + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +EXTERN(void) jsimd_quantize_sse2 + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +EXTERN(void) jsimd_quantize_neon + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); -/* SIMD Quantization */ -EXTERN(void) jsimd_quantize_mmx JPP((JCOEFPTR coef_block, - DCTELEM * divisors, - DCTELEM * workspace)); - -EXTERN(void) jsimd_quantize_sse2 JPP((JCOEFPTR coef_block, - DCTELEM * divisors, - DCTELEM * workspace)); - -EXTERN(void) jsimd_quantize_neon JPP((JCOEFPTR coef_block, - DCTELEM * divisors, - DCTELEM * workspace)); - -EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block, - FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); - -EXTERN(void) jsimd_quantize_float_sse JPP((JCOEFPTR coef_block, - FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); - -EXTERN(void) jsimd_quantize_float_sse2 JPP((JCOEFPTR coef_block, - FAST_FLOAT * divisors, - FAST_FLOAT * workspace)); - -/* SIMD Reduced Inverse DCT */ -EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_4x4_mmx JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_quantize_mips_dspr2 + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +EXTERN(void) jsimd_quantize_altivec + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +/* Floating Point Quantization */ +EXTERN(void) jsimd_quantize_float_3dnow + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_quantize_float_sse + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_quantize_float_sse2 + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + +EXTERN(void) jsimd_quantize_float_mips_dspr2 + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + +/* Scaled Inverse DCT */ +EXTERN(void) jsimd_idct_2x2_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_red_sse2[]; -EXTERN(void) jsimd_idct_2x2_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_4x4_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -EXTERN(void) jsimd_idct_2x2_neon JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_4x4_neon JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -/* SIMD Inverse DCT */ -EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_ifast_mmx JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_idct_2x2_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_2x2_neon + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_neon + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_2x2_mips_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_mips_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col, int *workspace); +EXTERN(void) jsimd_idct_6x6_mips_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_12x12_pass1_mips_dspr2 + (JCOEFPTR coef_block, void *dct_table, int *workspace); +EXTERN(void) jsimd_idct_12x12_pass2_mips_dspr2 + (int *workspace, int *output); + +/* Slow Integer Inverse DCT */ +EXTERN(void) jsimd_idct_islow_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_islow_sse2[]; -EXTERN(void) jsimd_idct_islow_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_idct_islow_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_islow_neon + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_islow_mips_dspr2 + (void *dct_table, JCOEFPTR coef_block, int *output_buf, + JSAMPLE *output_col); + +EXTERN(void) jsimd_idct_islow_altivec + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +/* Fast Integer Inverse DCT */ +EXTERN(void) jsimd_idct_ifast_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + extern const int jconst_idct_ifast_sse2[]; -EXTERN(void) jsimd_idct_ifast_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -EXTERN(void) jsimd_idct_islow_neon JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); -EXTERN(void) jsimd_idct_ifast_neon JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - -EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_idct_ifast_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_ifast_neon + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_ifast_cols_mips_dspr2 + (JCOEF *inptr, IFAST_MULT_TYPE *quantptr, DCTELEM *wsptr, + const int *idct_coefs); +EXTERN(void) jsimd_idct_ifast_rows_mips_dspr2 + (DCTELEM *wsptr, JSAMPARRAY output_buf, JDIMENSION output_col, + const int *idct_coefs); + +EXTERN(void) jsimd_idct_ifast_altivec + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +/* Floating Point Inverse DCT */ +EXTERN(void) jsimd_idct_float_3dnow + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_float_sse[]; -EXTERN(void) jsimd_idct_float_sse JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); +EXTERN(void) jsimd_idct_float_sse + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_float_sse2[]; -EXTERN(void) jsimd_idct_float_sse2 JPP((void * dct_table, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, - JDIMENSION output_col)); - +EXTERN(void) jsimd_idct_float_sse2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +/* Huffman coding */ +extern const int jconst_huff_encode_one_block[]; +EXTERN(JOCTET*) jsimd_huff_encode_one_block_sse2 + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); + +EXTERN(JOCTET*) jsimd_huff_encode_one_block_neon + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); + +EXTERN(JOCTET*) jsimd_huff_encode_one_block_neon_slowtbl + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_i386.c glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_i386.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_i386.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_i386.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1091 @@ +/* + * jsimd_i386.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 32-bit x86 architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +/* + * In the PIC cases, we have no guarantee that constants will keep + * their alignment. This macro allows us to verify it at runtime. + */ +#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) + +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ + +static unsigned int simd_support = ~0; +static unsigned int simd_huffman = 1; + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd (void) +{ + char *env = NULL; + + if (simd_support != ~0U) + return; + + simd_support = jpeg_simd_cpu_support(); + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCEMMX"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_MMX; + env = getenv("JSIMD_FORCE3DNOW"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_3DNOW|JSIMD_MMX; + env = getenv("JSIMD_FORCESSE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_SSE|JSIMD_MMX; + env = getenv("JSIMD_FORCESSE2"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_SSE2; + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_extrgb_ycc_convert_sse2; + mmxfct=jsimd_extrgb_ycc_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_extrgbx_ycc_convert_sse2; + mmxfct=jsimd_extrgbx_ycc_convert_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_extbgr_ycc_convert_sse2; + mmxfct=jsimd_extbgr_ycc_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_extbgrx_ycc_convert_sse2; + mmxfct=jsimd_extbgrx_ycc_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_extxbgr_ycc_convert_sse2; + mmxfct=jsimd_extxbgr_ycc_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_extxrgb_ycc_convert_sse2; + mmxfct=jsimd_extxrgb_ycc_convert_mmx; + break; + default: + sse2fct=jsimd_rgb_ycc_convert_sse2; + mmxfct=jsimd_rgb_ycc_convert_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_extrgb_gray_convert_sse2; + mmxfct=jsimd_extrgb_gray_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_extrgbx_gray_convert_sse2; + mmxfct=jsimd_extrgbx_gray_convert_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_extbgr_gray_convert_sse2; + mmxfct=jsimd_extbgr_gray_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_extbgrx_gray_convert_sse2; + mmxfct=jsimd_extbgrx_gray_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_extxbgr_gray_convert_sse2; + mmxfct=jsimd_extxbgr_gray_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_extxrgb_gray_convert_sse2; + mmxfct=jsimd_extxrgb_gray_convert_mmx; + break; + default: + sse2fct=jsimd_rgb_gray_convert_sse2; + mmxfct=jsimd_rgb_gray_convert_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_ycc_extrgb_convert_sse2; + mmxfct=jsimd_ycc_extrgb_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_ycc_extrgbx_convert_sse2; + mmxfct=jsimd_ycc_extrgbx_convert_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_ycc_extbgr_convert_sse2; + mmxfct=jsimd_ycc_extbgr_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_ycc_extbgrx_convert_sse2; + mmxfct=jsimd_ycc_extbgrx_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_ycc_extxbgr_convert_sse2; + mmxfct=jsimd_ycc_extxbgr_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_ycc_extxrgb_convert_sse2; + mmxfct=jsimd_ycc_extxrgb_convert_mmx; + break; + default: + sse2fct=jsimd_ycc_rgb_convert_sse2; + mmxfct=jsimd_ycc_rgb_convert_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) + sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_SSE2) + jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else if (simd_support & JSIMD_MMX) + jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_SSE2) + jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else if (simd_support & JSIMD_MMX) + jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_SSE2) + jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else if (simd_support & JSIMD_MMX) + jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_SSE2) + jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else if (simd_support & JSIMD_MMX) + jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else if (simd_support & JSIMD_MMX) + jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else if (simd_support & JSIMD_MMX) + jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; + mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; + break; + default: + sse2fct=jsimd_h2v2_merged_upsample_sse2; + mmxfct=jsimd_h2v2_merged_upsample_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; + mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; + break; + default: + sse2fct=jsimd_h2v1_merged_upsample_sse2; + mmxfct=jsimd_h2v1_merged_upsample_mmx; + break; + } + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else if (simd_support & JSIMD_MMX) + mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_SSE) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_convsamp_sse2(sample_data, start_col, workspace); + else if (simd_support & JSIMD_MMX) + jsimd_convsamp_mmx(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_convsamp_float_sse2(sample_data, start_col, workspace); + else if (simd_support & JSIMD_SSE) + jsimd_convsamp_float_sse(sample_data, start_col, workspace); + else if (simd_support & JSIMD_3DNOW) + jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + jsimd_fdct_islow_sse2(data); + else if (simd_support & JSIMD_MMX) + jsimd_fdct_islow_mmx(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + jsimd_fdct_ifast_sse2(data); + else if (simd_support & JSIMD_MMX) + jsimd_fdct_ifast_mmx(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + jsimd_fdct_float_sse(data); + else if (simd_support & JSIMD_3DNOW) + jsimd_fdct_float_3dnow(data); +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_SSE) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_quantize_sse2(coef_block, divisors, workspace); + else if (simd_support & JSIMD_MMX) + jsimd_quantize_mmx(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_quantize_float_sse2(coef_block, divisors, workspace); + else if (simd_support & JSIMD_SSE) + jsimd_quantize_float_sse(coef_block, divisors, workspace); + else if (simd_support & JSIMD_3DNOW) + jsimd_quantize_float_3dnow(coef_block, divisors, workspace); +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_MMX) + jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_MMX) + jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + if (sizeof(FLOAT_MULT_TYPE) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + return 1; + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) + jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_MMX) + jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_MMX) + jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) + jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_3DNOW) + jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && simd_huffman && + IS_ALIGNED_SSE(jconst_huff_encode_one_block)) + return 1; + + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, + dctbl, actbl); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_mips.c glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_mips.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_mips.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_mips.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1130 @@ +/* + * jsimd_mips.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014 D. R. Commander + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * MIPS architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +#include +#include +#include + +static unsigned int simd_support = ~0; + +#if defined(__linux__) + +LOCAL(int) +parse_proc_cpuinfo(const char* search_string) +{ + const char* file_name = "/proc/cpuinfo"; + char cpuinfo_line[256]; + FILE* f = NULL; + simd_support = 0; + + if ((f = fopen(file_name, "r")) != NULL) { + while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) { + if (strstr(cpuinfo_line, search_string) != NULL) { + fclose(f); + simd_support |= JSIMD_MIPS_DSPR2; + return 1; + } + } + fclose(f); + } + /* Did not find string in the proc file, or not Linux ELF. */ + return 0; +} + +#endif + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd (void) +{ + if (simd_support != ~0U) + return; + + simd_support = 0; + +#if defined(__MIPSEL__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) + simd_support |= JSIMD_MIPS_DSPR2; +#elif defined(__linux__) + /* We still have a chance to use MIPS DSPR2 regardless of globally used + * -mdspr2 options passed to gcc by performing runtime detection via + * /proc/cpuinfo parsing on linux */ + if (!parse_proc_cpuinfo("MIPS 74K")) + return; +#endif +} + +static const int mips_idct_ifast_coefs[4] = { + 0x45404540, // FIX( 1.082392200 / 2) = 17734 = 0x4546 + 0x5A805A80, // FIX( 1.414213562 / 2) = 23170 = 0x5A82 + 0x76407640, // FIX( 1.847759065 / 2) = 30274 = 0x7642 + 0xAC60AC60 // FIX(-2.613125930 / 4) = -21407 = 0xAC61 +}; + +/* The following struct is borrowed from jdsample.c */ +typedef void (*upsample1_ptr) (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +typedef struct { + struct jpeg_upsampler pub; + JSAMPARRAY color_buf[MAX_COMPONENTS]; + upsample1_ptr methods[MAX_COMPONENTS]; + int next_row_out; + JDIMENSION rows_to_go; + int rowgroup_height[MAX_COMPONENTS]; + UINT8 h_expand[MAX_COMPONENTS]; + UINT8 v_expand[MAX_COMPONENTS]; +} my_upsampler; + +typedef my_upsampler *my_upsample_ptr; + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_c_can_null_convert (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_extrgbx_ycc_convert_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_extbgr_ycc_convert_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_extbgrx_ycc_convert_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_extxbgr_ycc_convert_mips_dspr2; + + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_extxrgb_ycc_convert_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2; + break; + } + + if (simd_support & JSIMD_MIPS_DSPR2) + mipsdspr2fct(cinfo->image_width, input_buf, output_buf, output_row, + num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; + break; + } + + if (simd_support & JSIMD_MIPS_DSPR2) + mipsdspr2fct(cinfo->image_width, input_buf, output_buf, output_row, + num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_ycc_extrgbx_convert_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_ycc_extbgr_convert_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_ycc_extbgrx_convert_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_ycc_extxbgr_convert_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_ycc_extxrgb_convert_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2; + break; + } + + if (simd_support & JSIMD_MIPS_DSPR2) + mipsdspr2fct(cinfo->output_width, input_buf, input_row, output_buf, + num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(void) +jsimd_c_null_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_c_null_convert_mips_dspr2(cinfo->image_width, input_buf, + output_buf, output_row, num_rows, + cinfo->num_components); +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v2_smooth_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if(DCTSIZE != 8) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v2_downsample_mips_dspr2(cinfo->image_width, + cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); +} + +GLOBAL(void) +jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_smooth_downsample_mips_dspr2(input_data, output_data, + compptr->v_samp_factor, + cinfo->max_v_samp_factor, + cinfo->smoothing_factor, + compptr->width_in_blocks, + cinfo->image_width); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v1_downsample_mips_dspr2(cinfo->image_width, + cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_int_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v2_upsample_mips_dspr2(cinfo->max_v_samp_factor, + cinfo->output_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v1_upsample_mips_dspr2(cinfo->max_v_samp_factor, + cinfo->output_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + + jsimd_int_upsample_mips_dspr2(upsample->h_expand[compptr->component_index], + upsample->v_expand[compptr->component_index], + input_data, output_data_ptr, + cinfo->output_width, + cinfo->max_v_samp_factor); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v2_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor, + compptr->downsampled_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_h2v1_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor, + compptr->downsampled_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, + JSAMPLE *); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_h2v2_extbgr_merged_upsample_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2; + break; + } + + mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf, + cinfo->sample_range_limit); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, + JSAMPLE *); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_h2v1_extbgr_merged_upsample_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2; + break; + } + + mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf, + cinfo->sample_range_limit); +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_convsamp_mips_dspr2(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ + if ((simd_support & JSIMD_MIPS_DSPR2)) + jsimd_convsamp_float_mips_dspr2(sample_data, start_col, workspace); +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_fdct_islow_mips_dspr2(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_fdct_ifast_mips_dspr2(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_quantize_mips_dspr2(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_quantize_float_mips_dspr2(coef_block, divisors, workspace); +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_6x6 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_12x12 (void) +{ + init_simd(); + + if (BITS_IN_JSAMPLE != 8) + return 0; + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_idct_2x2_mips_dspr2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) { + int workspace[DCTSIZE*4]; /* buffers data between passes */ + jsimd_idct_4x4_mips_dspr2(compptr->dct_table, coef_block, output_buf, + output_col, workspace); + } +} + +GLOBAL(void) +jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_idct_6x6_mips_dspr2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) { + int workspace[96]; + int output[12] = { + (int)(output_buf[0] + output_col), + (int)(output_buf[1] + output_col), + (int)(output_buf[2] + output_col), + (int)(output_buf[3] + output_col), + (int)(output_buf[4] + output_col), + (int)(output_buf[5] + output_col), + (int)(output_buf[6] + output_col), + (int)(output_buf[7] + output_col), + (int)(output_buf[8] + output_col), + (int)(output_buf[9] + output_col), + (int)(output_buf[10] + output_col), + (int)(output_buf[11] + output_col), + }; + jsimd_idct_12x12_pass1_mips_dspr2(coef_block, compptr->dct_table, + workspace); + jsimd_idct_12x12_pass2_mips_dspr2(workspace, output); + } +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + init_simd(); + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) { + int output[8] = { + (int)(output_buf[0] + output_col), + (int)(output_buf[1] + output_col), + (int)(output_buf[2] + output_col), + (int)(output_buf[3] + output_col), + (int)(output_buf[4] + output_col), + (int)(output_buf[5] + output_col), + (int)(output_buf[6] + output_col), + (int)(output_buf[7] + output_col), + }; + + jsimd_idct_islow_mips_dspr2(coef_block, compptr->dct_table, + output, IDCT_range_limit(cinfo)); + } +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_MIPS_DSPR2) { + JCOEFPTR inptr; + IFAST_MULT_TYPE *quantptr; + DCTELEM workspace[DCTSIZE2]; /* buffers data between passes */ + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (IFAST_MULT_TYPE *) compptr->dct_table; + + jsimd_idct_ifast_cols_mips_dspr2(inptr, quantptr, + workspace, mips_idct_ifast_coefs); + + /* Pass 2: process rows from work array, store into output array. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + jsimd_idct_ifast_rows_mips_dspr2(workspace, output_buf, + output_col, mips_idct_ifast_coefs); + } +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return NULL; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_mips_dspr2_asm.h glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_mips_dspr2_asm.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_mips_dspr2_asm.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_mips_dspr2_asm.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,285 @@ +/* + * MIPS DSPr2 optimizations for libjpeg-turbo + * + * Copyright (C) 2013, MIPS Technologies, Inc., California. + * All rights reserved. + * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) + * Darko Laus (darko.laus@imgtec.com) + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#define zero $0 +#define AT $1 +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define k0 $26 +#define k1 $27 +#define gp $28 +#define sp $29 +#define fp $30 +#define s8 $30 +#define ra $31 + +#define f0 $f0 +#define f1 $f1 +#define f2 $f2 +#define f3 $f3 +#define f4 $f4 +#define f5 $f5 +#define f6 $f6 +#define f7 $f7 +#define f8 $f8 +#define f9 $f9 +#define f10 $f10 +#define f11 $f11 +#define f12 $f12 +#define f13 $f13 +#define f14 $f14 +#define f15 $f15 +#define f16 $f16 +#define f17 $f17 +#define f18 $f18 +#define f19 $f19 +#define f20 $f20 +#define f21 $f21 +#define f22 $f22 +#define f23 $f23 +#define f24 $f24 +#define f25 $f25 +#define f26 $f26 +#define f27 $f27 +#define f28 $f28 +#define f29 $f29 +#define f30 $f30 +#define f31 $f31 + +/* + * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 + */ +#define LEAF_MIPS32R2(symbol) \ + .globl symbol; \ + .align 2; \ + .type symbol, @function; \ + .ent symbol, 0; \ +symbol: .frame sp, 0, ra; \ + .set push; \ + .set arch=mips32r2; \ + .set noreorder; \ + .set noat; + +/* + * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2 + */ +#define LEAF_MIPS_DSPR2(symbol) \ +LEAF_MIPS32R2(symbol) \ + .set dspr2; + +/* + * END - mark end of function + */ +#define END(function) \ + .set pop; \ + .end function; \ + .size function,.-function + +/* + * Checks if stack offset is big enough for storing/restoring regs_num + * number of register to/from stack. Stack offset must be greater than + * or equal to the number of bytes needed for storing registers (regs_num*4). + * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is + * preserved for input arguments of the functions, already stored in a0-a3), + * stack size can be further optimized by utilizing this space. + */ +.macro CHECK_STACK_OFFSET regs_num, stack_offset +.if \stack_offset < \regs_num * 4 - 16 +.error "Stack offset too small." +.endif +.endm + +/* + * Saves set of registers on stack. Maximum number of registers that + * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). + * Stack offset is number of bytes that are added to stack pointer (sp) + * before registers are pushed in order to provide enough space on stack + * (offset must be multiple of 4, and must be big enough, as described by + * CHECK_STACK_OFFSET macro). This macro is intended to be used in + * combination with RESTORE_REGS_FROM_STACK macro. Example: + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 + */ +.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ + r2 = 0, r3 = 0, r4 = 0, \ + r5 = 0, r6 = 0, r7 = 0, \ + r8 = 0, r9 = 0, r10 = 0, \ + r11 = 0, r12 = 0, r13 = 0, \ + r14 = 0 + .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) + .error "Stack offset must be pozitive and multiple of 4." + .endif + .if \stack_offset != 0 + addiu sp, sp, -\stack_offset + .endif + sw \r1, 0(sp) + .if \r2 != 0 + sw \r2, 4(sp) + .endif + .if \r3 != 0 + sw \r3, 8(sp) + .endif + .if \r4 != 0 + sw \r4, 12(sp) + .endif + .if \r5 != 0 + CHECK_STACK_OFFSET 5, \stack_offset + sw \r5, 16(sp) + .endif + .if \r6 != 0 + CHECK_STACK_OFFSET 6, \stack_offset + sw \r6, 20(sp) + .endif + .if \r7 != 0 + CHECK_STACK_OFFSET 7, \stack_offset + sw \r7, 24(sp) + .endif + .if \r8 != 0 + CHECK_STACK_OFFSET 8, \stack_offset + sw \r8, 28(sp) + .endif + .if \r9 != 0 + CHECK_STACK_OFFSET 9, \stack_offset + sw \r9, 32(sp) + .endif + .if \r10 != 0 + CHECK_STACK_OFFSET 10, \stack_offset + sw \r10, 36(sp) + .endif + .if \r11 != 0 + CHECK_STACK_OFFSET 11, \stack_offset + sw \r11, 40(sp) + .endif + .if \r12 != 0 + CHECK_STACK_OFFSET 12, \stack_offset + sw \r12, 44(sp) + .endif + .if \r13 != 0 + CHECK_STACK_OFFSET 13, \stack_offset + sw \r13, 48(sp) + .endif + .if \r14 != 0 + CHECK_STACK_OFFSET 14, \stack_offset + sw \r14, 52(sp) + .endif +.endm + +/* + * Restores set of registers from stack. Maximum number of registers that + * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). + * Stack offset is number of bytes that are added to stack pointer (sp) + * after registers are restored (offset must be multiple of 4, and must + * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is + * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. + * Example: + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 + */ +.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ + r2 = 0, r3 = 0, r4 = 0, \ + r5 = 0, r6 = 0, r7 = 0, \ + r8 = 0, r9 = 0, r10 = 0, \ + r11 = 0, r12 = 0, r13 = 0, \ + r14 = 0 + .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) + .error "Stack offset must be pozitive and multiple of 4." + .endif + lw \r1, 0(sp) + .if \r2 != 0 + lw \r2, 4(sp) + .endif + .if \r3 != 0 + lw \r3, 8(sp) + .endif + .if \r4 != 0 + lw \r4, 12(sp) + .endif + .if \r5 != 0 + CHECK_STACK_OFFSET 5, \stack_offset + lw \r5, 16(sp) + .endif + .if \r6 != 0 + CHECK_STACK_OFFSET 6, \stack_offset + lw \r6, 20(sp) + .endif + .if \r7 != 0 + CHECK_STACK_OFFSET 7, \stack_offset + lw \r7, 24(sp) + .endif + .if \r8 != 0 + CHECK_STACK_OFFSET 8, \stack_offset + lw \r8, 28(sp) + .endif + .if \r9 != 0 + CHECK_STACK_OFFSET 9, \stack_offset + lw \r9, 32(sp) + .endif + .if \r10 != 0 + CHECK_STACK_OFFSET 10, \stack_offset + lw \r10, 36(sp) + .endif + .if \r11 != 0 + CHECK_STACK_OFFSET 11, \stack_offset + lw \r11, 40(sp) + .endif + .if \r12 != 0 + CHECK_STACK_OFFSET 12, \stack_offset + lw \r12, 44(sp) + .endif + .if \r13 != 0 + CHECK_STACK_OFFSET 13, \stack_offset + lw \r13, 48(sp) + .endif + .if \r14 != 0 + CHECK_STACK_OFFSET 14, \stack_offset + lw \r14, 52(sp) + .endif + .if \stack_offset != 0 + addiu sp, sp, \stack_offset + .endif +.endm + + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_mips_dspr2.S glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_mips_dspr2.S --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_mips_dspr2.S 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_mips_dspr2.S 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,4487 @@ +/* + * MIPS DSPr2 optimizations for libjpeg-turbo + * + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. + * All rights reserved. + * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) + * Darko Laus (darko.laus@imgtec.com) + * Copyright (C) 2015, D. R. Commander. All Rights Reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include "jsimd_mips_dspr2_asm.h" + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_c_null_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - output_buf + * a3 - output_row + * 16(sp) - num_rows + * 20(sp) - cinfo->num_components + * + * Null conversion for compression + */ + + SAVE_REGS_ON_STACK 8, s0, s1 + + lw t9, 24(sp) // t9 = num_rows + lw s0, 28(sp) // s0 = cinfo->num_components + andi t0, a0, 3 // t0 = cinfo->image_width & 3 + beqz t0, 4f // no residual + nop +0: + addiu t9, t9, -1 + bltz t9, 7f + li t1, 0 +1: + sll t3, t1, 2 + lwx t5, t3(a2) // t5 = outptr = output_buf[ci] + lw t2, 0(a1) // t2 = inptr = *input_buf + sll t4, a3, 2 + lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] + addu t2, t2, t1 + addu s1, t5, a0 + addu t6, t5, t0 +2: + lbu t3, 0(t2) + addiu t5, t5, 1 + sb t3, -1(t5) + bne t6, t5, 2b + addu t2, t2, s0 +3: + lbu t3, 0(t2) + addu t4, t2, s0 + addu t7, t4, s0 + addu t8, t7, s0 + addu t2, t8, s0 + lbu t4, 0(t4) + lbu t7, 0(t7) + lbu t8, 0(t8) + addiu t5, t5, 4 + sb t3, -4(t5) + sb t4, -3(t5) + sb t7, -2(t5) + bne s1, t5, 3b + sb t8, -1(t5) + addiu t1, t1, 1 + bne t1, s0, 1b + nop + addiu a1, a1, 4 + bgez t9, 0b + addiu a3, a3, 1 + b 7f + nop +4: + addiu t9, t9, -1 + bltz t9, 7f + li t1, 0 +5: + sll t3, t1, 2 + lwx t5, t3(a2) // t5 = outptr = output_buf[ci] + lw t2, 0(a1) // t2 = inptr = *input_buf + sll t4, a3, 2 + lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] + addu t2, t2, t1 + addu s1, t5, a0 + addu t6, t5, t0 +6: + lbu t3, 0(t2) + addu t4, t2, s0 + addu t7, t4, s0 + addu t8, t7, s0 + addu t2, t8, s0 + lbu t4, 0(t4) + lbu t7, 0(t7) + lbu t8, 0(t8) + addiu t5, t5, 4 + sb t3, -4(t5) + sb t4, -3(t5) + sb t7, -2(t5) + bne s1, t5, 6b + sb t8, -1(t5) + addiu t1, t1, 1 + bne t1, s0, 5b + nop + addiu a1, a1, 4 + bgez t9, 4b + addiu a3, a3, 1 +7: + RESTORE_REGS_FROM_STACK 8, s0, s1 + + j ra + nop + +END(jsimd_c_null_convert_mips_dspr2) + +/*****************************************************************************/ +/* + * jsimd_extrgb_ycc_convert_mips_dspr2 + * jsimd_extbgr_ycc_convert_mips_dspr2 + * jsimd_extrgbx_ycc_convert_mips_dspr2 + * jsimd_extbgrx_ycc_convert_mips_dspr2 + * jsimd_extxbgr_ycc_convert_mips_dspr2 + * jsimd_extxrgb_ycc_convert_mips_dspr2 + * + * Colorspace conversion RGB -> YCbCr + */ + +.macro GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs + +.macro DO_RGB_TO_YCC r, \ + g, \ + b, \ + inptr + lbu \r, \r_offs(\inptr) + lbu \g, \g_offs(\inptr) + lbu \b, \b_offs(\inptr) + addiu \inptr, \pixel_size +.endm + +LEAF_MIPS_DSPR2(jsimd_\colorid\()_ycc_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - output_buf + * a3 - output_row + * 16(sp) - num_rows + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw t7, 48(sp) // t7 = num_rows + li s0, 0x4c8b // FIX(0.29900) + li s1, 0x9646 // FIX(0.58700) + li s2, 0x1d2f // FIX(0.11400) + li s3, 0xffffd4cd // -FIX(0.16874) + li s4, 0xffffab33 // -FIX(0.33126) + li s5, 0x8000 // FIX(0.50000) + li s6, 0xffff94d1 // -FIX(0.41869) + li s7, 0xffffeb2f // -FIX(0.08131) + li t8, 0x807fff // CBCR_OFFSET + ONE_HALF-1 + +0: + addiu t7, -1 // --num_rows + lw t6, 0(a1) // t6 = input_buf[0] + lw t0, 0(a2) + lw t1, 4(a2) + lw t2, 8(a2) + sll t3, a3, 2 + lwx t0, t3(t0) // t0 = output_buf[0][output_row] + lwx t1, t3(t1) // t1 = output_buf[1][output_row] + lwx t2, t3(t2) // t2 = output_buf[2][output_row] + + addu t9, t2, a0 // t9 = end address + addiu a3, 1 + +1: + DO_RGB_TO_YCC t3, t4, t5, t6 + + mtlo s5, $ac0 + mtlo t8, $ac1 + mtlo t8, $ac2 + maddu $ac0, s2, t5 + maddu $ac1, s5, t5 + maddu $ac2, s5, t3 + maddu $ac0, s0, t3 + maddu $ac1, s3, t3 + maddu $ac2, s6, t4 + maddu $ac0, s1, t4 + maddu $ac1, s4, t4 + maddu $ac2, s7, t5 + extr.w t3, $ac0, 16 + extr.w t4, $ac1, 16 + extr.w t5, $ac2, 16 + sb t3, 0(t0) + sb t4, 0(t1) + sb t5, 0(t2) + addiu t0, 1 + addiu t2, 1 + bne t2, t9, 1b + addiu t1, 1 + bgtz t7, 0b + addiu a1, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_\colorid\()_ycc_convert_mips_dspr2) + +.purgem DO_RGB_TO_YCC + +.endm + +/*------------------------------------------id -- pix R G B */ +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1 +GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3 + +/*****************************************************************************/ +/* + * jsimd_ycc_extrgb_convert_mips_dspr2 + * jsimd_ycc_extbgr_convert_mips_dspr2 + * jsimd_ycc_extrgbx_convert_mips_dspr2 + * jsimd_ycc_extbgrx_convert_mips_dspr2 + * jsimd_ycc_extxbgr_convert_mips_dspr2 + * jsimd_ycc_extxrgb_convert_mips_dspr2 + * + * Colorspace conversion YCbCr -> RGB + */ + +.macro GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs, a_offs + +.macro STORE_YCC_TO_RGB scratch0 \ + scratch1 \ + scratch2 \ + outptr + sb \scratch0, \r_offs(\outptr) + sb \scratch1, \g_offs(\outptr) + sb \scratch2, \b_offs(\outptr) +.if (\pixel_size == 4) + li t0, 0xFF + sb t0, \a_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +LEAF_MIPS_DSPR2(jsimd_ycc_\colorid\()_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - input_row + * a3 - output_buf + * 16(sp) - num_rows + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s1, 48(sp) + li t3, 0x8000 + li t4, 0x166e9 // FIX(1.40200) + li t5, 0x1c5a2 // FIX(1.77200) + li t6, 0xffff492e // -FIX(0.71414) + li t7, 0xffffa7e6 // -FIX(0.34414) + repl.ph t8, 128 + +0: + lw s0, 0(a3) + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + sll s5, a2, 2 + addiu s1, -1 + lwx s2, s5(t0) + lwx s3, s5(t1) + lwx s4, s5(t2) + addu t9, s2, a0 + addiu a2, 1 + +1: + lbu s7, 0(s4) // cr + lbu s6, 0(s3) // cb + lbu s5, 0(s2) // y + addiu s2, 1 + addiu s4, 1 + addiu s7, -128 + addiu s6, -128 + mul t2, t7, s6 + mul t0, t6, s7 // Crgtab[cr] + sll s7, 15 + mulq_rs.w t1, t4, s7 // Crrtab[cr] + sll s6, 15 + addu t2, t3 // Cbgtab[cb] + addu t2, t0 + + mulq_rs.w t0, t5, s6 // Cbbtab[cb] + sra t2, 16 + addu t1, s5 + addu t2, s5 // add y + ins t2, t1, 16, 16 + subu.ph t2, t2, t8 + addu t0, s5 + shll_s.ph t2, t2, 8 + subu t0, 128 + shra.ph t2, t2, 8 + shll_s.w t0, t0, 24 + addu.ph t2, t2, t8 // clip & store + sra t0, t0, 24 + sra t1, t2, 16 + addiu t0, 128 + + STORE_YCC_TO_RGB t1, t2, t0, s0 + + bne s2, t9, 1b + addiu s3, 1 + bgtz s1, 0b + addiu a3, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_ycc_\colorid\()_convert_mips_dspr2) + +.purgem STORE_YCC_TO_RGB + +.endm + +/*------------------------------------------id -- pix R G B A */ +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0 +GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0 + +/*****************************************************************************/ +/* + * jsimd_extrgb_gray_convert_mips_dspr2 + * jsimd_extbgr_gray_convert_mips_dspr2 + * jsimd_extrgbx_gray_convert_mips_dspr2 + * jsimd_extbgrx_gray_convert_mips_dspr2 + * jsimd_extxbgr_gray_convert_mips_dspr2 + * jsimd_extxrgb_gray_convert_mips_dspr2 + * + * Colorspace conversion RGB -> GRAY + */ + +.macro GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs + +.macro DO_RGB_TO_GRAY r, \ + g, \ + b, \ + inptr + lbu \r, \r_offs(\inptr) + lbu \g, \g_offs(\inptr) + lbu \b, \b_offs(\inptr) + addiu \inptr, \pixel_size +.endm + +LEAF_MIPS_DSPR2(jsimd_\colorid\()_gray_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - output_buf + * a3 - output_row + * 16(sp) - num_rows + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + li s0, 0x4c8b // s0 = FIX(0.29900) + li s1, 0x9646 // s1 = FIX(0.58700) + li s2, 0x1d2f // s2 = FIX(0.11400) + li s7, 0x8000 // s7 = FIX(0.50000) + lw s6, 48(sp) + andi t7, a0, 3 + +0: + addiu s6, -1 // s6 = num_rows + lw t0, 0(a1) + lw t1, 0(a2) + sll t3, a3, 2 + lwx t1, t3(t1) + addiu a3, 1 + addu t9, t1, a0 + subu t8, t9, t7 + beq t1, t8, 2f + nop + +1: + DO_RGB_TO_GRAY t3, t4, t5, t0 + DO_RGB_TO_GRAY s3, s4, s5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + maddu $ac0, s0, t3 + mtlo s7, $ac1 + maddu $ac1, s2, s5 + maddu $ac1, s1, s4 + maddu $ac1, s0, s3 + extr.w t6, $ac0, 16 + + DO_RGB_TO_GRAY t3, t4, t5, t0 + DO_RGB_TO_GRAY s3, s4, s5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + extr.w t2, $ac1, 16 + maddu $ac0, s0, t3 + mtlo s7, $ac1 + maddu $ac1, s2, s5 + maddu $ac1, s1, s4 + maddu $ac1, s0, s3 + extr.w t5, $ac0, 16 + sb t6, 0(t1) + sb t2, 1(t1) + extr.w t3, $ac1, 16 + addiu t1, 4 + sb t5, -2(t1) + sb t3, -1(t1) + bne t1, t8, 1b + nop + +2: + beqz t7, 4f + nop + +3: + DO_RGB_TO_GRAY t3, t4, t5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + maddu $ac0, s0, t3 + extr.w t6, $ac0, 16 + sb t6, 0(t1) + addiu t1, 1 + bne t1, t9, 3b + nop + +4: + bgtz s6, 0b + addiu a1, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_\colorid\()_gray_convert_mips_dspr2) + +.purgem DO_RGB_TO_GRAY + +.endm + +/*------------------------------------------id -- pix R G B */ +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3 +/*****************************************************************************/ +/* + * jsimd_h2v2_merged_upsample_mips_dspr2 + * jsimd_h2v2_extrgb_merged_upsample_mips_dspr2 + * jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2 + * jsimd_h2v2_extbgr_merged_upsample_mips_dspr2 + * jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2 + * jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2 + * jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2 + * + * Merged h2v2 upsample routines + */ +.macro GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 colorid, \ + pixel_size, \ + r1_offs, \ + g1_offs, \ + b1_offs, \ + a1_offs, \ + r2_offs, \ + g2_offs, \ + b2_offs, \ + a2_offs + +.macro STORE_H2V2_2_PIXELS scratch0 \ + scratch1 \ + scratch2 \ + scratch3 \ + scratch4 \ + scratch5 \ + outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + sb \scratch3, \r2_offs(\outptr) + sb \scratch4, \g2_offs(\outptr) + sb \scratch5, \b2_offs(\outptr) +.if (\pixel_size == 8) + li \scratch0, 0xFF + sb \scratch0, \a1_offs(\outptr) + sb \scratch0, \a2_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +.macro STORE_H2V2_1_PIXEL scratch0 \ + scratch1 \ + scratch2 \ + outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) +.endif +.endm + +LEAF_MIPS_DSPR2(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2) +/* + * a0 - cinfo->output_width + * a1 - input_buf + * a2 - in_row_group_ctr + * a3 - output_buf + * 16(sp) - cinfo->sample_range_limit + */ + + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + lw t9, 56(sp) // cinfo->sample_range_limit + lw v0, 0(a1) + lw v1, 4(a1) + lw t0, 8(a1) + sll t1, a2, 3 + addiu t2, t1, 4 + sll t3, a2, 2 + lw t4, 0(a3) // t4 = output_buf[0] + lwx t1, t1(v0) // t1 = input_buf[0][in_row_group_ctr*2] + lwx t2, t2(v0) // t2 = input_buf[0][in_row_group_ctr*2 + 1] + lwx t5, t3(v1) // t5 = input_buf[1][in_row_group_ctr] + lwx t6, t3(t0) // t6 = input_buf[2][in_row_group_ctr] + lw t7, 4(a3) // t7 = output_buf[1] + li s1, 0xe6ea + addiu t8, s1, 0x7fff // t8 = 0x166e9 [FIX(1.40200)] + addiu s0, t8, 0x5eb9 // s0 = 0x1c5a2 [FIX(1.77200)] + addiu s1, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)] + xori s2, s1, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)] + srl t3, a0, 1 + blez t3, 2f + addu t0, t5, t3 // t0 = end address + 1: + lbu t3, 0(t5) + lbu s3, 0(t6) + addiu t5, t5, 1 + addiu t3, t3, -128 // (cb - 128) + addiu s3, s3, -128 // (cr - 128) + mult $ac1, s1, t3 + madd $ac1, s2, s3 + sll s3, s3, 15 + sll t3, t3, 15 + mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS + extr_r.w s5, $ac1, 16 + mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS + lbu v0, 0(t1) + addiu t6, t6, 1 + addiu t1, t1, 2 + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu AT, 0(t3) + lbu s7, 0(s3) + lbu ra, 0(v1) + lbu v0, -1(t1) + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + lbu v0, 0(t2) + + STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t4 + + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu AT, 0(t3) + lbu s7, 0(s3) + lbu ra, 0(v1) + lbu v0, 1(t2) + addiu t2, t2, 2 + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + + STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t7 + + bne t0, t5, 1b + nop +2: + andi t0, a0, 1 + beqz t0, 4f + lbu t3, 0(t5) + lbu s3, 0(t6) + addiu t3, t3, -128 // (cb - 128) + addiu s3, s3, -128 // (cr - 128) + mult $ac1, s1, t3 + madd $ac1, s2, s3 + sll s3, s3, 15 + sll t3, t3, 15 + lbu v0, 0(t1) + extr_r.w s5, $ac1, 16 + mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS + mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + lbu v0, 0(t2) + + STORE_H2V2_1_PIXEL t3, s3, v1, t4 + + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + + STORE_H2V2_1_PIXEL t3, s3, v1, t7 +4: + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + j ra + nop + +END(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2) + +.purgem STORE_H2V2_1_PIXEL +.purgem STORE_H2V2_2_PIXELS +.endm + +/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */ +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4 +GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4 +/*****************************************************************************/ +/* + * jsimd_h2v1_merged_upsample_mips_dspr2 + * jsimd_h2v1_extrgb_merged_upsample_mips_dspr2 + * jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2 + * jsimd_h2v1_extbgr_merged_upsample_mips_dspr2 + * jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2 + * jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2 + * jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2 + * + * Merged h2v1 upsample routines + */ + +.macro GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 colorid, \ + pixel_size, \ + r1_offs, \ + g1_offs, \ + b1_offs, \ + a1_offs, \ + r2_offs, \ + g2_offs, \ + b2_offs, \ + a2_offs + +.macro STORE_H2V1_2_PIXELS scratch0 \ + scratch1 \ + scratch2 \ + scratch3 \ + scratch4 \ + scratch5 \ + outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + sb \scratch3, \r2_offs(\outptr) + sb \scratch4, \g2_offs(\outptr) + sb \scratch5, \b2_offs(\outptr) +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) + sb t0, \a2_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +.macro STORE_H2V1_1_PIXEL scratch0 \ + scratch1 \ + scratch2 \ + outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) +.endif +.endm + +LEAF_MIPS_DSPR2(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2) +/* + * a0 - cinfo->output_width + * a1 - input_buf + * a2 - in_row_group_ctr + * a3 - output_buf + * 16(sp) - range_limit + */ + + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + li t0, 0xe6ea + lw t1, 0(a1) // t1 = input_buf[0] + lw t2, 4(a1) // t2 = input_buf[1] + lw t3, 8(a1) // t3 = input_buf[2] + lw t8, 56(sp) // t8 = range_limit + addiu s1, t0, 0x7fff // s1 = 0x166e9 [FIX(1.40200)] + addiu s2, s1, 0x5eb9 // s2 = 0x1c5a2 [FIX(1.77200)] + addiu s0, t0, 0x9916 // s0 = 0x8000 + addiu s4, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)] + xori s3, s4, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)] + srl t0, a0, 1 + sll t4, a2, 2 + lwx s5, t4(t1) // s5 = inptr0 + lwx s6, t4(t2) // s6 = inptr1 + lwx s7, t4(t3) // s7 = inptr2 + lw t7, 0(a3) // t7 = outptr + blez t0, 2f + addu t9, s6, t0 // t9 = end address +1: + lbu t2, 0(s6) // t2 = cb + lbu t0, 0(s7) // t0 = cr + lbu t1, 0(s5) // t1 = y + addiu t2, t2, -128 // t2 = cb - 128 + addiu t0, t0, -128 // t0 = cr - 128 + mult $ac1, s4, t2 + madd $ac1, s3, t0 + sll t0, t0, 15 + sll t2, t2, 15 + mulq_rs.w t0, s1, t0 // t0 = (C1*cr + ONE_HALF)>> SCALEBITS + extr_r.w t5, $ac1, 16 + mulq_rs.w t6, s2, t2 // t6 = (C2*cb + ONE_HALF)>> SCALEBITS + addiu s7, s7, 1 + addiu s6, s6, 1 + addu t2, t1, t0 // t2 = y + cred + addu t3, t1, t5 // t3 = y + cgreen + addu t4, t1, t6 // t4 = y + cblue + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t1, 1(s5) + lbu v0, 0(t2) + lbu v1, 0(t3) + lbu ra, 0(t4) + addu t2, t1, t0 + addu t3, t1, t5 + addu t4, t1, t6 + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t2, 0(t2) + lbu t3, 0(t3) + lbu t4, 0(t4) + + STORE_H2V1_2_PIXELS v0, v1, ra, t2, t3, t4, t7 + + bne t9, s6, 1b + addiu s5, s5, 2 +2: + andi t0, a0, 1 + beqz t0, 4f + nop +3: + lbu t2, 0(s6) + lbu t0, 0(s7) + lbu t1, 0(s5) + addiu t2, t2, -128 //(cb - 128) + addiu t0, t0, -128 //(cr - 128) + mul t3, s4, t2 + mul t4, s3, t0 + sll t0, t0, 15 + sll t2, t2, 15 + mulq_rs.w t0, s1, t0 // (C1*cr + ONE_HALF)>> SCALEBITS + mulq_rs.w t6, s2, t2 // (C2*cb + ONE_HALF)>> SCALEBITS + addu t3, t3, s0 + addu t3, t4, t3 + sra t5, t3, 16 // (C4*cb + ONE_HALF + C3*cr)>> SCALEBITS + addu t2, t1, t0 // y + cred + addu t3, t1, t5 // y + cgreen + addu t4, t1, t6 // y + cblue + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t2, 0(t2) + lbu t3, 0(t3) + lbu t4, 0(t4) + + STORE_H2V1_1_PIXEL t2, t3, t4, t7 +4: + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + j ra + nop + +END(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2) + +.purgem STORE_H2V1_1_PIXEL +.purgem STORE_H2V1_2_PIXELS +.endm + +/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */ +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4 +GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4 +/*****************************************************************************/ +/* + * jsimd_h2v2_fancy_upsample_mips_dspr2 + * + * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. + */ +LEAF_MIPS_DSPR2(jsimd_h2v2_fancy_upsample_mips_dspr2) +/* + * a0 - cinfo->max_v_samp_factor + * a1 - downsampled_width + * a2 - input_data + * a3 - output_data_ptr + */ + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5 + + li s4, 0 + lw s2, 0(a3) // s2 = *output_data_ptr +0: + li t9, 2 + lw s1, -4(a2) // s1 = inptr1 + +1: + lw s0, 0(a2) // s0 = inptr0 + lwx s3, s4(s2) + addiu s5, a1, -2 // s5 = downsampled_width - 2 + srl t4, s5, 1 + sll t4, t4, 1 + lbu t0, 0(s0) + lbu t1, 1(s0) + lbu t2, 0(s1) + lbu t3, 1(s1) + addiu s0, 2 + addiu s1, 2 + addu t8, s0, t4 // t8 = end address + andi s5, s5, 1 // s5 = residual + sll t4, t0, 1 + sll t6, t1, 1 + addu t0, t0, t4 // t0 = (*inptr0++) * 3 + addu t1, t1, t6 // t1 = (*inptr0++) * 3 + addu t7, t0, t2 // t7 = thiscolsum + addu t6, t1, t3 // t5 = nextcolsum + sll t0, t7, 2 // t0 = thiscolsum * 4 + subu t1, t0, t7 // t1 = thiscolsum * 3 + shra_r.w t0, t0, 4 + addiu t1, 7 + addu t1, t1, t6 + srl t1, t1, 4 + sb t0, 0(s3) + sb t1, 1(s3) + beq t8, s0, 22f // skip to final iteration if width == 3 + addiu s3, 2 +2: + lh t0, 0(s0) // t0 = A3|A2 + lh t2, 0(s1) // t2 = B3|B2 + addiu s0, 2 + addiu s1, 2 + preceu.ph.qbr t0, t0 // t0 = 0|A3|0|A2 + preceu.ph.qbr t2, t2 // t2 = 0|B3|0|B2 + shll.ph t1, t0, 1 + sll t3, t6, 1 + addu.ph t0, t1, t0 // t0 = A3*3|A2*3 + addu t3, t3, t6 // t3 = this * 3 + addu.ph t0, t0, t2 // t0 = next2|next1 + addu t1, t3, t7 + andi t7, t0, 0xFFFF // t7 = next1 + sll t2, t7, 1 + addu t2, t7, t2 // t2 = next1*3 + addu t4, t2, t6 + srl t6, t0, 16 // t6 = next2 + shra_r.w t1, t1, 4 // t1 = (this*3 + last + 8) >> 4 + addu t0, t3, t7 + addiu t0, 7 + srl t0, t0, 4 // t0 = (this*3 + next1 + 7) >> 4 + shra_r.w t4, t4, 4 // t3 = (next1*3 + this + 8) >> 4 + addu t2, t2, t6 + addiu t2, 7 + srl t2, t2, 4 // t2 = (next1*3 + next2 + 7) >> 4 + sb t1, 0(s3) + sb t0, 1(s3) + sb t4, 2(s3) + sb t2, 3(s3) + bne t8, s0, 2b + addiu s3, 4 +22: + beqz s5, 4f + addu t8, s0, s5 +3: + lbu t0, 0(s0) + lbu t2, 0(s1) + addiu s0, 1 + addiu s1, 1 + sll t3, t6, 1 + sll t1, t0, 1 + addu t1, t0, t1 // t1 = inptr0 * 3 + addu t3, t3, t6 // t3 = thiscolsum * 3 + addu t5, t1, t2 + addu t1, t3, t7 + shra_r.w t1, t1, 4 + addu t0, t3, t5 + addiu t0, 7 + srl t0, t0, 4 + sb t1, 0(s3) + sb t0, 1(s3) + addiu s3, 2 + move t7, t6 + bne t8, s0, 3b + move t6, t5 +4: + sll t0, t6, 2 // t0 = thiscolsum * 4 + subu t1, t0, t6 // t1 = thiscolsum * 3 + addu t1, t1, t7 + addiu s4, 4 + shra_r.w t1, t1, 4 + addiu t0, 7 + srl t0, t0, 4 + sb t1, 0(s3) + sb t0, 1(s3) + addiu t9, -1 + addiu s3, 2 + bnez t9, 1b + lw s1, 4(a2) + srl t0, s4, 2 + subu t0, a0, t0 + bgtz t0, 0b + addiu a2, 4 + + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5 + + j ra + nop +END(jsimd_h2v2_fancy_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v1_fancy_upsample_mips_dspr2) +/* + * a0 - cinfo->max_v_samp_factor + * a1 - downsampled_width + * a2 - input_data + * a3 - output_data_ptr + */ + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + .set at + + beqz a0, 3f + sll t0, a0, 2 + lw s1, 0(a3) + li s3, 0x10001 + addu s0, s1, t0 +0: + addiu t8, a1, -2 + srl t9, t8, 2 + lw t7, 0(a2) + lw s2, 0(s1) + lbu t0, 0(t7) + lbu t1, 1(t7) // t1 = inptr[1] + sll t2, t0, 1 + addu t2, t2, t0 // t2 = invalue*3 + addu t2, t2, t1 + shra_r.w t2, t2, 2 + sb t0, 0(s2) + sb t2, 1(s2) + beqz t9, 11f + addiu s2, 2 +1: + ulw t0, 0(t7) // t0 = |P3|P2|P1|P0| + ulw t1, 1(t7) + ulh t2, 4(t7) // t2 = |0|0|P5|P4| + preceu.ph.qbl t3, t0 // t3 = |0|P3|0|P2| + preceu.ph.qbr t0, t0 // t0 = |0|P1|0|P0| + preceu.ph.qbr t2, t2 // t2 = |0|P5|0|P4| + preceu.ph.qbl t4, t1 // t4 = |0|P4|0|P3| + preceu.ph.qbr t1, t1 // t1 = |0|P2|0|P1| + shll.ph t5, t4, 1 + shll.ph t6, t1, 1 + addu.ph t5, t5, t4 // t5 = |P4*3|P3*3| + addu.ph t6, t6, t1 // t6 = |P2*3|P1*3| + addu.ph t4, t3, s3 + addu.ph t0, t0, s3 + addu.ph t4, t4, t5 + addu.ph t0, t0, t6 + shrl.ph t4, t4, 2 // t4 = |0|P3|0|P2| + shrl.ph t0, t0, 2 // t0 = |0|P1|0|P0| + addu.ph t2, t2, t5 + addu.ph t3, t3, t6 + shra_r.ph t2, t2, 2 // t2 = |0|P5|0|P4| + shra_r.ph t3, t3, 2 // t3 = |0|P3|0|P2| + shll.ph t2, t2, 8 + shll.ph t3, t3, 8 + or t2, t4, t2 + or t3, t3, t0 + addiu t9, -1 + usw t3, 0(s2) + usw t2, 4(s2) + addiu s2, 8 + bgtz t9, 1b + addiu t7, 4 +11: + andi t8, 3 + beqz t8, 22f + addiu t7, 1 + +2: + lbu t0, 0(t7) + addiu t7, 1 + sll t1, t0, 1 + addu t2, t0, t1 // t2 = invalue + lbu t3, -2(t7) + lbu t4, 0(t7) + addiu t3, 1 + addiu t4, 2 + addu t3, t3, t2 + addu t4, t4, t2 + srl t3, 2 + srl t4, 2 + sb t3, 0(s2) + sb t4, 1(s2) + addiu t8, -1 + bgtz t8, 2b + addiu s2, 2 + +22: + lbu t0, 0(t7) + lbu t2, -1(t7) + sll t1, t0, 1 + addu t1, t1, t0 // t1 = invalue * 3 + addu t1, t1, t2 + addiu t1, 1 + srl t1, t1, 2 + sb t1, 0(s2) + sb t0, 1(s2) + addiu s1, 4 + bne s1, s0, 0b + addiu a2, 4 +3: + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop +END(jsimd_h2v1_fancy_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v1_downsample_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - cinfo->max_v_samp_factor + * a2 - compptr->v_samp_factor + * a3 - compptr->width_in_blocks + * 16(sp) - input_data + * 20(sp) - output_data + */ + .set at + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4 + + beqz a2, 7f + lw s1, 44(sp) // s1 = output_data + lw s0, 40(sp) // s0 = input_data + srl s2, a0, 2 + andi t9, a0, 2 + srl t7, t9, 1 + addu s2, t7, s2 + sll t0, a3, 3 // t0 = width_in_blocks*DCT + srl t7, t0, 1 + subu s2, t7, s2 +0: + andi t6, a0, 1 // t6 = temp_index + addiu t6, -1 + lw t4, 0(s1) // t4 = outptr + lw t5, 0(s0) // t5 = inptr0 + li s3, 0 // s3 = bias + srl t7, a0, 1 // t7 = image_width1 + srl s4, t7, 2 + andi t8, t7, 3 +1: + ulhu t0, 0(t5) + ulhu t1, 2(t5) + ulhu t2, 4(t5) + ulhu t3, 6(t5) + raddu.w.qb t0, t0 + raddu.w.qb t1, t1 + raddu.w.qb t2, t2 + raddu.w.qb t3, t3 + shra.ph t0, t0, 1 + shra_r.ph t1, t1, 1 + shra.ph t2, t2, 1 + shra_r.ph t3, t3, 1 + sb t0, 0(t4) + sb t1, 1(t4) + sb t2, 2(t4) + sb t3, 3(t4) + addiu s4, -1 + addiu t4, 4 + bgtz s4, 1b + addiu t5, 8 + beqz t8, 3f + addu s4, t4, t8 +2: + ulhu t0, 0(t5) + raddu.w.qb t0, t0 + addqh.w t0, t0, s3 + xori s3, s3, 1 + sb t0, 0(t4) + addiu t4, 1 + bne t4, s4, 2b + addiu t5, 2 +3: + lbux t1, t6(t5) + sll t1, 1 + addqh.w t2, t1, s3 // t2 = pixval1 + xori s3, s3, 1 + addqh.w t3, t1, s3 // t3 = pixval2 + blez s2, 5f + append t3, t2, 8 + addu t5, t4, s2 // t5 = loop_end2 +4: + ush t3, 0(t4) + addiu s2, -1 + bgtz s2, 4b + addiu t4, 2 +5: + beqz t9, 6f + nop + sb t2, 0(t4) +6: + addiu s1, 4 + addiu a2, -1 + bnez a2, 0b + addiu s0, 4 +7: + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4 + + j ra + nop +END(jsimd_h2v1_downsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v2_downsample_mips_dspr2) + +/* + * a0 - cinfo->image_width + * a1 - cinfo->max_v_samp_factor + * a2 - compptr->v_samp_factor + * a3 - compptr->width_in_blocks + * 16(sp) - input_data + * 20(sp) - output_data + */ + .set at + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + beqz a2, 8f + lw s1, 52(sp) // s1 = output_data + lw s0, 48(sp) // s0 = input_data + + andi t6, a0, 1 // t6 = temp_index + addiu t6, -1 + srl t7, a0, 1 // t7 = image_width1 + srl s4, t7, 2 + andi t8, t7, 3 + andi t9, a0, 2 + srl s2, a0, 2 + srl t7, t9, 1 + addu s2, t7, s2 + sll t0, a3, 3 // s2 = width_in_blocks*DCT + srl t7, t0, 1 + subu s2, t7, s2 +0: + lw t4, 0(s1) // t4 = outptr + lw t5, 0(s0) // t5 = inptr0 + lw s7, 4(s0) // s7 = inptr1 + li s6, 1 // s6 = bias +2: + ulw t0, 0(t5) // t0 = |P3|P2|P1|P0| + ulw t1, 0(s7) // t1 = |Q3|Q2|Q1|Q0| + ulw t2, 4(t5) + ulw t3, 4(s7) + precrq.ph.w t7, t0, t1 // t2 = |P3|P2|Q3|Q2| + ins t0, t1, 16, 16 // t0 = |Q1|Q0|P1|P0| + raddu.w.qb t1, t7 + raddu.w.qb t0, t0 + shra_r.w t1, t1, 2 + addiu t0, 1 + srl t0, 2 + precrq.ph.w t7, t2, t3 + ins t2, t3, 16, 16 + raddu.w.qb t7, t7 + raddu.w.qb t2, t2 + shra_r.w t7, t7, 2 + addiu t2, 1 + srl t2, 2 + sb t0, 0(t4) + sb t1, 1(t4) + sb t2, 2(t4) + sb t7, 3(t4) + addiu t4, 4 + addiu t5, 8 + addiu s4, s4, -1 + bgtz s4, 2b + addiu s7, 8 + beqz t8, 4f + addu t8, t4, t8 +3: + ulhu t0, 0(t5) + ulhu t1, 0(s7) + ins t0, t1, 16, 16 + raddu.w.qb t0, t0 + addu t0, t0, s6 + srl t0, 2 + xori s6, s6, 3 + sb t0, 0(t4) + addiu t5, 2 + addiu t4, 1 + bne t8, t4, 3b + addiu s7, 2 +4: + lbux t1, t6(t5) + sll t1, 1 + lbux t0, t6(s7) + sll t0, 1 + addu t1, t1, t0 + addu t3, t1, s6 + srl t0, t3, 2 // t2 = pixval1 + xori s6, s6, 3 + addu t2, t1, s6 + srl t1, t2, 2 // t3 = pixval2 + blez s2, 6f + append t1, t0, 8 +5: + ush t1, 0(t4) + addiu s2, -1 + bgtz s2, 5b + addiu t4, 2 +6: + beqz t9, 7f + nop + sb t0, 0(t4) +7: + addiu s1, 4 + addiu a2, -1 + bnez a2, 0b + addiu s0, 8 +8: + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_h2v2_downsample_mips_dspr2) +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2) +/* + * a0 - input_data + * a1 - output_data + * a2 - compptr->v_samp_factor + * a3 - cinfo->max_v_samp_factor + * 16(sp) - cinfo->smoothing_factor + * 20(sp) - compptr->width_in_blocks + * 24(sp) - cinfo->image_width + */ + + .set at + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s7, 52(sp) // compptr->width_in_blocks + lw s0, 56(sp) // cinfo->image_width + lw s6, 48(sp) // cinfo->smoothing_factor + sll s7, 3 // output_cols = width_in_blocks * DCTSIZE + sll v0, s7, 1 + subu v0, v0, s0 + blez v0, 2f + move v1, zero + addiu t0, a3, 2 // t0 = cinfo->max_v_samp_factor + 2 +0: + addiu t1, a0, -4 + sll t2, v1, 2 + lwx t1, t2(t1) + move t3, v0 + addu t1, t1, s0 + lbu t2, -1(t1) +1: + addiu t3, t3, -1 + sb t2, 0(t1) + bgtz t3, 1b + addiu t1, t1, 1 + addiu v1, v1, 1 + bne v1, t0, 0b + nop +2: + li v0, 80 + mul v0, s6, v0 + li v1, 16384 + move t4, zero + move t5, zero + subu t6, v1, v0 // t6 = 16384 - tmp_smoot_f * 80 + sll t7, s6, 4 // t7 = tmp_smoot_f * 16 +3: +/* Special case for first column: pretend column -1 is same as column 0 */ + sll v0, t4, 2 + lwx t8, v0(a1) // outptr = output_data[outrow] + sll v1, t5, 2 + addiu t9, v1, 4 + addiu s0, v1, -4 + addiu s1, v1, 8 + lwx s2, v1(a0) // inptr0 = input_data[inrow] + lwx t9, t9(a0) // inptr1 = input_data[inrow+1] + lwx s0, s0(a0) // above_ptr = input_data[inrow-1] + lwx s1, s1(a0) // below_ptr = input_data[inrow+2] + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, 0(s2) + lbu v1, 2(s2) + lbu t0, 0(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1,t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, 0(s0) + lbu t0, 0(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1,s3, t7 + extr_r.w v0, $ac1, 16 + addiu t8, t8, 1 + addiu s2, s2, 2 + addiu t9, t9, 2 + addiu s0, s0, 2 + addiu s1, s1, 2 + sb v0, -1(t8) + addiu s4, s7, -2 + and s4, s4, 3 + addu s5, s4, t8 //end adress +4: + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 2(s2) + lbu t0, -1(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + addiu t8, t8, 1 + addiu s2, s2, 2 + addiu t9, t9, 2 + addiu s0, s0, 2 + sb t2, -1(t8) + bne s5, t8, 4b + addiu s1, s1, 2 + addiu s5, s7, -2 + subu s5, s5, s4 + addu s5, s5, t8 //end adress +5: + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 2(s2) + lbu t0, -1(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + lh v1, 2(t9) + addu t0, t0, v0 + lh v0, 2(s2) + addu s3, t0, s3 + lh t0, 2(s0) + lh t1, 2(s1) + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 4(s2) + lbu t0, 1(t9) + lbu t1, 4(t9) + sb t2, 0(t8) + raddu.w.qb t3, v0 + lbu v0, 1(s2) + addu t0, t0, t1 + mult $ac1, t3, t6 + addu v0, v0, v1 + lbu t2, 4(s0) + addu t0, t0, v0 + lbu v0, 1(s0) + addu s3, t0, s3 + lbu t0, 1(s1) + lbu t3, 4(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + lh v1, 4(t9) + addu t0, t0, v0 + lh v0, 4(s2) + addu s3, t0, s3 + lh t0, 4(s0) + lh t1, 4(s1) + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 6(s2) + lbu t0, 3(t9) + lbu t1, 6(t9) + sb t2, 1(t8) + raddu.w.qb t3, v0 + lbu v0, 3(s2) + addu t0, t0,t1 + mult $ac1, t3, t6 + addu v0, v0, v1 + lbu t2, 6(s0) + addu t0, t0, v0 + lbu v0, 3(s0) + addu s3, t0, s3 + lbu t0, 3(s1) + lbu t3, 6(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + lh v1, 6(t9) + addu t0, t0, v0 + lh v0, 6(s2) + addu s3, t0, s3 + lh t0, 6(s0) + lh t1, 6(s1) + madd $ac1, s3, t7 + extr_r.w t3, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 8(s2) + lbu t0, 5(t9) + lbu t1, 8(t9) + sb t3, 2(t8) + raddu.w.qb t2, v0 + lbu v0, 5(s2) + addu t0, t0, t1 + mult $ac1, t2, t6 + addu v0, v0, v1 + lbu t2, 8(s0) + addu t0, t0, v0 + lbu v0, 5(s0) + addu s3, t0, s3 + lbu t0, 5(s1) + lbu t3, 8(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + addiu t8, t8, 4 + addu t0, t0, v0 + addiu s2, s2, 8 + addu s3, t0, s3 + addiu t9, t9, 8 + madd $ac1, s3, t7 + extr_r.w t1, $ac1, 16 + addiu s0, s0, 8 + addiu s1, s1, 8 + bne s5, t8, 5b + sb t1, -1(t8) +/* Special case for last column */ + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 1(s2) + lbu t0, -1(t9) + lbu t1, 1(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 1(s0) + addu t0, t0, v0 + lbu t3, 1(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1, s3, t7 + extr_r.w t0, $ac1, 16 + addiu t5, t5, 2 + sb t0, 0(t8) + addiu t4, t4, 1 + bne t4, a2, 3b + addiu t5, t5, 2 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_h2v2_smooth_downsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_int_upsample_mips_dspr2) +/* + * a0 - upsample->h_expand[compptr->component_index] + * a1 - upsample->v_expand[compptr->component_index] + * a2 - input_data + * a3 - output_data_ptr + * 16(sp) - cinfo->output_width + * 20(sp) - cinfo->max_v_samp_factor + */ + .set at + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + lw s0, 0(a3) // s0 = output_data + lw s1, 32(sp) // s1 = cinfo->output_width + lw s2, 36(sp) // s2 = cinfo->max_v_samp_factor + li t6, 0 // t6 = inrow + beqz s2, 10f + li s3, 0 // s3 = outrow +0: + addu t0, a2, t6 + addu t7, s0, s3 + lw t3, 0(t0) // t3 = inptr + lw t8, 0(t7) // t8 = outptr + beqz s1, 4f + addu t5, t8, s1 // t5 = outend +1: + lb t2, 0(t3) // t2 = invalue = *inptr++ + addiu t3, 1 + beqz a0, 3f + move t0, a0 // t0 = h_expand +2: + sb t2, 0(t8) + addiu t0, -1 + bgtz t0, 2b + addiu t8, 1 +3: + bgt t5, t8, 1b + nop +4: + addiu t9, a1, -1 // t9 = v_expand - 1 + blez t9, 9f + nop +5: + lw t3, 0(s0) + lw t4, 4(s0) + subu t0, s1, 0xF + blez t0, 7f + addu t5, t3, s1 // t5 = end address + andi t7, s1, 0xF // t7 = residual + subu t8, t5, t7 +6: + ulw t0, 0(t3) + ulw t1, 4(t3) + ulw t2, 8(t3) + usw t0, 0(t4) + ulw t0, 12(t3) + usw t1, 4(t4) + usw t2, 8(t4) + usw t0, 12(t4) + addiu t3, 16 + bne t3, t8, 6b + addiu t4, 16 + beqz t7, 8f + nop +7: + lbu t0, 0(t3) + sb t0, 0(t4) + addiu t3, 1 + bne t3, t5, 7b + addiu t4, 1 +8: + addiu t9, -1 + bgtz t9, 5b + addiu s0, 8 +9: + addu s3, s3, a1 + bne s3, s2, 0b + addiu t6, 1 +10: + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop +END(jsimd_int_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2) +/* + * a0 - cinfo->max_v_samp_factor + * a1 - cinfo->output_width + * a2 - input_data + * a3 - output_data_ptr + */ + lw t7, 0(a3) // t7 = output_data + andi t8, a1, 0xf // t8 = residual + sll t0, a0, 2 + blez a0, 4f + addu t9, t7, t0 // t9 = output_data end address +0: + lw t5, 0(t7) // t5 = outptr + lw t6, 0(a2) // t6 = inptr + addu t3, t5, a1 // t3 = outptr + output_width (end address) + subu t3, t8 // t3 = end address - residual + beq t5, t3, 2f + move t4, t8 +1: + ulw t0, 0(t6) // t0 = |P3|P2|P1|P0| + ulw t2, 4(t6) // t2 = |P7|P6|P5|P4| + srl t1, t0, 16 // t1 = |X|X|P3|P2| + ins t0, t0, 16, 16 // t0 = |P1|P0|P1|P0| + ins t1, t1, 16, 16 // t1 = |P3|P2|P3|P2| + ins t0, t0, 8, 16 // t0 = |P1|P1|P0|P0| + ins t1, t1, 8, 16 // t1 = |P3|P3|P2|P2| + usw t0, 0(t5) + usw t1, 4(t5) + srl t0, t2, 16 // t0 = |X|X|P7|P6| + ins t2, t2, 16, 16 // t2 = |P5|P4|P5|P4| + ins t0, t0, 16, 16 // t0 = |P7|P6|P7|P6| + ins t2, t2, 8, 16 // t2 = |P5|P5|P4|P4| + ins t0, t0, 8, 16 // t0 = |P7|P7|P6|P6| + usw t2, 8(t5) + usw t0, 12(t5) + addiu t5, 16 + bne t5, t3, 1b + addiu t6, 8 + beqz t8, 3f + move t4, t8 +2: + lbu t1, 0(t6) + sb t1, 0(t5) + sb t1, 1(t5) + addiu t4, -2 + addiu t6, 1 + bgtz t4, 2b + addiu t5, 2 +3: + addiu t7, 4 + bne t9, t7, 0b + addiu a2, 4 +4: + j ra + nop +END(jsimd_h2v1_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_h2v2_upsample_mips_dspr2) +/* + * a0 - cinfo->max_v_samp_factor + * a1 - cinfo->output_width + * a2 - input_data + * a3 - output_data_ptr + */ + lw t7, 0(a3) + blez a0, 7f + andi t9, a1, 0xf // t9 = residual +0: + lw t6, 0(a2) // t6 = inptr + lw t5, 0(t7) // t5 = outptr + addu t8, t5, a1 // t8 = outptr end address + subu t8, t9 // t8 = end address - residual + beq t5, t8, 2f + move t4, t9 +1: + ulw t0, 0(t6) + srl t1, t0, 16 + ins t0, t0, 16, 16 + ins t0, t0, 8, 16 + ins t1, t1, 16, 16 + ins t1, t1, 8, 16 + ulw t2, 4(t6) + usw t0, 0(t5) + usw t1, 4(t5) + srl t3, t2, 16 + ins t2, t2, 16, 16 + ins t2, t2, 8, 16 + ins t3, t3, 16, 16 + ins t3, t3, 8, 16 + usw t2, 8(t5) + usw t3, 12(t5) + addiu t5, 16 + bne t5, t8, 1b + addiu t6, 8 + beqz t9, 3f + move t4, t9 +2: + lbu t0, 0(t6) + sb t0, 0(t5) + sb t0, 1(t5) + addiu t4, -2 + addiu t6, 1 + bgtz t4, 2b + addiu t5, 2 +3: + lw t6, 0(t7) // t6 = outptr[0] + lw t5, 4(t7) // t5 = outptr[1] + addu t4, t6, a1 // t4 = new end address + beq a1, t9, 5f + subu t8, t4, t9 +4: + ulw t0, 0(t6) + ulw t1, 4(t6) + ulw t2, 8(t6) + usw t0, 0(t5) + ulw t0, 12(t6) + usw t1, 4(t5) + usw t2, 8(t5) + usw t0, 12(t5) + addiu t6, 16 + bne t6, t8, 4b + addiu t5, 16 + beqz t9, 6f + nop +5: + lbu t0, 0(t6) + sb t0, 0(t5) + addiu t6, 1 + bne t6, t4, 5b + addiu t5, 1 +6: + addiu t7, 8 + addiu a0, -2 + bgtz a0, 0b + addiu a2, 4 +7: + j ra + nop +END(jsimd_h2v2_upsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_islow_mips_dspr2) +/* + * a0 - coef_block + * a1 - compptr->dcttable + * a2 - output + * a3 - range_limit + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu sp, sp, -256 + move v0, sp + addiu v1, zero, 8 // v1 = DCTSIZE = 8 +1: + lh s4, 32(a0) // s4 = inptr[16] + lh s5, 64(a0) // s5 = inptr[32] + lh s6, 96(a0) // s6 = inptr[48] + lh t1, 112(a0) // t1 = inptr[56] + lh t7, 16(a0) // t7 = inptr[8] + lh t5, 80(a0) // t5 = inptr[40] + lh t3, 48(a0) // t3 = inptr[24] + or s4, s4, t1 + or s4, s4, t3 + or s4, s4, t5 + or s4, s4, t7 + or s4, s4, s5 + or s4, s4, s6 + bnez s4, 2f + addiu v1, v1, -1 + lh s5, 0(a1) // quantptr[DCTSIZE*0] + lh s6, 0(a0) // inptr[DCTSIZE*0] + mul s5, s5, s6 // DEQUANTIZE(inptr[0], quantptr[0]) + sll s5, s5, 2 + sw s5, 0(v0) + sw s5, 32(v0) + sw s5, 64(v0) + sw s5, 96(v0) + sw s5, 128(v0) + sw s5, 160(v0) + sw s5, 192(v0) + b 3f + sw s5, 224(v0) +2: + lh t0, 112(a1) + lh t2, 48(a1) + lh t4, 80(a1) + lh t6, 16(a1) + mul t0, t0, t1 // DEQUANTIZE(inptr[DCTSIZE*7],quant[DCTSIZE*7]) + mul t1, t2, t3 // DEQUANTIZE(inptr[DCTSIZE*3],quant[DCTSIZE*3]) + mul t2, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*5],quant[DCTSIZE*5]) + mul t3, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*1],quant[DCTSIZE*1]) + lh t4, 32(a1) + lh t5, 32(a0) + lh t6, 96(a1) + lh t7, 96(a0) + addu s0, t0, t1 // z3 = tmp0 + tmp2 + addu s1, t1, t2 // z2 = tmp1 + tmp2 + addu s2, t2, t3 // z4 = tmp1 + tmp3 + addu s3, s0, s2 // z3 + z4 + addiu t9, zero, 9633 // FIX_1_175875602 + mul s3, s3, t9 // z5 = MULTIPLY(z3 + z4, FIX_1_175875602) + addu t8, t0, t3 // z1 = tmp0 + tmp3 + addiu t9, zero, 2446 // FIX_0_298631336 + mul t0, t0, t9 // tmp0 = MULTIPLY(tmp0, FIX_0_298631336) + addiu t9, zero, 16819 // FIX_2_053119869 + mul t2, t2, t9 // tmp1 = MULTIPLY(tmp1, FIX_2_053119869) + addiu t9, zero, 25172 // FIX_3_072711026 + mul t1, t1, t9 // tmp2 = MULTIPLY(tmp2, FIX_3_072711026) + addiu t9, zero, 12299 // FIX_1_501321110 + mul t3, t3, t9 // tmp3 = MULTIPLY(tmp3, FIX_1_501321110) + addiu t9, zero, 16069 // FIX_1_961570560 + mul s0, s0, t9 // -z3 = MULTIPLY(z3, FIX_1_961570560) + addiu t9, zero, 3196 // FIX_0_390180644 + mul s2, s2, t9 // -z4 = MULTIPLY(z4, FIX_0_390180644) + addiu t9, zero, 7373 // FIX_0_899976223 + mul t8, t8, t9 // -z1 = MULTIPLY(z1, FIX_0_899976223) + addiu t9, zero, 20995 // FIX_2_562915447 + mul s1, s1, t9 // -z2 = MULTIPLY(z2, FIX_2_562915447) + subu s0, s3, s0 // z3 += z5 + addu t0, t0, s0 // tmp0 += z3 + addu t1, t1, s0 // tmp2 += z3 + subu s2, s3, s2 // z4 += z5 + addu t2, t2, s2 // tmp1 += z4 + addu t3, t3, s2 // tmp3 += z4 + subu t0, t0, t8 // tmp0 += z1 + subu t1, t1, s1 // tmp2 += z2 + subu t2, t2, s1 // tmp1 += z2 + subu t3, t3, t8 // tmp3 += z1 + mul s0, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*2],quant[DCTSIZE*2]) + addiu t9, zero, 6270 // FIX_0_765366865 + mul s1, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*6],quant[DCTSIZE*6]) + lh t4, 0(a1) + lh t5, 0(a0) + lh t6, 64(a1) + lh t7, 64(a0) + mul s2, t9, s0 // MULTIPLY(z2, FIX_0_765366865) + mul t5, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*0],quant[DCTSIZE*0]) + mul t6, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*4],quant[DCTSIZE*4]) + addiu t9, zero, 4433 // FIX_0_541196100 + addu s3, s0, s1 // z2 + z3 + mul s3, s3, t9 // z1 = MULTIPLY(z2 + z3, FIX_0_541196100) + addiu t9, zero, 15137 // FIX_1_847759065 + mul t8, s1, t9 // MULTIPLY(z3, FIX_1_847759065) + addu t4, t5, t6 + subu t5, t5, t6 + sll t4, t4, 13 // tmp0 = (z2 + z3) << CONST_BITS + sll t5, t5, 13 // tmp1 = (z2 - z3) << CONST_BITS + addu t7, s3, s2 // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) + subu t6, s3, t8 // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065) + addu s0, t4, t7 + subu s1, t4, t7 + addu s2, t5, t6 + subu s3, t5, t6 + addu t4, s0, t3 + subu s0, s0, t3 + addu t3, s2, t1 + subu s2, s2, t1 + addu t1, s3, t2 + subu s3, s3, t2 + addu t2, s1, t0 + subu s1, s1, t0 + shra_r.w t4, t4, 11 + shra_r.w t3, t3, 11 + shra_r.w t1, t1, 11 + shra_r.w t2, t2, 11 + shra_r.w s1, s1, 11 + shra_r.w s3, s3, 11 + shra_r.w s2, s2, 11 + shra_r.w s0, s0, 11 + sw t4, 0(v0) + sw t3, 32(v0) + sw t1, 64(v0) + sw t2, 96(v0) + sw s1, 128(v0) + sw s3, 160(v0) + sw s2, 192(v0) + sw s0, 224(v0) +3: + addiu a1, a1, 2 + addiu a0, a0, 2 + bgtz v1, 1b + addiu v0, v0, 4 + move v0, sp + addiu v1, zero, 8 +4: + lw t0, 8(v0) // z2 = (JLONG) wsptr[2] + lw t1, 24(v0) // z3 = (JLONG) wsptr[6] + lw t2, 0(v0) // (JLONG) wsptr[0] + lw t3, 16(v0) // (JLONG) wsptr[4] + lw s4, 4(v0) // (JLONG) wsptr[1] + lw s5, 12(v0) // (JLONG) wsptr[3] + lw s6, 20(v0) // (JLONG) wsptr[5] + lw s7, 28(v0) // (JLONG) wsptr[7] + or s4, s4, t0 + or s4, s4, t1 + or s4, s4, t3 + or s4, s4, s7 + or s4, s4, s5 + or s4, s4, s6 + bnez s4, 5f + addiu v1, v1, -1 + shra_r.w s5, t2, 5 + andi s5, s5, 0x3ff + lbux s5, s5(a3) + lw s1, 0(a2) + replv.qb s5, s5 + usw s5, 0(s1) + usw s5, 4(s1) + b 6f + nop +5: + addu t4, t0, t1 // z2 + z3 + addiu t8, zero, 4433 // FIX_0_541196100 + mul t5, t4, t8 // z1 = MULTIPLY(z2 + z3, FIX_0_541196100) + addiu t8, zero, 15137 // FIX_1_847759065 + mul t1, t1, t8 // MULTIPLY(z3, FIX_1_847759065) + addiu t8, zero, 6270 // FIX_0_765366865 + mul t0, t0, t8 // MULTIPLY(z2, FIX_0_765366865) + addu t4, t2, t3 // (JLONG) wsptr[0] + (JLONG) wsptr[4] + subu t2, t2, t3 // (JLONG) wsptr[0] - (JLONG) wsptr[4] + sll t4, t4, 13 // tmp0 = ((wsptr[0] + wsptr[4]) << CONST_BITS + sll t2, t2, 13 // tmp1 = ((wsptr[0] - wsptr[4]) << CONST_BITS + subu t1, t5, t1 // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065) + subu t3, t2, t1 // tmp12 = tmp1 - tmp2 + addu t2, t2, t1 // tmp11 = tmp1 + tmp2 + addu t5, t5, t0 // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) + subu t1, t4, t5 // tmp13 = tmp0 - tmp3 + addu t0, t4, t5 // tmp10 = tmp0 + tmp3 + lw t4, 28(v0) // tmp0 = (JLONG) wsptr[7] + lw t6, 12(v0) // tmp2 = (JLONG) wsptr[3] + lw t5, 20(v0) // tmp1 = (JLONG) wsptr[5] + lw t7, 4(v0) // tmp3 = (JLONG) wsptr[1] + addu s0, t4, t6 // z3 = tmp0 + tmp2 + addiu t8, zero, 9633 // FIX_1_175875602 + addu s1, t5, t7 // z4 = tmp1 + tmp3 + addu s2, s0, s1 // z3 + z4 + mul s2, s2, t8 // z5 = MULTIPLY(z3 + z4, FIX_1_175875602) + addu s3, t4, t7 // z1 = tmp0 + tmp3 + addu t9, t5, t6 // z2 = tmp1 + tmp2 + addiu t8, zero, 16069 // FIX_1_961570560 + mul s0, s0, t8 // -z3 = MULTIPLY(z3, FIX_1_961570560) + addiu t8, zero, 3196 // FIX_0_390180644 + mul s1, s1, t8 // -z4 = MULTIPLY(z4, FIX_0_390180644) + addiu t8, zero, 2446 // FIX_0_298631336 + mul t4, t4, t8 // tmp0 = MULTIPLY(tmp0, FIX_0_298631336) + addiu t8, zero, 7373 // FIX_0_899976223 + mul s3, s3, t8 // -z1 = MULTIPLY(z1, FIX_0_899976223) + addiu t8, zero, 16819 // FIX_2_053119869 + mul t5, t5, t8 // tmp1 = MULTIPLY(tmp1, FIX_2_053119869) + addiu t8, zero, 20995 // FIX_2_562915447 + mul t9, t9, t8 // -z2 = MULTIPLY(z2, FIX_2_562915447) + addiu t8, zero, 25172 // FIX_3_072711026 + mul t6, t6, t8 // tmp2 = MULTIPLY(tmp2, FIX_3_072711026) + addiu t8, zero, 12299 // FIX_1_501321110 + mul t7, t7, t8 // tmp3 = MULTIPLY(tmp3, FIX_1_501321110) + subu s0, s2, s0 // z3 += z5 + subu s1, s2, s1 // z4 += z5 + addu t4, t4, s0 + subu t4, t4, s3 // tmp0 + addu t5, t5, s1 + subu t5, t5, t9 // tmp1 + addu t6, t6, s0 + subu t6, t6, t9 // tmp2 + addu t7, t7, s1 + subu t7, t7, s3 // tmp3 + addu s0, t0, t7 + subu t0, t0, t7 + addu t7, t2, t6 + subu t2, t2, t6 + addu t6, t3, t5 + subu t3, t3, t5 + addu t5, t1, t4 + subu t1, t1, t4 + shra_r.w s0, s0, 18 + shra_r.w t7, t7, 18 + shra_r.w t6, t6, 18 + shra_r.w t5, t5, 18 + shra_r.w t1, t1, 18 + shra_r.w t3, t3, 18 + shra_r.w t2, t2, 18 + shra_r.w t0, t0, 18 + andi s0, s0, 0x3ff + andi t7, t7, 0x3ff + andi t6, t6, 0x3ff + andi t5, t5, 0x3ff + andi t1, t1, 0x3ff + andi t3, t3, 0x3ff + andi t2, t2, 0x3ff + andi t0, t0, 0x3ff + lw s1, 0(a2) + lbux s0, s0(a3) + lbux t7, t7(a3) + lbux t6, t6(a3) + lbux t5, t5(a3) + lbux t1, t1(a3) + lbux t3, t3(a3) + lbux t2, t2(a3) + lbux t0, t0(a3) + sb s0, 0(s1) + sb t7, 1(s1) + sb t6, 2(s1) + sb t5, 3(s1) + sb t1, 4(s1) + sb t3, 5(s1) + sb t2, 6(s1) + sb t0, 7(s1) +6: + addiu v0, v0, 32 + bgtz v1, 4b + addiu a2, a2, 4 + addiu sp, sp, 256 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_islow_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_ifast_cols_mips_dspr2) +/* + * a0 - inptr + * a1 - quantptr + * a2 - wsptr + * a3 - mips_idct_ifast_coefs + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu t9, a0, 16 // end address + or AT, a3, zero + +0: + lw s0, 0(a1) // quantptr[DCTSIZE*0] + lw t0, 0(a0) // inptr[DCTSIZE*0] + lw t1, 16(a0) // inptr[DCTSIZE*1] + muleq_s.w.phl v0, t0, s0 // tmp0 ... + lw t2, 32(a0) // inptr[DCTSIZE*2] + lw t3, 48(a0) // inptr[DCTSIZE*3] + lw t4, 64(a0) // inptr[DCTSIZE*4] + lw t5, 80(a0) // inptr[DCTSIZE*5] + muleq_s.w.phr t0, t0, s0 // ... tmp0 ... + lw t6, 96(a0) // inptr[DCTSIZE*6] + lw t7, 112(a0) // inptr[DCTSIZE*7] + or s4, t1, t2 + or s5, t3, t4 + bnez s4, 1f + ins t0, v0, 16, 16 // ... tmp0 + bnez s5, 1f + or s6, t5, t6 + or s6, s6, t7 + bnez s6, 1f + sw t0, 0(a2) // wsptr[DCTSIZE*0] + sw t0, 16(a2) // wsptr[DCTSIZE*1] + sw t0, 32(a2) // wsptr[DCTSIZE*2] + sw t0, 48(a2) // wsptr[DCTSIZE*3] + sw t0, 64(a2) // wsptr[DCTSIZE*4] + sw t0, 80(a2) // wsptr[DCTSIZE*5] + sw t0, 96(a2) // wsptr[DCTSIZE*6] + sw t0, 112(a2) // wsptr[DCTSIZE*7] + addiu a0, a0, 4 + b 2f + addiu a1, a1, 4 + +1: + lw s1, 32(a1) // quantptr[DCTSIZE*2] + lw s2, 64(a1) // quantptr[DCTSIZE*4] + muleq_s.w.phl v0, t2, s1 // tmp1 ... + muleq_s.w.phr t2, t2, s1 // ... tmp1 ... + lw s0, 16(a1) // quantptr[DCTSIZE*1] + lw s1, 48(a1) // quantptr[DCTSIZE*3] + lw s3, 96(a1) // quantptr[DCTSIZE*6] + muleq_s.w.phl v1, t4, s2 // tmp2 ... + muleq_s.w.phr t4, t4, s2 // ... tmp2 ... + lw s2, 80(a1) // quantptr[DCTSIZE*5] + lw t8, 4(AT) // FIX(1.414213562) + ins t2, v0, 16, 16 // ... tmp1 + muleq_s.w.phl v0, t6, s3 // tmp3 ... + muleq_s.w.phr t6, t6, s3 // ... tmp3 ... + ins t4, v1, 16, 16 // ... tmp2 + addq.ph s4, t0, t4 // tmp10 + subq.ph s5, t0, t4 // tmp11 + ins t6, v0, 16, 16 // ... tmp3 + subq.ph s6, t2, t6 // tmp12 ... + addq.ph s7, t2, t6 // tmp13 + mulq_s.ph s6, s6, t8 // ... tmp12 ... + addq.ph t0, s4, s7 // tmp0 + subq.ph t6, s4, s7 // tmp3 + muleq_s.w.phl v0, t1, s0 // tmp4 ... + muleq_s.w.phr t1, t1, s0 // ... tmp4 ... + shll_s.ph s6, s6, 1 // x2 + lw s3, 112(a1) // quantptr[DCTSIZE*7] + subq.ph s6, s6, s7 // ... tmp12 + muleq_s.w.phl v1, t7, s3 // tmp7 ... + muleq_s.w.phr t7, t7, s3 // ... tmp7 ... + ins t1, v0, 16, 16 // ... tmp4 + addq.ph t2, s5, s6 // tmp1 + subq.ph t4, s5, s6 // tmp2 + muleq_s.w.phl v0, t5, s2 // tmp6 ... + muleq_s.w.phr t5, t5, s2 // ... tmp6 ... + ins t7, v1, 16, 16 // ... tmp7 + addq.ph s5, t1, t7 // z11 + subq.ph s6, t1, t7 // z12 + muleq_s.w.phl v1, t3, s1 // tmp5 ... + muleq_s.w.phr t3, t3, s1 // ... tmp5 ... + ins t5, v0, 16, 16 // ... tmp6 + ins t3, v1, 16, 16 // ... tmp5 + addq.ph s7, t5, t3 // z13 + subq.ph v0, t5, t3 // z10 + addq.ph t7, s5, s7 // tmp7 + subq.ph s5, s5, s7 // tmp11 ... + addq.ph v1, v0, s6 // z5 ... + mulq_s.ph s5, s5, t8 // ... tmp11 + lw t8, 8(AT) // FIX(1.847759065) + lw s4, 0(AT) // FIX(1.082392200) + addq.ph s0, t0, t7 + subq.ph s1, t0, t7 + mulq_s.ph v1, v1, t8 // ... z5 + shll_s.ph s5, s5, 1 // x2 + lw t8, 12(AT) // FIX(-2.613125930) + sw s0, 0(a2) // wsptr[DCTSIZE*0] + shll_s.ph v0, v0, 1 // x4 + mulq_s.ph v0, v0, t8 // tmp12 ... + mulq_s.ph s4, s6, s4 // tmp10 ... + shll_s.ph v1, v1, 1 // x2 + addiu a0, a0, 4 + addiu a1, a1, 4 + sw s1, 112(a2) // wsptr[DCTSIZE*7] + shll_s.ph s6, v0, 1 // x4 + shll_s.ph s4, s4, 1 // x2 + addq.ph s6, s6, v1 // ... tmp12 + subq.ph t5, s6, t7 // tmp6 + subq.ph s4, s4, v1 // ... tmp10 + subq.ph t3, s5, t5 // tmp5 + addq.ph s2, t2, t5 + addq.ph t1, s4, t3 // tmp4 + subq.ph s3, t2, t5 + sw s2, 16(a2) // wsptr[DCTSIZE*1] + sw s3, 96(a2) // wsptr[DCTSIZE*6] + addq.ph v0, t4, t3 + subq.ph v1, t4, t3 + sw v0, 32(a2) // wsptr[DCTSIZE*2] + sw v1, 80(a2) // wsptr[DCTSIZE*5] + addq.ph v0, t6, t1 + subq.ph v1, t6, t1 + sw v0, 64(a2) // wsptr[DCTSIZE*4] + sw v1, 48(a2) // wsptr[DCTSIZE*3] + +2: + bne a0, t9, 0b + addiu a2, a2, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_ifast_cols_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_ifast_rows_mips_dspr2) +/* + * a0 - wsptr + * a1 - output_buf + * a2 - output_col + * a3 - mips_idct_ifast_coefs + */ + + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3 + + addiu t9, a0, 128 // end address + lui s8, 0x8080 + ori s8, s8, 0x8080 + +0: + lw AT, 36(sp) // restore $a3 (mips_idct_ifast_coefs) + lw t0, 0(a0) // wsptr[DCTSIZE*0+0/1] b a + lw s0, 16(a0) // wsptr[DCTSIZE*1+0/1] B A + lw t2, 4(a0) // wsptr[DCTSIZE*0+2/3] d c + lw s2, 20(a0) // wsptr[DCTSIZE*1+2/3] D C + lw t4, 8(a0) // wsptr[DCTSIZE*0+4/5] f e + lw s4, 24(a0) // wsptr[DCTSIZE*1+4/5] F E + lw t6, 12(a0) // wsptr[DCTSIZE*0+6/7] h g + lw s6, 28(a0) // wsptr[DCTSIZE*1+6/7] H G + precrq.ph.w t1, s0, t0 // B b + ins t0, s0, 16, 16 // A a + bnez t1, 1f + or s0, t2, s2 + bnez s0, 1f + or s0, t4, s4 + bnez s0, 1f + or s0, t6, s6 + bnez s0, 1f + shll_s.ph s0, t0, 2 // A a + lw a3, 0(a1) + lw AT, 4(a1) + precrq.ph.w t0, s0, s0 // A A + ins s0, s0, 16, 16 // a a + addu a3, a3, a2 + addu AT, AT, a2 + precrq.qb.ph t0, t0, t0 // A A A A + precrq.qb.ph s0, s0, s0 // a a a a + addu.qb s0, s0, s8 + addu.qb t0, t0, s8 + sw s0, 0(a3) + sw s0, 4(a3) + sw t0, 0(AT) + sw t0, 4(AT) + addiu a0, a0, 32 + bne a0, t9, 0b + addiu a1, a1, 8 + b 2f + nop + +1: + precrq.ph.w t3, s2, t2 + ins t2, s2, 16, 16 + precrq.ph.w t5, s4, t4 + ins t4, s4, 16, 16 + precrq.ph.w t7, s6, t6 + ins t6, s6, 16, 16 + lw t8, 4(AT) // FIX(1.414213562) + addq.ph s4, t0, t4 // tmp10 + subq.ph s5, t0, t4 // tmp11 + subq.ph s6, t2, t6 // tmp12 ... + addq.ph s7, t2, t6 // tmp13 + mulq_s.ph s6, s6, t8 // ... tmp12 ... + addq.ph t0, s4, s7 // tmp0 + subq.ph t6, s4, s7 // tmp3 + shll_s.ph s6, s6, 1 // x2 + subq.ph s6, s6, s7 // ... tmp12 + addq.ph t2, s5, s6 // tmp1 + subq.ph t4, s5, s6 // tmp2 + addq.ph s5, t1, t7 // z11 + subq.ph s6, t1, t7 // z12 + addq.ph s7, t5, t3 // z13 + subq.ph v0, t5, t3 // z10 + addq.ph t7, s5, s7 // tmp7 + subq.ph s5, s5, s7 // tmp11 ... + addq.ph v1, v0, s6 // z5 ... + mulq_s.ph s5, s5, t8 // ... tmp11 + lw t8, 8(AT) // FIX(1.847759065) + lw s4, 0(AT) // FIX(1.082392200) + addq.ph s0, t0, t7 // tmp0 + tmp7 + subq.ph s7, t0, t7 // tmp0 - tmp7 + mulq_s.ph v1, v1, t8 // ... z5 + lw a3, 0(a1) + lw t8, 12(AT) // FIX(-2.613125930) + shll_s.ph s5, s5, 1 // x2 + addu a3, a3, a2 + shll_s.ph v0, v0, 1 // x4 + mulq_s.ph v0, v0, t8 // tmp12 ... + mulq_s.ph s4, s6, s4 // tmp10 ... + shll_s.ph v1, v1, 1 // x2 + addiu a0, a0, 32 + addiu a1, a1, 8 + shll_s.ph s6, v0, 1 // x4 + shll_s.ph s4, s4, 1 // x2 + addq.ph s6, s6, v1 // ... tmp12 + shll_s.ph s0, s0, 2 + subq.ph t5, s6, t7 // tmp6 + subq.ph s4, s4, v1 // ... tmp10 + subq.ph t3, s5, t5 // tmp5 + shll_s.ph s7, s7, 2 + addq.ph t1, s4, t3 // tmp4 + addq.ph s1, t2, t5 // tmp1 + tmp6 + subq.ph s6, t2, t5 // tmp1 - tmp6 + addq.ph s2, t4, t3 // tmp2 + tmp5 + subq.ph s5, t4, t3 // tmp2 - tmp5 + addq.ph s4, t6, t1 // tmp3 + tmp4 + subq.ph s3, t6, t1 // tmp3 - tmp4 + shll_s.ph s1, s1, 2 + shll_s.ph s2, s2, 2 + shll_s.ph s3, s3, 2 + shll_s.ph s4, s4, 2 + shll_s.ph s5, s5, 2 + shll_s.ph s6, s6, 2 + precrq.ph.w t0, s1, s0 // B A + ins s0, s1, 16, 16 // b a + precrq.ph.w t2, s3, s2 // D C + ins s2, s3, 16, 16 // d c + precrq.ph.w t4, s5, s4 // F E + ins s4, s5, 16, 16 // f e + precrq.ph.w t6, s7, s6 // H G + ins s6, s7, 16, 16 // h g + precrq.qb.ph t0, t2, t0 // D C B A + precrq.qb.ph s0, s2, s0 // d c b a + precrq.qb.ph t4, t6, t4 // H G F E + precrq.qb.ph s4, s6, s4 // h g f e + addu.qb s0, s0, s8 + addu.qb s4, s4, s8 + sw s0, 0(a3) // outptr[0/1/2/3] d c b a + sw s4, 4(a3) // outptr[4/5/6/7] h g f e + lw a3, -4(a1) + addu.qb t0, t0, s8 + addu a3, a3, a2 + addu.qb t4, t4, s8 + sw t0, 0(a3) // outptr[0/1/2/3] D C B A + bne a0, t9, 0b + sw t4, 4(a3) // outptr[4/5/6/7] H G F E + +2: + + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3 + + j ra + nop + +END(jsimd_idct_ifast_rows_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_fdct_islow_mips_dspr2) +/* + * a0 - data + */ + + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lui t0, 6437 + ori t0, 2260 + lui t1, 9633 + ori t1, 11363 + lui t2, 0xd39e + ori t2, 0xe6dc + lui t3, 0xf72d + ori t3, 9633 + lui t4, 2261 + ori t4, 9633 + lui t5, 0xd39e + ori t5, 6437 + lui t6, 9633 + ori t6, 0xd39d + lui t7, 0xe6dc + ori t7, 2260 + lui t8, 4433 + ori t8, 10703 + lui t9, 0xd630 + ori t9, 4433 + li s8, 8 + move a1, a0 +1: + lw s0, 0(a1) // tmp0 = 1|0 + lw s1, 4(a1) // tmp1 = 3|2 + lw s2, 8(a1) // tmp2 = 5|4 + lw s3, 12(a1) // tmp3 = 7|6 + packrl.ph s1, s1, s1 // tmp1 = 2|3 + packrl.ph s3, s3, s3 // tmp3 = 6|7 + subq.ph s7, s1, s2 // tmp7 = 2-5|3-4 = t5|t4 + subq.ph s5, s0, s3 // tmp5 = 1-6|0-7 = t6|t7 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, s7, t0 // ac0 += t5* 6437 + t4* 2260 + dpa.w.ph $ac0, s5, t1 // ac0 += t6* 9633 + t7* 11363 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, s7, t2 // ac1 += t5*-11362 + t4* -6436 + dpa.w.ph $ac1, s5, t3 // ac1 += t6* -2259 + t7* 9633 + mult $ac2, $0, $0 // ac2 = 0 + dpa.w.ph $ac2, s7, t4 // ac2 += t5* 2261 + t4* 9633 + dpa.w.ph $ac2, s5, t5 // ac2 += t6*-11362 + t7* 6437 + mult $ac3, $0, $0 // ac3 = 0 + dpa.w.ph $ac3, s7, t6 // ac3 += t5* 9633 + t4*-11363 + dpa.w.ph $ac3, s5, t7 // ac3 += t6* -6436 + t7* 2260 + addq.ph s6, s1, s2 // tmp6 = 2+5|3+4 = t2|t3 + addq.ph s4, s0, s3 // tmp4 = 1+6|0+7 = t1|t0 + extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11 + extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11 + extr_r.w s2, $ac2, 11 // tmp2 = (ac2 + 1024) >> 11 + extr_r.w s3, $ac3, 11 // tmp3 = (ac3 + 1024) >> 11 + addq.ph s5, s4, s6 // tmp5 = t1+t2|t0+t3 = t11|t10 + subq.ph s7, s4, s6 // tmp7 = t1-t2|t0-t3 = t12|t13 + sh s0, 2(a1) + sh s1, 6(a1) + sh s2, 10(a1) + sh s3, 14(a1) + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, s7, t8 // ac0 += t12* 4433 + t13* 10703 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, s7, t9 // ac1 += t12*-10704 + t13* 4433 + sra s4, s5, 16 // tmp4 = t11 + addiu a1, a1, 16 + addiu s8, s8, -1 + extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11 + extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11 + addu s2, s5, s4 // tmp2 = t10 + t11 + subu s3, s5, s4 // tmp3 = t10 - t11 + sll s2, s2, 2 // tmp2 = (t10 + t11) << 2 + sll s3, s3, 2 // tmp3 = (t10 - t11) << 2 + sh s2, -16(a1) + sh s3, -8(a1) + sh s0, -12(a1) + bgtz s8, 1b + sh s1, -4(a1) + li t0, 2260 + li t1, 11363 + li t2, 9633 + li t3, 6436 + li t4, 6437 + li t5, 2261 + li t6, 11362 + li t7, 2259 + li t8, 4433 + li t9, 10703 + li a1, 10704 + li s8, 8 + +2: + lh a2, 0(a0) // 0 + lh a3, 16(a0) // 8 + lh v0, 32(a0) // 16 + lh v1, 48(a0) // 24 + lh s4, 64(a0) // 32 + lh s5, 80(a0) // 40 + lh s6, 96(a0) // 48 + lh s7, 112(a0) // 56 + addu s2, v0, s5 // tmp2 = 16 + 40 + subu s5, v0, s5 // tmp5 = 16 - 40 + addu s3, v1, s4 // tmp3 = 24 + 32 + subu s4, v1, s4 // tmp4 = 24 - 32 + addu s0, a2, s7 // tmp0 = 0 + 56 + subu s7, a2, s7 // tmp7 = 0 - 56 + addu s1, a3, s6 // tmp1 = 8 + 48 + subu s6, a3, s6 // tmp6 = 8 - 48 + addu a2, s0, s3 // tmp10 = tmp0 + tmp3 + subu v1, s0, s3 // tmp13 = tmp0 - tmp3 + addu a3, s1, s2 // tmp11 = tmp1 + tmp2 + subu v0, s1, s2 // tmp12 = tmp1 - tmp2 + mult s7, t1 // ac0 = tmp7 * c1 + madd s4, t0 // ac0 += tmp4 * c0 + madd s5, t4 // ac0 += tmp5 * c4 + madd s6, t2 // ac0 += tmp6 * c2 + mult $ac1, s7, t2 // ac1 = tmp7 * c2 + msub $ac1, s4, t3 // ac1 -= tmp4 * c3 + msub $ac1, s5, t6 // ac1 -= tmp5 * c6 + msub $ac1, s6, t7 // ac1 -= tmp6 * c7 + mult $ac2, s7, t4 // ac2 = tmp7 * c4 + madd $ac2, s4, t2 // ac2 += tmp4 * c2 + madd $ac2, s5, t5 // ac2 += tmp5 * c5 + msub $ac2, s6, t6 // ac2 -= tmp6 * c6 + mult $ac3, s7, t0 // ac3 = tmp7 * c0 + msub $ac3, s4, t1 // ac3 -= tmp4 * c1 + madd $ac3, s5, t2 // ac3 += tmp5 * c2 + msub $ac3, s6, t3 // ac3 -= tmp6 * c3 + extr_r.w s0, $ac0, 15 // tmp0 = (ac0 + 16384) >> 15 + extr_r.w s1, $ac1, 15 // tmp1 = (ac1 + 16384) >> 15 + extr_r.w s2, $ac2, 15 // tmp2 = (ac2 + 16384) >> 15 + extr_r.w s3, $ac3, 15 // tmp3 = (ac3 + 16384) >> 15 + addiu s8, s8, -1 + addu s4, a2, a3 // tmp4 = tmp10 + tmp11 + subu s5, a2, a3 // tmp5 = tmp10 - tmp11 + sh s0, 16(a0) + sh s1, 48(a0) + sh s2, 80(a0) + sh s3, 112(a0) + mult v0, t8 // ac0 = tmp12 * c8 + madd v1, t9 // ac0 += tmp13 * c9 + mult $ac1, v1, t8 // ac1 = tmp13 * c8 + msub $ac1, v0, a1 // ac1 -= tmp12 * c10 + addiu a0, a0, 2 + extr_r.w s6, $ac0, 15 // tmp6 = (ac0 + 16384) >> 15 + extr_r.w s7, $ac1, 15 // tmp7 = (ac1 + 16384) >> 15 + shra_r.w s4, s4, 2 // tmp4 = (tmp4 + 2) >> 2 + shra_r.w s5, s5, 2 // tmp5 = (tmp5 + 2) >> 2 + sh s4, -2(a0) + sh s5, 62(a0) + sh s6, 30(a0) + bgtz s8, 2b + sh s7, 94(a0) + + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + jr ra + nop + +END(jsimd_fdct_islow_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_fdct_ifast_mips_dspr2) +/* + * a0 - data + */ + .set at + SAVE_REGS_ON_STACK 8, s0, s1 + li a1, 0x014e014e // FIX_1_306562965 (334 << 16)|(334 & 0xffff) + li a2, 0x008b008b // FIX_0_541196100 (139 << 16)|(139 & 0xffff) + li a3, 0x00620062 // FIX_0_382683433 (98 << 16) |(98 & 0xffff) + li s1, 0x00b500b5 // FIX_0_707106781 (181 << 16)|(181 & 0xffff) + + move v0, a0 + addiu v1, v0, 128 // end address + +0: + lw t0, 0(v0) // tmp0 = 1|0 + lw t1, 4(v0) // tmp1 = 3|2 + lw t2, 8(v0) // tmp2 = 5|4 + lw t3, 12(v0) // tmp3 = 7|6 + packrl.ph t1, t1, t1 // tmp1 = 2|3 + packrl.ph t3, t3, t3 // tmp3 = 6|7 + subq.ph t7, t1, t2 // tmp7 = 2-5|3-4 = t5|t4 + subq.ph t5, t0, t3 // tmp5 = 1-6|0-7 = t6|t7 + addq.ph t6, t1, t2 // tmp6 = 2+5|3+4 = t2|t3 + addq.ph t4, t0, t3 // tmp4 = 1+6|0+7 = t1|t0 + addq.ph t8, t4, t6 // tmp5 = t1+t2|t0+t3 = t11|t10 + subq.ph t9, t4, t6 // tmp7 = t1-t2|t0-t3 = t12|t13 + sra t4, t8, 16 // tmp4 = t11 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t9, s1 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, t7, a3 // ac1 += t4*98 + t5*98 + dpsx.w.ph $ac1, t5, a3 // ac1 += t6*98 + t7*98 + mult $ac2, $0, $0 // ac2 = 0 + dpa.w.ph $ac2, t7, a2 // ac2 += t4*139 + t5*139 + mult $ac3, $0, $0 // ac3 = 0 + dpa.w.ph $ac3, t5, a1 // ac3 += t6*334 + t7*334 + precrq.ph.w t0, t5, t7 // t0 = t5|t6 + addq.ph t2, t8, t4 // tmp2 = t10 + t11 + subq.ph t3, t8, t4 // tmp3 = t10 - t11 + extr.w t4, $ac0, 8 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t0, s1 // ac0 += t5*181 + t6*181 + extr.w t0, $ac1, 8 // t0 = z5 + extr.w t1, $ac2, 8 // t1 = MULTIPLY(tmp10, 139) + extr.w t7, $ac3, 8 // t2 = MULTIPLY(tmp12, 334) + extr.w t8, $ac0, 8 // t8 = z3 = MULTIPLY(tmp11, 181) + add t6, t1, t0 // t6 = z2 + add t7, t7, t0 // t7 = z4 + subq.ph t0, t5, t8 // t0 = z13 = tmp7 - z3 + addq.ph t8, t5, t8 // t9 = z11 = tmp7 + z3 + addq.ph t1, t0, t6 // t1 = z13 + z2 + subq.ph t6, t0, t6 // t6 = z13 - z2 + addq.ph t0, t8, t7 // t0 = z11 + z4 + subq.ph t7, t8, t7 // t7 = z11 - z4 + addq.ph t5, t4, t9 + subq.ph t4, t9, t4 + sh t2, 0(v0) + sh t5, 4(v0) + sh t3, 8(v0) + sh t4, 12(v0) + sh t1, 10(v0) + sh t6, 6(v0) + sh t0, 2(v0) + sh t7, 14(v0) + addiu v0, 16 + bne v1, v0, 0b + nop + move v0, a0 + addiu v1, v0, 16 + +1: + lh t0, 0(v0) // 0 + lh t1, 16(v0) // 8 + lh t2, 32(v0) // 16 + lh t3, 48(v0) // 24 + lh t4, 64(v0) // 32 + lh t5, 80(v0) // 40 + lh t6, 96(v0) // 48 + lh t7, 112(v0) // 56 + add t8, t0, t7 // t8 = tmp0 + sub t7, t0, t7 // t7 = tmp7 + add t0, t1, t6 // t0 = tmp1 + sub t1, t1, t6 // t1 = tmp6 + add t6, t2, t5 // t6 = tmp2 + sub t5, t2, t5 // t5 = tmp5 + add t2, t3, t4 // t2 = tmp3 + sub t3, t3, t4 // t3 = tmp4 + add t4, t8, t2 // t4 = tmp10 = tmp0 + tmp3 + sub t8, t8, t2 // t8 = tmp13 = tmp0 - tmp3 + sub s0, t0, t6 // s0 = tmp12 = tmp1 - tmp2 + ins t8, s0, 16, 16 // t8 = tmp12|tmp13 + add t2, t0, t6 // t2 = tmp11 = tmp1 + tmp2 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t8, s1 // ac0 += t12*181 + t13*181 + add s0, t4, t2 // t8 = tmp10+tmp11 + sub t4, t4, t2 // t4 = tmp10-tmp11 + sh s0, 0(v0) + sh t4, 64(v0) + extr.w t2, $ac0, 8 // z1 = MULTIPLY(tmp12+tmp13,FIX_0_707106781) + addq.ph t4, t8, t2 // t9 = tmp13 + z1 + subq.ph t8, t8, t2 // t2 = tmp13 - z1 + sh t4, 32(v0) + sh t8, 96(v0) + add t3, t3, t5 // t3 = tmp10 = tmp4 + tmp5 + add t0, t5, t1 // t0 = tmp11 = tmp5 + tmp6 + add t1, t1, t7 // t1 = tmp12 = tmp6 + tmp7 + andi t4, a1, 0xffff + mul s0, t1, t4 + sra s0, s0, 8 // s0 = z4 = MULTIPLY(tmp12, FIX_1_306562965) + ins t1, t3, 16, 16 // t1 = tmp10|tmp12 + mult $0, $0 // ac0 = 0 + mulsa.w.ph $ac0, t1, a3 // ac0 += t10*98 - t12*98 + extr.w t8, $ac0, 8 // z5 = MULTIPLY(tmp10-tmp12,FIX_0_382683433) + add t2, t7, t8 // t2 = tmp7 + z5 + sub t7, t7, t8 // t7 = tmp7 - z5 + andi t4, a2, 0xffff + mul t8, t3, t4 + sra t8, t8, 8 // t8 = z2 = MULTIPLY(tmp10, FIX_0_541196100) + andi t4, s1, 0xffff + mul t6, t0, t4 + sra t6, t6, 8 // t6 = z3 = MULTIPLY(tmp11, FIX_0_707106781) + add t0, t6, t8 // t0 = z3 + z2 + sub t1, t6, t8 // t1 = z3 - z2 + add t3, t6, s0 // t3 = z3 + z4 + sub t4, t6, s0 // t4 = z3 - z4 + sub t5, t2, t1 // t5 = dataptr[5] + sub t6, t7, t0 // t6 = dataptr[3] + add t3, t2, t3 // t3 = dataptr[1] + add t4, t7, t4 // t4 = dataptr[7] + sh t5, 80(v0) + sh t6, 48(v0) + sh t3, 16(v0) + sh t4, 112(v0) + addiu v0, 2 + bne v0, v1, 1b + nop + + RESTORE_REGS_FROM_STACK 8, s0, s1 + + j ra + nop +END(jsimd_fdct_ifast_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_quantize_mips_dspr2) +/* + * a0 - coef_block + * a1 - divisors + * a2 - workspace + */ + + .set at + + SAVE_REGS_ON_STACK 16, s0, s1, s2 + + addiu v0, a2, 124 // v0 = workspace_end + lh t0, 0(a2) + lh t1, 0(a1) + lh t2, 128(a1) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0, t3 + lh t4, 384(a1) + lh t5, 130(a1) + lh t6, 2(a2) + lh t7, 2(a1) + lh t8, 386(a1) + +1: + andi t1, 0xffff + add t9, t0, t2 + andi t9, 0xffff + mul v1, t9, t1 + sra s0, t6, 15 + sll s0, s0, 1 + addiu s0, s0, 1 + addiu t9, t4, 16 + srav v1, v1, t9 + mul v1, v1, t3 + mul t6, t6, s0 + andi t7, 0xffff + addiu a2, a2, 4 + addiu a1, a1, 4 + add s1, t6, t5 + andi s1, 0xffff + sh v1, 0(a0) + + mul s2, s1, t7 + addiu s1, t8, 16 + srav s2, s2, s1 + mul s2,s2, s0 + lh t0, 0(a2) + lh t1, 0(a1) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0, t3 + lh t2, 128(a1) + lh t4, 384(a1) + lh t5, 130(a1) + lh t8, 386(a1) + lh t6, 2(a2) + lh t7, 2(a1) + sh s2, 2(a0) + lh t0, 0(a2) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0,t3 + bne a2, v0, 1b + addiu a0, a0, 4 + + andi t1, 0xffff + add t9, t0, t2 + andi t9, 0xffff + mul v1, t9, t1 + sra s0, t6, 15 + sll s0, s0, 1 + addiu s0, s0, 1 + addiu t9, t4, 16 + srav v1, v1, t9 + mul v1, v1, t3 + mul t6, t6, s0 + andi t7, 0xffff + sh v1, 0(a0) + add s1, t6, t5 + andi s1, 0xffff + mul s2, s1, t7 + addiu s1, t8, 16 + addiu a2, a2, 4 + addiu a1, a1, 4 + srav s2, s2, s1 + mul s2, s2, s0 + sh s2, 2(a0) + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2 + + j ra + nop + +END(jsimd_quantize_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_quantize_float_mips_dspr2) +/* + * a0 - coef_block + * a1 - divisors + * a2 - workspace + */ + + .set at + + li t1, 0x46800100 //integer representation 16384.5 + mtc1 t1, f0 + li t0, 63 +0: + lwc1 f2, 0(a2) + lwc1 f10, 0(a1) + lwc1 f4, 4(a2) + lwc1 f12, 4(a1) + lwc1 f6, 8(a2) + lwc1 f14, 8(a1) + lwc1 f8, 12(a2) + lwc1 f16, 12(a1) + madd.s f2, f0, f2, f10 + madd.s f4, f0, f4, f12 + madd.s f6, f0, f6, f14 + madd.s f8, f0, f8, f16 + lwc1 f10, 16(a1) + lwc1 f12, 20(a1) + trunc.w.s f2, f2 + trunc.w.s f4, f4 + trunc.w.s f6, f6 + trunc.w.s f8, f8 + lwc1 f14, 24(a1) + lwc1 f16, 28(a1) + mfc1 t1, f2 + mfc1 t2, f4 + mfc1 t3, f6 + mfc1 t4, f8 + lwc1 f2, 16(a2) + lwc1 f4, 20(a2) + lwc1 f6, 24(a2) + lwc1 f8, 28(a2) + madd.s f2, f0, f2, f10 + madd.s f4, f0, f4, f12 + madd.s f6, f0, f6, f14 + madd.s f8, f0, f8, f16 + addiu t1, t1, -16384 + addiu t2, t2, -16384 + addiu t3, t3, -16384 + addiu t4, t4, -16384 + trunc.w.s f2, f2 + trunc.w.s f4, f4 + trunc.w.s f6, f6 + trunc.w.s f8, f8 + sh t1, 0(a0) + sh t2, 2(a0) + sh t3, 4(a0) + sh t4, 6(a0) + mfc1 t1, f2 + mfc1 t2, f4 + mfc1 t3, f6 + mfc1 t4, f8 + addiu t0, t0, -8 + addiu a2, a2, 32 + addiu a1, a1, 32 + addiu t1, t1, -16384 + addiu t2, t2, -16384 + addiu t3, t3, -16384 + addiu t4, t4, -16384 + sh t1, 8(a0) + sh t2, 10(a0) + sh t3, 12(a0) + sh t4, 14(a0) + bgez t0, 0b + addiu a0, a0, 16 + + j ra + nop + +END(jsimd_quantize_float_mips_dspr2) +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_2x2_mips_dspr2) +/* + * a0 - compptr->dct_table + * a1 - coef_block + * a2 - output_buf + * a3 - output_col + */ + .set at + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5 + + addiu sp, sp, -40 + move v0, sp + addiu s2, zero, 29692 + addiu s3, zero, -10426 + addiu s4, zero, 6967 + addiu s5, zero, -5906 + lh t0, 0(a1) // t0 = inptr[DCTSIZE*0] + lh t5, 0(a0) // t5 = quantptr[DCTSIZE*0] + lh t1, 48(a1) // t1 = inptr[DCTSIZE*3] + lh t6, 48(a0) // t6 = quantptr[DCTSIZE*3] + mul t4, t5, t0 + lh t0, 16(a1) // t0 = inptr[DCTSIZE*1] + lh t5, 16(a0) // t5 = quantptr[DCTSIZE*1] + mul t6, t6, t1 + mul t5, t5, t0 + lh t2, 80(a1) // t2 = inptr[DCTSIZE*5] + lh t7, 80(a0) // t7 = quantptr[DCTSIZE*5] + lh t3, 112(a1) // t3 = inptr[DCTSIZE*7] + lh t8, 112(a0) // t8 = quantptr[DCTSIZE*7] + mul t7, t7, t2 + mult zero, zero + mul t8, t8, t3 + li s0, 0x73FCD746 // s0 = (29692 << 16) | (-10426 & 0xffff) + li s1, 0x1B37E8EE // s1 = (6967 << 16) | (-5906 & 0xffff) + ins t6, t5, 16, 16 // t6 = t5|t6 + sll t4, t4, 15 + dpa.w.ph $ac0, t6, s0 + lh t1, 2(a1) + lh t6, 2(a0) + ins t8, t7, 16, 16 // t8 = t7|t8 + dpa.w.ph $ac0, t8, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 18(a1) + lh t6, 18(a0) + lh t2, 50(a1) + lh t7, 50(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 82(a1) + lh t2, 82(a0) + lh t3, 114(a1) + lh t4, 114(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 0(v0) + sw t8, 20(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 6(a1) + lh t6, 6(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 22(a1) + lh t6, 22(a0) + lh t2, 54(a1) + lh t7, 54(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 86(a1) + lh t2, 86(a0) + lh t3, 118(a1) + lh t4, 118(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 4(v0) + sw t8, 24(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 10(a1) + lh t6, 10(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 26(a1) + lh t6, 26(a0) + lh t2, 58(a1) + lh t7, 58(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 90(a1) + lh t2, 90(a0) + lh t3, 122(a1) + lh t4, 122(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 8(v0) + sw t8, 28(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 14(a1) + lh t6, 14(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 30(a1) + lh t6, 30(a0) + lh t2, 62(a1) + lh t7, 62(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 94(a1) + lh t2, 94(a0) + lh t3, 126(a1) + lh t4, 126(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 12(v0) + sw t8, 32(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + lw t9, 0(a2) + lw t3, 0(v0) + lw t7, 4(v0) + lw t1, 8(v0) + addu t9, t9, a3 + sll t3, t3, 15 + subu t8, t4, t0 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + shra_r.w t8, t8, 13 + sw t0, 16(v0) + sw t8, 36(v0) + lw t5, 12(v0) + lw t6, 16(v0) + mult t7, s2 + madd t1, s3 + madd t5, s4 + madd t6, s5 + lw t5, 24(v0) + lw t7, 28(v0) + mflo t0, $ac0 + lw t8, 32(v0) + lw t2, 36(v0) + mult $ac1, t5, s2 + madd $ac1, t7, s3 + madd $ac1, t8, s4 + madd $ac1, t2, s5 + addu t1, t3, t0 + subu t6, t3, t0 + shra_r.w t1, t1, 20 + shra_r.w t6, t6, 20 + mflo t4, $ac1 + shll_s.w t1, t1, 24 + shll_s.w t6, t6, 24 + sra t1, t1, 24 + sra t6, t6, 24 + addiu t1, t1, 128 + addiu t6, t6, 128 + lw t0, 20(v0) + sb t1, 0(t9) + sb t6, 1(t9) + sll t0, t0, 15 + lw t9, 4(a2) + addu t1, t0, t4 + subu t6, t0, t4 + addu t9, t9, a3 + shra_r.w t1, t1, 20 + shra_r.w t6, t6, 20 + shll_s.w t1, t1, 24 + shll_s.w t6, t6, 24 + sra t1, t1, 24 + sra t6, t6, 24 + addiu t1, t1, 128 + addiu t6, t6, 128 + sb t1, 0(t9) + sb t6, 1(t9) + addiu sp, sp, 40 + + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5 + + j ra + nop + +END(jsimd_idct_2x2_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_4x4_mips_dspr2) +/* + * a0 - compptr->dct_table + * a1 - coef_block + * a2 - output_buf + * a3 - output_col + * 16(sp) - workspace[DCTSIZE*4]; // buffers data between passes + */ + + .set at + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw v1, 48(sp) + move t0, a1 + move t1, v1 + li t9, 4 + li s0, 0x2e75f93e + li s1, 0x21f9ba79 + li s2, 0xecc2efb0 + li s3, 0x52031ccd + +0: + lh s6, 32(t0) // inptr[DCTSIZE*2] + lh t6, 32(a0) // quantptr[DCTSIZE*2] + lh s7, 96(t0) // inptr[DCTSIZE*6] + lh t7, 96(a0) // quantptr[DCTSIZE*6] + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh s4, 0(t0) // inptr[DCTSIZE*0] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s5, 0(a0) // quantptr[0] + li s6, 15137 + li s7, 6270 + mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0]) + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh t5, 112(t0) // inptr[DCTSIZE*7] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s4, 112(a0) // quantptr[DCTSIZE*7] + lh v0, 80(t0) // inptr[DCTSIZE*5] + lh s5, 80(a0) // quantptr[DCTSIZE*5] + lh s6, 48(a0) // quantptr[DCTSIZE*3] + sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1) + lh s7, 16(a0) // quantptr[DCTSIZE*1] + lh t8, 16(t0) // inptr[DCTSIZE*1] + subu t6, t6, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6) + lh t7, 48(t0) // inptr[DCTSIZE*3] + mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7]) + mul v0, s5, v0 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5]) + mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3]) + mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1]) + addu t3, t2, t6 // tmp10 = tmp0 + z2 + subu t4, t2, t6 // tmp10 = tmp0 - z2 + mult $ac0, zero, zero + mult $ac1, zero, zero + ins t5, v0, 16, 16 + ins t7, t8, 16, 16 + addiu t9, t9, -1 + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + mflo s4, $ac0 + mflo s5, $ac1 + addiu a0, a0, 2 + addiu t1, t1, 4 + addiu t0, t0, 2 + addu t6, t4, s4 + subu t5, t4, s4 + addu s6, t3, s5 + subu s7, t3, s5 + shra_r.w t6, t6, 12 // DESCALE(tmp12 + temp1, 12) + shra_r.w t5, t5, 12 // DESCALE(tmp12 - temp1, 12) + shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12) + shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12) + sw t6, 28(t1) + sw t5, 60(t1) + sw s6, -4(t1) + bgtz t9, 0b + sw s7, 92(t1) + // second loop three pass + li t9, 3 +1: + lh s6, 34(t0) // inptr[DCTSIZE*2] + lh t6, 34(a0) // quantptr[DCTSIZE*2] + lh s7, 98(t0) // inptr[DCTSIZE*6] + lh t7, 98(a0) // quantptr[DCTSIZE*6] + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh s4, 2(t0) // inptr[DCTSIZE*0] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s5, 2(a0) // quantptr[DCTSIZE*0] + li s6, 15137 + li s7, 6270 + mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0]) + mul v0, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh t5, 114(t0) // inptr[DCTSIZE*7] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s4, 114(a0) // quantptr[DCTSIZE*7] + lh s5, 82(a0) // quantptr[DCTSIZE*5] + lh t6, 82(t0) // inptr[DCTSIZE*5] + sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1) + lh s6, 50(a0) // quantptr[DCTSIZE*3] + lh t8, 18(t0) // inptr[DCTSIZE*1] + subu v0, v0, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6) + lh t7, 50(t0) // inptr[DCTSIZE*3] + lh s7, 18(a0) // quantptr[DCTSIZE*1] + mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7]) + mul t6, s5, t6 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5]) + mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3]) + mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1]) + addu t3, t2, v0 // tmp10 = tmp0 + z2 + subu t4, t2, v0 // tmp10 = tmp0 - z2 + mult $ac0, zero, zero + mult $ac1, zero, zero + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + mflo t5, $ac0 + mflo t6, $ac1 + addiu t9, t9, -1 + addiu t0, t0, 2 + addiu a0, a0, 2 + addiu t1, t1, 4 + addu s5, t4, t5 + subu s4, t4, t5 + addu s6, t3, t6 + subu s7, t3, t6 + shra_r.w s5, s5, 12 // DESCALE(tmp12 + temp1, 12) + shra_r.w s4, s4, 12 // DESCALE(tmp12 - temp1, 12) + shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12) + shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12) + sw s5, 32(t1) + sw s4, 64(t1) + sw s6, 0(t1) + bgtz t9, 1b + sw s7, 96(t1) + move t1, v1 + li s4, 15137 + lw s6, 8(t1) // wsptr[2] + li s5, 6270 + lw s7, 24(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + lw t2, 0(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) + lh t5, 28(t1) // wsptr[7] + lh t6, 20(t1) // wsptr[5] + lh t7, 12(t1) // wsptr[3] + lh t8, 4(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 0(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + // 2 + li s4, 15137 + lw s6, 40(t1) // wsptr[2] + li s5, 6270 + lw s7, 56(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + lw t2, 32(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) + lh t5, 60(t1) // wsptr[7] + lh t6, 52(t1) // wsptr[5] + lh t7, 44(t1) // wsptr[3] + lh t8, 36(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, CONST_BITS-PASS1_BITS+1) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, CONST_BITS-PASS1_BITS+1) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, CONST_BITS-PASS1_BITS+1) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, CONST_BITS-PASS1_BITS+1) + sll s4, t9, 2 + lw v0, 4(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + // 3 + li s4, 15137 + lw s6, 72(t1) // wsptr[2] + li s5, 6270 + lw s7, 88(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + lw t2, 64(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) + lh t5, 92(t1) // wsptr[7] + lh t6, 84(t1) // wsptr[5] + lh t7, 76(t1) // wsptr[3] + lh t8, 68(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 8(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + li s4, 15137 + lw s6, 104(t1) // wsptr[2] + li s5, 6270 + lw s7, 120(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) + lw t2, 96(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], -FIX_0_765366865) + lh t5, 124(t1) // wsptr[7] + lh t6, 116(t1) // wsptr[5] + lh t7, 108(t1) // wsptr[3] + lh t8, 100(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2; + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2; + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 12(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_idct_4x4_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_6x6_mips_dspr2) +/* + * a0 - compptr->dct_table + * a1 - coef_block + * a2 - output_buf + * a3 - output_col + */ + .set at + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu sp, sp, -144 + move v0, sp + addiu v1, v0, 24 + addiu t9, zero, 5793 + addiu s0, zero, 10033 + addiu s1, zero, 2998 + +1: + lh s2, 0(a0) // q0 = quantptr[ 0] + lh s3, 32(a0) // q1 = quantptr[16] + lh s4, 64(a0) // q2 = quantptr[32] + lh t2, 64(a1) // tmp2 = inptr[32] + lh t1, 32(a1) // tmp1 = inptr[16] + lh t0, 0(a1) // tmp0 = inptr[ 0] + mul t2, t2, s4 // tmp2 = tmp2 * q2 + mul t1, t1, s3 // tmp1 = tmp1 * q1 + mul t0, t0, s2 // tmp0 = tmp0 * q0 + lh t6, 16(a1) // z1 = inptr[ 8] + lh t8, 80(a1) // z3 = inptr[40] + lh t7, 48(a1) // z2 = inptr[24] + lh s2, 16(a0) // q0 = quantptr[ 8] + lh s4, 80(a0) // q2 = quantptr[40] + lh s3, 48(a0) // q1 = quantptr[24] + mul t2, t2, t9 // tmp2 = tmp2 * 5793 + mul t1, t1, s0 // tmp1 = tmp1 * 10033 + sll t0, t0, 13 // tmp0 = tmp0 << 13 + mul t6, t6, s2 // z1 = z1 * q0 + mul t8, t8, s4 // z3 = z3 * q2 + mul t7, t7, s3 // z2 = z2 * q1 + addu t3, t0, t2 // tmp10 = tmp0 + tmp2 + sll t2, t2, 1 // tmp2 = tmp2 << 2 + subu t4, t0, t2 // tmp11 = tmp0 - tmp2; + subu t5, t3, t1 // tmp12 = tmp10 - tmp1 + addu t3, t3, t1 // tmp10 = tmp10 + tmp1 + addu t1, t6, t8 // tmp1 = z1 + z3 + mul t1, t1, s1 // tmp1 = tmp1 * 2998 + shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11 + subu t2, t6, t8 // tmp2 = z1 - z3 + subu t2, t2, t7 // tmp2 = tmp2 - z2 + sll t2, t2, 2 // tmp2 = tmp2 << 2 + addu t0, t6, t7 // tmp0 = z1 + z2 + sll t0, t0, 13 // tmp0 = tmp0 << 13 + subu s2, t8, t7 // q0 = z3 - z2 + sll s2, s2, 13 // q0 = q0 << 13 + addu t0, t0, t1 // tmp0 = tmp0 + tmp1 + addu t1, s2, t1 // tmp1 = q0 + tmp1 + addu s2, t4, t2 // q0 = tmp11 + tmp2 + subu s3, t4, t2 // q1 = tmp11 - tmp2 + addu t6, t3, t0 // z1 = tmp10 + tmp0 + subu t7, t3, t0 // z2 = tmp10 - tmp0 + addu t4, t5, t1 // tmp11 = tmp12 + tmp1 + subu t5, t5, t1 // tmp12 = tmp12 - tmp1 + shra_r.w t6, t6, 11 // z1 = (z1 + 1024) >> 11 + shra_r.w t7, t7, 11 // z2 = (z2 + 1024) >> 11 + shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11 + shra_r.w t5, t5, 11 // tmp12 = (tmp12 + 1024) >> 11 + sw s2, 24(v0) + sw s3, 96(v0) + sw t6, 0(v0) + sw t7, 120(v0) + sw t4, 48(v0) + sw t5, 72(v0) + addiu v0, v0, 4 + addiu a1, a1, 2 + bne v0, v1, 1b + addiu a0, a0, 2 + + /* Pass 2: process 6 rows from work array, store into output array. */ + move v0, sp + addiu v1, v0, 144 + +2: + lw t0, 0(v0) + lw t2, 16(v0) + lw s5, 0(a2) + addiu t0, t0, 16 + sll t0, t0, 13 + mul t3, t2, t9 + lw t6, 4(v0) + lw t8, 20(v0) + lw t7, 12(v0) + addu s5, s5, a3 + addu s6, t6, t8 + mul s6, s6, s1 + addu t1, t0, t3 + subu t4, t0, t3 + subu t4, t4, t3 + lw t3, 8(v0) + mul t0, t3, s0 + addu s7, t6, t7 + sll s7, s7, 13 + addu s7, s6, s7 + subu t2, t8, t7 + sll t2, t2, 13 + addu t2, s6, t2 + subu s6, t6, t7 + subu s6, s6, t8 + sll s6, s6, 13 + addu t3, t1, t0 + subu t5, t1, t0 + addu t6, t3, s7 + subu t3, t3, s7 + addu t7, t4, s6 + subu t4, t4, s6 + addu t8, t5, t2 + subu t5, t5, t2 + shll_s.w t6, t6, 6 + shll_s.w t3, t3, 6 + shll_s.w t7, t7, 6 + shll_s.w t4, t4, 6 + shll_s.w t8, t8, 6 + shll_s.w t5, t5, 6 + sra t6, t6, 24 + addiu t6, t6, 128 + sra t3, t3, 24 + addiu t3, t3, 128 + sb t6, 0(s5) + sra t7, t7, 24 + addiu t7, t7, 128 + sb t3, 5(s5) + sra t4, t4, 24 + addiu t4, t4, 128 + sb t7, 1(s5) + sra t8, t8, 24 + addiu t8, t8, 128 + sb t4, 4(s5) + addiu v0, v0, 24 + sra t5, t5, 24 + addiu t5, t5, 128 + sb t8, 2(s5) + addiu a2, a2, 4 + bne v0, v1, 2b + sb t5, 3(s5) + + addiu sp, sp, 144 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_6x6_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass1_mips_dspr2) +/* + * a0 - compptr->dct_table + * a1 - coef_block + * a2 - workspace + */ + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + li a3, 8 + +1: + // odd part + lh t0, 48(a1) + lh t1, 48(a0) + lh t2, 16(a1) + lh t3, 16(a0) + lh t4, 80(a1) + lh t5, 80(a0) + lh t6, 112(a1) + lh t7, 112(a0) + mul t0, t0, t1 // z2 + mul t1, t2, t3 // z1 + mul t2, t4, t5 // z3 + mul t3, t6, t7 // z4 + li t4, 10703 // FIX(1.306562965) + li t5, 4433 // FIX_0_541196100 + li t6, 7053 // FIX(0.860918669) + mul t4, t0,t4 // tmp11 + mul t5, t0,t5 // -tmp14 + addu t7, t1,t2 // tmp10 + addu t8, t7,t3 // tmp10 + z4 + mul t6, t6, t8 // tmp15 + li t8, 2139 // FIX(0.261052384) + mul t8, t7, t8 // MULTIPLY(tmp10, FIX(0.261052384)) + li t7, 2295 // FIX(0.280143716) + mul t7, t1, t7 // MULTIPLY(z1, FIX(0.280143716)) + addu t9, t2, t3 // z3 + z4 + li s0, 8565 // FIX(1.045510580) + mul t9, t9, s0 // -tmp13 + li s0, 12112 // FIX(1.478575242) + mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242) + li s1, 12998 // FIX(1.586706681) + mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681)) + li s2, 5540 // FIX(0.676326758) + mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758)) + li s3, 16244 // FIX(1.982889723) + mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723)) + subu t1, t1, t3 // z1-=z4 + subu t0, t0, t2 // z2-=z3 + addu t2, t0, t1 // z1+z2 + li t3, 4433 // FIX_0_541196100 + mul t2, t2, t3 // z3 + li t3, 6270 // FIX_0_765366865 + mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865) + li t3, 15137 // FIX_0_765366865 + mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065) + addu t8, t6, t8 // tmp12 + addu t3, t8, t4 // tmp12 + tmp11 + addu t3, t3, t7 // tmp10 + subu t8, t8, t9 // tmp12 + tmp13 + addu s0, t5, s0 + subu t8, t8, s0 // tmp12 + subu t9, t6, t9 + subu s1, s1, t4 + addu t9, t9, s1 // tmp13 + subu t6, t6, t5 + subu t6, t6, s2 + subu t6, t6, s3 // tmp15 + // even part start + lh t4, 64(a1) + lh t5, 64(a0) + lh t7, 32(a1) + lh s0, 32(a0) + lh s1, 0(a1) + lh s2, 0(a0) + lh s3, 96(a1) + lh v0, 96(a0) + mul t4, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*4],quantptr[DCTSIZE*4]) + mul t5, t7, s0 // DEQUANTIZE(inptr[DCTSIZE*2],quantptr[DCTSIZE*2]) + mul t7, s1, s2 // DEQUANTIZE(inptr[DCTSIZE*0],quantptr[DCTSIZE*0]) + mul s0, s3, v0 // DEQUANTIZE(inptr[DCTSIZE*6],quantptr[DCTSIZE*6]) + // odd part end + addu t1, t2, t1 // tmp11 + subu t0, t2, t0 // tmp14 + // update counter and pointers + addiu a3, a3, -1 + addiu a0, a0, 2 + addiu a1, a1, 2 + // even part rest + li s1, 10033 + li s2, 11190 + mul t4, t4, s1 // z4 + mul s1, t5, s2 // z4 + sll t5, t5, 13 // z1 + sll t7, t7, 13 + addiu t7, t7, 1024 // z3 + sll s0, s0, 13 // z2 + addu s2, t7, t4 // tmp10 + subu t4, t7, t4 // tmp11 + subu s3, t5, s0 // tmp12 + addu t2, t7, s3 // tmp21 + subu s3, t7, s3 // tmp24 + addu t7, s1, s0 // tmp12 + addu v0, s2, t7 // tmp20 + subu s2, s2, t7 // tmp25 + subu s1, s1, t5 // z4 - z1 + subu s1, s1, s0 // tmp12 + addu s0, t4, s1 // tmp22 + subu t4, t4, s1 // tmp23 + // final output stage + addu t5, v0, t3 + subu v0, v0, t3 + addu t3, t2, t1 + subu t2, t2, t1 + addu t1, s0, t8 + subu s0, s0, t8 + addu t8, t4, t9 + subu t4, t4, t9 + addu t9, s3, t0 + subu s3, s3, t0 + addu t0, s2, t6 + subu s2, s2, t6 + sra t5, t5, 11 + sra t3, t3, 11 + sra t1, t1, 11 + sra t8, t8, 11 + sra t9, t9, 11 + sra t0, t0, 11 + sra s2, s2, 11 + sra s3, s3, 11 + sra t4, t4, 11 + sra s0, s0, 11 + sra t2, t2, 11 + sra v0, v0, 11 + sw t5, 0(a2) + sw t3, 32(a2) + sw t1, 64(a2) + sw t8, 96(a2) + sw t9, 128(a2) + sw t0, 160(a2) + sw s2, 192(a2) + sw s3, 224(a2) + sw t4, 256(a2) + sw s0, 288(a2) + sw t2, 320(a2) + sw v0, 352(a2) + bgtz a3, 1b + addiu a2, a2, 4 + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop + +END(jsimd_idct_12x12_pass1_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass2_mips_dspr2) +/* + * a0 - workspace + * a1 - output + */ + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + li a3, 12 + +1: + // Odd part + lw t0, 12(a0) + lw t1, 4(a0) + lw t2, 20(a0) + lw t3, 28(a0) + li t4, 10703 // FIX(1.306562965) + li t5, 4433 // FIX_0_541196100 + mul t4, t0, t4 // tmp11 + mul t5, t0, t5 // -tmp14 + addu t6, t1, t2 // tmp10 + li t7, 2139 // FIX(0.261052384) + mul t7, t6, t7 // MULTIPLY(tmp10, FIX(0.261052384)) + addu t6, t6, t3 // tmp10 + z4 + li t8, 7053 // FIX(0.860918669) + mul t6, t6, t8 // tmp15 + li t8, 2295 // FIX(0.280143716) + mul t8, t1, t8 // MULTIPLY(z1, FIX(0.280143716)) + addu t9, t2, t3 // z3 + z4 + li s0, 8565 // FIX(1.045510580) + mul t9, t9, s0 // -tmp13 + li s0, 12112 // FIX(1.478575242) + mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242)) + li s1, 12998 // FIX(1.586706681) + mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681)) + li s2, 5540 // FIX(0.676326758) + mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758)) + li s3, 16244 // FIX(1.982889723) + mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723)) + subu t1, t1, t3 // z1 -= z4 + subu t0, t0, t2 // z2 -= z3 + addu t2, t1, t0 // z1 + z2 + li t3, 4433 // FIX_0_541196100 + mul t2, t2, t3 // z3 + li t3, 6270 // FIX_0_765366865 + mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865) + li t3, 15137 // FIX_1_847759065 + mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065) + addu t3, t6, t7 // tmp12 + addu t7, t3, t4 + addu t7, t7, t8 // tmp10 + subu t3, t3, t9 + subu t3, t3, t5 + subu t3, t3, s0 // tmp12 + subu t9, t6, t9 + subu t9, t9, t4 + addu t9, t9, s1 // tmp13 + subu t6, t6, t5 + subu t6, t6, s2 + subu t6, t6, s3 // tmp15 + addu t1, t2, t1 // tmp11 + subu t0, t2, t0 // tmp14 + // even part + lw t2, 16(a0) // z4 + lw t4, 8(a0) // z1 + lw t5, 0(a0) // z3 + lw t8, 24(a0) // z2 + li s0, 10033 // FIX(1.224744871) + li s1, 11190 // FIX(1.366025404) + mul t2, t2, s0 // z4 + mul s0, t4, s1 // z4 + addiu t5, t5, 0x10 + sll t5, t5, 13 // z3 + sll t4, t4, 13 // z1 + sll t8, t8, 13 // z2 + subu s1, t4, t8 // tmp12 + addu s2, t5, t2 // tmp10 + subu t2, t5, t2 // tmp11 + addu s3, t5, s1 // tmp21 + subu s1, t5, s1 // tmp24 + addu t5, s0, t8 // tmp12 + addu v0, s2, t5 // tmp20 + subu t5, s2, t5 // tmp25 + subu t4, s0, t4 + subu t4, t4, t8 // tmp12 + addu t8, t2, t4 // tmp22 + subu t2, t2, t4 // tmp23 + // increment counter and pointers + addiu a3, a3, -1 + addiu a0, a0, 32 + // Final stage + addu t4, v0, t7 + subu v0, v0, t7 + addu t7, s3, t1 + subu s3, s3, t1 + addu t1, t8, t3 + subu t8, t8, t3 + addu t3, t2, t9 + subu t2, t2, t9 + addu t9, s1, t0 + subu s1, s1, t0 + addu t0, t5, t6 + subu t5, t5, t6 + sll t4, t4, 4 + sll t7, t7, 4 + sll t1, t1, 4 + sll t3, t3, 4 + sll t9, t9, 4 + sll t0, t0, 4 + sll t5, t5, 4 + sll s1, s1, 4 + sll t2, t2, 4 + sll t8, t8, 4 + sll s3, s3, 4 + sll v0, v0, 4 + shll_s.w t4, t4, 2 + shll_s.w t7, t7, 2 + shll_s.w t1, t1, 2 + shll_s.w t3, t3, 2 + shll_s.w t9, t9, 2 + shll_s.w t0, t0, 2 + shll_s.w t5, t5, 2 + shll_s.w s1, s1, 2 + shll_s.w t2, t2, 2 + shll_s.w t8, t8, 2 + shll_s.w s3, s3, 2 + shll_s.w v0, v0, 2 + srl t4, t4, 24 + srl t7, t7, 24 + srl t1, t1, 24 + srl t3, t3, 24 + srl t9, t9, 24 + srl t0, t0, 24 + srl t5, t5, 24 + srl s1, s1, 24 + srl t2, t2, 24 + srl t8, t8, 24 + srl s3, s3, 24 + srl v0, v0, 24 + lw t6, 0(a1) + addiu t4, t4, 0x80 + addiu t7, t7, 0x80 + addiu t1, t1, 0x80 + addiu t3, t3, 0x80 + addiu t9, t9, 0x80 + addiu t0, t0, 0x80 + addiu t5, t5, 0x80 + addiu s1, s1, 0x80 + addiu t2, t2, 0x80 + addiu t8, t8, 0x80 + addiu s3, s3, 0x80 + addiu v0, v0, 0x80 + sb t4, 0(t6) + sb t7, 1(t6) + sb t1, 2(t6) + sb t3, 3(t6) + sb t9, 4(t6) + sb t0, 5(t6) + sb t5, 6(t6) + sb s1, 7(t6) + sb t2, 8(t6) + sb t8, 9(t6) + sb s3, 10(t6) + sb v0, 11(t6) + bgtz a3, 1b + addiu a1, a1, 4 + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + jr ra + nop + +END(jsimd_idct_12x12_pass2_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_convsamp_mips_dspr2) +/* + * a0 - sample_data + * a1 - start_col + * a2 - workspace + */ + + lw t0, 0(a0) + li t7, 0xff80ff80 + addu t0, t0, a1 + ulw t1, 0(t0) + ulw t2, 4(t0) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + lw t0, 4(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 0(a2) + usw t4, 4(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 8(a2) + usw t6, 12(a2) + + lw t0, 8(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 16(a2) + usw t4, 20(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 24(a2) + usw t6, 28(a2) + + lw t0, 12(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 32(a2) + usw t4, 36(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 40(a2) + usw t6, 44(a2) + + lw t0, 16(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 48(a2) + usw t4, 52(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 56(a2) + usw t6, 60(a2) + + lw t0, 20(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 64(a2) + usw t4, 68(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 72(a2) + usw t6, 76(a2) + + lw t0, 24(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 80(a2) + usw t4, 84(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 88(a2) + usw t6, 92(a2) + + lw t0, 28(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 96(a2) + usw t4, 100(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 104(a2) + usw t6, 108(a2) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 112(a2) + usw t4, 116(a2) + usw t5, 120(a2) + usw t6, 124(a2) + + j ra + nop + +END(jsimd_convsamp_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_convsamp_float_mips_dspr2) +/* + * a0 - sample_data + * a1 - start_col + * a2 - workspace + */ + + .set at + + lw t0, 0(a0) + addu t0, t0, a1 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 4(a0) + swc1 f2, 0(a2) + swc1 f4, 4(a2) + swc1 f6, 8(a2) + addu t0, t0, a1 + swc1 f8, 12(a2) + swc1 f10, 16(a2) + swc1 f12, 20(a2) + swc1 f14, 24(a2) + swc1 f16, 28(a2) + //elemr 1 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 8(a0) + swc1 f2, 32(a2) + swc1 f4, 36(a2) + swc1 f6, 40(a2) + addu t0, t0, a1 + swc1 f8, 44(a2) + swc1 f10, 48(a2) + swc1 f12, 52(a2) + swc1 f14, 56(a2) + swc1 f16, 60(a2) + //elemr 2 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 12(a0) + swc1 f2, 64(a2) + swc1 f4, 68(a2) + swc1 f6, 72(a2) + addu t0, t0, a1 + swc1 f8, 76(a2) + swc1 f10, 80(a2) + swc1 f12, 84(a2) + swc1 f14, 88(a2) + swc1 f16, 92(a2) + //elemr 3 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 16(a0) + swc1 f2, 96(a2) + swc1 f4, 100(a2) + swc1 f6, 104(a2) + addu t0, t0, a1 + swc1 f8, 108(a2) + swc1 f10, 112(a2) + swc1 f12, 116(a2) + swc1 f14, 120(a2) + swc1 f16, 124(a2) + //elemr 4 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 20(a0) + swc1 f2, 128(a2) + swc1 f4, 132(a2) + swc1 f6, 136(a2) + addu t0, t0, a1 + swc1 f8, 140(a2) + swc1 f10, 144(a2) + swc1 f12, 148(a2) + swc1 f14, 152(a2) + swc1 f16, 156(a2) + //elemr 5 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 24(a0) + swc1 f2, 160(a2) + swc1 f4, 164(a2) + swc1 f6, 168(a2) + addu t0, t0, a1 + swc1 f8, 172(a2) + swc1 f10, 176(a2) + swc1 f12, 180(a2) + swc1 f14, 184(a2) + swc1 f16, 188(a2) + //elemr 6 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 28(a0) + swc1 f2, 192(a2) + swc1 f4, 196(a2) + swc1 f6, 200(a2) + addu t0, t0, a1 + swc1 f8, 204(a2) + swc1 f10, 208(a2) + swc1 f12, 212(a2) + swc1 f14, 216(a2) + swc1 f16, 220(a2) + //elemr 7 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + swc1 f2, 224(a2) + swc1 f4, 228(a2) + swc1 f6, 232(a2) + swc1 f8, 236(a2) + swc1 f10, 240(a2) + swc1 f12, 244(a2) + swc1 f14, 248(a2) + swc1 f16, 252(a2) + + j ra + nop + +END(jsimd_convsamp_float_mips_dspr2) + +/*****************************************************************************/ + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_powerpc.c glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_powerpc.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_powerpc.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_powerpc.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,741 @@ +/* + * jsimd_powerpc.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014-2015 D. R. Commander + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * PowerPC architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +static unsigned int simd_support = ~0; + +LOCAL(void) +init_simd (void) +{ + char *env = NULL; + + if (simd_support != ~0U) + return; + + simd_support = JSIMD_ALTIVEC; + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*altivecfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_extrgb_ycc_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_extrgbx_ycc_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_extbgr_ycc_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_extbgrx_ycc_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_extxbgr_ycc_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_extxrgb_ycc_convert_altivec; + break; + default: + altivecfct=jsimd_rgb_ycc_convert_altivec; + break; + } + + altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*altivecfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_extrgb_gray_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_extrgbx_gray_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_extbgr_gray_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_extbgrx_gray_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_extxbgr_gray_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_extxrgb_gray_convert_altivec; + break; + default: + altivecfct=jsimd_rgb_gray_convert_altivec; + break; + } + + altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_ycc_extrgb_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_ycc_extrgbx_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_ycc_extbgr_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_ycc_extbgrx_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_ycc_extxbgr_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_ycc_extxrgb_convert_altivec; + break; + default: + altivecfct=jsimd_ycc_rgb_convert_altivec; + break; + } + + altivecfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_downsample_altivec(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v1_downsample_altivec(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_upsample_altivec(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_upsample_altivec(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_fancy_upsample_altivec(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_fancy_upsample_altivec(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_h2v2_extrgb_merged_upsample_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_h2v2_extrgbx_merged_upsample_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_h2v2_extbgr_merged_upsample_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_h2v2_extbgrx_merged_upsample_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_h2v2_extxbgr_merged_upsample_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_h2v2_extxrgb_merged_upsample_altivec; + break; + default: + altivecfct=jsimd_h2v2_merged_upsample_altivec; + break; + } + + altivecfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct=jsimd_h2v1_extrgb_merged_upsample_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct=jsimd_h2v1_extrgbx_merged_upsample_altivec; + break; + case JCS_EXT_BGR: + altivecfct=jsimd_h2v1_extbgr_merged_upsample_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct=jsimd_h2v1_extbgrx_merged_upsample_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct=jsimd_h2v1_extxbgr_merged_upsample_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct=jsimd_h2v1_extxrgb_merged_upsample_altivec; + break; + default: + altivecfct=jsimd_h2v1_merged_upsample_altivec; + break; + } + + altivecfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + jsimd_convsamp_altivec(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + jsimd_fdct_islow_altivec(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + jsimd_fdct_ifast_altivec(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + jsimd_quantize_altivec(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_islow_altivec(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_ifast_altivec(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return NULL; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_x86_64.c glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_x86_64.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/jsimd_x86_64.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/jsimd_x86_64.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,887 @@ +/* + * jsimd_x86_64.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014, 2016 D. R. Commander + * Copyright 2015 Matthieu Darbois + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 64-bit x86 architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +/* + * In the PIC cases, we have no guarantee that constants will keep + * their alignment. This macro allows us to verify it at runtime. + */ +#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) + +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ + +static unsigned int simd_support = ~0; +static unsigned int simd_huffman = 1; + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd (void) +{ + char *env = NULL; + + if (simd_support != ~0U) + return; + + simd_support = JSIMD_SSE2 | JSIMD_SSE; + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_extrgb_ycc_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_extrgbx_ycc_convert_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_extbgr_ycc_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_extbgrx_ycc_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_extxbgr_ycc_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_extxrgb_ycc_convert_sse2; + break; + default: + sse2fct=jsimd_rgb_ycc_convert_sse2; + break; + } + + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch(cinfo->in_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_extrgb_gray_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_extrgbx_gray_convert_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_extbgr_gray_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_extbgrx_gray_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_extxbgr_gray_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_extxrgb_gray_convert_sse2; + break; + default: + sse2fct=jsimd_rgb_gray_convert_sse2; + break; + } + + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_ycc_extrgb_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_ycc_extrgbx_convert_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_ycc_extbgr_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_ycc_extbgrx_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_ycc_extxbgr_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_ycc_extxrgb_convert_sse2; + break; + default: + sse2fct=jsimd_ycc_rgb_convert_sse2; + break; + } + + sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; + break; + default: + sse2fct=jsimd_h2v2_merged_upsample_sse2; + break; + } + + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch(cinfo->out_color_space) { + case JCS_EXT_RGB: + sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; + break; + case JCS_EXT_BGR: + sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; + break; + default: + sse2fct=jsimd_h2v1_merged_upsample_sse2; + break; + } + + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + jsimd_convsamp_sse2(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ + jsimd_convsamp_float_sse2(sample_data, start_col, workspace); +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM *data) +{ + jsimd_fdct_islow_sse2(data); +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM *data) +{ + jsimd_fdct_ifast_sse2(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT *data) +{ + jsimd_fdct_float_sse(data); +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + jsimd_quantize_sse2(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ + jsimd_quantize_float_sse2(coef_block, divisors, workspace); +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + if (sizeof(FLOAT_MULT_TYPE) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block (void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && simd_huffman && + IS_ALIGNED_SSE(jconst_huff_encode_one_block)) + return 1; + + return 0; +} + +GLOBAL(JOCTET*) +jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, + dctbl, actbl); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/Makefile.am glmark2-2021.02/src/libjpeg-turbo/simd/Makefile.am --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/Makefile.am 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/Makefile.am 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,97 @@ +noinst_LTLIBRARIES = libsimd.la + +BUILT_SOURCES = jsimdcfg.inc + +EXTRA_DIST = nasm_lt.sh CMakeLists.txt \ + jccolext-mmx.asm jcgryext-mmx.asm jdcolext-mmx.asm jdmrgext-mmx.asm \ + jccolext-sse2.asm jcgryext-sse2.asm jdcolext-sse2.asm jdmrgext-sse2.asm \ + jccolext-sse2-64.asm jcgryext-sse2-64.asm jdcolext-sse2-64.asm \ + jdmrgext-sse2-64.asm jccolext-altivec.c jcgryext-altivec.c \ + jdcolext-altivec.c jdmrgext-altivec.c + +if SIMD_X86_64 + +libsimd_la_SOURCES = jsimd_x86_64.c jsimd.h jsimdcfg.inc.h jsimdext.inc \ + jcolsamp.inc jdct.inc jpeg_nbits_table.inc jfdctflt-sse-64.asm \ + jccolor-sse2-64.asm jcgray-sse2-64.asm jchuff-sse2-64.asm \ + jcsample-sse2-64.asm jdcolor-sse2-64.asm jdmerge-sse2-64.asm \ + jdsample-sse2-64.asm jfdctfst-sse2-64.asm jfdctint-sse2-64.asm \ + jidctflt-sse2-64.asm jidctfst-sse2-64.asm jidctint-sse2-64.asm \ + jidctred-sse2-64.asm jquantf-sse2-64.asm jquanti-sse2-64.asm + +jccolor-sse2-64.lo: jccolext-sse2-64.asm +jcgray-sse2-64.lo: jcgryext-sse2-64.asm +jdcolor-sse2-64.lo: jdcolext-sse2-64.asm +jdmerge-sse2-64.lo: jdmrgext-sse2-64.asm + +endif + +if SIMD_I386 + +libsimd_la_SOURCES = jsimd_i386.c jsimd.h jsimdcfg.inc.h jsimdext.inc \ + jcolsamp.inc jdct.inc jpeg_nbits_table.inc jsimdcpu.asm \ + jfdctflt-3dn.asm jidctflt-3dn.asm jquant-3dn.asm \ + jccolor-mmx.asm jcgray-mmx.asm jcsample-mmx.asm \ + jdcolor-mmx.asm jdmerge-mmx.asm jdsample-mmx.asm \ + jfdctfst-mmx.asm jfdctint-mmx.asm jidctfst-mmx.asm \ + jidctint-mmx.asm jidctred-mmx.asm jquant-mmx.asm \ + jfdctflt-sse.asm jidctflt-sse.asm jquant-sse.asm \ + jccolor-sse2.asm jcgray-sse2.asm jchuff-sse2.asm \ + jcsample-sse2.asm jdcolor-sse2.asm jdmerge-sse2.asm \ + jdsample-sse2.asm jfdctfst-sse2.asm jfdctint-sse2.asm \ + jidctflt-sse2.asm jidctfst-sse2.asm jidctint-sse2.asm \ + jidctred-sse2.asm jquantf-sse2.asm jquanti-sse2.asm + +jccolor-mmx.lo: jccolext-mmx.asm +jcgray.-mmx.lo: jcgryext-mmx.asm +jdcolor-mmx.lo: jdcolext-mmx.asm +jdmerge-mmx.lo: jdmrgext-mmx.asm +jccolor-sse2.lo: jccolext-sse2.asm +jcgray-sse2.lo: jcgryext-sse2.asm +jdcolor-sse2.lo: jdcolext-sse2.asm +jdmerge-sse2.lo: jdmrgext-sse2.asm + +endif + +if SIMD_ARM + +libsimd_la_SOURCES = jsimd_arm.c jsimd_arm_neon.S + +endif + +if SIMD_ARM_64 + +libsimd_la_SOURCES = jsimd_arm64.c jsimd_arm64_neon.S + +endif + +if SIMD_MIPS + +libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S + +endif + +if SIMD_POWERPC + +libsimd_la_SOURCES = jsimd_powerpc.c jsimd_altivec.h jcsample.h \ + jccolor-altivec.c jcgray-altivec.c jcsample-altivec.c \ + jdcolor-altivec.c jdmerge-altivec.c jdsample-altivec.c \ + jfdctfst-altivec.c jfdctint-altivec.c \ + jidctfst-altivec.c jidctint-altivec.c \ + jquanti-altivec.c +libsimd_la_CFLAGS = -maltivec + +jccolor-altivec.lo: jccolext-altivec.c +jcgray-altivec.lo: jcgryext-altivec.c +jdcolor-altivec.lo: jdcolext-altivec.c +jdmerge-altivec.lo: jdmrgext-altivec.c + +endif + +AM_CPPFLAGS = -I$(top_srcdir) + +.asm.lo: + $(AM_V_GEN) $(LIBTOOL) $(AM_V_lt) --mode=compile --tag NASM $(srcdir)/nasm_lt.sh $(AM_V_lt) $(NASM) $(NAFLAGS) -I$(srcdir) -I. $< -o $@ + +jsimdcfg.inc: $(srcdir)/jsimdcfg.inc.h ../jpeglib.h ../jconfig.h ../jmorecfg.h + $(AM_V_GEN) $(CPP) -I$(top_builddir) -I$(top_builddir)/simd $(srcdir)/jsimdcfg.inc.h | $(EGREP) "^[\;%]|^\ %" | sed 's%_cpp_protection_%%' | sed 's@% define@%define@g' > $@ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/nasm_lt.sh glmark2-2021.02/src/libjpeg-turbo/simd/nasm_lt.sh --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/simd/nasm_lt.sh 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/simd/nasm_lt.sh 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,60 @@ +#! /bin/sh +command="" +infile="" +o_opt=no +pic=no +while [ $# -gt 0 ]; do + case "$1" in + --silent) + exec > /dev/null + ;; + -DPIC|-fPIC|-fpic|-Kpic|-KPIC) + if [ "$pic" != "yes" ] ; then + command="$command -DPIC" + pic=yes + fi + ;; + -f|-fbin|-faout|-faoutb|-fcoff|-felf|-felf64|-fas86| \ + -fobj|-fwin32|-fwin64|-frdf|-fieee|-fmacho|-fmacho64) + # it's a file format specifier for nasm. + command="$command $1" + ;; + -f*) + # maybe a code-generation flag for gcc. + ;; + -[Ii]*) + incdir=`echo "$1" | sed 's/^-[Ii]//'` + if [ "x$incdir" = x -a "x$2" != x ] ; then + case "$2" in + -*) ;; + *) incdir="$2"; shift;; + esac + fi + if [ "x$incdir" != x ] ; then + # In the case of NASM, the trailing slash is necessary. + incdir=`echo "$incdir" | sed 's%/*$%/%'` + command="$command -I$incdir" + fi + ;; + -o*) + o_opt=yes + command="$command $1" + ;; + *.asm) + infile=$1 + command="$command $1" + ;; + *) + command="$command $1" + ;; + esac + shift +done +if [ "$o_opt" != yes ] ; then + # By default, NASM creates an output file + # in the same directory as the input file. + outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o" + command="$command $outfile" +fi +echo $command +exec $command diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/structure.txt glmark2-2021.02/src/libjpeg-turbo/structure.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/structure.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/structure.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,906 @@ +IJG JPEG LIBRARY: SYSTEM ARCHITECTURE + +This file was part of the Independent JPEG Group's software: +Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. +It was modified by The libjpeg-turbo Project to include only information +relevant to libjpeg-turbo. +For conditions of distribution and use, see the accompanying README.ijg file. + + +This file provides an overview of the architecture of the IJG JPEG software; +that is, the functions of the various modules in the system and the interfaces +between modules. For more precise details about any data structure or calling +convention, see the include files and comments in the source code. + +We assume that the reader is already somewhat familiar with the JPEG standard. +The README.ijg file includes references for learning about JPEG. The file +libjpeg.txt describes the library from the viewpoint of an application +programmer using the library; it's best to read that file before this one. +Also, the file coderules.txt describes the coding style conventions we use. + +In this document, JPEG-specific terminology follows the JPEG standard: + A "component" means a color channel, e.g., Red or Luminance. + A "sample" is a single component value (i.e., one number in the image data). + A "coefficient" is a frequency coefficient (a DCT transform output number). + A "block" is an 8x8 group of samples or coefficients. + An "MCU" (minimum coded unit) is an interleaved set of blocks of size + determined by the sampling factors, or a single block in a + noninterleaved scan. +We do not use the terms "pixel" and "sample" interchangeably. When we say +pixel, we mean an element of the full-size image, while a sample is an element +of the downsampled image. Thus the number of samples may vary across +components while the number of pixels does not. (This terminology is not used +rigorously throughout the code, but it is used in places where confusion would +otherwise result.) + + +*** System features *** + +The IJG distribution contains two parts: + * A subroutine library for JPEG compression and decompression. + * cjpeg/djpeg, two sample applications that use the library to transform + JFIF JPEG files to and from several other image formats. +cjpeg/djpeg are of no great intellectual complexity: they merely add a simple +command-line user interface and I/O routines for several uncompressed image +formats. This document concentrates on the library itself. + +We desire the library to be capable of supporting all JPEG baseline, extended +sequential, and progressive DCT processes. Hierarchical processes are not +supported. + +The library does not support the lossless (spatial) JPEG process. Lossless +JPEG shares little or no code with lossy JPEG, and would normally be used +without the extensive pre- and post-processing provided by this library. +We feel that lossless JPEG is better handled by a separate library. + +Within these limits, any set of compression parameters allowed by the JPEG +spec should be readable for decompression. (We can be more restrictive about +what formats we can generate.) Although the system design allows for all +parameter values, some uncommon settings are not yet implemented and may +never be; nonintegral sampling ratios are the prime example. Furthermore, +we treat 8-bit vs. 12-bit data precision as a compile-time switch, not a +run-time option, because most machines can store 8-bit pixels much more +compactly than 12-bit. + +By itself, the library handles only interchange JPEG datastreams --- in +particular the widely used JFIF file format. The library can be used by +surrounding code to process interchange or abbreviated JPEG datastreams that +are embedded in more complex file formats. (For example, libtiff uses this +library to implement JPEG compression within the TIFF file format.) + +The library includes a substantial amount of code that is not covered by the +JPEG standard but is necessary for typical applications of JPEG. These +functions preprocess the image before JPEG compression or postprocess it after +decompression. They include colorspace conversion, downsampling/upsampling, +and color quantization. This code can be omitted if not needed. + +A wide range of quality vs. speed tradeoffs are possible in JPEG processing, +and even more so in decompression postprocessing. The decompression library +provides multiple implementations that cover most of the useful tradeoffs, +ranging from very-high-quality down to fast-preview operation. On the +compression side we have generally not provided low-quality choices, since +compression is normally less time-critical. It should be understood that the +low-quality modes may not meet the JPEG standard's accuracy requirements; +nonetheless, they are useful for viewers. + + +*** System overview *** + +The compressor and decompressor are each divided into two main sections: +the JPEG compressor or decompressor proper, and the preprocessing or +postprocessing functions. The interface between these two sections is the +image data that the official JPEG spec regards as its input or output: this +data is in the colorspace to be used for compression, and it is downsampled +to the sampling factors to be used. The preprocessing and postprocessing +steps are responsible for converting a normal image representation to or from +this form. (Those few applications that want to deal with YCbCr downsampled +data can skip the preprocessing or postprocessing step.) + +Looking more closely, the compressor library contains the following main +elements: + + Preprocessing: + * Color space conversion (e.g., RGB to YCbCr). + * Edge expansion and downsampling. Optionally, this step can do simple + smoothing --- this is often helpful for low-quality source data. + JPEG proper: + * MCU assembly, DCT, quantization. + * Entropy coding (sequential or progressive, Huffman or arithmetic). + +In addition to these modules we need overall control, marker generation, +and support code (memory management & error handling). There is also a +module responsible for physically writing the output data --- typically +this is just an interface to fwrite(), but some applications may need to +do something else with the data. + +The decompressor library contains the following main elements: + + JPEG proper: + * Entropy decoding (sequential or progressive, Huffman or arithmetic). + * Dequantization, inverse DCT, MCU disassembly. + Postprocessing: + * Upsampling. Optionally, this step may be able to do more general + rescaling of the image. + * Color space conversion (e.g., YCbCr to RGB). This step may also + provide gamma adjustment [ currently it does not ]. + * Optional color quantization (e.g., reduction to 256 colors). + * Optional color precision reduction (e.g., 24-bit to 15-bit color). + [This feature is not currently implemented.] + +We also need overall control, marker parsing, and a data source module. +The support code (memory management & error handling) can be shared with +the compression half of the library. + +There may be several implementations of each of these elements, particularly +in the decompressor, where a wide range of speed/quality tradeoffs is very +useful. It must be understood that some of the best speedups involve +merging adjacent steps in the pipeline. For example, upsampling, color space +conversion, and color quantization might all be done at once when using a +low-quality ordered-dither technique. The system architecture is designed to +allow such merging where appropriate. + + +Note: it is convenient to regard edge expansion (padding to block boundaries) +as a preprocessing/postprocessing function, even though the JPEG spec includes +it in compression/decompression. We do this because downsampling/upsampling +can be simplified a little if they work on padded data: it's not necessary to +have special cases at the right and bottom edges. Therefore the interface +buffer is always an integral number of blocks wide and high, and we expect +compression preprocessing to pad the source data properly. Padding will occur +only to the next block (8-sample) boundary. In an interleaved-scan situation, +additional dummy blocks may be used to fill out MCUs, but the MCU assembly and +disassembly logic will create or discard these blocks internally. (This is +advantageous for speed reasons, since we avoid DCTing the dummy blocks. +It also permits a small reduction in file size, because the compressor can +choose dummy block contents so as to minimize their size in compressed form. +Finally, it makes the interface buffer specification independent of whether +the file is actually interleaved or not.) Applications that wish to deal +directly with the downsampled data must provide similar buffering and padding +for odd-sized images. + + +*** Poor man's object-oriented programming *** + +It should be clear by now that we have a lot of quasi-independent processing +steps, many of which have several possible behaviors. To avoid cluttering the +code with lots of switch statements, we use a simple form of object-style +programming to separate out the different possibilities. + +For example, two different color quantization algorithms could be implemented +as two separate modules that present the same external interface; at runtime, +the calling code will access the proper module indirectly through an "object". + +We can get the limited features we need while staying within portable C. +The basic tool is a function pointer. An "object" is just a struct +containing one or more function pointer fields, each of which corresponds to +a method name in real object-oriented languages. During initialization we +fill in the function pointers with references to whichever module we have +determined we need to use in this run. Then invocation of the module is done +by indirecting through a function pointer; on most machines this is no more +expensive than a switch statement, which would be the only other way of +making the required run-time choice. The really significant benefit, of +course, is keeping the source code clean and well structured. + +We can also arrange to have private storage that varies between different +implementations of the same kind of object. We do this by making all the +module-specific object structs be separately allocated entities, which will +be accessed via pointers in the master compression or decompression struct. +The "public" fields or methods for a given kind of object are specified by +a commonly known struct. But a module's initialization code can allocate +a larger struct that contains the common struct as its first member, plus +additional private fields. With appropriate pointer casting, the module's +internal functions can access these private fields. (For a simple example, +see jdatadst.c, which implements the external interface specified by struct +jpeg_destination_mgr, but adds extra fields.) + +(Of course this would all be a lot easier if we were using C++, but we are +not yet prepared to assume that everyone has a C++ compiler.) + +An important benefit of this scheme is that it is easy to provide multiple +versions of any method, each tuned to a particular case. While a lot of +precalculation might be done to select an optimal implementation of a method, +the cost per invocation is constant. For example, the upsampling step might +have a "generic" method, plus one or more "hardwired" methods for the most +popular sampling factors; the hardwired methods would be faster because they'd +use straight-line code instead of for-loops. The cost to determine which +method to use is paid only once, at startup, and the selection criteria are +hidden from the callers of the method. + +This plan differs a little bit from usual object-oriented structures, in that +only one instance of each object class will exist during execution. The +reason for having the class structure is that on different runs we may create +different instances (choose to execute different modules). You can think of +the term "method" as denoting the common interface presented by a particular +set of interchangeable functions, and "object" as denoting a group of related +methods, or the total shared interface behavior of a group of modules. + + +*** Overall control structure *** + +We previously mentioned the need for overall control logic in the compression +and decompression libraries. In IJG implementations prior to v5, overall +control was mostly provided by "pipeline control" modules, which proved to be +large, unwieldy, and hard to understand. To improve the situation, the +control logic has been subdivided into multiple modules. The control modules +consist of: + +1. Master control for module selection and initialization. This has two +responsibilities: + + 1A. Startup initialization at the beginning of image processing. + The individual processing modules to be used in this run are selected + and given initialization calls. + + 1B. Per-pass control. This determines how many passes will be performed + and calls each active processing module to configure itself + appropriately at the beginning of each pass. End-of-pass processing, + where necessary, is also invoked from the master control module. + + Method selection is partially distributed, in that a particular processing + module may contain several possible implementations of a particular method, + which it will select among when given its initialization call. The master + control code need only be concerned with decisions that affect more than + one module. + +2. Data buffering control. A separate control module exists for each + inter-processing-step data buffer. This module is responsible for + invoking the processing steps that write or read that data buffer. + +Each buffer controller sees the world as follows: + +input data => processing step A => buffer => processing step B => output data + | | | + ------------------ controller ------------------ + +The controller knows the dataflow requirements of steps A and B: how much data +they want to accept in one chunk and how much they output in one chunk. Its +function is to manage its buffer and call A and B at the proper times. + +A data buffer control module may itself be viewed as a processing step by a +higher-level control module; thus the control modules form a binary tree with +elementary processing steps at the leaves of the tree. + +The control modules are objects. A considerable amount of flexibility can +be had by replacing implementations of a control module. For example: +* Merging of adjacent steps in the pipeline is done by replacing a control + module and its pair of processing-step modules with a single processing- + step module. (Hence the possible merges are determined by the tree of + control modules.) +* In some processing modes, a given interstep buffer need only be a "strip" + buffer large enough to accommodate the desired data chunk sizes. In other + modes, a full-image buffer is needed and several passes are required. + The control module determines which kind of buffer is used and manipulates + virtual array buffers as needed. One or both processing steps may be + unaware of the multi-pass behavior. + +In theory, we might be able to make all of the data buffer controllers +interchangeable and provide just one set of implementations for all. In +practice, each one contains considerable special-case processing for its +particular job. The buffer controller concept should be regarded as an +overall system structuring principle, not as a complete description of the +task performed by any one controller. + + +*** Compression object structure *** + +Here is a sketch of the logical structure of the JPEG compression library: + + |-- Colorspace conversion + |-- Preprocessing controller --| + | |-- Downsampling +Main controller --| + | |-- Forward DCT, quantize + |-- Coefficient controller --| + |-- Entropy encoding + +This sketch also describes the flow of control (subroutine calls) during +typical image data processing. Each of the components shown in the diagram is +an "object" which may have several different implementations available. One +or more source code files contain the actual implementation(s) of each object. + +The objects shown above are: + +* Main controller: buffer controller for the subsampled-data buffer, which + holds the preprocessed input data. This controller invokes preprocessing to + fill the subsampled-data buffer, and JPEG compression to empty it. There is + usually no need for a full-image buffer here; a strip buffer is adequate. + +* Preprocessing controller: buffer controller for the downsampling input data + buffer, which lies between colorspace conversion and downsampling. Note + that a unified conversion/downsampling module would probably replace this + controller entirely. + +* Colorspace conversion: converts application image data into the desired + JPEG color space; also changes the data from pixel-interleaved layout to + separate component planes. Processes one pixel row at a time. + +* Downsampling: performs reduction of chroma components as required. + Optionally may perform pixel-level smoothing as well. Processes a "row + group" at a time, where a row group is defined as Vmax pixel rows of each + component before downsampling, and Vk sample rows afterwards (remember Vk + differs across components). Some downsampling or smoothing algorithms may + require context rows above and below the current row group; the + preprocessing controller is responsible for supplying these rows via proper + buffering. The downsampler is responsible for edge expansion at the right + edge (i.e., extending each sample row to a multiple of 8 samples); but the + preprocessing controller is responsible for vertical edge expansion (i.e., + duplicating the bottom sample row as needed to make a multiple of 8 rows). + +* Coefficient controller: buffer controller for the DCT-coefficient data. + This controller handles MCU assembly, including insertion of dummy DCT + blocks when needed at the right or bottom edge. When performing + Huffman-code optimization or emitting a multiscan JPEG file, this + controller is responsible for buffering the full image. The equivalent of + one fully interleaved MCU row of subsampled data is processed per call, + even when the JPEG file is noninterleaved. + +* Forward DCT and quantization: Perform DCT, quantize, and emit coefficients. + Works on one or more DCT blocks at a time. (Note: the coefficients are now + emitted in normal array order, which the entropy encoder is expected to + convert to zigzag order as necessary. Prior versions of the IJG code did + the conversion to zigzag order within the quantization step.) + +* Entropy encoding: Perform Huffman or arithmetic entropy coding and emit the + coded data to the data destination module. Works on one MCU per call. + For progressive JPEG, the same DCT blocks are fed to the entropy coder + during each pass, and the coder must emit the appropriate subset of + coefficients. + +In addition to the above objects, the compression library includes these +objects: + +* Master control: determines the number of passes required, controls overall + and per-pass initialization of the other modules. + +* Marker writing: generates JPEG markers (except for RSTn, which is emitted + by the entropy encoder when needed). + +* Data destination manager: writes the output JPEG datastream to its final + destination (e.g., a file). The destination manager supplied with the + library knows how to write to a stdio stream or to a memory buffer; + for other behaviors, the surrounding application may provide its own + destination manager. + +* Memory manager: allocates and releases memory, controls virtual arrays + (with backing store management, where required). + +* Error handler: performs formatting and output of error and trace messages; + determines handling of nonfatal errors. The surrounding application may + override some or all of this object's methods to change error handling. + +* Progress monitor: supports output of "percent-done" progress reports. + This object represents an optional callback to the surrounding application: + if wanted, it must be supplied by the application. + +The error handler, destination manager, and progress monitor objects are +defined as separate objects in order to simplify application-specific +customization of the JPEG library. A surrounding application may override +individual methods or supply its own all-new implementation of one of these +objects. The object interfaces for these objects are therefore treated as +part of the application interface of the library, whereas the other objects +are internal to the library. + +The error handler and memory manager are shared by JPEG compression and +decompression; the progress monitor, if used, may be shared as well. + + +*** Decompression object structure *** + +Here is a sketch of the logical structure of the JPEG decompression library: + + |-- Entropy decoding + |-- Coefficient controller --| + | |-- Dequantize, Inverse DCT +Main controller --| + | |-- Upsampling + |-- Postprocessing controller --| |-- Colorspace conversion + |-- Color quantization + |-- Color precision reduction + +As before, this diagram also represents typical control flow. The objects +shown are: + +* Main controller: buffer controller for the subsampled-data buffer, which + holds the output of JPEG decompression proper. This controller's primary + task is to feed the postprocessing procedure. Some upsampling algorithms + may require context rows above and below the current row group; when this + is true, the main controller is responsible for managing its buffer so as + to make context rows available. In the current design, the main buffer is + always a strip buffer; a full-image buffer is never required. + +* Coefficient controller: buffer controller for the DCT-coefficient data. + This controller handles MCU disassembly, including deletion of any dummy + DCT blocks at the right or bottom edge. When reading a multiscan JPEG + file, this controller is responsible for buffering the full image. + (Buffering DCT coefficients, rather than samples, is necessary to support + progressive JPEG.) The equivalent of one fully interleaved MCU row of + subsampled data is processed per call, even when the source JPEG file is + noninterleaved. + +* Entropy decoding: Read coded data from the data source module and perform + Huffman or arithmetic entropy decoding. Works on one MCU per call. + For progressive JPEG decoding, the coefficient controller supplies the prior + coefficients of each MCU (initially all zeroes), which the entropy decoder + modifies in each scan. + +* Dequantization and inverse DCT: like it says. Note that the coefficients + buffered by the coefficient controller have NOT been dequantized; we + merge dequantization and inverse DCT into a single step for speed reasons. + When scaled-down output is asked for, simplified DCT algorithms may be used + that emit fewer samples per DCT block, not the full 8x8. Works on one DCT + block at a time. + +* Postprocessing controller: buffer controller for the color quantization + input buffer, when quantization is in use. (Without quantization, this + controller just calls the upsampler.) For two-pass quantization, this + controller is responsible for buffering the full-image data. + +* Upsampling: restores chroma components to full size. (May support more + general output rescaling, too. Note that if undersized DCT outputs have + been emitted by the DCT module, this module must adjust so that properly + sized outputs are created.) Works on one row group at a time. This module + also calls the color conversion module, so its top level is effectively a + buffer controller for the upsampling->color conversion buffer. However, in + all but the highest-quality operating modes, upsampling and color + conversion are likely to be merged into a single step. + +* Colorspace conversion: convert from JPEG color space to output color space, + and change data layout from separate component planes to pixel-interleaved. + Works on one pixel row at a time. + +* Color quantization: reduce the data to colormapped form, using either an + externally specified colormap or an internally generated one. This module + is not used for full-color output. Works on one pixel row at a time; may + require two passes to generate a color map. Note that the output will + always be a single component representing colormap indexes. In the current + design, the output values are JSAMPLEs, so an 8-bit compilation cannot + quantize to more than 256 colors. This is unlikely to be a problem in + practice. + +* Color reduction: this module handles color precision reduction, e.g., + generating 15-bit color (5 bits/primary) from JPEG's 24-bit output. + Not quite clear yet how this should be handled... should we merge it with + colorspace conversion??? + +Note that some high-speed operating modes might condense the entire +postprocessing sequence to a single module (upsample, color convert, and +quantize in one step). + +In addition to the above objects, the decompression library includes these +objects: + +* Master control: determines the number of passes required, controls overall + and per-pass initialization of the other modules. This is subdivided into + input and output control: jdinput.c controls only input-side processing, + while jdmaster.c handles overall initialization and output-side control. + +* Marker reading: decodes JPEG markers (except for RSTn). + +* Data source manager: supplies the input JPEG datastream. The source + manager supplied with the library knows how to read from a stdio stream + or from a memory buffer; for other behaviors, the surrounding application + may provide its own source manager. + +* Memory manager: same as for compression library. + +* Error handler: same as for compression library. + +* Progress monitor: same as for compression library. + +As with compression, the data source manager, error handler, and progress +monitor are candidates for replacement by a surrounding application. + + +*** Decompression input and output separation *** + +To support efficient incremental display of progressive JPEG files, the +decompressor is divided into two sections that can run independently: + +1. Data input includes marker parsing, entropy decoding, and input into the + coefficient controller's DCT coefficient buffer. Note that this + processing is relatively cheap and fast. + +2. Data output reads from the DCT coefficient buffer and performs the IDCT + and all postprocessing steps. + +For a progressive JPEG file, the data input processing is allowed to get +arbitrarily far ahead of the data output processing. (This occurs only +if the application calls jpeg_consume_input(); otherwise input and output +run in lockstep, since the input section is called only when the output +section needs more data.) In this way the application can avoid making +extra display passes when data is arriving faster than the display pass +can run. Furthermore, it is possible to abort an output pass without +losing anything, since the coefficient buffer is read-only as far as the +output section is concerned. See libjpeg.txt for more detail. + +A full-image coefficient array is only created if the JPEG file has multiple +scans (or if the application specifies buffered-image mode anyway). When +reading a single-scan file, the coefficient controller normally creates only +a one-MCU buffer, so input and output processing must run in lockstep in this +case. jpeg_consume_input() is effectively a no-op in this situation. + +The main impact of dividing the decompressor in this fashion is that we must +be very careful with shared variables in the cinfo data structure. Each +variable that can change during the course of decompression must be +classified as belonging to data input or data output, and each section must +look only at its own variables. For example, the data output section may not +depend on any of the variables that describe the current scan in the JPEG +file, because these may change as the data input section advances into a new +scan. + +The progress monitor is (somewhat arbitrarily) defined to treat input of the +file as one pass when buffered-image mode is not used, and to ignore data +input work completely when buffered-image mode is used. Note that the +library has no reliable way to predict the number of passes when dealing +with a progressive JPEG file, nor can it predict the number of output passes +in buffered-image mode. So the work estimate is inherently bogus anyway. + +No comparable division is currently made in the compression library, because +there isn't any real need for it. + + +*** Data formats *** + +Arrays of pixel sample values use the following data structure: + + typedef something JSAMPLE; a pixel component value, 0..MAXJSAMPLE + typedef JSAMPLE *JSAMPROW; ptr to a row of samples + typedef JSAMPROW *JSAMPARRAY; ptr to a list of rows + typedef JSAMPARRAY *JSAMPIMAGE; ptr to a list of color-component arrays + +The basic element type JSAMPLE will typically be one of unsigned char, +(signed) char, or short. Short will be used if samples wider than 8 bits are +to be supported (this is a compile-time option). Otherwise, unsigned char is +used if possible. If the compiler only supports signed chars, then it is +necessary to mask off the value when reading. Thus, all reads of JSAMPLE +values must be coded as "GETJSAMPLE(value)", where the macro will be defined +as "((value) & 0xFF)" on signed-char machines and "((int) (value))" elsewhere. + +With these conventions, JSAMPLE values can be assumed to be >= 0. This helps +simplify correct rounding during downsampling, etc. The JPEG standard's +specification that sample values run from -128..127 is accommodated by +subtracting 128 from the sample value in the DCT step. Similarly, during +decompression the output of the IDCT step will be immediately shifted back to +0..255. (NB: different values are required when 12-bit samples are in use. +The code is written in terms of MAXJSAMPLE and CENTERJSAMPLE, which will be +defined as 255 and 128 respectively in an 8-bit implementation, and as 4095 +and 2048 in a 12-bit implementation.) + +We use a pointer per row, rather than a two-dimensional JSAMPLE array. This +choice costs only a small amount of memory and has several benefits: +* Code using the data structure doesn't need to know the allocated width of + the rows. This simplifies edge expansion/compression, since we can work + in an array that's wider than the logical picture width. +* Indexing doesn't require multiplication; this is a performance win on many + machines. +* Arrays with more than 64K total elements can be supported even on machines + where malloc() cannot allocate chunks larger than 64K. +* The rows forming a component array may be allocated at different times + without extra copying. This trick allows some speedups in smoothing steps + that need access to the previous and next rows. + +Note that each color component is stored in a separate array; we don't use the +traditional layout in which the components of a pixel are stored together. +This simplifies coding of modules that work on each component independently, +because they don't need to know how many components there are. Furthermore, +we can read or write each component to a temporary file independently, which +is helpful when dealing with noninterleaved JPEG files. + +In general, a specific sample value is accessed by code such as + GETJSAMPLE(image[colorcomponent][row][col]) +where col is measured from the image left edge, but row is measured from the +first sample row currently in memory. Either of the first two indexings can +be precomputed by copying the relevant pointer. + + +Since most image-processing applications prefer to work on images in which +the components of a pixel are stored together, the data passed to or from the +surrounding application uses the traditional convention: a single pixel is +represented by N consecutive JSAMPLE values, and an image row is an array of +(# of color components)*(image width) JSAMPLEs. One or more rows of data can +be represented by a pointer of type JSAMPARRAY in this scheme. This scheme is +converted to component-wise storage inside the JPEG library. (Applications +that want to skip JPEG preprocessing or postprocessing will have to contend +with component-wise storage.) + + +Arrays of DCT-coefficient values use the following data structure: + + typedef short JCOEF; a 16-bit signed integer + typedef JCOEF JBLOCK[DCTSIZE2]; an 8x8 block of coefficients + typedef JBLOCK *JBLOCKROW; ptr to one horizontal row of 8x8 blocks + typedef JBLOCKROW *JBLOCKARRAY; ptr to a list of such rows + typedef JBLOCKARRAY *JBLOCKIMAGE; ptr to a list of color component arrays + +The underlying type is at least a 16-bit signed integer; while "short" is big +enough on all machines of interest, on some machines it is preferable to use +"int" for speed reasons, despite the storage cost. Coefficients are grouped +into 8x8 blocks (but we always use #defines DCTSIZE and DCTSIZE2 rather than +"8" and "64"). + +The contents of a coefficient block may be in either "natural" or zigzagged +order, and may be true values or divided by the quantization coefficients, +depending on where the block is in the processing pipeline. In the current +library, coefficient blocks are kept in natural order everywhere; the entropy +codecs zigzag or dezigzag the data as it is written or read. The blocks +contain quantized coefficients everywhere outside the DCT/IDCT subsystems. +(This latter decision may need to be revisited to support variable +quantization a la JPEG Part 3.) + +Notice that the allocation unit is now a row of 8x8 blocks, corresponding to +eight rows of samples. Otherwise the structure is much the same as for +samples, and for the same reasons. + + +*** Suspendable processing *** + +In some applications it is desirable to use the JPEG library as an +incremental, memory-to-memory filter. In this situation the data source or +destination may be a limited-size buffer, and we can't rely on being able to +empty or refill the buffer at arbitrary times. Instead the application would +like to have control return from the library at buffer overflow/underrun, and +then resume compression or decompression at a later time. + +This scenario is supported for simple cases. (For anything more complex, we +recommend that the application "bite the bullet" and develop real multitasking +capability.) The libjpeg.txt file goes into more detail about the usage and +limitations of this capability; here we address the implications for library +structure. + +The essence of the problem is that the entropy codec (coder or decoder) must +be prepared to stop at arbitrary times. In turn, the controllers that call +the entropy codec must be able to stop before having produced or consumed all +the data that they normally would handle in one call. That part is reasonably +straightforward: we make the controller call interfaces include "progress +counters" which indicate the number of data chunks successfully processed, and +we require callers to test the counter rather than just assume all of the data +was processed. + +Rather than trying to restart at an arbitrary point, the current Huffman +codecs are designed to restart at the beginning of the current MCU after a +suspension due to buffer overflow/underrun. At the start of each call, the +codec's internal state is loaded from permanent storage (in the JPEG object +structures) into local variables. On successful completion of the MCU, the +permanent state is updated. (This copying is not very expensive, and may even +lead to *improved* performance if the local variables can be registerized.) +If a suspension occurs, the codec simply returns without updating the state, +thus effectively reverting to the start of the MCU. Note that this implies +leaving some data unprocessed in the source/destination buffer (ie, the +compressed partial MCU). The data source/destination module interfaces are +specified so as to make this possible. This also implies that the data buffer +must be large enough to hold a worst-case compressed MCU; a couple thousand +bytes should be enough. + +In a successive-approximation AC refinement scan, the progressive Huffman +decoder has to be able to undo assignments of newly nonzero coefficients if it +suspends before the MCU is complete, since decoding requires distinguishing +previously-zero and previously-nonzero coefficients. This is a bit tedious +but probably won't have much effect on performance. Other variants of Huffman +decoding need not worry about this, since they will just store the same values +again if forced to repeat the MCU. + +This approach would probably not work for an arithmetic codec, since its +modifiable state is quite large and couldn't be copied cheaply. Instead it +would have to suspend and resume exactly at the point of the buffer end. + +The JPEG marker reader is designed to cope with suspension at an arbitrary +point. It does so by backing up to the start of the marker parameter segment, +so the data buffer must be big enough to hold the largest marker of interest. +Again, a couple KB should be adequate. (A special "skip" convention is used +to bypass COM and APPn markers, so these can be larger than the buffer size +without causing problems; otherwise a 64K buffer would be needed in the worst +case.) + +The JPEG marker writer currently does *not* cope with suspension. +We feel that this is not necessary; it is much easier simply to require +the application to ensure there is enough buffer space before starting. (An +empty 2K buffer is more than sufficient for the header markers; and ensuring +there are a dozen or two bytes available before calling jpeg_finish_compress() +will suffice for the trailer.) This would not work for writing multi-scan +JPEG files, but we simply do not intend to support that capability with +suspension. + + +*** Memory manager services *** + +The JPEG library's memory manager controls allocation and deallocation of +memory, and it manages large "virtual" data arrays on machines where the +operating system does not provide virtual memory. Note that the same +memory manager serves both compression and decompression operations. + +In all cases, allocated objects are tied to a particular compression or +decompression master record, and they will be released when that master +record is destroyed. + +The memory manager does not provide explicit deallocation of objects. +Instead, objects are created in "pools" of free storage, and a whole pool +can be freed at once. This approach helps prevent storage-leak bugs, and +it speeds up operations whenever malloc/free are slow (as they often are). +The pools can be regarded as lifetime identifiers for objects. Two +pools/lifetimes are defined: + * JPOOL_PERMANENT lasts until master record is destroyed + * JPOOL_IMAGE lasts until done with image (JPEG datastream) +Permanent lifetime is used for parameters and tables that should be carried +across from one datastream to another; this includes all application-visible +parameters. Image lifetime is used for everything else. (A third lifetime, +JPOOL_PASS = one processing pass, was originally planned. However it was +dropped as not being worthwhile. The actual usage patterns are such that the +peak memory usage would be about the same anyway; and having per-pass storage +substantially complicates the virtual memory allocation rules --- see below.) + +The memory manager deals with three kinds of object: +1. "Small" objects. Typically these require no more than 10K-20K total. +2. "Large" objects. These may require tens to hundreds of K depending on + image size. Semantically they behave the same as small objects, but we + distinguish them because pool allocation heuristics may differ for large and + small objects (historically, large objects were also referenced by far + pointers on MS-DOS machines.) Note that individual "large" objects cannot + exceed the size allowed by type size_t, which may be 64K or less on some + machines. +3. "Virtual" objects. These are large 2-D arrays of JSAMPLEs or JBLOCKs + (typically large enough for the entire image being processed). The + memory manager provides stripwise access to these arrays. On machines + without virtual memory, the rest of the array may be swapped out to a + temporary file. + +(Note: JSAMPARRAY and JBLOCKARRAY data structures are a combination of large +objects for the data proper and small objects for the row pointers. For +convenience and speed, the memory manager provides single routines to create +these structures. Similarly, virtual arrays include a small control block +and a JSAMPARRAY or JBLOCKARRAY working buffer, all created with one call.) + +In the present implementation, virtual arrays are only permitted to have image +lifespan. (Permanent lifespan would not be reasonable, and pass lifespan is +not very useful since a virtual array's raison d'etre is to store data for +multiple passes through the image.) We also expect that only "small" objects +will be given permanent lifespan, though this restriction is not required by +the memory manager. + +In a non-virtual-memory machine, some performance benefit can be gained by +making the in-memory buffers for virtual arrays be as large as possible. +(For small images, the buffers might fit entirely in memory, so blind +swapping would be very wasteful.) The memory manager will adjust the height +of the buffers to fit within a prespecified maximum memory usage. In order +to do this in a reasonably optimal fashion, the manager needs to allocate all +of the virtual arrays at once. Therefore, there isn't a one-step allocation +routine for virtual arrays; instead, there is a "request" routine that simply +allocates the control block, and a "realize" routine (called just once) that +determines space allocation and creates all of the actual buffers. The +realize routine must allow for space occupied by non-virtual large objects. +(We don't bother to factor in the space needed for small objects, on the +grounds that it isn't worth the trouble.) + +To support all this, we establish the following protocol for doing business +with the memory manager: + 1. Modules must request virtual arrays (which may have only image lifespan) + during the initial setup phase, i.e., in their jinit_xxx routines. + 2. All "large" objects (including JSAMPARRAYs and JBLOCKARRAYs) must also be + allocated during initial setup. + 3. realize_virt_arrays will be called at the completion of initial setup. + The above conventions ensure that sufficient information is available + for it to choose a good size for virtual array buffers. +Small objects of any lifespan may be allocated at any time. We expect that +the total space used for small objects will be small enough to be negligible +in the realize_virt_arrays computation. + +In a virtual-memory machine, we simply pretend that the available space is +infinite, thus causing realize_virt_arrays to decide that it can allocate all +the virtual arrays as full-size in-memory buffers. The overhead of the +virtual-array access protocol is very small when no swapping occurs. + +A virtual array can be specified to be "pre-zeroed"; when this flag is set, +never-yet-written sections of the array are set to zero before being made +available to the caller. If this flag is not set, never-written sections +of the array contain garbage. (This feature exists primarily because the +equivalent logic would otherwise be needed in jdcoefct.c for progressive +JPEG mode; we may as well make it available for possible other uses.) + +The first write pass on a virtual array is required to occur in top-to-bottom +order; read passes, as well as any write passes after the first one, may +access the array in any order. This restriction exists partly to simplify +the virtual array control logic, and partly because some file systems may not +support seeking beyond the current end-of-file in a temporary file. The main +implication of this restriction is that rearrangement of rows (such as +converting top-to-bottom data order to bottom-to-top) must be handled while +reading data out of the virtual array, not while putting it in. + + +*** Memory manager internal structure *** + +To isolate system dependencies as much as possible, we have broken the +memory manager into two parts. There is a reasonably system-independent +"front end" (jmemmgr.c) and a "back end" that contains only the code +likely to change across systems. All of the memory management methods +outlined above are implemented by the front end. The back end provides +the following routines for use by the front end (none of these routines +are known to the rest of the JPEG code): + +jpeg_mem_init, jpeg_mem_term system-dependent initialization/shutdown + +jpeg_get_small, jpeg_free_small interface to malloc and free library routines + (or their equivalents) + +jpeg_get_large, jpeg_free_large historically was used to interface with + FAR malloc/free on MS-DOS machines; now the + same as jpeg_get_small/jpeg_free_small + +jpeg_mem_available estimate available memory + +jpeg_open_backing_store create a backing-store object + +read_backing_store, manipulate a backing-store object +write_backing_store, +close_backing_store + +On some systems there will be more than one type of backing-store object +(specifically, in MS-DOS a backing store file might be an area of extended +memory as well as a disk file). jpeg_open_backing_store is responsible for +choosing how to implement a given object. The read/write/close routines +are method pointers in the structure that describes a given object; this +lets them be different for different object types. + +It may be necessary to ensure that backing store objects are explicitly +released upon abnormal program termination. For example, MS-DOS won't free +extended memory by itself. To support this, we will expect the main program +or surrounding application to arrange to call self_destruct (typically via +jpeg_destroy) upon abnormal termination. This may require a SIGINT signal +handler or equivalent. We don't want to have the back end module install its +own signal handler, because that would pre-empt the surrounding application's +ability to control signal handling. + +The IJG distribution includes several memory manager back end implementations. +Usually the same back end should be suitable for all applications on a given +system, but it is possible for an application to supply its own back end at +need. + + +*** Implications of DNL marker *** + +Some JPEG files may use a DNL marker to postpone definition of the image +height (this would be useful for a fax-like scanner's output, for instance). +In these files the SOF marker claims the image height is 0, and you only +find out the true image height at the end of the first scan. + +We could read these files as follows: +1. Upon seeing zero image height, replace it by 65535 (the maximum allowed). +2. When the DNL is found, update the image height in the global image + descriptor. +This implies that control modules must avoid making copies of the image +height, and must re-test for termination after each MCU row. This would +be easy enough to do. + +In cases where image-size data structures are allocated, this approach will +result in very inefficient use of virtual memory or much-larger-than-necessary +temporary files. This seems acceptable for something that probably won't be a +mainstream usage. People might have to forgo use of memory-hogging options +(such as two-pass color quantization or noninterleaved JPEG files) if they +want efficient conversion of such files. (One could improve efficiency by +demanding a user-supplied upper bound for the height, less than 65536; in most +cases it could be much less.) + +The standard also permits the SOF marker to overestimate the image height, +with a DNL to give the true, smaller height at the end of the first scan. +This would solve the space problems if the overestimate wasn't too great. +However, it implies that you don't even know whether DNL will be used. + +This leads to a couple of very serious objections: +1. Testing for a DNL marker must occur in the inner loop of the decompressor's + Huffman decoder; this implies a speed penalty whether the feature is used + or not. +2. There is no way to hide the last-minute change in image height from an + application using the decoder. Thus *every* application using the IJG + library would suffer a complexity penalty whether it cared about DNL or + not. +We currently do not support DNL because of these problems. + +A different approach is to insist that DNL-using files be preprocessed by a +separate program that reads ahead to the DNL, then goes back and fixes the SOF +marker. This is a much simpler solution and is probably far more efficient. +Even if one wants piped input, buffering the first scan of the JPEG file needs +a lot smaller temp file than is implied by the maximum-height method. For +this approach we'd simply treat DNL as a no-op in the decompressor (at most, +check that it matches the SOF image height). + +We will not worry about making the compressor capable of outputting DNL. +Something similar to the first scheme above could be applied if anyone ever +wants to make that work. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjbench.c glmark2-2021.02/src/libjpeg-turbo/tjbench.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjbench.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/tjbench.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,990 @@ +/* + * Copyright (C)2009-2016 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "./bmp.h" +#include "./tjutil.h" +#include "./turbojpeg.h" + + +#define _throw(op, err) { \ + printf("ERROR in line %d while %s:\n%s\n", __LINE__, op, err); \ + retval=-1; goto bailout;} +#define _throwunix(m) _throw(m, strerror(errno)) +#define _throwtj(m) _throw(m, tjGetErrorStr()) +#define _throwbmp(m) _throw(m, bmpgeterr()) + +int flags=TJFLAG_NOREALLOC, componly=0, decomponly=0, doyuv=0, quiet=0, + dotile=0, pf=TJPF_BGR, yuvpad=1, warmup=1, dowrite=1; +char *ext="ppm"; +const char *pixFormatStr[TJ_NUMPF]= +{ + "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY", "", "", "", "", "CMYK" +}; +const char *subNameLong[TJ_NUMSAMP]= +{ + "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" +}; +const char *csName[TJ_NUMCS]= +{ + "RGB", "YCbCr", "GRAY", "CMYK", "YCCK" +}; +const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"}; +tjscalingfactor *scalingfactors=NULL, sf={1, 1}; int nsf=0; +int xformop=TJXOP_NONE, xformopt=0; +int (*customFilter)(short *, tjregion, tjregion, int, int, tjtransform *); +double benchtime=5.0; + + +char *formatName(int subsamp, int cs, char *buf) +{ + if(cs==TJCS_YCbCr) return (char *)subNameLong[subsamp]; + else if(cs==TJCS_YCCK) + { + snprintf(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]); + return buf; + } + else return (char *)csName[cs]; +} + + +char *sigfig(double val, int figs, char *buf, int len) +{ + char format[80]; + int digitsafterdecimal=figs-(int)ceil(log10(fabs(val))); + if(digitsafterdecimal<1) snprintf(format, 80, "%%.0f"); + else snprintf(format, 80, "%%.%df", digitsafterdecimal); + snprintf(buf, len, format, val); + return buf; +} + + +/* Custom DCT filter which produces a negative of the image */ +int dummyDCTFilter(short *coeffs, tjregion arrayRegion, tjregion planeRegion, + int componentIndex, int transformIndex, tjtransform *transform) +{ + int i; + for(i=0; i0) + { + snprintf(qualstr, 6, "_Q%d", jpegqual); + qualstr[5]=0; + } + + if((handle=tjInitDecompress())==NULL) + _throwtj("executing tjInitDecompress()"); + + if(dstbuf==NULL) + { + if((dstbuf=(unsigned char *)malloc(pitch*scaledh))==NULL) + _throwunix("allocating destination buffer"); + dstbufalloc=1; + } + /* Set the destination buffer to gray so we know whether the decompressor + attempted to write to it */ + memset(dstbuf, 127, pitch*scaledh); + + if(doyuv) + { + int width=dotile? tilew:scaledw; + int height=dotile? tileh:scaledh; + int yuvsize=tjBufSizeYUV2(width, yuvpad, height, subsamp); + if((yuvbuf=(unsigned char *)malloc(yuvsize))==NULL) + _throwunix("allocating YUV buffer"); + memset(yuvbuf, 127, yuvsize); + } + + /* Benchmark */ + iter=-warmup; + elapsed=elapsedDecode=0.; + while(1) + { + int tile=0; + double start=gettime(); + for(row=0, dstptr=dstbuf; row=0) elapsedDecode+=gettime()-startDecode; + } + else + if(tjDecompress2(handle, jpegbuf[tile], jpegsize[tile], dstptr2, + width, pitch, height, pf, flags)==-1) + _throwtj("executing tjDecompress2()"); + } + } + iter++; + if(iter>=1) + { + elapsed+=gettime()-start; + if(elapsed>=benchtime) break; + } + } + if(doyuv) elapsed-=elapsedDecode; + + if(tjDestroy(handle)==-1) _throwtj("executing tjDestroy()"); + handle=NULL; + + if(quiet) + { + printf("%-6s%s", + sigfig((double)(w*h)/1000000.*(double)iter/elapsed, 4, tempstr, 1024), + quiet==2? "\n":" "); + if(doyuv) + printf("%s\n", + sigfig((double)(w*h)/1000000.*(double)iter/elapsedDecode, 4, tempstr, + 1024)); + else if(quiet!=2) printf("\n"); + } + else + { + printf("%s --> Frame rate: %f fps\n", + doyuv? "Decomp to YUV":"Decompress ", (double)iter/elapsed); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000.*(double)iter/elapsed); + if(doyuv) + { + printf("YUV Decode --> Frame rate:  %f fps\n", + (double)iter/elapsedDecode); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000.*(double)iter/elapsedDecode); + } + } + + if (!dowrite) goto bailout; + + if(sf.num!=1 || sf.denom!=1) + snprintf(sizestr, 20, "%d_%d", sf.num, sf.denom); + else if(tilew!=w || tileh!=h) + snprintf(sizestr, 20, "%dx%d", tilew, tileh); + else snprintf(sizestr, 20, "full"); + if(decomponly) + snprintf(tempstr, 1024, "%s_%s.%s", filename, sizestr, ext); + else + snprintf(tempstr, 1024, "%s_%s%s_%s.%s", filename, subName[subsamp], + qualstr, sizestr, ext); + + if(savebmp(tempstr, dstbuf, scaledw, scaledh, pf, + (flags&TJFLAG_BOTTOMUP)!=0)==-1) + _throwbmp("saving bitmap"); + ptr=strrchr(tempstr, '.'); + snprintf(ptr, 1024-(ptr-tempstr), "-err.%s", ext); + if(srcbuf && sf.num==1 && sf.denom==1) + { + if(!quiet) printf("Compression error written to %s.\n", tempstr); + if(subsamp==TJ_GRAYSCALE) + { + int index, index2; + for(row=0, index=0; row255) y=255; if(y<0) y=0; + dstbuf[rindex]=abs(dstbuf[rindex]-y); + dstbuf[gindex]=abs(dstbuf[gindex]-y); + dstbuf[bindex]=abs(dstbuf[bindex]-y); + } + } + } + else + { + for(row=0; row>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", pfStr, + (flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down", subNameLong[subsamp], + jpegqual); + + for(tilew=dotile? 8:w, tileh=dotile? 8:h; ; tilew*=2, tileh*=2) + { + if(tilew>w) tilew=w; if(tileh>h) tileh=h; + ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh; + + if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *) + *ntilesw*ntilesh))==NULL) + _throwunix("allocating JPEG tile array"); + memset(jpegbuf, 0, sizeof(unsigned char *)*ntilesw*ntilesh); + if((jpegsize=(unsigned long *)malloc(sizeof(unsigned long) + *ntilesw*ntilesh))==NULL) + _throwunix("allocating JPEG size array"); + memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh); + + if((flags&TJFLAG_NOREALLOC)!=0) + for(i=0; i=0) elapsedEncode+=gettime()-startEncode; + if(tjCompressFromYUV(handle, yuvbuf, width, yuvpad, height, + subsamp, &jpegbuf[tile], &jpegsize[tile], jpegqual, flags)==-1) + _throwtj("executing tjCompressFromYUV()"); + } + else + { + if(tjCompress2(handle, srcptr2, width, pitch, height, pf, + &jpegbuf[tile], &jpegsize[tile], subsamp, jpegqual, flags)==-1) + _throwtj("executing tjCompress2()"); + } + totaljpegsize+=jpegsize[tile]; + } + } + iter++; + if(iter>=1) + { + elapsed+=gettime()-start; + if(elapsed>=benchtime) break; + } + } + if(doyuv) elapsed-=elapsedEncode; + + if(tjDestroy(handle)==-1) _throwtj("executing tjDestroy()"); + handle=NULL; + + if(quiet==1) printf("%-5d %-5d ", tilew, tileh); + if(quiet) + { + if(doyuv) + printf("%-6s%s", + sigfig((double)(w*h)/1000000.*(double)iter/elapsedEncode, 4, tempstr, + 1024), quiet==2? "\n":" "); + printf("%-6s%s", + sigfig((double)(w*h)/1000000.*(double)iter/elapsed, 4, tempstr, 1024), + quiet==2? "\n":" "); + printf("%-6s%s", + sigfig((double)(w*h*ps)/(double)totaljpegsize, 4, tempstr2, 80), + quiet==2? "\n":" "); + } + else + { + printf("\n%s size: %d x %d\n", dotile? "Tile":"Image", tilew, + tileh); + if(doyuv) + { + printf("Encode YUV --> Frame rate: %f fps\n", + (double)iter/elapsedEncode); + printf(" Output image size: %d bytes\n", yuvsize); + printf(" Compression ratio: %f:1\n", + (double)(w*h*ps)/(double)yuvsize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000.*(double)iter/elapsedEncode); + printf(" Output bit stream: %f Megabits/sec\n", + (double)yuvsize*8./1000000.*(double)iter/elapsedEncode); + } + printf("%s --> Frame rate: %f fps\n", + doyuv? "Comp from YUV":"Compress ", (double)iter/elapsed); + printf(" Output image size: %d bytes\n", + totaljpegsize); + printf(" Compression ratio: %f:1\n", + (double)(w*h*ps)/(double)totaljpegsize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000.*(double)iter/elapsed); + printf(" Output bit stream: %f Megabits/sec\n", + (double)totaljpegsize*8./1000000.*(double)iter/elapsed); + } + if(tilew==w && tileh==h && dowrite) + { + snprintf(tempstr, 1024, "%s_%s_Q%d.jpg", filename, subName[subsamp], + jpegqual); + if((file=fopen(tempstr, "wb"))==NULL) + _throwunix("opening reference image"); + if(fwrite(jpegbuf[0], jpegsize[0], 1, file)!=1) + _throwunix("writing reference image"); + fclose(file); file=NULL; + if(!quiet) printf("Reference image written to %s\n", tempstr); + } + + /* Decompression test */ + if(!componly) + { + if(decomp(srcbuf, jpegbuf, jpegsize, tmpbuf, w, h, subsamp, jpegqual, + filename, tilew, tileh)==-1) + goto bailout; + } + + for(i=0; i>>>> JPEG %s --> %s (%s) <<<<<\n", + formatName(subsamp, cs, tempstr), pixFormatStr[pf], + (flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down"); + + for(tilew=dotile? 16:w, tileh=dotile? 16:h; ; tilew*=2, tileh*=2) + { + if(tilew>w) tilew=w; if(tileh>h) tileh=h; + ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh; + + if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *) + *ntilesw*ntilesh))==NULL) + _throwunix("allocating JPEG tile array"); + memset(jpegbuf, 0, sizeof(unsigned char *)*ntilesw*ntilesh); + if((jpegsize=(unsigned long *)malloc(sizeof(unsigned long) + *ntilesw*ntilesh))==NULL) + _throwunix("allocating JPEG size array"); + memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh); + + if((flags&TJFLAG_NOREALLOC)!=0 || !dotile) + for(i=0; i %d x %d", TJSCALED(_w, sf), TJSCALED(_h, sf)); + printf("\n"); + } + else if(quiet==1) + { + printf("%-4s (%s) %-5s %-5s ", pixFormatStr[pf], + (flags&TJFLAG_BOTTOMUP)? "BU":"TD", csName[cs], subNameLong[subsamp]); + printf("%-5d %-5d ", tilew, tileh); + } + + _subsamp=subsamp; + if(dotile || xformop!=TJXOP_NONE || xformopt!=0 || customFilter) + { + if((t=(tjtransform *)malloc(sizeof(tjtransform)*ntilesw*ntilesh)) + ==NULL) + _throwunix("allocating image transform array"); + + if(xformop==TJXOP_TRANSPOSE || xformop==TJXOP_TRANSVERSE + || xformop==TJXOP_ROT90 || xformop==TJXOP_ROT270) + { + _w=h; _h=w; _tilew=tileh; _tileh=tilew; + } + + if(xformopt&TJXOPT_GRAY) _subsamp=TJ_GRAYSCALE; + if(xformop==TJXOP_HFLIP || xformop==TJXOP_ROT180) + _w=_w-(_w%tjMCUWidth[_subsamp]); + if(xformop==TJXOP_VFLIP || xformop==TJXOP_ROT180) + _h=_h-(_h%tjMCUHeight[_subsamp]); + if(xformop==TJXOP_TRANSVERSE || xformop==TJXOP_ROT90) + _w=_w-(_w%tjMCUHeight[_subsamp]); + if(xformop==TJXOP_TRANSVERSE || xformop==TJXOP_ROT270) + _h=_h-(_h%tjMCUWidth[_subsamp]); + _ntilesw=(_w+_tilew-1)/_tilew; + _ntilesh=(_h+_tileh-1)/_tileh; + + if(xformop==TJXOP_TRANSPOSE || xformop==TJXOP_TRANSVERSE + || xformop==TJXOP_ROT90 || xformop==TJXOP_ROT270) + { + if(_subsamp==TJSAMP_422) _subsamp=TJSAMP_440; + else if(_subsamp==TJSAMP_440) _subsamp=TJSAMP_422; + } + + for(row=0, tile=0; row<_ntilesh; row++) + { + for(col=0; col<_ntilesw; col++, tile++) + { + t[tile].r.w=min(_tilew, _w-col*_tilew); + t[tile].r.h=min(_tileh, _h-row*_tileh); + t[tile].r.x=col*_tilew; + t[tile].r.y=row*_tileh; + t[tile].op=xformop; + t[tile].options=xformopt|TJXOPT_TRIM; + t[tile].customFilter=customFilter; + if(t[tile].options&TJXOPT_NOOUTPUT && jpegbuf[tile]) + { + tjFree(jpegbuf[tile]); jpegbuf[tile]=NULL; + } + } + } + + iter=-warmup; + elapsed=0.; + while(1) + { + start=gettime(); + if(tjTransform(handle, srcbuf, srcsize, _ntilesw*_ntilesh, jpegbuf, + jpegsize, t, flags)==-1) + _throwtj("executing tjTransform()"); + iter++; + if(iter>=1) + { + elapsed+=gettime()-start; + if(elapsed>=benchtime) break; + } + } + + free(t); t=NULL; + + for(tile=0, totaljpegsize=0; tile<_ntilesw*_ntilesh; tile++) + totaljpegsize+=jpegsize[tile]; + + if(quiet) + { + printf("%-6s%s%-6s%s", + sigfig((double)(w*h)/1000000./elapsed, 4, tempstr, 80), + quiet==2? "\n":" ", + sigfig((double)(w*h*ps)/(double)totaljpegsize, 4, tempstr2, 80), + quiet==2? "\n":" "); + } + else if(!quiet) + { + printf("Transform --> Frame rate: %f fps\n", 1.0/elapsed); + printf(" Output image size: %lu bytes\n", totaljpegsize); + printf(" Compression ratio: %f:1\n", + (double)(w*h*ps)/(double)totaljpegsize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w*h)/1000000./elapsed); + printf(" Output bit stream: %f Megabits/sec\n", + (double)totaljpegsize*8./1000000./elapsed); + } + } + else + { + if(quiet==1) printf("N/A N/A "); + jpegsize[0]=srcsize; + memcpy(jpegbuf[0], srcbuf, srcsize); + } + + if(w==tilew) _tilew=_w; + if(h==tileh) _tileh=_h; + if(!(xformopt&TJXOPT_NOOUTPUT)) + { + if(decomp(NULL, jpegbuf, jpegsize, NULL, _w, _h, _subsamp, 0, + filename, _tilew, _tileh)==-1) + goto bailout; + } + else if(quiet==1) printf("N/A\n"); + + for(i=0; i [options]\n\n"); + printf(" %s\n", progname); + printf(" [options]\n\n"); + printf("Options:\n\n"); + printf("-alloc = Dynamically allocate JPEG image buffers\n"); + printf("-bmp = Generate output images in Windows Bitmap format (default = PPM)\n"); + printf("-bottomup = Test bottom-up compression/decompression\n"); + printf("-tile = Test performance of the codec when the image is encoded as separate\n"); + printf(" tiles of varying sizes.\n"); + printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n"); + printf(" Test the specified color conversion path in the codec (default = BGR)\n"); + printf("-cmyk = Indirectly test YCCK JPEG compression/decompression (the source\n"); + printf(" and destination bitmaps are still RGB. The conversion is done\n"); + printf(" internally prior to compression or after decompression.)\n"); + printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n"); + printf(" the underlying codec\n"); + printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n"); + printf(" codec\n"); + printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n"); + printf(" underlying codec\n"); + printf("-subsamp = When testing JPEG compression, this option specifies the level\n"); + printf(" of chrominance subsampling to use ( = 444, 422, 440, 420, 411, or\n"); + printf(" GRAY). The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in\n"); + printf(" sequence.\n"); + printf("-quiet = Output results in tabular rather than verbose format\n"); + printf("-yuv = Test YUV encoding/decoding functions\n"); + printf("-yuvpad

= If testing YUV encoding/decoding, this specifies the number of\n"); + printf(" bytes to which each row of each plane in the intermediate YUV image is\n"); + printf(" padded (default = 1)\n"); + printf("-scale M/N = Scale down the width/height of the decompressed JPEG image by a\n"); + printf(" factor of M/N (M/N = "); + for(i=0; i2) + { + if(i!=nsf-1) printf(", "); + if(i==nsf-2) printf("or "); + } + if(i%8==0 && i!=0) printf("\n "); + } + printf(")\n"); + printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n"); + printf(" Perform the corresponding lossless transform prior to\n"); + printf(" decompression (these options are mutually exclusive)\n"); + printf("-grayscale = Perform lossless grayscale conversion prior to decompression\n"); + printf(" test (can be combined with the other transforms above)\n"); + printf("-benchtime = Run each benchmark for at least seconds (default = 5.0)\n"); + printf("-warmup = Execute each benchmark times to prime the cache before\n"); + printf(" taking performance measurements (default = 1)\n"); + printf("-componly = Stop after running compression tests. Do not test decompression.\n"); + printf("-nowrite = Do not write reference or output images (improves consistency of\n"); + printf(" performance measurements.)\n\n"); + printf("NOTE: If the quality is specified as a range (e.g. 90-100), a separate\n"); + printf("test will be performed for all quality values in the range.\n\n"); + exit(1); +} + + +int main(int argc, char *argv[]) +{ + unsigned char *srcbuf=NULL; int w=0, h=0, i, j; + int minqual=-1, maxqual=-1; char *temp; + int minarg=2, retval=0, subsamp=-1; + + if((scalingfactors=tjGetScalingFactors(&nsf))==NULL || nsf==0) + _throwtj("executing tjGetScalingFactors()"); + + if(argc100) + { + puts("ERROR: Quality must be between 1 and 100."); + exit(1); + } + if((temp=strchr(argv[2], '-'))!=NULL && strlen(temp)>1 + && sscanf(&temp[1], "%d", &maxqual)==1 && maxqual>minqual && maxqual>=1 + && maxqual<=100) {} + else maxqual=minqual; + } + + if(argc>minarg) + { + for(i=minarg; i0.0) benchtime=temp; + else usage(argv[0]); + } + if(!strcasecmp(argv[i], "-warmup") && i=0) + { + warmup=temp; + printf("Warmup runs = %d\n\n", warmup); + } + else usage(argv[0]); + } + if(!strcmp(argv[i], "-?")) usage(argv[0]); + if(!strcasecmp(argv[i], "-alloc")) flags&=(~TJFLAG_NOREALLOC); + if(!strcasecmp(argv[i], "-bmp")) ext="bmp"; + if(!strcasecmp(argv[i], "-yuv")) + { + printf("Testing YUV planar encoding/decoding\n\n"); + doyuv=1; + } + if(!strcasecmp(argv[i], "-yuvpad") && i=1) yuvpad=temp; + } + if(!strcasecmp(argv[i], "-subsamp") && i=0 && subsamp=minqual; i--) + fullTest(srcbuf, w, h, subsamp, i, argv[1]); + printf("\n"); + } + else + { + if(pf!=TJPF_CMYK) + { + for(i=maxqual; i>=minqual; i--) + fullTest(srcbuf, w, h, TJSAMP_GRAY, i, argv[1]); + printf("\n"); + } + for(i=maxqual; i>=minqual; i--) + fullTest(srcbuf, w, h, TJSAMP_420, i, argv[1]); + printf("\n"); + for(i=maxqual; i>=minqual; i--) + fullTest(srcbuf, w, h, TJSAMP_422, i, argv[1]); + printf("\n"); + for(i=maxqual; i>=minqual; i--) + fullTest(srcbuf, w, h, TJSAMP_444, i, argv[1]); + printf("\n"); + } + + bailout: + if(srcbuf) free(srcbuf); + return retval; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjbenchtest.in glmark2-2021.02/src/libjpeg-turbo/tjbenchtest.in --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjbenchtest.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/tjbenchtest.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,249 @@ +#!/bin/bash + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +onexit() +{ + if [ -d $OUTDIR ]; then + rm -rf $OUTDIR + fi +} + +runme() +{ + echo \*\*\* $* + $* +} + +EXT=bmp +IMAGES="vgl_5674_0098.${EXT} vgl_6434_0018a.${EXT} vgl_6548_0026a.${EXT} nightshot_iso_100.${EXT}" +IMGDIR=@srcdir@/testimages +OUTDIR=`mktemp -d /tmp/__tjbenchtest_output.XXXXXX` +EXEDIR=. +BMPARG= +NSARG= +YUVARG= +ALLOC=0 +ALLOCARG= +if [ "$EXT" = "bmp" ]; then BMPARG=-bmp; fi + +if [ -d $OUTDIR ]; then + rm -rf $OUTDIR +fi +mkdir -p $OUTDIR + +exec >$EXEDIR/tjbenchtest.log + +if [ $# -gt 0 ]; then + if [ "$1" = "-yuv" ]; then + NSARG=-nosmooth + YUVARG=-yuv + +# NOTE: The combination of tjEncodeYUV*() and tjCompressFromYUV*() does not +# always produce bitwise-identical results to tjCompress*() if subsampling is +# enabled. In both cases, if the image width or height are not evenly +# divisible by the MCU width/height, then the bottom and/or right edge are +# expanded. However, the libjpeg code performs this expansion prior to +# downsampling, and TurboJPEG performs it in tjCompressFromYUV*(), which is +# after downsampling. Thus, the two will agree only if the width/height along +# each downsampled dimension is an odd number or is evenly divisible by the MCU +# width/height. This disagreement basically amounts to a round-off error, but +# there is no easy way around it, so for now, we just test the only image that +# works. (NOTE: nightshot_iso_100 does not suffer from the above issue, but +# it suffers from an unrelated problem whereby the combination of +# tjDecompressToYUV*() and tjDecodeYUV*() do not produce bitwise-identical +# results to tjDecompress*() if decompression scaling is enabled. This latter +# phenomenon is not yet fully understood but is also believed to be some sort +# of round-off error.) + IMAGES="vgl_6548_0026a.${EXT}" + fi + if [ "$1" = "-alloc" ]; then + ALLOCARG=-alloc + ALLOC=1 + fi +fi + +# Standard tests +for image in $IMAGES; do + + cp $IMGDIR/$image $OUTDIR + basename=`basename $image .${EXT}` + runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_default_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_accurate_djpeg.${EXT} $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + + # Compression + for dct in accurate fast; do + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG $ALLOCARG + for samp in GRAY 420 422 444; do + runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg + done + done + + for dct in fast accurate default; do + dctarg=-${dct}dct + if [ "${dct}" = "default" ]; then + dctarg= + fi + + # Tiled compression & decompression + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $ALLOCARG + for samp in GRAY 444; do + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} + rm $i + done + fi + done + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $ALLOCARG + for samp in 420 422; do + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} + rm $i + done + fi + done + + # Tiled decompression + for samp in GRAY 444; do + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} + rm $i + done + fi + done + for samp in 420 422; do + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} + rm $i + done + fi + done + done + + # Scaled decompression + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} + done + done + + # Transforms + for samp in GRAY 420 422 444; do + runme $EXEDIR/jpegtran -flip horizontal -trim -outfile $OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -flip vertical -trim -outfile $OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -transpose -trim -outfile $OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -transverse -trim -outfile $OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 90 -trim -outfile $OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 180 -trim -outfile $OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 270 -trim -outfile $OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + done + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444; do + runme $EXEDIR/djpeg -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} + rm $i + done + fi + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} + rm $i + done + fi + done + done + + # Grayscale transform + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG $ALLOCARG + if [ $ALLOC = 1 ]; then + runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} + else + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \ + $OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do + runme cmp $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT} + rm $i + done + fi + done + done + + # Transforms with scaling + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT} + rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} + done + done + done + +done + +echo SUCCESS! diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjbenchtest.java.in glmark2-2021.02/src/libjpeg-turbo/tjbenchtest.java.in --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjbenchtest.java.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/tjbenchtest.java.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,207 @@ +#!/bin/bash + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +onexit() +{ + if [ -d $OUTDIR ]; then + rm -rf $OUTDIR + fi +} + +runme() +{ + echo \*\*\* $* + $* +} + +IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp" +IMGDIR=@srcdir@/testimages +OUTDIR=`mktemp -d /tmp/__tjbenchtest_java_output.XXXXXX` +EXEDIR=. +JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs" +BMPARG= +NSARG= +YUVARG= + +if [ -d $OUTDIR ]; then + rm -rf $OUTDIR +fi +mkdir -p $OUTDIR + +exec >$EXEDIR/tjbenchtest-java.log + +if [ $# -gt 0 ]; then + if [ "$1" = "-yuv" ]; then + NSARG=-nosmooth + YUVARG=-yuv + +# NOTE: The combination of tjEncodeYUV*() and tjCompressFromYUV*() does not +# always produce bitwise-identical results to tjCompress*() if subsampling is +# enabled. In both cases, if the image width or height are not evenly +# divisible by the MCU width/height, then the bottom and/or right edge are +# expanded. However, the libjpeg code performs this expansion prior to +# downsampling, and TurboJPEG performs it in tjCompressFromYUV*(), which is +# after downsampling. Thus, the two will agree only if the width/height along +# each downsampled dimension is an odd number or is evenly divisible by the MCU +# width/height. This disagreement basically amounts to a round-off error, but +# there is no easy way around it, so for now, we just test the only image that +# works. (NOTE: nightshot_iso_100 does not suffer from the above issue, but +# it suffers from an unrelated problem whereby the combination of +# tjDecompressToYUV*() and tjDecodeYUV*() do not produce bitwise-identical +# results to tjDecompress*() if decompression scaling is enabled. This latter +# phenomenon is not yet fully understood but is also believed to be some sort +# of round-off error.) + IMAGES="vgl_6548_0026a.bmp" + fi +fi + +# Standard tests +for image in $IMAGES; do + + cp $IMGDIR/$image $OUTDIR + basename=`basename $image .bmp` + runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_default_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + + # Compression + for dct in accurate fast; do + runme $JAVA TJBench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG + for samp in GRAY 420 422 444; do + runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg + done + done + + for dct in fast accurate default; do + dctarg=-${dct}dct + if [ "${dct}" = "default" ]; then + dctarg= + fi + + # Tiled compression & decompression + runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG + for samp in GRAY 444; do + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp + rm $i + done + done + runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG + for samp in 420 422; do + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp + rm $i + done + done + + # Tiled decompression + for samp in GRAY 444; do + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp + rm $i + done + done + for samp in 420 422; do + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp $i -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp + rm $i + done + done + done + + # Scaled decompression + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp + done + done + + # Transforms + for samp in GRAY 420 422 444; do + runme $EXEDIR/jpegtran -flip horizontal -trim -outfile $OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -flip vertical -trim -outfile $OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -transpose -trim -outfile $OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -transverse -trim -outfile $OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 90 -trim -outfile $OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 180 -trim -outfile $OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + runme $EXEDIR/jpegtran -rotate 270 -trim -outfile $OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg + done + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444; do + runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $i + done + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $i + done + done + done + + # Grayscale transform + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG + for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ + $OUTDIR/${basename}_${samp}_Q95_full.bmp; do + runme cmp -i 54:54 $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp + rm $i + done + done + done + + # Transforms with scaling + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp + done + done + done + +done + +echo SUCCESS! diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjexampletest.in glmark2-2021.02/src/libjpeg-turbo/tjexampletest.in --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjexampletest.in 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/tjexampletest.in 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,150 @@ +#!/bin/bash + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +onexit() +{ + if [ -d $OUTDIR ]; then + rm -rf $OUTDIR + fi +} + +runme() +{ + echo \*\*\* $* + $* +} + +IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp" +IMGDIR=@srcdir@/testimages +OUTDIR=__tjexampletest_output +EXEDIR=. +JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs" + +if [ -d $OUTDIR ]; then + rm -rf $OUTDIR +fi +mkdir -p $OUTDIR + +exec >$EXEDIR/tjexampletest.log + +for image in $IMAGES; do + + cp $IMGDIR/$image $OUTDIR + basename=`basename $image .bmp` + $EXEDIR/cjpeg -quality 95 -dct fast -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_fast_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_fast_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_fast_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_fast_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct int -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_accurate_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_accurate_cjpeg.jpg + $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_accurate_cjpeg.jpg + for samp in GRAY 420 422 444; do + $EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_djpeg.bmp + $EXEDIR/djpeg -dct fast -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_djpeg.bmp + $EXEDIR/djpeg -dct int -rgb -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_djpeg.bmp + done + for samp in 420 422; do + $EXEDIR/djpeg -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp + $EXEDIR/djpeg -dct fast -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp + $EXEDIR/djpeg -dct int -nosmooth -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp + done + + # Compression + for dct in fast accurate; do + for samp in GRAY 420 422 444; do + runme $JAVA TJExample $OUTDIR/$image $OUTDIR/${basename}_${samp}_${dct}.jpg -q 95 -samp ${samp} -${dct}dct + runme cmp $OUTDIR/${basename}_${samp}_${dct}.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg + done + done + + # Decompression + for dct in fast accurate default; do + srcdct=${dct} + dctarg=-${dct}dct + if [ "${dct}" = "default" ]; then + srcdct=fast + dctarg= + fi + for samp in GRAY 420 422 444; do + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}.bmp ${dctarg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}.bmp $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${dct}.bmp + done + for samp in 420 422; do + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp -fastupsample ${dctarg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp + done + done + + # Scaled decompression + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + for samp in GRAY 420 422 444; do + $EXEDIR/djpeg -rgb -bmp -scale ${scalearg} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${scale}.bmp -scale ${scalearg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${scale}.bmp + done + done + + # Transforms + for samp in GRAY 420 422 444; do + $EXEDIR/jpegtran -crop 70x60+16+16 -flip horizontal -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -flip vertical -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -transpose -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -transverse -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 90 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 180 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg + $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 270 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg + done + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 420 422 444; do + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -crop 16,16,70x60 + runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + $EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 16,16,70x60 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + for samp in 420 422; do + $EXEDIR/djpeg -nosmooth -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 16,16,70x60 -fastupsample + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + done + + # Grayscale transform + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -grayscale -crop 16,16,70x60 + runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_GRAY_${xform}_jpegtran.jpg + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -grayscale -crop 16,16,70x60 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + done + + # Transforms with scaling + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed s@_@/@g` + $EXEDIR/djpeg -rgb -bmp -scale ${scalearg} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp + runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp -$xform -scale ${scalearg} -crop 16,16,70x60 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp + done + done + done + +done + +echo SUCCESS! diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjunittest.c glmark2-2021.02/src/libjpeg-turbo/tjunittest.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjunittest.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/tjunittest.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,735 @@ +/* + * Copyright (C)2009-2014 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This program tests the various code paths in the TurboJPEG C Wrapper + */ + +#include +#include +#include +#include +#include "./tjutil.h" +#include "./turbojpeg.h" +#ifdef _WIN32 + #include + #define random() rand() +#endif + + +void usage(char *progName) +{ + printf("\nUSAGE: %s [options]\n", progName); + printf("Options:\n"); + printf("-yuv = test YUV encoding/decoding support\n"); + printf("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n"); + printf(" 4-byte boundary\n"); + printf("-alloc = test automatic buffer allocation\n"); + exit(1); +} + + +#define _throwtj() {printf("TurboJPEG ERROR:\n%s\n", tjGetErrorStr()); \ + bailout();} +#define _tj(f) {if((f)==-1) _throwtj();} +#define _throw(m) {printf("ERROR: %s\n", m); bailout();} + +const char *subNameLong[TJ_NUMSAMP]= +{ + "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" +}; +const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"}; + +const char *pixFormatStr[TJ_NUMPF]= +{ + "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale", + "RGBA", "BGRA", "ABGR", "ARGB", "CMYK" +}; + +const int alphaOffset[TJ_NUMPF] = {-1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1}; + +const int _3byteFormats[]={TJPF_RGB, TJPF_BGR}; +const int _4byteFormats[]={TJPF_RGBX, TJPF_BGRX, TJPF_XBGR, TJPF_XRGB, + TJPF_CMYK}; +const int _onlyGray[]={TJPF_GRAY}; +const int _onlyRGB[]={TJPF_RGB}; + +int doyuv=0, alloc=0, pad=4; + +int exitStatus=0; +#define bailout() {exitStatus=-1; goto bailout;} + + +void initBuf(unsigned char *buf, int w, int h, int pf, int flags) +{ + int roffset=tjRedOffset[pf]; + int goffset=tjGreenOffset[pf]; + int boffset=tjBlueOffset[pf]; + int ps=tjPixelSize[pf]; + int index, row, col, halfway=16; + + if(pf==TJPF_GRAY) + { + memset(buf, 0, w*h*ps); + for(row=0; row=halfway) buf[index*ps+3]=0; + } + else + { + buf[index*ps+2]=0; + if(row=halfway) buf[index*ps+goffset]=255; + } + } + } + } +} + + +#define checkval(v, cv) { \ + if(vcv+1) { \ + printf("\nComp. %s at %d,%d should be %d, not %d\n", \ + #v, row, col, cv, v); \ + retval=0; exitStatus=-1; goto bailout; \ + }} + +#define checkval0(v) { \ + if(v>1) { \ + printf("\nComp. %s at %d,%d should be 0, not %d\n", #v, row, col, v); \ + retval=0; exitStatus=-1; goto bailout; \ + }} + +#define checkval255(v) { \ + if(v<254) { \ + printf("\nComp. %s at %d,%d should be 255, not %d\n", #v, row, col, v); \ + retval=0; exitStatus=-1; goto bailout; \ + }} + + +int checkBuf(unsigned char *buf, int w, int h, int pf, int subsamp, + tjscalingfactor sf, int flags) +{ + int roffset=tjRedOffset[pf]; + int goffset=tjGreenOffset[pf]; + int boffset=tjBlueOffset[pf]; + int aoffset=alphaOffset[pf]; + int ps=tjPixelSize[pf]; + int index, row, col, retval=1; + int halfway=16*sf.num/sf.denom; + int blocksize=8*sf.num/sf.denom; + + if(pf==TJPF_CMYK) + { + for(row=0; row=0? buf[index*ps+aoffset]:0xFF; + if(((row/blocksize)+(col/blocksize))%2==0) + { + if(row0) memset(*dstBuf, 0, *dstSize); + + + if(!alloc) flags|=TJFLAG_NOREALLOC; + if(doyuv) + { + unsigned long yuvSize=tjBufSizeYUV2(w, pad, h, subsamp); + tjscalingfactor sf={1, 1}; + tjhandle handle2=tjInitCompress(); + if(!handle2) _throwtj(); + + if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL) + _throw("Memory allocation failure"); + memset(yuvBuf, 0, yuvSize); + + printf("%s %s -> YUV %s ... ", pfStr, buStrLong, subNameLong[subsamp]); + _tj(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, pad, subsamp, + flags)); + tjDestroy(handle2); + if(checkBufYUV(yuvBuf, w, h, subsamp, sf)) printf("Passed.\n"); + else printf("FAILED!\n"); + + printf("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp], buStrLong, + jpegQual); + _tj(tjCompressFromYUV(handle, yuvBuf, w, pad, h, subsamp, dstBuf, + dstSize, jpegQual, flags)); + } + else + { + printf("%s %s -> %s Q%d ... ", pfStr, buStrLong, subNameLong[subsamp], + jpegQual); + _tj(tjCompress2(handle, srcBuf, w, 0, h, pf, dstBuf, dstSize, subsamp, + jpegQual, flags)); + } + + snprintf(tempStr, 1024, "%s_enc_%s_%s_%s_Q%d.jpg", basename, pfStr, buStr, + subName[subsamp], jpegQual); + writeJPEG(*dstBuf, *dstSize, tempStr); + printf("Done.\n Result in %s\n", tempStr); + + bailout: + if(yuvBuf) free(yuvBuf); + if(srcBuf) free(srcBuf); +} + + +void _decompTest(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp, + int flags, tjscalingfactor sf) +{ + unsigned char *dstBuf=NULL, *yuvBuf=NULL; + int _hdrw=0, _hdrh=0, _hdrsubsamp=-1; + int scaledWidth=TJSCALED(w, sf); + int scaledHeight=TJSCALED(h, sf); + unsigned long dstSize=0; + + _tj(tjDecompressHeader2(handle, jpegBuf, jpegSize, &_hdrw, &_hdrh, + &_hdrsubsamp)); + if(_hdrw!=w || _hdrh!=h || _hdrsubsamp!=subsamp) + _throw("Incorrect JPEG header"); + + dstSize=scaledWidth*scaledHeight*tjPixelSize[pf]; + if((dstBuf=(unsigned char *)malloc(dstSize))==NULL) + _throw("Memory allocation failure"); + memset(dstBuf, 0, dstSize); + + if(doyuv) + { + unsigned long yuvSize=tjBufSizeYUV2(scaledWidth, pad, scaledHeight, + subsamp); + tjhandle handle2=tjInitDecompress(); + if(!handle2) _throwtj(); + + if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL) + _throw("Memory allocation failure"); + memset(yuvBuf, 0, yuvSize); + + printf("JPEG -> YUV %s ", subNameLong[subsamp]); + if(sf.num!=1 || sf.denom!=1) + printf("%d/%d ... ", sf.num, sf.denom); + else printf("... "); + _tj(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, scaledWidth, + pad, scaledHeight, flags)); + if(checkBufYUV(yuvBuf, scaledWidth, scaledHeight, subsamp, sf)) + printf("Passed.\n"); + else printf("FAILED!\n"); + + printf("YUV %s -> %s %s ... ", subNameLong[subsamp], pixFormatStr[pf], + (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down "); + _tj(tjDecodeYUV(handle2, yuvBuf, pad, subsamp, dstBuf, scaledWidth, 0, + scaledHeight, pf, flags)); + tjDestroy(handle2); + } + else + { + printf("JPEG -> %s %s ", pixFormatStr[pf], + (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down "); + if(sf.num!=1 || sf.denom!=1) + printf("%d/%d ... ", sf.num, sf.denom); + else printf("... "); + _tj(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth, 0, + scaledHeight, pf, flags)); + } + + if(checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags)) + printf("Passed."); + else printf("FAILED!"); + printf("\n"); + + bailout: + if(yuvBuf) free(yuvBuf); + if(dstBuf) free(dstBuf); +} + + +void decompTest(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp, + int flags) +{ + int i, n=0; + tjscalingfactor *sf=tjGetScalingFactors(&n); + if(!sf || !n) _throwtj(); + + for(i=0; i=TJPF_RGBX && pf<=TJPF_XRGB) + { + printf("\n"); + decompTest(dhandle, dstBuf, size, w, h, pf+(TJPF_RGBA-TJPF_RGBX), + basename, subsamp, flags); + } + printf("\n"); + } + } + printf("--------------------\n\n"); + + bailout: + if(chandle) tjDestroy(chandle); + if(dhandle) tjDestroy(dhandle); + + if(dstBuf) tjFree(dstBuf); +} + + +void bufSizeTest(void) +{ + int w, h, i, subsamp; + unsigned char *srcBuf=NULL, *dstBuf=NULL; + tjhandle handle=NULL; + unsigned long dstSize=0; + + if((handle=tjInitCompress())==NULL) _throwtj(); + + printf("Buffer size regression test\n"); + for(subsamp=0; subsamp1) + { + for(i=1; i + +static double getfreq(void) +{ + LARGE_INTEGER freq; + if(!QueryPerformanceFrequency(&freq)) return 0.0; + return (double)freq.QuadPart; +} + +static double f=-1.0; + +double gettime(void) +{ + LARGE_INTEGER t; + if(f<0.0) f=getfreq(); + if(f==0.0) return (double)GetTickCount()/1000.; + else + { + QueryPerformanceCounter(&t); + return (double)t.QuadPart/f; + } +} + +#else + +#include +#include + +double gettime(void) +{ + struct timeval tv; + if(gettimeofday(&tv, NULL)<0) return 0.0; + else return (double)tv.tv_sec+((double)tv.tv_usec/1000000.); +} + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjutil.h glmark2-2021.02/src/libjpeg-turbo/tjutil.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/tjutil.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/tjutil.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,47 @@ +/* + * Copyright (C)2011 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _WIN32 + #ifndef __MINGW32__ + #include + #define snprintf(str, n, format, ...) \ + _snprintf_s(str, n, _TRUNCATE, format, __VA_ARGS__) + #endif + #define strcasecmp stricmp + #define strncasecmp strnicmp +#endif + +#ifndef min + #define min(a,b) ((a)<(b)?(a):(b)) +#endif + +#ifndef max + #define max(a,b) ((a)>(b)?(a):(b)) +#endif + +extern double gettime(void); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/transupp.c glmark2-2021.02/src/libjpeg-turbo/transupp.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/transupp.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/transupp.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1621 @@ +/* + * transupp.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2010, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains image transformation routines and other utility code + * used by the jpegtran sample application. These are NOT part of the core + * JPEG library. But we keep these routines separate from jpegtran.c to + * ease the task of maintaining jpegtran-like programs that have other user + * interfaces. + */ + +/* Although this file really shouldn't have access to the library internals, + * it's helpful to let it call jround_up() and jcopy_block_row(). + */ +#define JPEG_INTERNALS + +#include "jinclude.h" +#include "jpeglib.h" +#include "transupp.h" /* My own external interface */ +#include "jpegcomp.h" +#include /* to declare isdigit() */ + + +#if JPEG_LIB_VERSION >= 70 +#define dstinfo_min_DCT_h_scaled_size dstinfo->min_DCT_h_scaled_size +#define dstinfo_min_DCT_v_scaled_size dstinfo->min_DCT_v_scaled_size +#else +#define dstinfo_min_DCT_h_scaled_size DCTSIZE +#define dstinfo_min_DCT_v_scaled_size DCTSIZE +#endif + + +#if TRANSFORMS_SUPPORTED + +/* + * Lossless image transformation routines. These routines work on DCT + * coefficient arrays and thus do not require any lossy decompression + * or recompression of the image. + * Thanks to Guido Vollbeding for the initial design and code of this feature, + * and to Ben Jackson for introducing the cropping feature. + * + * Horizontal flipping is done in-place, using a single top-to-bottom + * pass through the virtual source array. It will thus be much the + * fastest option for images larger than main memory. + * + * The other routines require a set of destination virtual arrays, so they + * need twice as much memory as jpegtran normally does. The destination + * arrays are always written in normal scan order (top to bottom) because + * the virtual array manager expects this. The source arrays will be scanned + * in the corresponding order, which means multiple passes through the source + * arrays for most of the transforms. That could result in much thrashing + * if the image is larger than main memory. + * + * If cropping or trimming is involved, the destination arrays may be smaller + * than the source arrays. Note it is not possible to do horizontal flip + * in-place when a nonzero Y crop offset is specified, since we'd have to move + * data from one block row to another but the virtual array manager doesn't + * guarantee we can touch more than one row at a time. So in that case, + * we have to use a separate destination array. + * + * Some notes about the operating environment of the individual transform + * routines: + * 1. Both the source and destination virtual arrays are allocated from the + * source JPEG object, and therefore should be manipulated by calling the + * source's memory manager. + * 2. The destination's component count should be used. It may be smaller + * than the source's when forcing to grayscale. + * 3. Likewise the destination's sampling factors should be used. When + * forcing to grayscale the destination's sampling factors will be all 1, + * and we may as well take that as the effective iMCU size. + * 4. When "trim" is in effect, the destination's dimensions will be the + * trimmed values but the source's will be untrimmed. + * 5. When "crop" is in effect, the destination's dimensions will be the + * cropped values but the source's will be uncropped. Each transform + * routine is responsible for picking up source data starting at the + * correct X and Y offset for the crop region. (The X and Y offsets + * passed to the transform routines are measured in iMCU blocks of the + * destination.) + * 6. All the routines assume that the source and destination buffers are + * padded out to a full iMCU boundary. This is true, although for the + * source buffer it is an undocumented property of jdcoefct.c. + */ + + +LOCAL(void) +do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Crop. This is only used when no rotate/flip is requested with the crop. */ +{ + JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks; + int ci, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + jpeg_component_info *compptr; + + /* We simply have to copy the right amount of data (the destination's + * image size) starting at the given X and Y offsets in the source. + */ + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_y + y_crop_blocks, + (JDIMENSION) compptr->v_samp_factor, FALSE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + jcopy_block_row(src_buffer[offset_y] + x_crop_blocks, + dst_buffer[offset_y], + compptr->width_in_blocks); + } + } + } +} + + +LOCAL(void) +do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, + jvirt_barray_ptr *src_coef_arrays) +/* Horizontal flip; done in-place, so no separate dest array is required. + * NB: this only works when y_crop_offset is zero. + */ +{ + JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks; + int ci, k, offset_y; + JBLOCKARRAY buffer; + JCOEFPTR ptr1, ptr2; + JCOEF temp1, temp2; + jpeg_component_info *compptr; + + /* Horizontal mirroring of DCT blocks is accomplished by swapping + * pairs of blocks in-place. Within a DCT block, we perform horizontal + * mirroring by changing the signs of odd-numbered columns. + * Partial iMCUs at the right edge are left untouched. + */ + MCU_cols = srcinfo->output_width / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + for (blk_y = 0; blk_y < compptr->height_in_blocks; + blk_y += compptr->v_samp_factor) { + buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + /* Do the mirroring */ + for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) { + ptr1 = buffer[offset_y][blk_x]; + ptr2 = buffer[offset_y][comp_width - blk_x - 1]; + /* this unrolled loop doesn't need to know which row it's on... */ + for (k = 0; k < DCTSIZE2; k += 2) { + temp1 = *ptr1; /* swap even column */ + temp2 = *ptr2; + *ptr1++ = temp2; + *ptr2++ = temp1; + temp1 = *ptr1; /* swap odd column with sign change */ + temp2 = *ptr2; + *ptr1++ = -temp2; + *ptr2++ = -temp1; + } + } + if (x_crop_blocks > 0) { + /* Now left-justify the portion of the data to be kept. + * We can't use a single jcopy_block_row() call because that routine + * depends on memcpy(), whose behavior is unspecified for overlapping + * source and destination areas. Sigh. + */ + for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) { + jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks, + buffer[offset_y] + blk_x, + (JDIMENSION) 1); + } + } + } + } + } +} + + +LOCAL(void) +do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Horizontal flip in general cropping case */ +{ + JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, k, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JBLOCKROW src_row_ptr, dst_row_ptr; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* Here we must output into a separate array because we can't touch + * different rows of a single virtual array simultaneously. Otherwise, + * this is essentially the same as the routine above. + */ + MCU_cols = srcinfo->output_width / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_y + y_crop_blocks, + (JDIMENSION) compptr->v_samp_factor, FALSE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + dst_row_ptr = dst_buffer[offset_y]; + src_row_ptr = src_buffer[offset_y]; + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Do the mirrorable blocks */ + dst_ptr = dst_row_ptr[dst_blk_x]; + src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; + /* this unrolled loop doesn't need to know which row it's on... */ + for (k = 0; k < DCTSIZE2; k += 2) { + *dst_ptr++ = *src_ptr++; /* copy even column */ + *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */ + } + } else { + /* Copy last partial block(s) verbatim */ + jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks, + dst_row_ptr + dst_blk_x, + (JDIMENSION) 1); + } + } + } + } + } +} + + +LOCAL(void) +do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Vertical flip */ +{ + JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JBLOCKROW src_row_ptr, dst_row_ptr; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* We output into a separate array because we can't touch different + * rows of the source virtual array simultaneously. Otherwise, this + * is a pretty straightforward analog of horizontal flip. + * Within a DCT block, vertical mirroring is done by changing the signs + * of odd-numbered rows. + * Partial iMCUs at the bottom edge are copied verbatim. + */ + MCU_rows = srcinfo->output_height / + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_height = MCU_rows * compptr->v_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Row is within the mirrorable area. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + comp_height - y_crop_blocks - dst_blk_y - + (JDIMENSION) compptr->v_samp_factor, + (JDIMENSION) compptr->v_samp_factor, FALSE); + } else { + /* Bottom-edge blocks will be copied verbatim. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_y + y_crop_blocks, + (JDIMENSION) compptr->v_samp_factor, FALSE); + } + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Row is within the mirrorable area. */ + dst_row_ptr = dst_buffer[offset_y]; + src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1]; + src_row_ptr += x_crop_blocks; + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x++) { + dst_ptr = dst_row_ptr[dst_blk_x]; + src_ptr = src_row_ptr[dst_blk_x]; + for (i = 0; i < DCTSIZE; i += 2) { + /* copy even row */ + for (j = 0; j < DCTSIZE; j++) + *dst_ptr++ = *src_ptr++; + /* copy odd row with sign change */ + for (j = 0; j < DCTSIZE; j++) + *dst_ptr++ = - *src_ptr++; + } + } + } else { + /* Just copy row verbatim. */ + jcopy_block_row(src_buffer[offset_y] + x_crop_blocks, + dst_buffer[offset_y], + compptr->width_in_blocks); + } + } + } + } +} + + +LOCAL(void) +do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Transpose source into destination */ +{ + JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_x, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* Transposing pixels within a block just requires transposing the + * DCT coefficients. + * Partial iMCUs at the edges require no special treatment; we simply + * process all the available DCT blocks for every component. + */ + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x += compptr->h_samp_factor) { + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_x + x_crop_blocks, + (JDIMENSION) compptr->h_samp_factor, FALSE); + for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { + dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; + src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } + } + } +} + + +LOCAL(void) +do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* 90 degree rotation is equivalent to + * 1. Transposing the image; + * 2. Horizontal mirroring. + * These two steps are merged into a single processing routine. + */ +{ + JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_x, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* Because of the horizontal mirror step, we can't process partial iMCUs + * at the (output) right edge properly. They just get transposed and + * not mirrored. + */ + MCU_cols = srcinfo->output_height / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x += compptr->h_samp_factor) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Block is within the mirrorable area. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + comp_width - x_crop_blocks - dst_blk_x - + (JDIMENSION) compptr->h_samp_factor, + (JDIMENSION) compptr->h_samp_factor, FALSE); + } else { + /* Edge blocks are transposed but not mirrored. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_x + x_crop_blocks, + (JDIMENSION) compptr->h_samp_factor, FALSE); + } + for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { + dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Block is within the mirrorable area. */ + src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + i++; + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + } else { + /* Edge blocks are transposed but not mirrored. */ + src_ptr = src_buffer[offset_x] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } + } + } + } +} + + +LOCAL(void) +do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* 270 degree rotation is equivalent to + * 1. Horizontal mirroring; + * 2. Transposing the image. + * These two steps are merged into a single processing routine. + */ +{ + JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_x, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + /* Because of the horizontal mirror step, we can't process partial iMCUs + * at the (output) bottom edge properly. They just get transposed and + * not mirrored. + */ + MCU_rows = srcinfo->output_width / + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_height = MCU_rows * compptr->v_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x += compptr->h_samp_factor) { + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_x + x_crop_blocks, + (JDIMENSION) compptr->h_samp_factor, FALSE); + for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { + dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Block is within the mirrorable area. */ + src_ptr = src_buffer[offset_x] + [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) { + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + j++; + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + } + } else { + /* Edge blocks are transposed but not mirrored. */ + src_ptr = src_buffer[offset_x] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } + } + } + } +} + + +LOCAL(void) +do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* 180 degree rotation is equivalent to + * 1. Vertical mirroring; + * 2. Horizontal mirroring. + * These two steps are merged into a single processing routine. + */ +{ + JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JBLOCKROW src_row_ptr, dst_row_ptr; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + MCU_cols = srcinfo->output_width / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + MCU_rows = srcinfo->output_height / + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + comp_height = MCU_rows * compptr->v_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Row is within the vertically mirrorable area. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + comp_height - y_crop_blocks - dst_blk_y - + (JDIMENSION) compptr->v_samp_factor, + (JDIMENSION) compptr->v_samp_factor, FALSE); + } else { + /* Bottom-edge rows are only mirrored horizontally. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_y + y_crop_blocks, + (JDIMENSION) compptr->v_samp_factor, FALSE); + } + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + dst_row_ptr = dst_buffer[offset_y]; + if (y_crop_blocks + dst_blk_y < comp_height) { + /* Row is within the mirrorable area. */ + src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1]; + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + dst_ptr = dst_row_ptr[dst_blk_x]; + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Process the blocks that can be mirrored both ways. */ + src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; + for (i = 0; i < DCTSIZE; i += 2) { + /* For even row, negate every odd column. */ + for (j = 0; j < DCTSIZE; j += 2) { + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = - *src_ptr++; + } + /* For odd row, negate every even column. */ + for (j = 0; j < DCTSIZE; j += 2) { + *dst_ptr++ = - *src_ptr++; + *dst_ptr++ = *src_ptr++; + } + } + } else { + /* Any remaining right-edge blocks are only mirrored vertically. */ + src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x]; + for (i = 0; i < DCTSIZE; i += 2) { + for (j = 0; j < DCTSIZE; j++) + *dst_ptr++ = *src_ptr++; + for (j = 0; j < DCTSIZE; j++) + *dst_ptr++ = - *src_ptr++; + } + } + } + } else { + /* Remaining rows are just mirrored horizontally. */ + src_row_ptr = src_buffer[offset_y]; + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Process the blocks that can be mirrored. */ + dst_ptr = dst_row_ptr[dst_blk_x]; + src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; + for (i = 0; i < DCTSIZE2; i += 2) { + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = - *src_ptr++; + } + } else { + /* Any remaining right-edge blocks are only copied. */ + jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks, + dst_row_ptr + dst_blk_x, + (JDIMENSION) 1); + } + } + } + } + } + } +} + + +LOCAL(void) +do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) +/* Transverse transpose is equivalent to + * 1. 180 degree rotation; + * 2. Transposition; + * or + * 1. Horizontal mirroring; + * 2. Transposition; + * 3. Horizontal mirroring. + * These steps are merged into a single processing routine. + */ +{ + JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y; + JDIMENSION x_crop_blocks, y_crop_blocks; + int ci, i, j, offset_x, offset_y; + JBLOCKARRAY src_buffer, dst_buffer; + JCOEFPTR src_ptr, dst_ptr; + jpeg_component_info *compptr; + + MCU_cols = srcinfo->output_height / + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + MCU_rows = srcinfo->output_width / + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + comp_width = MCU_cols * compptr->h_samp_factor; + comp_height = MCU_rows * compptr->v_samp_factor; + x_crop_blocks = x_crop_offset * compptr->h_samp_factor; + y_crop_blocks = y_crop_offset * compptr->v_samp_factor; + for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; + dst_blk_y += compptr->v_samp_factor) { + dst_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION) compptr->v_samp_factor, TRUE); + for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x += compptr->h_samp_factor) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Block is within the mirrorable area. */ + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + comp_width - x_crop_blocks - dst_blk_x - + (JDIMENSION) compptr->h_samp_factor, + (JDIMENSION) compptr->h_samp_factor, FALSE); + } else { + src_buffer = (*srcinfo->mem->access_virt_barray) + ((j_common_ptr) srcinfo, src_coef_arrays[ci], + dst_blk_x + x_crop_blocks, + (JDIMENSION) compptr->h_samp_factor, FALSE); + } + for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { + dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; + if (y_crop_blocks + dst_blk_y < comp_height) { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Block is within the mirrorable area. */ + src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1] + [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) { + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + j++; + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + i++; + for (j = 0; j < DCTSIZE; j++) { + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + j++; + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } else { + /* Right-edge blocks are mirrored in y only */ + src_ptr = src_buffer[offset_x] + [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) { + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + j++; + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + } + } + } else { + if (x_crop_blocks + dst_blk_x < comp_width) { + /* Bottom-edge blocks are mirrored in x only */ + src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + i++; + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + } + } else { + /* At lower right corner, just transpose, no mirroring */ + src_ptr = src_buffer[offset_x] + [dst_blk_y + offset_y + y_crop_blocks]; + for (i = 0; i < DCTSIZE; i++) + for (j = 0; j < DCTSIZE; j++) + dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + } + } + } + } + } + } + } +} + + +/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec. + * Returns TRUE if valid integer found, FALSE if not. + * *strptr is advanced over the digit string, and *result is set to its value. + */ + +LOCAL(boolean) +jt_read_integer (const char **strptr, JDIMENSION *result) +{ + const char *ptr = *strptr; + JDIMENSION val = 0; + + for (; isdigit(*ptr); ptr++) { + val = val * 10 + (JDIMENSION) (*ptr - '0'); + } + *result = val; + if (ptr == *strptr) + return FALSE; /* oops, no digits */ + *strptr = ptr; + return TRUE; +} + + +/* Parse a crop specification (written in X11 geometry style). + * The routine returns TRUE if the spec string is valid, FALSE if not. + * + * The crop spec string should have the format + * [f]x[f]{+-}{+-} + * where width, height, xoffset, and yoffset are unsigned integers. + * Each of the elements can be omitted to indicate a default value. + * (A weakness of this style is that it is not possible to omit xoffset + * while specifying yoffset, since they look alike.) + * + * This code is loosely based on XParseGeometry from the X11 distribution. + */ + +GLOBAL(boolean) +jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec) +{ + info->crop = FALSE; + info->crop_width_set = JCROP_UNSET; + info->crop_height_set = JCROP_UNSET; + info->crop_xoffset_set = JCROP_UNSET; + info->crop_yoffset_set = JCROP_UNSET; + + if (isdigit(*spec)) { + /* fetch width */ + if (! jt_read_integer(&spec, &info->crop_width)) + return FALSE; + if (*spec == 'f' || *spec == 'F') { + spec++; + info->crop_width_set = JCROP_FORCE; + } else + info->crop_width_set = JCROP_POS; + } + if (*spec == 'x' || *spec == 'X') { + /* fetch height */ + spec++; + if (! jt_read_integer(&spec, &info->crop_height)) + return FALSE; + if (*spec == 'f' || *spec == 'F') { + spec++; + info->crop_height_set = JCROP_FORCE; + } else + info->crop_height_set = JCROP_POS; + } + if (*spec == '+' || *spec == '-') { + /* fetch xoffset */ + info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS; + spec++; + if (! jt_read_integer(&spec, &info->crop_xoffset)) + return FALSE; + } + if (*spec == '+' || *spec == '-') { + /* fetch yoffset */ + info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS; + spec++; + if (! jt_read_integer(&spec, &info->crop_yoffset)) + return FALSE; + } + /* We had better have gotten to the end of the string. */ + if (*spec != '\0') + return FALSE; + info->crop = TRUE; + return TRUE; +} + + +/* Trim off any partial iMCUs on the indicated destination edge */ + +LOCAL(void) +trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width) +{ + JDIMENSION MCU_cols; + + MCU_cols = info->output_width / info->iMCU_sample_width; + if (MCU_cols > 0 && info->x_crop_offset + MCU_cols == + full_width / info->iMCU_sample_width) + info->output_width = MCU_cols * info->iMCU_sample_width; +} + +LOCAL(void) +trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height) +{ + JDIMENSION MCU_rows; + + MCU_rows = info->output_height / info->iMCU_sample_height; + if (MCU_rows > 0 && info->y_crop_offset + MCU_rows == + full_height / info->iMCU_sample_height) + info->output_height = MCU_rows * info->iMCU_sample_height; +} + + +/* Request any required workspace. + * + * This routine figures out the size that the output image will be + * (which implies that all the transform parameters must be set before + * it is called). + * + * We allocate the workspace virtual arrays from the source decompression + * object, so that all the arrays (both the original data and the workspace) + * will be taken into account while making memory management decisions. + * Hence, this routine must be called after jpeg_read_header (which reads + * the image dimensions) and before jpeg_read_coefficients (which realizes + * the source's virtual arrays). + * + * This function returns FALSE right away if -perfect is given + * and transformation is not perfect. Otherwise returns TRUE. + */ + +GLOBAL(boolean) +jtransform_request_workspace (j_decompress_ptr srcinfo, + jpeg_transform_info *info) +{ + jvirt_barray_ptr *coef_arrays; + boolean need_workspace, transpose_it; + jpeg_component_info *compptr; + JDIMENSION xoffset, yoffset; + JDIMENSION width_in_iMCUs, height_in_iMCUs; + JDIMENSION width_in_blocks, height_in_blocks; + int ci, h_samp_factor, v_samp_factor; + + /* Determine number of components in output image */ + if (info->force_grayscale && + srcinfo->jpeg_color_space == JCS_YCbCr && + srcinfo->num_components == 3) + /* We'll only process the first component */ + info->num_components = 1; + else + /* Process all the components */ + info->num_components = srcinfo->num_components; + + /* Compute output image dimensions and related values. */ +#if JPEG_LIB_VERSION >= 80 + jpeg_core_output_dimensions(srcinfo); +#else + srcinfo->output_width = srcinfo->image_width; + srcinfo->output_height = srcinfo->image_height; +#endif + + /* Return right away if -perfect is given and transformation is not perfect. + */ + if (info->perfect) { + if (info->num_components == 1) { + if (!jtransform_perfect_transform(srcinfo->output_width, + srcinfo->output_height, + srcinfo->_min_DCT_h_scaled_size, + srcinfo->_min_DCT_v_scaled_size, + info->transform)) + return FALSE; + } else { + if (!jtransform_perfect_transform(srcinfo->output_width, + srcinfo->output_height, + srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size, + srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size, + info->transform)) + return FALSE; + } + } + + /* If there is only one output component, force the iMCU size to be 1; + * else use the source iMCU size. (This allows us to do the right thing + * when reducing color to grayscale, and also provides a handy way of + * cleaning up "funny" grayscale images whose sampling factors are not 1x1.) + */ + switch (info->transform) { + case JXFORM_TRANSPOSE: + case JXFORM_TRANSVERSE: + case JXFORM_ROT_90: + case JXFORM_ROT_270: + info->output_width = srcinfo->output_height; + info->output_height = srcinfo->output_width; + if (info->num_components == 1) { + info->iMCU_sample_width = srcinfo->_min_DCT_v_scaled_size; + info->iMCU_sample_height = srcinfo->_min_DCT_h_scaled_size; + } else { + info->iMCU_sample_width = + srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size; + info->iMCU_sample_height = + srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size; + } + break; + default: + info->output_width = srcinfo->output_width; + info->output_height = srcinfo->output_height; + if (info->num_components == 1) { + info->iMCU_sample_width = srcinfo->_min_DCT_h_scaled_size; + info->iMCU_sample_height = srcinfo->_min_DCT_v_scaled_size; + } else { + info->iMCU_sample_width = + srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size; + info->iMCU_sample_height = + srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size; + } + break; + } + + /* If cropping has been requested, compute the crop area's position and + * dimensions, ensuring that its upper left corner falls at an iMCU boundary. + */ + if (info->crop) { + /* Insert default values for unset crop parameters */ + if (info->crop_xoffset_set == JCROP_UNSET) + info->crop_xoffset = 0; /* default to +0 */ + if (info->crop_yoffset_set == JCROP_UNSET) + info->crop_yoffset = 0; /* default to +0 */ + if (info->crop_xoffset >= info->output_width || + info->crop_yoffset >= info->output_height) + ERREXIT(srcinfo, JERR_BAD_CROP_SPEC); + if (info->crop_width_set == JCROP_UNSET) + info->crop_width = info->output_width - info->crop_xoffset; + if (info->crop_height_set == JCROP_UNSET) + info->crop_height = info->output_height - info->crop_yoffset; + /* Ensure parameters are valid */ + if (info->crop_width <= 0 || info->crop_width > info->output_width || + info->crop_height <= 0 || info->crop_height > info->output_height || + info->crop_xoffset > info->output_width - info->crop_width || + info->crop_yoffset > info->output_height - info->crop_height) + ERREXIT(srcinfo, JERR_BAD_CROP_SPEC); + /* Convert negative crop offsets into regular offsets */ + if (info->crop_xoffset_set == JCROP_NEG) + xoffset = info->output_width - info->crop_width - info->crop_xoffset; + else + xoffset = info->crop_xoffset; + if (info->crop_yoffset_set == JCROP_NEG) + yoffset = info->output_height - info->crop_height - info->crop_yoffset; + else + yoffset = info->crop_yoffset; + /* Now adjust so that upper left corner falls at an iMCU boundary */ + if (info->crop_width_set == JCROP_FORCE) + info->output_width = info->crop_width; + else + info->output_width = + info->crop_width + (xoffset % info->iMCU_sample_width); + if (info->crop_height_set == JCROP_FORCE) + info->output_height = info->crop_height; + else + info->output_height = + info->crop_height + (yoffset % info->iMCU_sample_height); + /* Save x/y offsets measured in iMCUs */ + info->x_crop_offset = xoffset / info->iMCU_sample_width; + info->y_crop_offset = yoffset / info->iMCU_sample_height; + } else { + info->x_crop_offset = 0; + info->y_crop_offset = 0; + } + + /* Figure out whether we need workspace arrays, + * and if so whether they are transposed relative to the source. + */ + need_workspace = FALSE; + transpose_it = FALSE; + switch (info->transform) { + case JXFORM_NONE: + if (info->x_crop_offset != 0 || info->y_crop_offset != 0) + need_workspace = TRUE; + /* No workspace needed if neither cropping nor transforming */ + break; + case JXFORM_FLIP_H: + if (info->trim) + trim_right_edge(info, srcinfo->output_width); + if (info->y_crop_offset != 0 || info->slow_hflip) + need_workspace = TRUE; + /* do_flip_h_no_crop doesn't need a workspace array */ + break; + case JXFORM_FLIP_V: + if (info->trim) + trim_bottom_edge(info, srcinfo->output_height); + /* Need workspace arrays having same dimensions as source image. */ + need_workspace = TRUE; + break; + case JXFORM_TRANSPOSE: + /* transpose does NOT have to trim anything */ + /* Need workspace arrays having transposed dimensions. */ + need_workspace = TRUE; + transpose_it = TRUE; + break; + case JXFORM_TRANSVERSE: + if (info->trim) { + trim_right_edge(info, srcinfo->output_height); + trim_bottom_edge(info, srcinfo->output_width); + } + /* Need workspace arrays having transposed dimensions. */ + need_workspace = TRUE; + transpose_it = TRUE; + break; + case JXFORM_ROT_90: + if (info->trim) + trim_right_edge(info, srcinfo->output_height); + /* Need workspace arrays having transposed dimensions. */ + need_workspace = TRUE; + transpose_it = TRUE; + break; + case JXFORM_ROT_180: + if (info->trim) { + trim_right_edge(info, srcinfo->output_width); + trim_bottom_edge(info, srcinfo->output_height); + } + /* Need workspace arrays having same dimensions as source image. */ + need_workspace = TRUE; + break; + case JXFORM_ROT_270: + if (info->trim) + trim_bottom_edge(info, srcinfo->output_width); + /* Need workspace arrays having transposed dimensions. */ + need_workspace = TRUE; + transpose_it = TRUE; + break; + } + + /* Allocate workspace if needed. + * Note that we allocate arrays padded out to the next iMCU boundary, + * so that transform routines need not worry about missing edge blocks. + */ + if (need_workspace) { + coef_arrays = (jvirt_barray_ptr *) + (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE, + sizeof(jvirt_barray_ptr) * info->num_components); + width_in_iMCUs = (JDIMENSION) + jdiv_round_up((long) info->output_width, + (long) info->iMCU_sample_width); + height_in_iMCUs = (JDIMENSION) + jdiv_round_up((long) info->output_height, + (long) info->iMCU_sample_height); + for (ci = 0; ci < info->num_components; ci++) { + compptr = srcinfo->comp_info + ci; + if (info->num_components == 1) { + /* we're going to force samp factors to 1x1 in this case */ + h_samp_factor = v_samp_factor = 1; + } else if (transpose_it) { + h_samp_factor = compptr->v_samp_factor; + v_samp_factor = compptr->h_samp_factor; + } else { + h_samp_factor = compptr->h_samp_factor; + v_samp_factor = compptr->v_samp_factor; + } + width_in_blocks = width_in_iMCUs * h_samp_factor; + height_in_blocks = height_in_iMCUs * v_samp_factor; + coef_arrays[ci] = (*srcinfo->mem->request_virt_barray) + ((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE, + width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor); + } + info->workspace_coef_arrays = coef_arrays; + } else + info->workspace_coef_arrays = NULL; + + return TRUE; +} + + +/* Transpose destination image parameters */ + +LOCAL(void) +transpose_critical_parameters (j_compress_ptr dstinfo) +{ + int tblno, i, j, ci, itemp; + jpeg_component_info *compptr; + JQUANT_TBL *qtblptr; + JDIMENSION jtemp; + UINT16 qtemp; + + /* Transpose image dimensions */ + jtemp = dstinfo->image_width; + dstinfo->image_width = dstinfo->image_height; + dstinfo->image_height = jtemp; +#if JPEG_LIB_VERSION >= 70 + itemp = dstinfo->min_DCT_h_scaled_size; + dstinfo->min_DCT_h_scaled_size = dstinfo->min_DCT_v_scaled_size; + dstinfo->min_DCT_v_scaled_size = itemp; +#endif + + /* Transpose sampling factors */ + for (ci = 0; ci < dstinfo->num_components; ci++) { + compptr = dstinfo->comp_info + ci; + itemp = compptr->h_samp_factor; + compptr->h_samp_factor = compptr->v_samp_factor; + compptr->v_samp_factor = itemp; + } + + /* Transpose quantization tables */ + for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) { + qtblptr = dstinfo->quant_tbl_ptrs[tblno]; + if (qtblptr != NULL) { + for (i = 0; i < DCTSIZE; i++) { + for (j = 0; j < i; j++) { + qtemp = qtblptr->quantval[i*DCTSIZE+j]; + qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i]; + qtblptr->quantval[j*DCTSIZE+i] = qtemp; + } + } + } + } +} + + +/* Adjust Exif image parameters. + * + * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible. + */ + +#if JPEG_LIB_VERSION >= 70 +LOCAL(void) +adjust_exif_parameters (JOCTET *data, unsigned int length, + JDIMENSION new_width, JDIMENSION new_height) +{ + boolean is_motorola; /* Flag for byte order */ + unsigned int number_of_tags, tagnum; + unsigned int firstoffset, offset; + JDIMENSION new_value; + + if (length < 12) return; /* Length of an IFD entry */ + + /* Discover byte order */ + if (GETJOCTET(data[0]) == 0x49 && GETJOCTET(data[1]) == 0x49) + is_motorola = FALSE; + else if (GETJOCTET(data[0]) == 0x4D && GETJOCTET(data[1]) == 0x4D) + is_motorola = TRUE; + else + return; + + /* Check Tag Mark */ + if (is_motorola) { + if (GETJOCTET(data[2]) != 0) return; + if (GETJOCTET(data[3]) != 0x2A) return; + } else { + if (GETJOCTET(data[3]) != 0) return; + if (GETJOCTET(data[2]) != 0x2A) return; + } + + /* Get first IFD offset (offset to IFD0) */ + if (is_motorola) { + if (GETJOCTET(data[4]) != 0) return; + if (GETJOCTET(data[5]) != 0) return; + firstoffset = GETJOCTET(data[6]); + firstoffset <<= 8; + firstoffset += GETJOCTET(data[7]); + } else { + if (GETJOCTET(data[7]) != 0) return; + if (GETJOCTET(data[6]) != 0) return; + firstoffset = GETJOCTET(data[5]); + firstoffset <<= 8; + firstoffset += GETJOCTET(data[4]); + } + if (firstoffset > length - 2) return; /* check end of data segment */ + + /* Get the number of directory entries contained in this IFD */ + if (is_motorola) { + number_of_tags = GETJOCTET(data[firstoffset]); + number_of_tags <<= 8; + number_of_tags += GETJOCTET(data[firstoffset+1]); + } else { + number_of_tags = GETJOCTET(data[firstoffset+1]); + number_of_tags <<= 8; + number_of_tags += GETJOCTET(data[firstoffset]); + } + if (number_of_tags == 0) return; + firstoffset += 2; + + /* Search for ExifSubIFD offset Tag in IFD0 */ + for (;;) { + if (firstoffset > length - 12) return; /* check end of data segment */ + /* Get Tag number */ + if (is_motorola) { + tagnum = GETJOCTET(data[firstoffset]); + tagnum <<= 8; + tagnum += GETJOCTET(data[firstoffset+1]); + } else { + tagnum = GETJOCTET(data[firstoffset+1]); + tagnum <<= 8; + tagnum += GETJOCTET(data[firstoffset]); + } + if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */ + if (--number_of_tags == 0) return; + firstoffset += 12; + } + + /* Get the ExifSubIFD offset */ + if (is_motorola) { + if (GETJOCTET(data[firstoffset+8]) != 0) return; + if (GETJOCTET(data[firstoffset+9]) != 0) return; + offset = GETJOCTET(data[firstoffset+10]); + offset <<= 8; + offset += GETJOCTET(data[firstoffset+11]); + } else { + if (GETJOCTET(data[firstoffset+11]) != 0) return; + if (GETJOCTET(data[firstoffset+10]) != 0) return; + offset = GETJOCTET(data[firstoffset+9]); + offset <<= 8; + offset += GETJOCTET(data[firstoffset+8]); + } + if (offset > length - 2) return; /* check end of data segment */ + + /* Get the number of directory entries contained in this SubIFD */ + if (is_motorola) { + number_of_tags = GETJOCTET(data[offset]); + number_of_tags <<= 8; + number_of_tags += GETJOCTET(data[offset+1]); + } else { + number_of_tags = GETJOCTET(data[offset+1]); + number_of_tags <<= 8; + number_of_tags += GETJOCTET(data[offset]); + } + if (number_of_tags < 2) return; + offset += 2; + + /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */ + do { + if (offset > length - 12) return; /* check end of data segment */ + /* Get Tag number */ + if (is_motorola) { + tagnum = GETJOCTET(data[offset]); + tagnum <<= 8; + tagnum += GETJOCTET(data[offset+1]); + } else { + tagnum = GETJOCTET(data[offset+1]); + tagnum <<= 8; + tagnum += GETJOCTET(data[offset]); + } + if (tagnum == 0xA002 || tagnum == 0xA003) { + if (tagnum == 0xA002) + new_value = new_width; /* ExifImageWidth Tag */ + else + new_value = new_height; /* ExifImageHeight Tag */ + if (is_motorola) { + data[offset+2] = 0; /* Format = unsigned long (4 octets) */ + data[offset+3] = 4; + data[offset+4] = 0; /* Number Of Components = 1 */ + data[offset+5] = 0; + data[offset+6] = 0; + data[offset+7] = 1; + data[offset+8] = 0; + data[offset+9] = 0; + data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF); + data[offset+11] = (JOCTET)(new_value & 0xFF); + } else { + data[offset+2] = 4; /* Format = unsigned long (4 octets) */ + data[offset+3] = 0; + data[offset+4] = 1; /* Number Of Components = 1 */ + data[offset+5] = 0; + data[offset+6] = 0; + data[offset+7] = 0; + data[offset+8] = (JOCTET)(new_value & 0xFF); + data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF); + data[offset+10] = 0; + data[offset+11] = 0; + } + } + offset += 12; + } while (--number_of_tags); +} +#endif + + +/* Adjust output image parameters as needed. + * + * This must be called after jpeg_copy_critical_parameters() + * and before jpeg_write_coefficients(). + * + * The return value is the set of virtual coefficient arrays to be written + * (either the ones allocated by jtransform_request_workspace, or the + * original source data arrays). The caller will need to pass this value + * to jpeg_write_coefficients(). + */ + +GLOBAL(jvirt_barray_ptr *) +jtransform_adjust_parameters (j_decompress_ptr srcinfo, + j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info) +{ + /* If force-to-grayscale is requested, adjust destination parameters */ + if (info->force_grayscale) { + /* First, ensure we have YCbCr or grayscale data, and that the source's + * Y channel is full resolution. (No reasonable person would make Y + * be less than full resolution, so actually coping with that case + * isn't worth extra code space. But we check it to avoid crashing.) + */ + if (((dstinfo->jpeg_color_space == JCS_YCbCr && + dstinfo->num_components == 3) || + (dstinfo->jpeg_color_space == JCS_GRAYSCALE && + dstinfo->num_components == 1)) && + srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor && + srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) { + /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed + * properly. Among other things, it sets the target h_samp_factor & + * v_samp_factor to 1, which typically won't match the source. + * We have to preserve the source's quantization table number, however. + */ + int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no; + jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE); + dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no; + } else { + /* Sorry, can't do it */ + ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL); + } + } else if (info->num_components == 1) { + /* For a single-component source, we force the destination sampling factors + * to 1x1, with or without force_grayscale. This is useful because some + * decoders choke on grayscale images with other sampling factors. + */ + dstinfo->comp_info[0].h_samp_factor = 1; + dstinfo->comp_info[0].v_samp_factor = 1; + } + + /* Correct the destination's image dimensions as necessary + * for rotate/flip, resize, and crop operations. + */ +#if JPEG_LIB_VERSION >= 70 + dstinfo->jpeg_width = info->output_width; + dstinfo->jpeg_height = info->output_height; +#endif + + /* Transpose destination image parameters */ + switch (info->transform) { + case JXFORM_TRANSPOSE: + case JXFORM_TRANSVERSE: + case JXFORM_ROT_90: + case JXFORM_ROT_270: +#if JPEG_LIB_VERSION < 70 + dstinfo->image_width = info->output_height; + dstinfo->image_height = info->output_width; +#endif + transpose_critical_parameters(dstinfo); + break; + default: +#if JPEG_LIB_VERSION < 70 + dstinfo->image_width = info->output_width; + dstinfo->image_height = info->output_height; +#endif + break; + } + + /* Adjust Exif properties */ + if (srcinfo->marker_list != NULL && + srcinfo->marker_list->marker == JPEG_APP0+1 && + srcinfo->marker_list->data_length >= 6 && + GETJOCTET(srcinfo->marker_list->data[0]) == 0x45 && + GETJOCTET(srcinfo->marker_list->data[1]) == 0x78 && + GETJOCTET(srcinfo->marker_list->data[2]) == 0x69 && + GETJOCTET(srcinfo->marker_list->data[3]) == 0x66 && + GETJOCTET(srcinfo->marker_list->data[4]) == 0 && + GETJOCTET(srcinfo->marker_list->data[5]) == 0) { + /* Suppress output of JFIF marker */ + dstinfo->write_JFIF_header = FALSE; +#if JPEG_LIB_VERSION >= 70 + /* Adjust Exif image parameters */ + if (dstinfo->jpeg_width != srcinfo->image_width || + dstinfo->jpeg_height != srcinfo->image_height) + /* Align data segment to start of TIFF structure for parsing */ + adjust_exif_parameters(srcinfo->marker_list->data + 6, + srcinfo->marker_list->data_length - 6, + dstinfo->jpeg_width, dstinfo->jpeg_height); +#endif + } + + /* Return the appropriate output data set */ + if (info->workspace_coef_arrays != NULL) + return info->workspace_coef_arrays; + return src_coef_arrays; +} + + +/* Execute the actual transformation, if any. + * + * This must be called *after* jpeg_write_coefficients, because it depends + * on jpeg_write_coefficients to have computed subsidiary values such as + * the per-component width and height fields in the destination object. + * + * Note that some transformations will modify the source data arrays! + */ + +GLOBAL(void) +jtransform_execute_transform (j_decompress_ptr srcinfo, + j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info) +{ + jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays; + + /* Note: conditions tested here should match those in switch statement + * in jtransform_request_workspace() + */ + switch (info->transform) { + case JXFORM_NONE: + if (info->x_crop_offset != 0 || info->y_crop_offset != 0) + do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_FLIP_H: + if (info->y_crop_offset != 0 || info->slow_hflip) + do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + else + do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset, + src_coef_arrays); + break; + case JXFORM_FLIP_V: + do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_TRANSPOSE: + do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_TRANSVERSE: + do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_ROT_90: + do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_ROT_180: + do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + case JXFORM_ROT_270: + do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset, + src_coef_arrays, dst_coef_arrays); + break; + } +} + +/* jtransform_perfect_transform + * + * Determine whether lossless transformation is perfectly + * possible for a specified image and transformation. + * + * Inputs: + * image_width, image_height: source image dimensions. + * MCU_width, MCU_height: pixel dimensions of MCU. + * transform: transformation identifier. + * Parameter sources from initialized jpeg_struct + * (after reading source header): + * image_width = cinfo.image_width + * image_height = cinfo.image_height + * MCU_width = cinfo.max_h_samp_factor * cinfo.block_size + * MCU_height = cinfo.max_v_samp_factor * cinfo.block_size + * Result: + * TRUE = perfect transformation possible + * FALSE = perfect transformation not possible + * (may use custom action then) + */ + +GLOBAL(boolean) +jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height, + int MCU_width, int MCU_height, + JXFORM_CODE transform) +{ + boolean result = TRUE; /* initialize TRUE */ + + switch (transform) { + case JXFORM_FLIP_H: + case JXFORM_ROT_270: + if (image_width % (JDIMENSION) MCU_width) + result = FALSE; + break; + case JXFORM_FLIP_V: + case JXFORM_ROT_90: + if (image_height % (JDIMENSION) MCU_height) + result = FALSE; + break; + case JXFORM_TRANSVERSE: + case JXFORM_ROT_180: + if (image_width % (JDIMENSION) MCU_width) + result = FALSE; + if (image_height % (JDIMENSION) MCU_height) + result = FALSE; + break; + default: + break; + } + + return result; +} + +#endif /* TRANSFORMS_SUPPORTED */ + + +/* Setup decompression object to save desired markers in memory. + * This must be called before jpeg_read_header() to have the desired effect. + */ + +GLOBAL(void) +jcopy_markers_setup (j_decompress_ptr srcinfo, JCOPY_OPTION option) +{ +#ifdef SAVE_MARKERS_SUPPORTED + int m; + + /* Save comments except under NONE option */ + if (option != JCOPYOPT_NONE) { + jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF); + } + /* Save all types of APPn markers iff ALL option */ + if (option == JCOPYOPT_ALL) { + for (m = 0; m < 16; m++) + jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF); + } +#endif /* SAVE_MARKERS_SUPPORTED */ +} + +/* Copy markers saved in the given source object to the destination object. + * This should be called just after jpeg_start_compress() or + * jpeg_write_coefficients(). + * Note that those routines will have written the SOI, and also the + * JFIF APP0 or Adobe APP14 markers if selected. + */ + +GLOBAL(void) +jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JCOPY_OPTION option) +{ + jpeg_saved_marker_ptr marker; + + /* In the current implementation, we don't actually need to examine the + * option flag here; we just copy everything that got saved. + * But to avoid confusion, we do not output JFIF and Adobe APP14 markers + * if the encoder library already wrote one. + */ + for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) { + if (dstinfo->write_JFIF_header && + marker->marker == JPEG_APP0 && + marker->data_length >= 5 && + GETJOCTET(marker->data[0]) == 0x4A && + GETJOCTET(marker->data[1]) == 0x46 && + GETJOCTET(marker->data[2]) == 0x49 && + GETJOCTET(marker->data[3]) == 0x46 && + GETJOCTET(marker->data[4]) == 0) + continue; /* reject duplicate JFIF */ + if (dstinfo->write_Adobe_marker && + marker->marker == JPEG_APP0+14 && + marker->data_length >= 5 && + GETJOCTET(marker->data[0]) == 0x41 && + GETJOCTET(marker->data[1]) == 0x64 && + GETJOCTET(marker->data[2]) == 0x6F && + GETJOCTET(marker->data[3]) == 0x62 && + GETJOCTET(marker->data[4]) == 0x65) + continue; /* reject duplicate Adobe */ + jpeg_write_marker(dstinfo, marker->marker, + marker->data, marker->data_length); + } +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/transupp.h glmark2-2021.02/src/libjpeg-turbo/transupp.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/transupp.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/transupp.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,207 @@ +/* + * transupp.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding. + * It was modified by The libjpeg-turbo Project to include only code relevant + * to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains declarations for image transformation routines and + * other utility code used by the jpegtran sample application. These are + * NOT part of the core JPEG library. But we keep these routines separate + * from jpegtran.c to ease the task of maintaining jpegtran-like programs + * that have other user interfaces. + * + * NOTE: all the routines declared here have very specific requirements + * about when they are to be executed during the reading and writing of the + * source and destination files. See the comments in transupp.c, or see + * jpegtran.c for an example of correct usage. + */ + +/* If you happen not to want the image transform support, disable it here */ +#ifndef TRANSFORMS_SUPPORTED +#define TRANSFORMS_SUPPORTED 1 /* 0 disables transform code */ +#endif + +/* + * Although rotating and flipping data expressed as DCT coefficients is not + * hard, there is an asymmetry in the JPEG format specification for images + * whose dimensions aren't multiples of the iMCU size. The right and bottom + * image edges are padded out to the next iMCU boundary with junk data; but + * no padding is possible at the top and left edges. If we were to flip + * the whole image including the pad data, then pad garbage would become + * visible at the top and/or left, and real pixels would disappear into the + * pad margins --- perhaps permanently, since encoders & decoders may not + * bother to preserve DCT blocks that appear to be completely outside the + * nominal image area. So, we have to exclude any partial iMCUs from the + * basic transformation. + * + * Transpose is the only transformation that can handle partial iMCUs at the + * right and bottom edges completely cleanly. flip_h can flip partial iMCUs + * at the bottom, but leaves any partial iMCUs at the right edge untouched. + * Similarly flip_v leaves any partial iMCUs at the bottom edge untouched. + * The other transforms are defined as combinations of these basic transforms + * and process edge blocks in a way that preserves the equivalence. + * + * The "trim" option causes untransformable partial iMCUs to be dropped; + * this is not strictly lossless, but it usually gives the best-looking + * result for odd-size images. Note that when this option is active, + * the expected mathematical equivalences between the transforms may not hold. + * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim + * followed by -rot 180 -trim trims both edges.) + * + * We also offer a lossless-crop option, which discards data outside a given + * image region but losslessly preserves what is inside. Like the rotate and + * flip transforms, lossless crop is restricted by the JPEG format: the upper + * left corner of the selected region must fall on an iMCU boundary. If this + * does not hold for the given crop parameters, we silently move the upper left + * corner up and/or left to make it so, simultaneously increasing the region + * dimensions to keep the lower right crop corner unchanged. (Thus, the + * output image covers at least the requested region, but may cover more.) + * The adjustment of the region dimensions may be optionally disabled. + * + * We also provide a lossless-resize option, which is kind of a lossless-crop + * operation in the DCT coefficient block domain - it discards higher-order + * coefficients and losslessly preserves lower-order coefficients of a + * sub-block. + * + * Rotate/flip transform, resize, and crop can be requested together in a + * single invocation. The crop is applied last --- that is, the crop region + * is specified in terms of the destination image after transform/resize. + * + * We also offer a "force to grayscale" option, which simply discards the + * chrominance channels of a YCbCr image. This is lossless in the sense that + * the luminance channel is preserved exactly. It's not the same kind of + * thing as the rotate/flip transformations, but it's convenient to handle it + * as part of this package, mainly because the transformation routines have to + * be aware of the option to know how many components to work on. + */ + + +/* + * Codes for supported types of image transformations. + */ + +typedef enum { + JXFORM_NONE, /* no transformation */ + JXFORM_FLIP_H, /* horizontal flip */ + JXFORM_FLIP_V, /* vertical flip */ + JXFORM_TRANSPOSE, /* transpose across UL-to-LR axis */ + JXFORM_TRANSVERSE, /* transpose across UR-to-LL axis */ + JXFORM_ROT_90, /* 90-degree clockwise rotation */ + JXFORM_ROT_180, /* 180-degree rotation */ + JXFORM_ROT_270 /* 270-degree clockwise (or 90 ccw) */ +} JXFORM_CODE; + +/* + * Codes for crop parameters, which can individually be unspecified, + * positive or negative for xoffset or yoffset, + * positive or forced for width or height. + */ + +typedef enum { + JCROP_UNSET, + JCROP_POS, + JCROP_NEG, + JCROP_FORCE +} JCROP_CODE; + +/* + * Transform parameters struct. + * NB: application must not change any elements of this struct after + * calling jtransform_request_workspace. + */ + +typedef struct { + /* Options: set by caller */ + JXFORM_CODE transform; /* image transform operator */ + boolean perfect; /* if TRUE, fail if partial MCUs are requested */ + boolean trim; /* if TRUE, trim partial MCUs as needed */ + boolean force_grayscale; /* if TRUE, convert color image to grayscale */ + boolean crop; /* if TRUE, crop source image */ + boolean slow_hflip; /* For best performance, the JXFORM_FLIP_H transform + normally modifies the source coefficients in place. + Setting this to TRUE will instead use a slower, + double-buffered algorithm, which leaves the source + coefficients in tact (necessary if other transformed + images must be generated from the same set of + coefficients. */ + + /* Crop parameters: application need not set these unless crop is TRUE. + * These can be filled in by jtransform_parse_crop_spec(). + */ + JDIMENSION crop_width; /* Width of selected region */ + JCROP_CODE crop_width_set; /* (forced disables adjustment) */ + JDIMENSION crop_height; /* Height of selected region */ + JCROP_CODE crop_height_set; /* (forced disables adjustment) */ + JDIMENSION crop_xoffset; /* X offset of selected region */ + JCROP_CODE crop_xoffset_set; /* (negative measures from right edge) */ + JDIMENSION crop_yoffset; /* Y offset of selected region */ + JCROP_CODE crop_yoffset_set; /* (negative measures from bottom edge) */ + + /* Internal workspace: caller should not touch these */ + int num_components; /* # of components in workspace */ + jvirt_barray_ptr *workspace_coef_arrays; /* workspace for transformations */ + JDIMENSION output_width; /* cropped destination dimensions */ + JDIMENSION output_height; + JDIMENSION x_crop_offset; /* destination crop offsets measured in iMCUs */ + JDIMENSION y_crop_offset; + int iMCU_sample_width; /* destination iMCU size */ + int iMCU_sample_height; +} jpeg_transform_info; + + +#if TRANSFORMS_SUPPORTED + +/* Parse a crop specification (written in X11 geometry style) */ +EXTERN(boolean) jtransform_parse_crop_spec + (jpeg_transform_info *info, const char *spec); +/* Request any required workspace */ +EXTERN(boolean) jtransform_request_workspace + (j_decompress_ptr srcinfo, jpeg_transform_info *info); +/* Adjust output image parameters */ +EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters + (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info); +/* Execute the actual transformation, if any */ +EXTERN(void) jtransform_execute_transform + (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info); +/* Determine whether lossless transformation is perfectly + * possible for a specified image and transformation. + */ +EXTERN(boolean) jtransform_perfect_transform + (JDIMENSION image_width, JDIMENSION image_height, int MCU_width, + int MCU_height, JXFORM_CODE transform); + +/* jtransform_execute_transform used to be called + * jtransform_execute_transformation, but some compilers complain about + * routine names that long. This macro is here to avoid breaking any + * old source code that uses the original name... + */ +#define jtransform_execute_transformation jtransform_execute_transform + +#endif /* TRANSFORMS_SUPPORTED */ + + +/* + * Support for copying optional markers from source to destination file. + */ + +typedef enum { + JCOPYOPT_NONE, /* copy no optional markers */ + JCOPYOPT_COMMENTS, /* copy only comment (COM) markers */ + JCOPYOPT_ALL /* copy all optional markers */ +} JCOPY_OPTION; + +#define JCOPYOPT_DEFAULT JCOPYOPT_COMMENTS /* recommended default */ + +/* Setup decompression object to save desired markers in memory */ +EXTERN(void) jcopy_markers_setup + (j_decompress_ptr srcinfo, JCOPY_OPTION option); +/* Copy markers saved in the given source object to the destination object */ +EXTERN(void) jcopy_markers_execute + (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JCOPY_OPTION option); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg.c glmark2-2021.02/src/libjpeg-turbo/turbojpeg.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/turbojpeg.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,2121 @@ +/* + * Copyright (C)2009-2016 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* TurboJPEG/LJT: this implements the TurboJPEG API using libjpeg or + libjpeg-turbo */ + +#include +#include +#include +#include +#define JPEG_INTERNALS +#include +#include +#include +#include "./turbojpeg.h" +#include "./tjutil.h" +#include "transupp.h" +#include "./jpegcomp.h" + +extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **, + unsigned long *, boolean); +extern void jpeg_mem_src_tj(j_decompress_ptr, const unsigned char *, + unsigned long); + +#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) +#define isPow2(x) (((x)&(x-1))==0) + + +/* Error handling (based on example in example.c) */ + +static char errStr[JMSG_LENGTH_MAX]="No error"; + +struct my_error_mgr +{ + struct jpeg_error_mgr pub; + jmp_buf setjmp_buffer; + void (*emit_message)(j_common_ptr, int); + boolean warning; +}; +typedef struct my_error_mgr *my_error_ptr; + +static void my_error_exit(j_common_ptr cinfo) +{ + my_error_ptr myerr=(my_error_ptr)cinfo->err; + (*cinfo->err->output_message)(cinfo); + longjmp(myerr->setjmp_buffer, 1); +} + +/* Based on output_message() in jerror.c */ + +static void my_output_message(j_common_ptr cinfo) +{ + (*cinfo->err->format_message)(cinfo, errStr); +} + +static void my_emit_message(j_common_ptr cinfo, int msg_level) +{ + my_error_ptr myerr=(my_error_ptr)cinfo->err; + myerr->emit_message(cinfo, msg_level); + if(msg_level<0) myerr->warning=TRUE; +} + + +/* Global structures, macros, etc. */ + +enum {COMPRESS=1, DECOMPRESS=2}; + +typedef struct _tjinstance +{ + struct jpeg_compress_struct cinfo; + struct jpeg_decompress_struct dinfo; + struct my_error_mgr jerr; + int init, headerRead; +} tjinstance; + +static const int pixelsize[TJ_NUMSAMP]={3, 3, 3, 1, 3, 3}; + +static const JXFORM_CODE xformtypes[TJ_NUMXOP]= +{ + JXFORM_NONE, JXFORM_FLIP_H, JXFORM_FLIP_V, JXFORM_TRANSPOSE, + JXFORM_TRANSVERSE, JXFORM_ROT_90, JXFORM_ROT_180, JXFORM_ROT_270 +}; + +#define NUMSF 16 +static const tjscalingfactor sf[NUMSF]={ + {2, 1}, + {15, 8}, + {7, 4}, + {13, 8}, + {3, 2}, + {11, 8}, + {5, 4}, + {9, 8}, + {1, 1}, + {7, 8}, + {3, 4}, + {5, 8}, + {1, 2}, + {3, 8}, + {1, 4}, + {1, 8} +}; + +#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \ + retval=-1; goto bailout;} +#define getinstance(handle) tjinstance *this=(tjinstance *)handle; \ + j_compress_ptr cinfo=NULL; j_decompress_ptr dinfo=NULL; \ + if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1;} \ + cinfo=&this->cinfo; dinfo=&this->dinfo; \ + this->jerr.warning=FALSE; +#define getcinstance(handle) tjinstance *this=(tjinstance *)handle; \ + j_compress_ptr cinfo=NULL; \ + if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1;} \ + cinfo=&this->cinfo; \ + this->jerr.warning=FALSE; +#define getdinstance(handle) tjinstance *this=(tjinstance *)handle; \ + j_decompress_ptr dinfo=NULL; \ + if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1;} \ + dinfo=&this->dinfo; \ + this->jerr.warning=FALSE; + +static int getPixelFormat(int pixelSize, int flags) +{ + if(pixelSize==1) return TJPF_GRAY; + if(pixelSize==3) + { + if(flags&TJ_BGR) return TJPF_BGR; + else return TJPF_RGB; + } + if(pixelSize==4) + { + if(flags&TJ_ALPHAFIRST) + { + if(flags&TJ_BGR) return TJPF_XBGR; + else return TJPF_XRGB; + } + else + { + if(flags&TJ_BGR) return TJPF_BGRX; + else return TJPF_RGBX; + } + } + return -1; +} + +static int setCompDefaults(struct jpeg_compress_struct *cinfo, + int pixelFormat, int subsamp, int jpegQual, int flags) +{ + int retval=0; + char *env=NULL; + + switch(pixelFormat) + { + case TJPF_GRAY: + cinfo->in_color_space=JCS_GRAYSCALE; break; + #if JCS_EXTENSIONS==1 + case TJPF_RGB: + cinfo->in_color_space=JCS_EXT_RGB; break; + case TJPF_BGR: + cinfo->in_color_space=JCS_EXT_BGR; break; + case TJPF_RGBX: + case TJPF_RGBA: + cinfo->in_color_space=JCS_EXT_RGBX; break; + case TJPF_BGRX: + case TJPF_BGRA: + cinfo->in_color_space=JCS_EXT_BGRX; break; + case TJPF_XRGB: + case TJPF_ARGB: + cinfo->in_color_space=JCS_EXT_XRGB; break; + case TJPF_XBGR: + case TJPF_ABGR: + cinfo->in_color_space=JCS_EXT_XBGR; break; + #else + case TJPF_RGB: + case TJPF_BGR: + case TJPF_RGBX: + case TJPF_BGRX: + case TJPF_XRGB: + case TJPF_XBGR: + case TJPF_RGBA: + case TJPF_BGRA: + case TJPF_ARGB: + case TJPF_ABGR: + cinfo->in_color_space=JCS_RGB; pixelFormat=TJPF_RGB; + break; + #endif + case TJPF_CMYK: + cinfo->in_color_space=JCS_CMYK; break; + } + + cinfo->input_components=tjPixelSize[pixelFormat]; + jpeg_set_defaults(cinfo); + +#ifndef NO_GETENV + if((env=getenv("TJ_OPTIMIZE"))!=NULL && strlen(env)>0 && !strcmp(env, "1")) + cinfo->optimize_coding=TRUE; + if((env=getenv("TJ_ARITHMETIC"))!=NULL && strlen(env)>0 && !strcmp(env, "1")) + cinfo->arith_code=TRUE; + if((env=getenv("TJ_RESTART"))!=NULL && strlen(env)>0) + { + int temp=-1; char tempc=0; + if(sscanf(env, "%d%c", &temp, &tempc)>=1 && temp>=0 && temp<=65535) + { + if(toupper(tempc)=='B') + { + cinfo->restart_interval=temp; + cinfo->restart_in_rows=0; + } + else + cinfo->restart_in_rows=temp; + } + } +#endif + + if(jpegQual>=0) + { + jpeg_set_quality(cinfo, jpegQual, TRUE); + if(jpegQual>=96 || flags&TJFLAG_ACCURATEDCT) cinfo->dct_method=JDCT_ISLOW; + else cinfo->dct_method=JDCT_FASTEST; + } + if(subsamp==TJSAMP_GRAY) + jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); + else if(pixelFormat==TJPF_CMYK) + jpeg_set_colorspace(cinfo, JCS_YCCK); + else jpeg_set_colorspace(cinfo, JCS_YCbCr); + +#ifndef NO_GETENV + if((env=getenv("TJ_PROGRESSIVE"))!=NULL && strlen(env)>0 + && !strcmp(env, "1")) + jpeg_simple_progression(cinfo); +#endif + + cinfo->comp_info[0].h_samp_factor=tjMCUWidth[subsamp]/8; + cinfo->comp_info[1].h_samp_factor=1; + cinfo->comp_info[2].h_samp_factor=1; + if(cinfo->num_components>3) + cinfo->comp_info[3].h_samp_factor=tjMCUWidth[subsamp]/8; + cinfo->comp_info[0].v_samp_factor=tjMCUHeight[subsamp]/8; + cinfo->comp_info[1].v_samp_factor=1; + cinfo->comp_info[2].v_samp_factor=1; + if(cinfo->num_components>3) + cinfo->comp_info[3].v_samp_factor=tjMCUHeight[subsamp]/8; + + return retval; +} + +static int setDecompDefaults(struct jpeg_decompress_struct *dinfo, + int pixelFormat, int flags) +{ + int retval=0; + + switch(pixelFormat) + { + case TJPF_GRAY: + dinfo->out_color_space=JCS_GRAYSCALE; break; + #if JCS_EXTENSIONS==1 + case TJPF_RGB: + dinfo->out_color_space=JCS_EXT_RGB; break; + case TJPF_BGR: + dinfo->out_color_space=JCS_EXT_BGR; break; + case TJPF_RGBX: + dinfo->out_color_space=JCS_EXT_RGBX; break; + case TJPF_BGRX: + dinfo->out_color_space=JCS_EXT_BGRX; break; + case TJPF_XRGB: + dinfo->out_color_space=JCS_EXT_XRGB; break; + case TJPF_XBGR: + dinfo->out_color_space=JCS_EXT_XBGR; break; + #if JCS_ALPHA_EXTENSIONS==1 + case TJPF_RGBA: + dinfo->out_color_space=JCS_EXT_RGBA; break; + case TJPF_BGRA: + dinfo->out_color_space=JCS_EXT_BGRA; break; + case TJPF_ARGB: + dinfo->out_color_space=JCS_EXT_ARGB; break; + case TJPF_ABGR: + dinfo->out_color_space=JCS_EXT_ABGR; break; + #endif + #else + case TJPF_RGB: + case TJPF_BGR: + case TJPF_RGBX: + case TJPF_BGRX: + case TJPF_XRGB: + case TJPF_XBGR: + case TJPF_RGBA: + case TJPF_BGRA: + case TJPF_ARGB: + case TJPF_ABGR: + dinfo->out_color_space=JCS_RGB; break; + #endif + case TJPF_CMYK: + dinfo->out_color_space=JCS_CMYK; break; + default: + _throw("Unsupported pixel format"); + } + + if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST; + + bailout: + return retval; +} + + +static int getSubsamp(j_decompress_ptr dinfo) +{ + int retval=-1, i, k; + + /* The sampling factors actually have no meaning with grayscale JPEG files, + and in fact it's possible to generate grayscale JPEGs with sampling + factors > 1 (even though those sampling factors are ignored by the + decompressor.) Thus, we need to treat grayscale as a special case. */ + if(dinfo->num_components==1 && dinfo->jpeg_color_space==JCS_GRAYSCALE) + return TJSAMP_GRAY; + + for(i=0; inum_components==pixelsize[i] + || ((dinfo->jpeg_color_space==JCS_YCCK + || dinfo->jpeg_color_space==JCS_CMYK) + && pixelsize[i]==3 && dinfo->num_components==4)) + { + if(dinfo->comp_info[0].h_samp_factor==tjMCUWidth[i]/8 + && dinfo->comp_info[0].v_samp_factor==tjMCUHeight[i]/8) + { + int match=0; + for(k=1; knum_components; k++) + { + int href=1, vref=1; + if(dinfo->jpeg_color_space==JCS_YCCK && k==3) + { + href=tjMCUWidth[i]/8; vref=tjMCUHeight[i]/8; + } + if(dinfo->comp_info[k].h_samp_factor==href + && dinfo->comp_info[k].v_samp_factor==vref) + match++; + } + if(match==dinfo->num_components-1) + { + retval=i; break; + } + } + } + } + return retval; +} + + +#ifndef JCS_EXTENSIONS + +/* Conversion functions to emulate the colorspace extensions. This allows the + TurboJPEG wrapper to be used with libjpeg */ + +#define TORGB(PS, ROFFSET, GOFFSET, BOFFSET) { \ + int rowPad=pitch-width*PS; \ + while(height--) \ + { \ + unsigned char *endOfRow=src+width*PS; \ + while(srcjerr.setjmp_buffer)) return -1; + if(this->init&COMPRESS) jpeg_destroy_compress(cinfo); + if(this->init&DECOMPRESS) jpeg_destroy_decompress(dinfo); + free(this); + return 0; +} + + +/* These are exposed mainly because Windows can't malloc() and free() across + DLL boundaries except when the CRT DLL is used, and we don't use the CRT DLL + with turbojpeg.dll for compatibility reasons. However, these functions + can potentially be used for other purposes by different implementations. */ + +DLLEXPORT void DLLCALL tjFree(unsigned char *buf) +{ + if(buf) free(buf); +} + + +DLLEXPORT unsigned char *DLLCALL tjAlloc(int bytes) +{ + return (unsigned char *)malloc(bytes); +} + + +/* Compressor */ + +static tjhandle _tjInitCompress(tjinstance *this) +{ + static unsigned char buffer[1]; + unsigned char *buf=buffer; unsigned long size=1; + + /* This is also straight out of example.c */ + this->cinfo.err=jpeg_std_error(&this->jerr.pub); + this->jerr.pub.error_exit=my_error_exit; + this->jerr.pub.output_message=my_output_message; + this->jerr.emit_message=this->jerr.pub.emit_message; + this->jerr.pub.emit_message=my_emit_message; + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + if(this) free(this); return NULL; + } + + jpeg_create_compress(&this->cinfo); + /* Make an initial call so it will create the destination manager */ + jpeg_mem_dest_tj(&this->cinfo, &buf, &size, 0); + + this->init|=COMPRESS; + return (tjhandle)this; +} + +DLLEXPORT tjhandle DLLCALL tjInitCompress(void) +{ + tjinstance *this=NULL; + if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) + { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitCompress(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + return _tjInitCompress(this); +} + + +DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height, + int jpegSubsamp) +{ + unsigned long retval=0; int mcuw, mcuh, chromasf; + if(width<1 || height<1 || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT) + _throw("tjBufSize(): Invalid argument"); + + /* This allows for rare corner cases in which a JPEG image can actually be + larger than the uncompressed input (we wouldn't mention it if it hadn't + happened before.) */ + mcuw=tjMCUWidth[jpegSubsamp]; + mcuh=tjMCUHeight[jpegSubsamp]; + chromasf=jpegSubsamp==TJSAMP_GRAY? 0: 4*64/(mcuw*mcuh); + retval=PAD(width, mcuw) * PAD(height, mcuh) * (2 + chromasf) + 2048; + + bailout: + return retval; +} + +DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height) +{ + unsigned long retval=0; + if(width<1 || height<1) + _throw("TJBUFSIZE(): Invalid argument"); + + /* This allows for rare corner cases in which a JPEG image can actually be + larger than the uncompressed input (we wouldn't mention it if it hadn't + happened before.) */ + retval=PAD(width, 16) * PAD(height, 16) * 6 + 2048; + + bailout: + return retval; +} + + +DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height, + int subsamp) +{ + int retval=0, nc, i; + + if(subsamp<0 || subsamp>=NUMSUBOPT) + _throw("tjBufSizeYUV2(): Invalid argument"); + + nc=(subsamp==TJSAMP_GRAY? 1:3); + for(i=0; i=TJ_NUMSAMP) + _throw("tjPlaneWidth(): Invalid argument"); + nc=(subsamp==TJSAMP_GRAY? 1:3); + if(componentID<0 || componentID>=nc) + _throw("tjPlaneWidth(): Invalid argument"); + + pw=PAD(width, tjMCUWidth[subsamp]/8); + if(componentID==0) + retval=pw; + else + retval=pw*8/tjMCUWidth[subsamp]; + + bailout: + return retval; +} + + +DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp) +{ + int ph, nc, retval=0; + + if(height<1 || subsamp<0 || subsamp>=TJ_NUMSAMP) + _throw("tjPlaneHeight(): Invalid argument"); + nc=(subsamp==TJSAMP_GRAY? 1:3); + if(componentID<0 || componentID>=nc) + _throw("tjPlaneHeight(): Invalid argument"); + + ph=PAD(height, tjMCUHeight[subsamp]/8); + if(componentID==0) + retval=ph; + else + retval=ph*8/tjMCUHeight[subsamp]; + + bailout: + return retval; +} + + +DLLEXPORT unsigned long DLLCALL tjPlaneSizeYUV(int componentID, int width, + int stride, int height, int subsamp) +{ + unsigned long retval=0; + int pw, ph; + + if(width<1 || height<1 || subsamp<0 || subsamp>=NUMSUBOPT) + _throw("tjPlaneSizeYUV(): Invalid argument"); + + pw=tjPlaneWidth(componentID, width, subsamp); + ph=tjPlaneHeight(componentID, height, subsamp); + if(pw<0 || ph<0) return -1; + + if(stride==0) stride=pw; + else stride=abs(stride); + + retval=stride*(ph-1)+pw; + + bailout: + return retval; +} + + +DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags) +{ + int i, retval=0, alloc=1; JSAMPROW *row_pointer=NULL; + #ifndef JCS_EXTENSIONS + unsigned char *rgbBuf=NULL; + #endif + + getcinstance(handle) + if((this->init&COMPRESS)==0) + _throw("tjCompress2(): Instance has not been initialized for compression"); + + if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 + || pixelFormat>=TJ_NUMPF || jpegBuf==NULL || jpegSize==NULL + || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT || jpegQual<0 || jpegQual>100) + _throw("tjCompress2(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; + + #ifndef JCS_EXTENSIONS + if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK) + { + rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE); + if(!rgbBuf) _throw("tjCompress2(): Memory allocation failure"); + srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf); + pitch=width*RGB_PIXELSIZE; + } + #endif + + cinfo->image_width=width; + cinfo->image_height=height; + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(flags&TJFLAG_NOREALLOC) + { + alloc=0; *jpegSize=tjBufSize(width, height, jpegSubsamp); + } + jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc); + if(setCompDefaults(cinfo, pixelFormat, jpegSubsamp, jpegQual, flags)==-1) + return -1; + + jpeg_start_compress(cinfo, TRUE); + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*height))==NULL) + _throw("tjCompress2(): Memory allocation failure"); + for(i=0; inext_scanlineimage_height) + { + jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline], + cinfo->image_height-cinfo->next_scanline); + } + jpeg_finish_compress(cinfo); + + bailout: + if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); + #ifndef JCS_EXTENSIONS + if(rgbBuf) free(rgbBuf); + #endif + if(row_pointer) free(row_pointer); + if(this->jerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf, + int width, int pitch, int height, int pixelSize, unsigned char *jpegBuf, + unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags) +{ + int retval=0; unsigned long size; + if(flags&TJ_YUV) + { + size=tjBufSizeYUV(width, height, jpegSubsamp); + retval=tjEncodeYUV2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), jpegBuf, jpegSubsamp, flags); + } + else + { + retval=tjCompress2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), &jpegBuf, &size, jpegSubsamp, jpegQual, + flags|TJFLAG_NOREALLOC); + } + *jpegSize=size; + return retval; +} + + +DLLEXPORT int DLLCALL tjEncodeYUVPlanes(tjhandle handle, + const unsigned char *srcBuf, int width, int pitch, int height, + int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, + int flags) +{ + int i, retval=0; JSAMPROW *row_pointer=NULL; + JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS]; + JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS]; + JSAMPROW *outbuf[MAX_COMPONENTS]; + int row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS]; + JSAMPLE *ptr; + jpeg_component_info *compptr; + #ifndef JCS_EXTENSIONS + unsigned char *rgbBuf=NULL; + #endif + + getcinstance(handle); + + for(i=0; iinit&COMPRESS)==0) + _throw("tjEncodeYUVPlanes(): Instance has not been initialized for compression"); + + if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 + || pixelFormat>=TJ_NUMPF || !dstPlanes || !dstPlanes[0] || subsamp<0 + || subsamp>=NUMSUBOPT) + _throw("tjEncodeYUVPlanes(): Invalid argument"); + if(subsamp!=TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) + _throw("tjEncodeYUVPlanes(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + if(pixelFormat==TJPF_CMYK) + _throw("tjEncodeYUVPlanes(): Cannot generate YUV images from CMYK pixels"); + + if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; + + #ifndef JCS_EXTENSIONS + if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK) + { + rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE); + if(!rgbBuf) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf); + pitch=width*RGB_PIXELSIZE; + } + #endif + + cinfo->image_width=width; + cinfo->image_height=height; + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags)==-1) return -1; + + /* Execute only the parts of jpeg_start_compress() that we need. If we + were to call the whole jpeg_start_compress() function, then it would try + to write the file headers, which could overflow the output buffer if the + YUV image were very small. */ + if(cinfo->global_state!=CSTATE_START) + _throw("tjEncodeYUVPlanes(): libjpeg API is in the wrong state"); + (*cinfo->err->reset_error_mgr)((j_common_ptr)cinfo); + jinit_c_master_control(cinfo, FALSE); + jinit_color_converter(cinfo); + jinit_downsampler(cinfo); + (*cinfo->cconvert->start_pass)(cinfo); + + pw0=PAD(width, cinfo->max_h_samp_factor); + ph0=PAD(height, cinfo->max_v_samp_factor); + + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph0))==NULL) + _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for(i=0; inum_components; i++) + { + compptr=&cinfo->comp_info[i]; + _tmpbuf[i]=(JSAMPLE *)malloc( + PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE) + /compptr->h_samp_factor, 16) * cinfo->max_v_samp_factor + 16); + if(!_tmpbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*cinfo->max_v_samp_factor); + if(!tmpbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for(row=0; rowmax_v_samp_factor; row++) + { + unsigned char *_tmpbuf_aligned= + (unsigned char *)PAD((size_t)_tmpbuf[i], 16); + tmpbuf[i][row]=&_tmpbuf_aligned[ + PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE) + /compptr->h_samp_factor, 16) * row]; + } + _tmpbuf2[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16) + * compptr->v_samp_factor + 16); + if(!_tmpbuf2[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + tmpbuf2[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor); + if(!tmpbuf2[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for(row=0; rowv_samp_factor; row++) + { + unsigned char *_tmpbuf2_aligned= + (unsigned char *)PAD((size_t)_tmpbuf2[i], 16); + tmpbuf2[i][row]=&_tmpbuf2_aligned[ + PAD(compptr->width_in_blocks*DCTSIZE, 16) * row]; + } + pw[i]=pw0*compptr->h_samp_factor/cinfo->max_h_samp_factor; + ph[i]=ph0*compptr->v_samp_factor/cinfo->max_v_samp_factor; + outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]); + if(!outbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + ptr=dstPlanes[i]; + for(row=0; rowmax_v_samp_factor) + { + (*cinfo->cconvert->color_convert)(cinfo, &row_pointer[row], tmpbuf, 0, + cinfo->max_v_samp_factor); + (cinfo->downsample->downsample)(cinfo, tmpbuf, 0, tmpbuf2, 0); + for(i=0, compptr=cinfo->comp_info; inum_components; i++, compptr++) + jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i], + row*compptr->v_samp_factor/cinfo->max_v_samp_factor, + compptr->v_samp_factor, pw[i]); + } + cinfo->next_scanline+=height; + jpeg_abort_compress(cinfo); + + bailout: + if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); + #ifndef JCS_EXTENSIONS + if(rgbBuf) free(rgbBuf); + #endif + if(row_pointer) free(row_pointer); + for(i=0; ijerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, + const unsigned char *srcBuf, int width, int pitch, int height, + int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags) +{ + unsigned char *dstPlanes[3]; + int pw0, ph0, strides[3], retval=-1; + + if(width<=0 || height<=0 || dstBuf==NULL || pad<0 || !isPow2(pad) + || subsamp<0 || subsamp>=NUMSUBOPT) + _throw("tjEncodeYUV3(): Invalid argument"); + + pw0=tjPlaneWidth(0, width, subsamp); + ph0=tjPlaneHeight(0, height, subsamp); + dstPlanes[0]=dstBuf; + strides[0]=PAD(pw0, pad); + if(subsamp==TJSAMP_GRAY) + { + strides[1]=strides[2]=0; + dstPlanes[1]=dstPlanes[2]=NULL; + } + else + { + int pw1=tjPlaneWidth(1, width, subsamp); + int ph1=tjPlaneHeight(1, height, subsamp); + strides[1]=strides[2]=PAD(pw1, pad); + dstPlanes[1]=dstPlanes[0]+strides[0]*ph0; + dstPlanes[2]=dstPlanes[1]+strides[1]*ph1; + } + + return tjEncodeYUVPlanes(handle, srcBuf, width, pitch, height, pixelFormat, + dstPlanes, strides, subsamp, flags); + + bailout: + return retval; +} + +DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, + int subsamp, int flags) +{ + return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat, + dstBuf, 4, subsamp, flags); +} + +DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, + int width, int pitch, int height, int pixelSize, unsigned char *dstBuf, + int subsamp, int flags) +{ + return tjEncodeYUV2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), dstBuf, subsamp, flags); +} + + +DLLEXPORT int DLLCALL tjCompressFromYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, int width, const int *strides, int height, + int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, + int flags) +{ + int i, row, retval=0, alloc=1; JSAMPROW *inbuf[MAX_COMPONENTS]; + int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS], + tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS]; + JSAMPLE *_tmpbuf=NULL, *ptr; JSAMPROW *tmpbuf[MAX_COMPONENTS]; + + getcinstance(handle) + + for(i=0; iinit&COMPRESS)==0) + _throw("tjCompressFromYUVPlanes(): Instance has not been initialized for compression"); + + if(!srcPlanes || !srcPlanes[0] || width<=0 || height<=0 || subsamp<0 + || subsamp>=NUMSUBOPT || jpegBuf==NULL || jpegSize==NULL || jpegQual<0 + || jpegQual>100) + _throw("tjCompressFromYUVPlanes(): Invalid argument"); + if(subsamp!=TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) + _throw("tjCompressFromYUVPlanes(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + cinfo->image_width=width; + cinfo->image_height=height; + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(flags&TJFLAG_NOREALLOC) + { + alloc=0; *jpegSize=tjBufSize(width, height, subsamp); + } + jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc); + if(setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags)==-1) + return -1; + cinfo->raw_data_in=TRUE; + + jpeg_start_compress(cinfo, TRUE); + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&cinfo->comp_info[i]; + int ih; + iw[i]=compptr->width_in_blocks*DCTSIZE; + ih=compptr->height_in_blocks*DCTSIZE; + pw[i]=PAD(cinfo->image_width, cinfo->max_h_samp_factor) + *compptr->h_samp_factor/cinfo->max_h_samp_factor; + ph[i]=PAD(cinfo->image_height, cinfo->max_v_samp_factor) + *compptr->v_samp_factor/cinfo->max_v_samp_factor; + if(iw[i]!=pw[i] || ih!=ph[i]) usetmpbuf=1; + th[i]=compptr->v_samp_factor*DCTSIZE; + tmpbufsize+=iw[i]*th[i]; + if((inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]))==NULL) + _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); + ptr=(JSAMPLE *)srcPlanes[i]; + for(row=0; rownum_components; i++) + { + if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL) + _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); + for(row=0; rowimage_height; + row+=cinfo->max_v_samp_factor*DCTSIZE) + { + JSAMPARRAY yuvptr[MAX_COMPONENTS]; + int crow[MAX_COMPONENTS]; + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&cinfo->comp_info[i]; + crow[i]=row*compptr->v_samp_factor/cinfo->max_v_samp_factor; + if(usetmpbuf) + { + int j, k; + for(j=0; jmax_v_samp_factor*DCTSIZE); + } + jpeg_finish_compress(cinfo); + + bailout: + if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); + for(i=0; ijerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, + const unsigned char *srcBuf, int width, int pad, int height, int subsamp, + unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags) +{ + const unsigned char *srcPlanes[3]; + int pw0, ph0, strides[3], retval=-1; + + if(srcBuf==NULL || width<=0 || pad<1 || height<=0 || subsamp<0 + || subsamp>=NUMSUBOPT) + _throw("tjCompressFromYUV(): Invalid argument"); + + pw0=tjPlaneWidth(0, width, subsamp); + ph0=tjPlaneHeight(0, height, subsamp); + srcPlanes[0]=srcBuf; + strides[0]=PAD(pw0, pad); + if(subsamp==TJSAMP_GRAY) + { + strides[1]=strides[2]=0; + srcPlanes[1]=srcPlanes[2]=NULL; + } + else + { + int pw1=tjPlaneWidth(1, width, subsamp); + int ph1=tjPlaneHeight(1, height, subsamp); + strides[1]=strides[2]=PAD(pw1, pad); + srcPlanes[1]=srcPlanes[0]+strides[0]*ph0; + srcPlanes[2]=srcPlanes[1]+strides[1]*ph1; + } + + return tjCompressFromYUVPlanes(handle, srcPlanes, width, strides, height, + subsamp, jpegBuf, jpegSize, jpegQual, flags); + + bailout: + return retval; +} + + +/* Decompressor */ + +static tjhandle _tjInitDecompress(tjinstance *this) +{ + static unsigned char buffer[1]; + + /* This is also straight out of example.c */ + this->dinfo.err=jpeg_std_error(&this->jerr.pub); + this->jerr.pub.error_exit=my_error_exit; + this->jerr.pub.output_message=my_output_message; + this->jerr.emit_message=this->jerr.pub.emit_message; + this->jerr.pub.emit_message=my_emit_message; + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + if(this) free(this); return NULL; + } + + jpeg_create_decompress(&this->dinfo); + /* Make an initial call so it will create the source manager */ + jpeg_mem_src_tj(&this->dinfo, buffer, 1); + + this->init|=DECOMPRESS; + return (tjhandle)this; +} + +DLLEXPORT tjhandle DLLCALL tjInitDecompress(void) +{ + tjinstance *this; + if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) + { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitDecompress(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + return _tjInitDecompress(this); +} + + +DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp, int *jpegColorspace) +{ + int retval=0; + + getdinstance(handle); + if((this->init&DECOMPRESS)==0) + _throw("tjDecompressHeader3(): Instance has not been initialized for decompression"); + + if(jpegBuf==NULL || jpegSize<=0 || width==NULL || height==NULL + || jpegSubsamp==NULL || jpegColorspace==NULL) + _throw("tjDecompressHeader3(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + return -1; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + + *width=dinfo->image_width; + *height=dinfo->image_height; + *jpegSubsamp=getSubsamp(dinfo); + switch(dinfo->jpeg_color_space) + { + case JCS_GRAYSCALE: *jpegColorspace=TJCS_GRAY; break; + case JCS_RGB: *jpegColorspace=TJCS_RGB; break; + case JCS_YCbCr: *jpegColorspace=TJCS_YCbCr; break; + case JCS_CMYK: *jpegColorspace=TJCS_CMYK; break; + case JCS_YCCK: *jpegColorspace=TJCS_YCCK; break; + default: *jpegColorspace=-1; break; + } + + jpeg_abort_decompress(dinfo); + + if(*jpegSubsamp<0) + _throw("tjDecompressHeader3(): Could not determine subsampling type for JPEG image"); + if(*jpegColorspace<0) + _throw("tjDecompressHeader3(): Could not determine colorspace of JPEG image"); + if(*width<1 || *height<1) + _throw("tjDecompressHeader3(): Invalid data returned in header"); + + bailout: + if(this->jerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, + int *jpegSubsamp) +{ + int jpegColorspace; + return tjDecompressHeader3(handle, jpegBuf, jpegSize, width, height, + jpegSubsamp, &jpegColorspace); +} + +DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height) +{ + int jpegSubsamp; + return tjDecompressHeader2(handle, jpegBuf, jpegSize, width, height, + &jpegSubsamp); +} + + +DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors) +{ + if(numscalingfactors==NULL) + { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjGetScalingFactors(): Invalid argument"); + return NULL; + } + + *numscalingfactors=NUMSF; + return (tjscalingfactor *)sf; +} + + +DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, int flags) +{ + int i, retval=0; JSAMPROW *row_pointer=NULL; + int jpegwidth, jpegheight, scaledw, scaledh; + #ifndef JCS_EXTENSIONS + unsigned char *rgbBuf=NULL; + unsigned char *_dstBuf=NULL; int _pitch=0; + #endif + + getdinstance(handle); + if((this->init&DECOMPRESS)==0) + _throw("tjDecompress2(): Instance has not been initialized for decompression"); + + if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pitch<0 + || height<0 || pixelFormat<0 || pixelFormat>=TJ_NUMPF) + _throw("tjDecompress2(): Invalid argument"); + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + if(setDecompDefaults(dinfo, pixelFormat, flags)==-1) + { + retval=-1; goto bailout; + } + + if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE; + + jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; + if(width==0) width=jpegwidth; + if(height==0) height=jpegheight; + for(i=0; i=NUMSF) + _throw("tjDecompress2(): Could not scale down to desired image dimensions"); + width=scaledw; height=scaledh; + dinfo->scale_num=sf[i].num; + dinfo->scale_denom=sf[i].denom; + + jpeg_start_decompress(dinfo); + if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat]; + + #ifndef JCS_EXTENSIONS + if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK && + (RGB_RED!=tjRedOffset[pixelFormat] || + RGB_GREEN!=tjGreenOffset[pixelFormat] || + RGB_BLUE!=tjBlueOffset[pixelFormat] || + RGB_PIXELSIZE!=tjPixelSize[pixelFormat])) + { + rgbBuf=(unsigned char *)malloc(width*height*3); + if(!rgbBuf) _throw("tjDecompress2(): Memory allocation failure"); + _pitch=pitch; pitch=width*3; + _dstBuf=dstBuf; dstBuf=rgbBuf; + } + #endif + + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW) + *dinfo->output_height))==NULL) + _throw("tjDecompress2(): Memory allocation failure"); + for(i=0; i<(int)dinfo->output_height; i++) + { + if(flags&TJFLAG_BOTTOMUP) + row_pointer[i]=&dstBuf[(dinfo->output_height-i-1)*pitch]; + else row_pointer[i]=&dstBuf[i*pitch]; + } + while(dinfo->output_scanlineoutput_height) + { + jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline], + dinfo->output_height-dinfo->output_scanline); + } + jpeg_finish_decompress(dinfo); + + #ifndef JCS_EXTENSIONS + fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat); + #endif + + bailout: + if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); + #ifndef JCS_EXTENSIONS + if(rgbBuf) free(rgbBuf); + #endif + if(row_pointer) free(row_pointer); + if(this->jerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, + int height, int pixelSize, int flags) +{ + if(flags&TJ_YUV) + return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags); + else + return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch, + height, getPixelFormat(pixelSize, flags), flags); +} + + +static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo, + int pixelFormat, int subsamp, int flags) +{ + int i; + + dinfo->scale_num=dinfo->scale_denom=1; + + if(subsamp==TJSAMP_GRAY) + { + dinfo->num_components=dinfo->comps_in_scan=1; + dinfo->jpeg_color_space=JCS_GRAYSCALE; + } + else + { + dinfo->num_components=dinfo->comps_in_scan=3; + dinfo->jpeg_color_space=JCS_YCbCr; + } + + dinfo->comp_info=(jpeg_component_info *) + (*dinfo->mem->alloc_small)((j_common_ptr)dinfo, JPOOL_IMAGE, + dinfo->num_components*sizeof(jpeg_component_info)); + + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&dinfo->comp_info[i]; + compptr->h_samp_factor=(i==0)? tjMCUWidth[subsamp]/8:1; + compptr->v_samp_factor=(i==0)? tjMCUHeight[subsamp]/8:1; + compptr->component_index=i; + compptr->component_id=i+1; + compptr->quant_tbl_no=compptr->dc_tbl_no=compptr->ac_tbl_no= + (i==0)? 0:1; + dinfo->cur_comp_info[i]=compptr; + } + dinfo->data_precision=8; + for(i=0; i<2; i++) + { + if(dinfo->quant_tbl_ptrs[i]==NULL) + dinfo->quant_tbl_ptrs[i]=jpeg_alloc_quant_table((j_common_ptr)dinfo); + } + + return 0; +} + + +int my_read_markers(j_decompress_ptr dinfo) +{ + return JPEG_REACHED_SOS; +} + +void my_reset_marker_reader(j_decompress_ptr dinfo) +{ +} + +DLLEXPORT int DLLCALL tjDecodeYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, const int *strides, int subsamp, + unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, + int flags) +{ + int i, retval=0; JSAMPROW *row_pointer=NULL; + JSAMPLE *_tmpbuf[MAX_COMPONENTS]; + JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS]; + int row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS]; + JSAMPLE *ptr; + jpeg_component_info *compptr; + #ifndef JCS_EXTENSIONS + unsigned char *rgbBuf=NULL; + unsigned char *_dstBuf=NULL; int _pitch=0; + #endif + int (*old_read_markers)(j_decompress_ptr); + void (*old_reset_marker_reader)(j_decompress_ptr); + + getdinstance(handle); + + for(i=0; iinit&DECOMPRESS)==0) + _throw("tjDecodeYUVPlanes(): Instance has not been initialized for decompression"); + + if(!srcPlanes || !srcPlanes[0] || subsamp<0 || subsamp>=NUMSUBOPT + || dstBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 + || pixelFormat>=TJ_NUMPF) + _throw("tjDecodeYUVPlanes(): Invalid argument"); + if(subsamp!=TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) + _throw("tjDecodeYUVPlanes(): Invalid argument"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + if(pixelFormat==TJPF_CMYK) + _throw("tjDecodeYUVPlanes(): Cannot decode YUV images into CMYK pixels."); + + if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; + dinfo->image_width=width; + dinfo->image_height=height; + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setDecodeDefaults(dinfo, pixelFormat, subsamp, flags)==-1) + { + retval=-1; goto bailout; + } + old_read_markers=dinfo->marker->read_markers; + dinfo->marker->read_markers=my_read_markers; + old_reset_marker_reader=dinfo->marker->reset_marker_reader; + dinfo->marker->reset_marker_reader=my_reset_marker_reader; + jpeg_read_header(dinfo, TRUE); + dinfo->marker->read_markers=old_read_markers; + dinfo->marker->reset_marker_reader=old_reset_marker_reader; + + if(setDecompDefaults(dinfo, pixelFormat, flags)==-1) + { + retval=-1; goto bailout; + } + dinfo->do_fancy_upsampling=FALSE; + dinfo->Se=DCTSIZE2-1; + jinit_master_decompress(dinfo); + (*dinfo->upsample->start_pass)(dinfo); + + pw0=PAD(width, dinfo->max_h_samp_factor); + ph0=PAD(height, dinfo->max_v_samp_factor); + + if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat]; + + #ifndef JCS_EXTENSIONS + if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK && + (RGB_RED!=tjRedOffset[pixelFormat] || + RGB_GREEN!=tjGreenOffset[pixelFormat] || + RGB_BLUE!=tjBlueOffset[pixelFormat] || + RGB_PIXELSIZE!=tjPixelSize[pixelFormat])) + { + rgbBuf=(unsigned char *)malloc(width*height*3); + if(!rgbBuf) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + _pitch=pitch; pitch=width*3; + _dstBuf=dstBuf; dstBuf=rgbBuf; + } + #endif + + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph0))==NULL) + _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + for(i=0; inum_components; i++) + { + compptr=&dinfo->comp_info[i]; + _tmpbuf[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16) + * compptr->v_samp_factor + 16); + if(!_tmpbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor); + if(!tmpbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + for(row=0; rowv_samp_factor; row++) + { + unsigned char *_tmpbuf_aligned= + (unsigned char *)PAD((size_t)_tmpbuf[i], 16); + tmpbuf[i][row]=&_tmpbuf_aligned[ + PAD(compptr->width_in_blocks*DCTSIZE, 16) * row]; + } + pw[i]=pw0*compptr->h_samp_factor/dinfo->max_h_samp_factor; + ph[i]=ph0*compptr->v_samp_factor/dinfo->max_v_samp_factor; + inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]); + if(!inbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + ptr=(JSAMPLE *)srcPlanes[i]; + for(row=0; rowmax_v_samp_factor) + { + JDIMENSION inrow=0, outrow=0; + for(i=0, compptr=dinfo->comp_info; inum_components; i++, compptr++) + jcopy_sample_rows(inbuf[i], + row*compptr->v_samp_factor/dinfo->max_v_samp_factor, tmpbuf[i], 0, + compptr->v_samp_factor, pw[i]); + (dinfo->upsample->upsample)(dinfo, tmpbuf, &inrow, + dinfo->max_v_samp_factor, &row_pointer[row], &outrow, + dinfo->max_v_samp_factor); + } + jpeg_abort_decompress(dinfo); + + #ifndef JCS_EXTENSIONS + fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat); + #endif + + bailout: + if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); + #ifndef JCS_EXTENSIONS + if(rgbBuf) free(rgbBuf); + #endif + if(row_pointer) free(row_pointer); + for(i=0; ijerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, + int height, int pixelFormat, int flags) +{ + const unsigned char *srcPlanes[3]; + int pw0, ph0, strides[3], retval=-1; + + if(srcBuf==NULL || pad<0 || !isPow2(pad) || subsamp<0 || subsamp>=NUMSUBOPT + || width<=0 || height<=0) + _throw("tjDecodeYUV(): Invalid argument"); + + pw0=tjPlaneWidth(0, width, subsamp); + ph0=tjPlaneHeight(0, height, subsamp); + srcPlanes[0]=srcBuf; + strides[0]=PAD(pw0, pad); + if(subsamp==TJSAMP_GRAY) + { + strides[1]=strides[2]=0; + srcPlanes[1]=srcPlanes[2]=NULL; + } + else + { + int pw1=tjPlaneWidth(1, width, subsamp); + int ph1=tjPlaneHeight(1, height, subsamp); + strides[1]=strides[2]=PAD(pw1, pad); + srcPlanes[1]=srcPlanes[0]+strides[0]*ph0; + srcPlanes[2]=srcPlanes[1]+strides[1]*ph1; + } + + return tjDecodeYUVPlanes(handle, srcPlanes, strides, subsamp, dstBuf, width, + pitch, height, pixelFormat, flags); + + bailout: + return retval; +} + +DLLEXPORT int DLLCALL tjDecompressToYUVPlanes(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, + unsigned char **dstPlanes, int width, int *strides, int height, int flags) +{ + int i, sfi, row, retval=0; JSAMPROW *outbuf[MAX_COMPONENTS]; + int jpegwidth, jpegheight, jpegSubsamp, scaledw, scaledh; + int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS], + tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS]; + JSAMPLE *_tmpbuf=NULL, *ptr; JSAMPROW *tmpbuf[MAX_COMPONENTS]; + int dctsize; + + getdinstance(handle); + + for(i=0; iinit&DECOMPRESS)==0) + _throw("tjDecompressToYUVPlanes(): Instance has not been initialized for decompression"); + + if(jpegBuf==NULL || jpegSize<=0 || !dstPlanes || !dstPlanes[0] || width<0 + || height<0) + _throw("tjDecompressToYUVPlanes(): Invalid argument"); + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + if(!this->headerRead) + { + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + } + this->headerRead=0; + jpegSubsamp=getSubsamp(dinfo); + if(jpegSubsamp<0) + _throw("tjDecompressToYUVPlanes(): Could not determine subsampling type for JPEG image"); + + if(jpegSubsamp!=TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) + _throw("tjDecompressToYUVPlanes(): Invalid argument"); + + jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; + if(width==0) width=jpegwidth; + if(height==0) height=jpegheight; + for(i=0; i=NUMSF) + _throw("tjDecompressToYUVPlanes(): Could not scale down to desired image dimensions"); + if(dinfo->num_components>3) + _throw("tjDecompressToYUVPlanes(): JPEG image must have 3 or fewer components"); + + width=scaledw; height=scaledh; + dinfo->scale_num=sf[i].num; + dinfo->scale_denom=sf[i].denom; + sfi=i; + jpeg_calc_output_dimensions(dinfo); + + dctsize=DCTSIZE*sf[sfi].num/sf[sfi].denom; + + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&dinfo->comp_info[i]; + int ih; + iw[i]=compptr->width_in_blocks*dctsize; + ih=compptr->height_in_blocks*dctsize; + pw[i]=PAD(dinfo->output_width, dinfo->max_h_samp_factor) + *compptr->h_samp_factor/dinfo->max_h_samp_factor; + ph[i]=PAD(dinfo->output_height, dinfo->max_v_samp_factor) + *compptr->v_samp_factor/dinfo->max_v_samp_factor; + if(iw[i]!=pw[i] || ih!=ph[i]) usetmpbuf=1; + th[i]=compptr->v_samp_factor*dctsize; + tmpbufsize+=iw[i]*th[i]; + if((outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]))==NULL) + _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); + ptr=dstPlanes[i]; + for(row=0; rownum_components; i++) + { + if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL) + _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); + for(row=0; rowdo_fancy_upsampling=FALSE; + if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST; + dinfo->raw_data_out=TRUE; + + jpeg_start_decompress(dinfo); + for(row=0; row<(int)dinfo->output_height; + row+=dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size) + { + JSAMPARRAY yuvptr[MAX_COMPONENTS]; + int crow[MAX_COMPONENTS]; + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&dinfo->comp_info[i]; + if(jpegSubsamp==TJ_420) + { + /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try + to be clever and use the IDCT to perform upsampling on the U and V + planes. For instance, if the output image is to be scaled by 1/2 + relative to the JPEG image, then the scaling factor and upsampling + effectively cancel each other, so a normal 8x8 IDCT can be used. + However, this is not desirable when using the decompress-to-YUV + functionality in TurboJPEG, since we want to output the U and V + planes in their subsampled form. Thus, we have to override some + internal libjpeg parameters to force it to use the "scaled" IDCT + functions on the U and V planes. */ + compptr->_DCT_scaled_size=dctsize; + compptr->MCU_sample_width=tjMCUWidth[jpegSubsamp]* + sf[sfi].num/sf[sfi].denom* + compptr->v_samp_factor/dinfo->max_v_samp_factor; + dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0]; + } + crow[i]=row*compptr->v_samp_factor/dinfo->max_v_samp_factor; + if(usetmpbuf) yuvptr[i]=tmpbuf[i]; + else yuvptr[i]=&outbuf[i][crow[i]]; + } + jpeg_read_raw_data(dinfo, yuvptr, + dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size); + if(usetmpbuf) + { + int j; + for(i=0; inum_components; i++) + { + for(j=0; jglobal_state>DSTATE_START) jpeg_abort_decompress(dinfo); + for(i=0; ijerr.warning) retval=-1; + return retval; +} + +DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pad, int height, int flags) +{ + unsigned char *dstPlanes[3]; + int pw0, ph0, strides[3], retval=-1, jpegSubsamp=-1; + int i, jpegwidth, jpegheight, scaledw, scaledh; + + getdinstance(handle); + + if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pad<1 + || !isPow2(pad) || height<0) + _throw("tjDecompressToYUV2(): Invalid argument"); + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + jpegSubsamp=getSubsamp(dinfo); + if(jpegSubsamp<0) + _throw("tjDecompressToYUV2(): Could not determine subsampling type for JPEG image"); + + jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; + if(width==0) width=jpegwidth; + if(height==0) height=jpegheight; + + for(i=0; i=NUMSF) + _throw("tjDecompressToYUV2(): Could not scale down to desired image dimensions"); + + pw0=tjPlaneWidth(0, width, jpegSubsamp); + ph0=tjPlaneHeight(0, height, jpegSubsamp); + dstPlanes[0]=dstBuf; + strides[0]=PAD(pw0, pad); + if(jpegSubsamp==TJSAMP_GRAY) + { + strides[1]=strides[2]=0; + dstPlanes[1]=dstPlanes[2]=NULL; + } + else + { + int pw1=tjPlaneWidth(1, width, jpegSubsamp); + int ph1=tjPlaneHeight(1, height, jpegSubsamp); + strides[1]=strides[2]=PAD(pw1, pad); + dstPlanes[1]=dstPlanes[0]+strides[0]*ph0; + dstPlanes[2]=dstPlanes[1]+strides[1]*ph1; + } + + this->headerRead=1; + return tjDecompressToYUVPlanes(handle, jpegBuf, jpegSize, dstPlanes, width, + strides, height, flags); + + bailout: + return retval; + +} + +DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int flags) +{ + return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags); +} + + +/* Transformer */ + +DLLEXPORT tjhandle DLLCALL tjInitTransform(void) +{ + tjinstance *this=NULL; tjhandle handle=NULL; + if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) + { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitTransform(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + handle=_tjInitCompress(this); + if(!handle) return NULL; + handle=_tjInitDecompress(this); + return handle; +} + + +DLLEXPORT int DLLCALL tjTransform(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, int n, + unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *t, int flags) +{ + jpeg_transform_info *xinfo=NULL; + jvirt_barray_ptr *srccoefs, *dstcoefs; + int retval=0, i, jpegSubsamp; + + getinstance(handle); + if((this->init&COMPRESS)==0 || (this->init&DECOMPRESS)==0) + _throw("tjTransform(): Instance has not been initialized for transformation"); + + if(jpegBuf==NULL || jpegSize<=0 || n<1 || dstBufs==NULL || dstSizes==NULL + || t==NULL || flags<0) + _throw("tjTransform(): Invalid argument"); + + if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); + + if(setjmp(this->jerr.setjmp_buffer)) + { + /* If we get here, the JPEG code has signaled an error. */ + retval=-1; + goto bailout; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + + if((xinfo=(jpeg_transform_info *)malloc(sizeof(jpeg_transform_info)*n)) + ==NULL) + _throw("tjTransform(): Memory allocation failure"); + MEMZERO(xinfo, sizeof(jpeg_transform_info)*n); + + for(i=0; iimage_width; h=dinfo->image_height; + } + else + { + w=xinfo[i].crop_width; h=xinfo[i].crop_height; + } + if(flags&TJFLAG_NOREALLOC) + { + alloc=0; dstSizes[i]=tjBufSize(w, h, jpegSubsamp); + } + if(!(t[i].options&TJXOPT_NOOUTPUT)) + jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc); + jpeg_copy_critical_parameters(dinfo, cinfo); + dstcoefs=jtransform_adjust_parameters(dinfo, cinfo, srccoefs, + &xinfo[i]); + if(!(t[i].options&TJXOPT_NOOUTPUT)) + { + jpeg_write_coefficients(cinfo, dstcoefs); + jcopy_markers_execute(dinfo, cinfo, JCOPYOPT_ALL); + } + else jinit_c_master_control(cinfo, TRUE); + jtransform_execute_transformation(dinfo, cinfo, srccoefs, + &xinfo[i]); + if(t[i].customFilter) + { + int ci, y; JDIMENSION by; + for(ci=0; cinum_components; ci++) + { + jpeg_component_info *compptr=&cinfo->comp_info[ci]; + tjregion arrayRegion={0, 0, compptr->width_in_blocks*DCTSIZE, + DCTSIZE}; + tjregion planeRegion={0, 0, compptr->width_in_blocks*DCTSIZE, + compptr->height_in_blocks*DCTSIZE}; + for(by=0; byheight_in_blocks; by+=compptr->v_samp_factor) + { + JBLOCKARRAY barray=(dinfo->mem->access_virt_barray) + ((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor, + TRUE); + for(y=0; yv_samp_factor; y++) + { + if(t[i].customFilter(barray[y][0], arrayRegion, planeRegion, + ci, i, &t[i])==-1) + _throw("tjTransform(): Error in custom filter"); + arrayRegion.y+=DCTSIZE; + } + } + } + } + if(!(t[i].options&TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo); + } + + jpeg_finish_decompress(dinfo); + + bailout: + if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); + if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); + if(xinfo) free(xinfo); + if(this->jerr.warning) retval=-1; + return retval; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg.h glmark2-2021.02/src/libjpeg-turbo/turbojpeg.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/turbojpeg.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1542 @@ +/* + * Copyright (C)2009-2015 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TURBOJPEG_H__ +#define __TURBOJPEG_H__ + +#if defined(_WIN32) && defined(DLLDEFINE) +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif +#define DLLCALL + + +/** + * @addtogroup TurboJPEG + * TurboJPEG API. This API provides an interface for generating, decoding, and + * transforming planar YUV and JPEG images in memory. + * + * @anchor YUVnotes + * YUV Image Format Notes + * ---------------------- + * Technically, the JPEG format uses the YCbCr colorspace (which is technically + * not a colorspace but a color transform), but per the convention of the + * digital video community, the TurboJPEG API uses "YUV" to refer to an image + * format consisting of Y, Cb, and Cr image planes. + * + * Each plane is simply a 2D array of bytes, each byte representing the value + * of one of the components (Y, Cb, or Cr) at a particular location in the + * image. The width and height of each plane are determined by the image + * width, height, and level of chrominance subsampling. The luminance plane + * width is the image width padded to the nearest multiple of the horizontal + * subsampling factor (2 in the case of 4:2:0 and 4:2:2, 4 in the case of + * 4:1:1, 1 in the case of 4:4:4 or grayscale.) Similarly, the luminance plane + * height is the image height padded to the nearest multiple of the vertical + * subsampling factor (2 in the case of 4:2:0 or 4:4:0, 1 in the case of 4:4:4 + * or grayscale.) This is irrespective of any additional padding that may be + * specified as an argument to the various YUV functions. The chrominance + * plane width is equal to the luminance plane width divided by the horizontal + * subsampling factor, and the chrominance plane height is equal to the + * luminance plane height divided by the vertical subsampling factor. + * + * For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is + * used, then the luminance plane would be 36 x 35 bytes, and each of the + * chrominance planes would be 18 x 35 bytes. If you specify a line padding of + * 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and + * each of the chrominance planes would be 20 x 35 bytes. + * + * @{ + */ + + +/** + * The number of chrominance subsampling options + */ +#define TJ_NUMSAMP 6 + +/** + * Chrominance subsampling options. + * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK + * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of + * the Cb and Cr (chrominance) components can be discarded or averaged together + * to produce a smaller image with little perceptible loss of image clarity + * (the human eye is more sensitive to small changes in brightness than to + * small changes in color.) This is called "chrominance subsampling". + */ +enum TJSAMP +{ + /** + * 4:4:4 chrominance subsampling (no chrominance subsampling). The JPEG or + * YUV image will contain one chrominance component for every pixel in the + * source image. + */ + TJSAMP_444=0, + /** + * 4:2:2 chrominance subsampling. The JPEG or YUV image will contain one + * chrominance component for every 2x1 block of pixels in the source image. + */ + TJSAMP_422, + /** + * 4:2:0 chrominance subsampling. The JPEG or YUV image will contain one + * chrominance component for every 2x2 block of pixels in the source image. + */ + TJSAMP_420, + /** + * Grayscale. The JPEG or YUV image will contain no chrominance components. + */ + TJSAMP_GRAY, + /** + * 4:4:0 chrominance subsampling. The JPEG or YUV image will contain one + * chrominance component for every 1x2 block of pixels in the source image. + * + * @note 4:4:0 subsampling is not fully accelerated in libjpeg-turbo. + */ + TJSAMP_440, + /** + * 4:1:1 chrominance subsampling. The JPEG or YUV image will contain one + * chrominance component for every 4x1 block of pixels in the source image. + * JPEG images compressed with 4:1:1 subsampling will be almost exactly the + * same size as those compressed with 4:2:0 subsampling, and in the + * aggregate, both subsampling methods produce approximately the same + * perceptual quality. However, 4:1:1 is better able to reproduce sharp + * horizontal features. + * + * @note 4:1:1 subsampling is not fully accelerated in libjpeg-turbo. + */ + TJSAMP_411 +}; + +/** + * MCU block width (in pixels) for a given level of chrominance subsampling. + * MCU block sizes: + * - 8x8 for no subsampling or grayscale + * - 16x8 for 4:2:2 + * - 8x16 for 4:4:0 + * - 16x16 for 4:2:0 + * - 32x8 for 4:1:1 + */ +static const int tjMCUWidth[TJ_NUMSAMP] = {8, 16, 16, 8, 8, 32}; + +/** + * MCU block height (in pixels) for a given level of chrominance subsampling. + * MCU block sizes: + * - 8x8 for no subsampling or grayscale + * - 16x8 for 4:2:2 + * - 8x16 for 4:4:0 + * - 16x16 for 4:2:0 + * - 32x8 for 4:1:1 + */ +static const int tjMCUHeight[TJ_NUMSAMP] = {8, 8, 16, 8, 16, 8}; + + +/** + * The number of pixel formats + */ +#define TJ_NUMPF 12 + +/** + * Pixel formats + */ +enum TJPF +{ + /** + * RGB pixel format. The red, green, and blue components in the image are + * stored in 3-byte pixels in the order R, G, B from lowest to highest byte + * address within each pixel. + */ + TJPF_RGB=0, + /** + * BGR pixel format. The red, green, and blue components in the image are + * stored in 3-byte pixels in the order B, G, R from lowest to highest byte + * address within each pixel. + */ + TJPF_BGR, + /** + * RGBX pixel format. The red, green, and blue components in the image are + * stored in 4-byte pixels in the order R, G, B from lowest to highest byte + * address within each pixel. The X component is ignored when compressing + * and undefined when decompressing. + */ + TJPF_RGBX, + /** + * BGRX pixel format. The red, green, and blue components in the image are + * stored in 4-byte pixels in the order B, G, R from lowest to highest byte + * address within each pixel. The X component is ignored when compressing + * and undefined when decompressing. + */ + TJPF_BGRX, + /** + * XBGR pixel format. The red, green, and blue components in the image are + * stored in 4-byte pixels in the order R, G, B from highest to lowest byte + * address within each pixel. The X component is ignored when compressing + * and undefined when decompressing. + */ + TJPF_XBGR, + /** + * XRGB pixel format. The red, green, and blue components in the image are + * stored in 4-byte pixels in the order B, G, R from highest to lowest byte + * address within each pixel. The X component is ignored when compressing + * and undefined when decompressing. + */ + TJPF_XRGB, + /** + * Grayscale pixel format. Each 1-byte pixel represents a luminance + * (brightness) level from 0 to 255. + */ + TJPF_GRAY, + /** + * RGBA pixel format. This is the same as @ref TJPF_RGBX, except that when + * decompressing, the X component is guaranteed to be 0xFF, which can be + * interpreted as an opaque alpha channel. + */ + TJPF_RGBA, + /** + * BGRA pixel format. This is the same as @ref TJPF_BGRX, except that when + * decompressing, the X component is guaranteed to be 0xFF, which can be + * interpreted as an opaque alpha channel. + */ + TJPF_BGRA, + /** + * ABGR pixel format. This is the same as @ref TJPF_XBGR, except that when + * decompressing, the X component is guaranteed to be 0xFF, which can be + * interpreted as an opaque alpha channel. + */ + TJPF_ABGR, + /** + * ARGB pixel format. This is the same as @ref TJPF_XRGB, except that when + * decompressing, the X component is guaranteed to be 0xFF, which can be + * interpreted as an opaque alpha channel. + */ + TJPF_ARGB, + /** + * CMYK pixel format. Unlike RGB, which is an additive color model used + * primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive + * color model used primarily for printing. In the CMYK color model, the + * value of each color component typically corresponds to an amount of cyan, + * magenta, yellow, or black ink that is applied to a white background. In + * order to convert between CMYK and RGB, it is necessary to use a color + * management system (CMS.) A CMS will attempt to map colors within the + * printer's gamut to perceptually similar colors in the display's gamut and + * vice versa, but the mapping is typically not 1:1 or reversible, nor can it + * be defined with a simple formula. Thus, such a conversion is out of scope + * for a codec library. However, the TurboJPEG API allows for compressing + * CMYK pixels into a YCCK JPEG image (see #TJCS_YCCK) and decompressing YCCK + * JPEG images into CMYK pixels. + */ + TJPF_CMYK +}; + + +/** + * Red offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the red component is offset from the start of the pixel. For + * instance, if a pixel of format TJ_BGRX is stored in char pixel[], + * then the red component will be pixel[tjRedOffset[TJ_BGRX]]. + */ +static const int tjRedOffset[TJ_NUMPF] = {0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1, -1}; +/** + * Green offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the green component is offset from the start of the pixel. + * For instance, if a pixel of format TJ_BGRX is stored in + * char pixel[], then the green component will be + * pixel[tjGreenOffset[TJ_BGRX]]. + */ +static const int tjGreenOffset[TJ_NUMPF] = {1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2, -1}; +/** + * Blue offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the Blue component is offset from the start of the pixel. For + * instance, if a pixel of format TJ_BGRX is stored in char pixel[], + * then the blue component will be pixel[tjBlueOffset[TJ_BGRX]]. + */ +static const int tjBlueOffset[TJ_NUMPF] = {2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3, -1}; + +/** + * Pixel size (in bytes) for a given pixel format. + */ +static const int tjPixelSize[TJ_NUMPF] = {3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4}; + + +/** + * The number of JPEG colorspaces + */ +#define TJ_NUMCS 5 + +/** + * JPEG colorspaces + */ +enum TJCS +{ + /** + * RGB colorspace. When compressing the JPEG image, the R, G, and B + * components in the source image are reordered into image planes, but no + * colorspace conversion or subsampling is performed. RGB JPEG images can be + * decompressed to any of the extended RGB pixel formats or grayscale, but + * they cannot be decompressed to YUV images. + */ + TJCS_RGB=0, + /** + * YCbCr colorspace. YCbCr is not an absolute colorspace but rather a + * mathematical transformation of RGB designed solely for storage and + * transmission. YCbCr images must be converted to RGB before they can + * actually be displayed. In the YCbCr colorspace, the Y (luminance) + * component represents the black & white portion of the original image, and + * the Cb and Cr (chrominance) components represent the color portion of the + * original image. Originally, the analog equivalent of this transformation + * allowed the same signal to drive both black & white and color televisions, + * but JPEG images use YCbCr primarily because it allows the color data to be + * optionally subsampled for the purposes of reducing bandwidth or disk + * space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images + * can be compressed from and decompressed to any of the extended RGB pixel + * formats or grayscale, or they can be decompressed to YUV planar images. + */ + TJCS_YCbCr, + /** + * Grayscale colorspace. The JPEG image retains only the luminance data (Y + * component), and any color data from the source image is discarded. + * Grayscale JPEG images can be compressed from and decompressed to any of + * the extended RGB pixel formats or grayscale, or they can be decompressed + * to YUV planar images. + */ + TJCS_GRAY, + /** + * CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K + * components in the source image are reordered into image planes, but no + * colorspace conversion or subsampling is performed. CMYK JPEG images can + * only be decompressed to CMYK pixels. + */ + TJCS_CMYK, + /** + * YCCK colorspace. YCCK (AKA "YCbCrK") is not an absolute colorspace but + * rather a mathematical transformation of CMYK designed solely for storage + * and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be + * reversibly transformed into YCCK, and as with YCbCr, the chrominance + * components in the YCCK pixels can be subsampled without incurring major + * perceptual loss. YCCK JPEG images can only be compressed from and + * decompressed to CMYK pixels. + */ + TJCS_YCCK +}; + + +/** + * The uncompressed source/destination image is stored in bottom-up (Windows, + * OpenGL) order, not top-down (X11) order. + */ +#define TJFLAG_BOTTOMUP 2 +/** + * When decompressing an image that was compressed using chrominance + * subsampling, use the fastest chrominance upsampling algorithm available in + * the underlying codec. The default is to use smooth upsampling, which + * creates a smooth transition between neighboring chrominance components in + * order to reduce upsampling artifacts in the decompressed image. + */ +#define TJFLAG_FASTUPSAMPLE 256 +/** + * Disable buffer (re)allocation. If passed to #tjCompress2() or + * #tjTransform(), this flag will cause those functions to generate an error if + * the JPEG image buffer is invalid or too small rather than attempting to + * allocate or reallocate that buffer. This reproduces the behavior of earlier + * versions of TurboJPEG. + */ +#define TJFLAG_NOREALLOC 1024 +/** + * Use the fastest DCT/IDCT algorithm available in the underlying codec. The + * default if this flag is not specified is implementation-specific. For + * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast + * algorithm by default when compressing, because this has been shown to have + * only a very slight effect on accuracy, but it uses the accurate algorithm + * when decompressing, because this has been shown to have a larger effect. + */ +#define TJFLAG_FASTDCT 2048 +/** + * Use the most accurate DCT/IDCT algorithm available in the underlying codec. + * The default if this flag is not specified is implementation-specific. For + * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast + * algorithm by default when compressing, because this has been shown to have + * only a very slight effect on accuracy, but it uses the accurate algorithm + * when decompressing, because this has been shown to have a larger effect. + */ +#define TJFLAG_ACCURATEDCT 4096 + + +/** + * The number of transform operations + */ +#define TJ_NUMXOP 8 + +/** + * Transform operations for #tjTransform() + */ +enum TJXOP +{ + /** + * Do not transform the position of the image pixels + */ + TJXOP_NONE=0, + /** + * Flip (mirror) image horizontally. This transform is imperfect if there + * are any partial MCU blocks on the right edge (see #TJXOPT_PERFECT.) + */ + TJXOP_HFLIP, + /** + * Flip (mirror) image vertically. This transform is imperfect if there are + * any partial MCU blocks on the bottom edge (see #TJXOPT_PERFECT.) + */ + TJXOP_VFLIP, + /** + * Transpose image (flip/mirror along upper left to lower right axis.) This + * transform is always perfect. + */ + TJXOP_TRANSPOSE, + /** + * Transverse transpose image (flip/mirror along upper right to lower left + * axis.) This transform is imperfect if there are any partial MCU blocks in + * the image (see #TJXOPT_PERFECT.) + */ + TJXOP_TRANSVERSE, + /** + * Rotate image clockwise by 90 degrees. This transform is imperfect if + * there are any partial MCU blocks on the bottom edge (see + * #TJXOPT_PERFECT.) + */ + TJXOP_ROT90, + /** + * Rotate image 180 degrees. This transform is imperfect if there are any + * partial MCU blocks in the image (see #TJXOPT_PERFECT.) + */ + TJXOP_ROT180, + /** + * Rotate image counter-clockwise by 90 degrees. This transform is imperfect + * if there are any partial MCU blocks on the right edge (see + * #TJXOPT_PERFECT.) + */ + TJXOP_ROT270 +}; + + +/** + * This option will cause #tjTransform() to return an error if the transform is + * not perfect. Lossless transforms operate on MCU blocks, whose size depends + * on the level of chrominance subsampling used (see #tjMCUWidth + * and #tjMCUHeight.) If the image's width or height is not evenly divisible + * by the MCU block size, then there will be partial MCU blocks on the right + * and/or bottom edges. It is not possible to move these partial MCU blocks to + * the top or left of the image, so any transform that would require that is + * "imperfect." If this option is not specified, then any partial MCU blocks + * that cannot be transformed will be left in place, which will create + * odd-looking strips on the right or bottom edge of the image. + */ +#define TJXOPT_PERFECT 1 +/** + * This option will cause #tjTransform() to discard any partial MCU blocks that + * cannot be transformed. + */ +#define TJXOPT_TRIM 2 +/** + * This option will enable lossless cropping. See #tjTransform() for more + * information. + */ +#define TJXOPT_CROP 4 +/** + * This option will discard the color data in the input image and produce + * a grayscale output image. + */ +#define TJXOPT_GRAY 8 +/** + * This option will prevent #tjTransform() from outputting a JPEG image for + * this particular transform (this can be used in conjunction with a custom + * filter to capture the transformed DCT coefficients without transcoding + * them.) + */ +#define TJXOPT_NOOUTPUT 16 + + +/** + * Scaling factor + */ +typedef struct +{ + /** + * Numerator + */ + int num; + /** + * Denominator + */ + int denom; +} tjscalingfactor; + +/** + * Cropping region + */ +typedef struct +{ + /** + * The left boundary of the cropping region. This must be evenly divisible + * by the MCU block width (see #tjMCUWidth.) + */ + int x; + /** + * The upper boundary of the cropping region. This must be evenly divisible + * by the MCU block height (see #tjMCUHeight.) + */ + int y; + /** + * The width of the cropping region. Setting this to 0 is the equivalent of + * setting it to the width of the source JPEG image - x. + */ + int w; + /** + * The height of the cropping region. Setting this to 0 is the equivalent of + * setting it to the height of the source JPEG image - y. + */ + int h; +} tjregion; + +/** + * Lossless transform + */ +typedef struct tjtransform +{ + /** + * Cropping region + */ + tjregion r; + /** + * One of the @ref TJXOP "transform operations" + */ + int op; + /** + * The bitwise OR of one of more of the @ref TJXOPT_CROP "transform options" + */ + int options; + /** + * Arbitrary data that can be accessed within the body of the callback + * function + */ + void *data; + /** + * A callback function that can be used to modify the DCT coefficients + * after they are losslessly transformed but before they are transcoded to a + * new JPEG image. This allows for custom filters or other transformations + * to be applied in the frequency domain. + * + * @param coeffs pointer to an array of transformed DCT coefficients. (NOTE: + * this pointer is not guaranteed to be valid once the callback returns, so + * applications wishing to hand off the DCT coefficients to another function + * or library should make a copy of them within the body of the callback.) + * + * @param arrayRegion #tjregion structure containing the width and height of + * the array pointed to by coeffs as well as its offset relative to + * the component plane. TurboJPEG implementations may choose to split each + * component plane into multiple DCT coefficient arrays and call the callback + * function once for each array. + * + * @param planeRegion #tjregion structure containing the width and height of + * the component plane to which coeffs belongs + * + * @param componentID ID number of the component plane to which + * coeffs belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, + * and 2 in typical JPEG images.) + * + * @param transformID ID number of the transformed image to which + * coeffs belongs. This is the same as the index of the transform + * in the transforms array that was passed to #tjTransform(). + * + * @param transform a pointer to a #tjtransform structure that specifies the + * parameters and/or cropping region for this transform + * + * @return 0 if the callback was successful, or -1 if an error occurred. + */ + int (*customFilter)(short *coeffs, tjregion arrayRegion, + tjregion planeRegion, int componentIndex, int transformIndex, + struct tjtransform *transform); +} tjtransform; + +/** + * TurboJPEG instance handle + */ +typedef void* tjhandle; + + +/** + * Pad the given width to the nearest 32-bit boundary + */ +#define TJPAD(width) (((width)+3)&(~3)) + +/** + * Compute the scaled value of dimension using the given scaling + * factor. This macro performs the integer equivalent of ceil(dimension * + * scalingFactor). + */ +#define TJSCALED(dimension, scalingFactor) ((dimension * scalingFactor.num \ + + scalingFactor.denom - 1) / scalingFactor.denom) + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Create a TurboJPEG compressor instance. + * + * @return a handle to the newly-created instance, or NULL if an error + * occurred (see #tjGetErrorStr().) + */ +DLLEXPORT tjhandle DLLCALL tjInitCompress(void); + + +/** + * Compress an RGB, grayscale, or CMYK image into a JPEG image. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing RGB, grayscale, or + * CMYK pixels to be compressed + * + * @param width width (in pixels) of the source image + * + * @param pitch bytes per line in the source image. Normally, this should be + * width * #tjPixelSize[pixelFormat] if the image is unpadded, or + * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image + * is padded to the nearest 32-bit boundary, as is the case for Windows + * bitmaps. You can also be clever and use this parameter to skip lines, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source image + * + * @param pixelFormat pixel format of the source image (see @ref TJPF + * "Pixel formats".) + * + * @param jpegBuf address of a pointer to an image buffer that will receive the + * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer + * to accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, + * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer + * for you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be + * re-allocated (setting #TJFLAG_NOREALLOC guarantees this.) + * . + * If you choose option 1, *jpegSize should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, + * you should always check *jpegBuf upon return from this function, as + * it may have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of + * the JPEG image buffer. If *jpegBuf points to a pre-allocated + * buffer, then *jpegSize should be set to the size of the buffer. + * Upon return, *jpegSize will contain the size of the JPEG image (in + * bytes.) If *jpegBuf points to a JPEG image buffer that is being + * reused from a previous call to one of the JPEG compression functions, then + * *jpegSize is ignored. + * + * @param jpegSubsamp the level of chrominance subsampling to be used when + * generating the JPEG image (see @ref TJSAMP + * "Chrominance subsampling options".) + * + * @param jpegQual the image quality of the generated JPEG image (1 = worst, + * 100 = best) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags); + + +/** + * Compress a YUV planar image into a JPEG image. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing a YUV planar image to be + * compressed. The size of this buffer should match the value returned by + * #tjBufSizeYUV2() for the given image width, height, padding, and level of + * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be + * stored sequentially in the source buffer (refer to @ref YUVnotes + * "YUV Image Format Notes".) + * + * @param width width (in pixels) of the source image. If the width is not an + * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate + * buffer copy will be performed within TurboJPEG. + * + * @param pad the line padding used in the source image. For instance, if each + * line in each plane of the YUV image is padded to the nearest multiple of 4 + * bytes, then pad should be set to 4. + * + * @param height height (in pixels) of the source image. If the height is not + * an even multiple of the MCU block height (see #tjMCUHeight), then an + * intermediate buffer copy will be performed within TurboJPEG. + * + * @param subsamp the level of chrominance subsampling used in the source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param jpegBuf address of a pointer to an image buffer that will receive the + * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to + * accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, + * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer + * for you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be + * re-allocated (setting #TJFLAG_NOREALLOC guarantees this.) + * . + * If you choose option 1, *jpegSize should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, + * you should always check *jpegBuf upon return from this function, as + * it may have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of + * the JPEG image buffer. If *jpegBuf points to a pre-allocated + * buffer, then *jpegSize should be set to the size of the buffer. + * Upon return, *jpegSize will contain the size of the JPEG image (in + * bytes.) If *jpegBuf points to a JPEG image buffer that is being + * reused from a previous call to one of the JPEG compression functions, then + * *jpegSize is ignored. + * + * @param jpegQual the image quality of the generated JPEG image (1 = worst, + * 100 = best) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, + const unsigned char *srcBuf, int width, int pad, int height, int subsamp, + unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags); + + +/** + * Compress a set of Y, U (Cb), and V (Cr) image planes into a JPEG image. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if compressing a grayscale image) that contain a YUV + * image to be compressed. These planes can be contiguous or non-contiguous in + * memory. The size of each plane should match the value returned by + * #tjPlaneSizeYUV() for the given image width, height, strides, and level of + * chrominance subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" + * for more details. + * + * @param width width (in pixels) of the source image. If the width is not an + * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate + * buffer copy will be performed within TurboJPEG. + * + * @param strides an array of integers, each specifying the number of bytes per + * line in the corresponding plane of the YUV source image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see + * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then + * the strides for all planes will be set to their respective plane widths. + * You can adjust the strides in order to specify an arbitrary amount of line + * padding in each plane or to create a JPEG image from a subregion of a larger + * YUV planar image. + * + * @param height height (in pixels) of the source image. If the height is not + * an even multiple of the MCU block height (see #tjMCUHeight), then an + * intermediate buffer copy will be performed within TurboJPEG. + * + * @param subsamp the level of chrominance subsampling used in the source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param jpegBuf address of a pointer to an image buffer that will receive the + * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to + * accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, + * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer + * for you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be + * re-allocated (setting #TJFLAG_NOREALLOC guarantees this.) + * . + * If you choose option 1, *jpegSize should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, + * you should always check *jpegBuf upon return from this function, as + * it may have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of + * the JPEG image buffer. If *jpegBuf points to a pre-allocated + * buffer, then *jpegSize should be set to the size of the buffer. + * Upon return, *jpegSize will contain the size of the JPEG image (in + * bytes.) If *jpegBuf points to a JPEG image buffer that is being + * reused from a previous call to one of the JPEG compression functions, then + * *jpegSize is ignored. + * + * @param jpegQual the image quality of the generated JPEG image (1 = worst, + * 100 = best) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjCompressFromYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, int width, const int *strides, int height, + int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, + int flags); + + +/** + * The maximum size of the buffer (in bytes) required to hold a JPEG image with + * the given parameters. The number of bytes returned by this function is + * larger than the size of the uncompressed source image. The reason for this + * is that the JPEG format uses 16-bit coefficients, and it is thus possible + * for a very high-quality JPEG image with very high-frequency content to + * expand rather than compress when converted to the JPEG format. Such images + * represent a very rare corner case, but since there is no way to predict the + * size of a JPEG image prior to compression, the corner case has to be + * handled. + * + * @param width width (in pixels) of the image + * + * @param height height (in pixels) of the image + * + * @param jpegSubsamp the level of chrominance subsampling to be used when + * generating the JPEG image (see @ref TJSAMP + * "Chrominance subsampling options".) + * + * @return the maximum size of the buffer (in bytes) required to hold the + * image, or -1 if the arguments are out of bounds. + */ +DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height, + int jpegSubsamp); + + +/** + * The size of the buffer (in bytes) required to hold a YUV planar image with + * the given parameters. + * + * @param width width (in pixels) of the image + * + * @param pad the width of each line in each plane of the image is padded to + * the nearest multiple of this number of bytes (must be a power of 2.) + * + * @param height height (in pixels) of the image + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * + * @return the size of the buffer (in bytes) required to hold the image, or + * -1 if the arguments are out of bounds. + */ +DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height, + int subsamp); + + +/** + * The size of the buffer (in bytes) required to hold a YUV image plane with + * the given parameters. + * + * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) + * + * @param width width (in pixels) of the YUV image. NOTE: this is the width of + * the whole image, not the plane width. + * + * @param stride bytes per line in the image plane. Setting this to 0 is the + * equivalent of setting it to the plane width. + * + * @param height height (in pixels) of the YUV image. NOTE: this is the height + * of the whole image, not the plane height. + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * + * @return the size of the buffer (in bytes) required to hold the YUV image + * plane, or -1 if the arguments are out of bounds. + */ +DLLEXPORT unsigned long DLLCALL tjPlaneSizeYUV(int componentID, int width, + int stride, int height, int subsamp); + + +/** + * The plane width of a YUV image plane with the given parameters. Refer to + * @ref YUVnotes "YUV Image Format Notes" for a description of plane width. + * + * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) + * + * @param width width (in pixels) of the YUV image + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * + * @return the plane width of a YUV image plane with the given parameters, or + * -1 if the arguments are out of bounds. + */ +DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp); + + +/** + * The plane height of a YUV image plane with the given parameters. Refer to + * @ref YUVnotes "YUV Image Format Notes" for a description of plane height. + * + * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr) + * + * @param height height (in pixels) of the YUV image + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * + * @return the plane height of a YUV image plane with the given parameters, or + * -1 if the arguments are out of bounds. + */ +DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp); + + +/** + * Encode an RGB or grayscale image into a YUV planar image. This function + * uses the accelerated color conversion routines in the underlying + * codec but does not execute any of the other steps in the JPEG compression + * process. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels + * to be encoded + * + * @param width width (in pixels) of the source image + * + * @param pitch bytes per line in the source image. Normally, this should be + * width * #tjPixelSize[pixelFormat] if the image is unpadded, or + * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image + * is padded to the nearest 32-bit boundary, as is the case for Windows + * bitmaps. You can also be clever and use this parameter to skip lines, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source image + * + * @param pixelFormat pixel format of the source image (see @ref TJPF + * "Pixel formats".) + * + * @param dstBuf pointer to an image buffer that will receive the YUV image. + * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based + * on the image width, height, padding, and level of chrominance subsampling. + * The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the + * buffer (refer to @ref YUVnotes "YUV Image Format Notes".) + * + * @param pad the width of each line in each plane of the YUV image will be + * padded to the nearest multiple of this number of bytes (must be a power of + * 2.) To generate images suitable for X Video, pad should be set to + * 4. + * + * @param subsamp the level of chrominance subsampling to be used when + * generating the YUV image (see @ref TJSAMP + * "Chrominance subsampling options".) To generate images suitable for X + * Video, subsamp should be set to @ref TJSAMP_420. This produces an + * image compatible with the I420 (AKA "YUV420P") format. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, + const unsigned char *srcBuf, int width, int pitch, int height, + int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags); + + +/** + * Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image + * planes. This function uses the accelerated color conversion routines in the + * underlying codec but does not execute any of the other steps in the JPEG + * compression process. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels + * to be encoded + * + * @param width width (in pixels) of the source image + * + * @param pitch bytes per line in the source image. Normally, this should be + * width * #tjPixelSize[pixelFormat] if the image is unpadded, or + * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image + * is padded to the nearest 32-bit boundary, as is the case for Windows + * bitmaps. You can also be clever and use this parameter to skip lines, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source image + * + * @param pixelFormat pixel format of the source image (see @ref TJPF + * "Pixel formats".) + * + * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if generating a grayscale image) that will receive the + * encoded image. These planes can be contiguous or non-contiguous in memory. + * Use #tjPlaneSizeYUV() to determine the appropriate size for each plane based + * on the image width, height, strides, and level of chrominance subsampling. + * Refer to @ref YUVnotes "YUV Image Format Notes" for more details. + * + * @param strides an array of integers, each specifying the number of bytes per + * line in the corresponding plane of the output image. Setting the stride for + * any plane to 0 is the same as setting it to the plane width (see + * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then + * the strides for all planes will be set to their respective plane widths. + * You can adjust the strides in order to add an arbitrary amount of line + * padding to each plane or to encode an RGB or grayscale image into a + * subregion of a larger YUV planar image. + * + * @param subsamp the level of chrominance subsampling to be used when + * generating the YUV image (see @ref TJSAMP + * "Chrominance subsampling options".) To generate images suitable for X + * Video, subsamp should be set to @ref TJSAMP_420. This produces an + * image compatible with the I420 (AKA "YUV420P") format. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjEncodeYUVPlanes(tjhandle handle, + const unsigned char *srcBuf, int width, int pitch, int height, + int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, + int flags); + + +/** + * Create a TurboJPEG decompressor instance. + * + * @return a handle to the newly-created instance, or NULL if an error + * occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT tjhandle DLLCALL tjInitDecompress(void); + + +/** + * Retrieve information about a JPEG image without decompressing it. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param jpegBuf pointer to a buffer containing a JPEG image + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param width pointer to an integer variable that will receive the width (in + * pixels) of the JPEG image + * + * @param height pointer to an integer variable that will receive the height + * (in pixels) of the JPEG image + * + * @param jpegSubsamp pointer to an integer variable that will receive the + * level of chrominance subsampling used when the JPEG image was compressed + * (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param jpegColorspace pointer to an integer variable that will receive one + * of the JPEG colorspace constants, indicating the colorspace of the JPEG + * image (see @ref TJCS "JPEG colorspaces".) + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) +*/ +DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp, int *jpegColorspace); + + +/** + * Returns a list of fractional scaling factors that the JPEG decompressor in + * this implementation of TurboJPEG supports. + * + * @param numscalingfactors pointer to an integer variable that will receive + * the number of elements in the list + * + * @return a pointer to a list of fractional scaling factors, or NULL if an + * error is encountered (see #tjGetErrorStr().) +*/ +DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors); + + +/** + * Decompress a JPEG image to an RGB, grayscale, or CMYK image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param jpegBuf pointer to a buffer containing the JPEG image to decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param dstBuf pointer to an image buffer that will receive the decompressed + * image. This buffer should normally be pitch * scaledHeight bytes + * in size, where scaledHeight can be determined by calling + * #TJSCALED() with the JPEG image height and one of the scaling factors + * returned by #tjGetScalingFactors(). The dstBuf pointer may also be + * used to decompress into a specific region of a larger buffer. + * + * @param width desired width (in pixels) of the destination image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired width. If width is + * set to 0, then only the height will be considered when determining the + * scaled image size. + * + * @param pitch bytes per line in the destination image. Normally, this is + * scaledWidth * #tjPixelSize[pixelFormat] if the decompressed image + * is unpadded, else #TJPAD(scaledWidth * #tjPixelSize[pixelFormat]) + * if each line of the decompressed image is padded to the nearest 32-bit + * boundary, as is the case for Windows bitmaps. (NOTE: scaledWidth + * can be determined by calling #TJSCALED() with the JPEG image width and one + * of the scaling factors returned by #tjGetScalingFactors().) You can also be + * clever and use the pitch parameter to skip lines, etc. Setting this + * parameter to 0 is the equivalent of setting it to + * scaledWidth * #tjPixelSize[pixelFormat]. + * + * @param height desired height (in pixels) of the destination image. If this + * is different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired height. If height + * is set to 0, then only the width will be considered when determining the + * scaled image size. + * + * @param pixelFormat pixel format of the destination image (see @ref + * TJPF "Pixel formats".) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, int flags); + + +/** + * Decompress a JPEG image to a YUV planar image. This function performs JPEG + * decompression but leaves out the color conversion step, so a planar YUV + * image is generated instead of an RGB image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param jpegBuf pointer to a buffer containing the JPEG image to decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param dstBuf pointer to an image buffer that will receive the YUV image. + * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based + * on the image width, height, padding, and level of subsampling. The Y, + * U (Cb), and V (Cr) image planes will be stored sequentially in the buffer + * (refer to @ref YUVnotes "YUV Image Format Notes".) + * + * @param width desired width (in pixels) of the YUV image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired width. If width is + * set to 0, then only the height will be considered when determining the + * scaled image size. If the scaled width is not an even multiple of the MCU + * block width (see #tjMCUWidth), then an intermediate buffer copy will be + * performed within TurboJPEG. + * + * @param pad the width of each line in each plane of the YUV image will be + * padded to the nearest multiple of this number of bytes (must be a power of + * 2.) To generate images suitable for X Video, pad should be set to + * 4. + * + * @param height desired height (in pixels) of the YUV image. If this is + * different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired height. If height + * is set to 0, then only the width will be considered when determining the + * scaled image size. If the scaled height is not an even multiple of the MCU + * block height (see #tjMCUHeight), then an intermediate buffer copy will be + * performed within TurboJPEG. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pad, int height, int flags); + + +/** + * Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image + * planes. This function performs JPEG decompression but leaves out the color + * conversion step, so a planar YUV image is generated instead of an RGB image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param jpegBuf pointer to a buffer containing the JPEG image to decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if decompressing a grayscale image) that will receive + * the YUV image. These planes can be contiguous or non-contiguous in memory. + * Use #tjPlaneSizeYUV() to determine the appropriate size for each plane based + * on the scaled image width, scaled image height, strides, and level of + * chrominance subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" + * for more details. + * + * @param width desired width (in pixels) of the YUV image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired width. If width is + * set to 0, then only the height will be considered when determining the + * scaled image size. If the scaled width is not an even multiple of the MCU + * block width (see #tjMCUWidth), then an intermediate buffer copy will be + * performed within TurboJPEG. + * + * @param strides an array of integers, each specifying the number of bytes per + * line in the corresponding plane of the output image. Setting the stride for + * any plane to 0 is the same as setting it to the scaled plane width (see + * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then + * the strides for all planes will be set to their respective scaled plane + * widths. You can adjust the strides in order to add an arbitrary amount of + * line padding to each plane or to decompress the JPEG image into a subregion + * of a larger YUV planar image. + * + * @param height desired height (in pixels) of the YUV image. If this is + * different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest + * possible image that will fit within the desired height. If height + * is set to 0, then only the width will be considered when determining the + * scaled image size. If the scaled height is not an even multiple of the MCU + * block height (see #tjMCUHeight), then an intermediate buffer copy will be + * performed within TurboJPEG. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecompressToYUVPlanes(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, + unsigned char **dstPlanes, int width, int *strides, int height, int flags); + + +/** + * Decode a YUV planar image into an RGB or grayscale image. This function + * uses the accelerated color conversion routines in the underlying + * codec but does not execute any of the other steps in the JPEG decompression + * process. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param srcBuf pointer to an image buffer containing a YUV planar image to be + * decoded. The size of this buffer should match the value returned by + * #tjBufSizeYUV2() for the given image width, height, padding, and level of + * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be + * stored sequentially in the source buffer (refer to @ref YUVnotes + * "YUV Image Format Notes".) + * + * @param pad Use this parameter to specify that the width of each line in each + * plane of the YUV source image is padded to the nearest multiple of this + * number of bytes (must be a power of 2.) + * + * @param subsamp the level of chrominance subsampling used in the YUV source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param dstBuf pointer to an image buffer that will receive the decoded + * image. This buffer should normally be pitch * height bytes in + * size, but the dstBuf pointer can also be used to decode into a + * specific region of a larger buffer. + * + * @param width width (in pixels) of the source and destination images + * + * @param pitch bytes per line in the destination image. Normally, this should + * be width * #tjPixelSize[pixelFormat] if the destination image is + * unpadded, or #TJPAD(width * #tjPixelSize[pixelFormat]) if each line + * of the destination image should be padded to the nearest 32-bit boundary, as + * is the case for Windows bitmaps. You can also be clever and use the pitch + * parameter to skip lines, etc. Setting this parameter to 0 is the equivalent + * of setting it to width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source and destination images + * + * @param pixelFormat pixel format of the destination image (see @ref TJPF + * "Pixel formats".) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, + int height, int pixelFormat, int flags); + + +/** + * Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale + * image. This function uses the accelerated color conversion routines in the + * underlying codec but does not execute any of the other steps in the JPEG + * decompression process. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * + * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if decoding a grayscale image) that contain a YUV image + * to be decoded. These planes can be contiguous or non-contiguous in memory. + * The size of each plane should match the value returned by #tjPlaneSizeYUV() + * for the given image width, height, strides, and level of chrominance + * subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" for more + * details. + * + * @param strides an array of integers, each specifying the number of bytes per + * line in the corresponding plane of the YUV source image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see + * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then + * the strides for all planes will be set to their respective plane widths. + * You can adjust the strides in order to specify an arbitrary amount of line + * padding in each plane or to decode a subregion of a larger YUV planar image. + * + * @param subsamp the level of chrominance subsampling used in the YUV source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * + * @param dstBuf pointer to an image buffer that will receive the decoded + * image. This buffer should normally be pitch * height bytes in + * size, but the dstBuf pointer can also be used to decode into a + * specific region of a larger buffer. + * + * @param width width (in pixels) of the source and destination images + * + * @param pitch bytes per line in the destination image. Normally, this should + * be width * #tjPixelSize[pixelFormat] if the destination image is + * unpadded, or #TJPAD(width * #tjPixelSize[pixelFormat]) if each line + * of the destination image should be padded to the nearest 32-bit boundary, as + * is the case for Windows bitmaps. You can also be clever and use the pitch + * parameter to skip lines, etc. Setting this parameter to 0 is the equivalent + * of setting it to width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source and destination images + * + * @param pixelFormat pixel format of the destination image (see @ref TJPF + * "Pixel formats".) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDecodeYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, const int *strides, int subsamp, + unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, + int flags); + + +/** + * Create a new TurboJPEG transformer instance. + * + * @return a handle to the newly-created instance, or NULL if an error + * occurred (see #tjGetErrorStr().) + */ +DLLEXPORT tjhandle DLLCALL tjInitTransform(void); + + +/** + * Losslessly transform a JPEG image into another JPEG image. Lossless + * transforms work by moving the raw DCT coefficients from one JPEG image + * structure to another without altering the values of the coefficients. While + * this is typically faster than decompressing the image, transforming it, and + * re-compressing it, lossless transforms are not free. Each lossless + * transform requires reading and performing Huffman decoding on all of the + * coefficients in the source image, regardless of the size of the destination + * image. Thus, this function provides a means of generating multiple + * transformed images from the same source or applying multiple + * transformations simultaneously, in order to eliminate the need to read the + * source coefficients multiple times. + * + * @param handle a handle to a TurboJPEG transformer instance + * + * @param jpegBuf pointer to a buffer containing the JPEG source image to + * transform + * + * @param jpegSize size of the JPEG source image (in bytes) + * + * @param n the number of transformed JPEG images to generate + * + * @param dstBufs pointer to an array of n image buffers. dstBufs[i] + * will receive a JPEG image that has been transformed using the parameters in + * transforms[i]. TurboJPEG has the ability to reallocate the JPEG + * buffer to accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, + * -# set dstBufs[i] to NULL to tell TurboJPEG to allocate the buffer + * for you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize() with the transformed or cropped width and height. This should + * ensure that the buffer never has to be re-allocated (setting + * #TJFLAG_NOREALLOC guarantees this.) + * . + * If you choose option 1, dstSizes[i] should be set to the size of + * your pre-allocated buffer. In any case, unless you have set + * #TJFLAG_NOREALLOC, you should always check dstBufs[i] upon return + * from this function, as it may have changed. + * + * @param dstSizes pointer to an array of n unsigned long variables that will + * receive the actual sizes (in bytes) of each transformed JPEG image. If + * dstBufs[i] points to a pre-allocated buffer, then + * dstSizes[i] should be set to the size of the buffer. Upon return, + * dstSizes[i] will contain the size of the JPEG image (in bytes.) + * + * @param transforms pointer to an array of n #tjtransform structures, each of + * which specifies the transform parameters and/or cropping region for the + * corresponding transformed output image. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjTransform(tjhandle handle, + const unsigned char *jpegBuf, unsigned long jpegSize, int n, + unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, + int flags); + + +/** + * Destroy a TurboJPEG compressor, decompressor, or transformer instance. + * + * @param handle a handle to a TurboJPEG compressor, decompressor or + * transformer instance + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + */ +DLLEXPORT int DLLCALL tjDestroy(tjhandle handle); + + +/** + * Allocate an image buffer for use with TurboJPEG. You should always use + * this function to allocate the JPEG destination buffer(s) for #tjCompress2() + * and #tjTransform() unless you are disabling automatic buffer + * (re)allocation (by setting #TJFLAG_NOREALLOC.) + * + * @param bytes the number of bytes to allocate + * + * @return a pointer to a newly-allocated buffer with the specified number of + * bytes. + * + * @sa tjFree() + */ +DLLEXPORT unsigned char* DLLCALL tjAlloc(int bytes); + + +/** + * Free an image buffer previously allocated by TurboJPEG. You should always + * use this function to free JPEG destination buffer(s) that were automatically + * (re)allocated by #tjCompress2() or #tjTransform() or that were manually + * allocated using #tjAlloc(). + * + * @param buffer address of the buffer to free + * + * @sa tjAlloc() + */ +DLLEXPORT void DLLCALL tjFree(unsigned char *buffer); + + +/** + * Returns a descriptive error message explaining why the last command failed. + * + * @return a descriptive error message explaining why the last command failed. + */ +DLLEXPORT char* DLLCALL tjGetErrorStr(void); + + +/* Deprecated functions and macros */ +#define TJFLAG_FORCEMMX 8 +#define TJFLAG_FORCESSE 16 +#define TJFLAG_FORCESSE2 32 +#define TJFLAG_FORCESSE3 128 + + +/* Backward compatibility functions and macros (nothing to see here) */ +#define NUMSUBOPT TJ_NUMSAMP +#define TJ_444 TJSAMP_444 +#define TJ_422 TJSAMP_422 +#define TJ_420 TJSAMP_420 +#define TJ_411 TJSAMP_420 +#define TJ_GRAYSCALE TJSAMP_GRAY + +#define TJ_BGR 1 +#define TJ_BOTTOMUP TJFLAG_BOTTOMUP +#define TJ_FORCEMMX TJFLAG_FORCEMMX +#define TJ_FORCESSE TJFLAG_FORCESSE +#define TJ_FORCESSE2 TJFLAG_FORCESSE2 +#define TJ_ALPHAFIRST 64 +#define TJ_FORCESSE3 TJFLAG_FORCESSE3 +#define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE +#define TJ_YUV 512 + +DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height); + +DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height, + int jpegSubsamp); + +DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height, + int subsamp); + +DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf, + int width, int pitch, int height, int pixelSize, unsigned char *dstBuf, + unsigned long *compressedSize, int jpegSubsamp, int jpegQual, int flags); + +DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, + unsigned char *srcBuf, int width, int pitch, int height, int pixelSize, + unsigned char *dstBuf, int subsamp, int flags); + +DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, + unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, + unsigned char *dstBuf, int subsamp, int flags); + +DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height); + +DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, + int *jpegSubsamp); + +DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelSize, int flags); + +DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle, + unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, + int flags); + + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg-jni.c glmark2-2021.02/src/libjpeg-turbo/turbojpeg-jni.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg-jni.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/turbojpeg-jni.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1166 @@ +/* + * Copyright (C)2011-2016 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "turbojpeg.h" +#ifdef WIN32 +#include "tjutil.h" +#endif +#include +#include "java/org_libjpegturbo_turbojpeg_TJCompressor.h" +#include "java/org_libjpegturbo_turbojpeg_TJDecompressor.h" +#include "java/org_libjpegturbo_turbojpeg_TJ.h" + +#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) + +#define _throw(msg, exceptionClass) { \ + jclass _exccls=(*env)->FindClass(env, exceptionClass); \ + if(!_exccls || (*env)->ExceptionCheck(env)) goto bailout; \ + (*env)->ThrowNew(env, _exccls, msg); \ + goto bailout; \ +} + +#define _throwtj() _throw(tjGetErrorStr(), "org/libjpegturbo/turbojpeg/TJException") + +#define _throwarg(msg) _throw(msg, "java/lang/IllegalArgumentException") + +#define _throwmem() _throw("Memory allocation failure", "java/lang/OutOfMemoryError"); + +#define bailif0(f) {if(!(f) || (*env)->ExceptionCheck(env)) { \ + goto bailout; \ +}} + +#define gethandle() \ + jclass _cls=(*env)->GetObjectClass(env, obj); \ + jfieldID _fid; \ + if(!_cls || (*env)->ExceptionCheck(env)) goto bailout; \ + bailif0(_fid=(*env)->GetFieldID(env, _cls, "handle", "J")); \ + handle=(tjhandle)(size_t)(*env)->GetLongField(env, obj, _fid); \ + +#ifdef _WIN32 +#define setenv(envvar, value, dummy) _putenv_s(envvar, value) +#endif + +#define prop2env(property, envvar) \ +{ \ + if((jName=(*env)->NewStringUTF(env, property))!=NULL \ + && (jValue=(*env)->CallStaticObjectMethod(env, cls, mid, jName))!=NULL) \ + { \ + if((value=(*env)->GetStringUTFChars(env, jValue, 0))!=NULL) \ + { \ + setenv(envvar, value, 1); \ + (*env)->ReleaseStringUTFChars(env, jValue, value); \ + } \ + } \ +} + +int ProcessSystemProperties(JNIEnv *env) +{ + jclass cls; jmethodID mid; + jstring jName, jValue; + const char *value; + + bailif0(cls=(*env)->FindClass(env, "java/lang/System")); + bailif0(mid=(*env)->GetStaticMethodID(env, cls, "getProperty", + "(Ljava/lang/String;)Ljava/lang/String;")); + + prop2env("turbojpeg.optimize", "TJ_OPTIMIZE"); + prop2env("turbojpeg.arithmetic", "TJ_ARITHMETIC"); + prop2env("turbojpeg.restart", "TJ_RESTART"); + prop2env("turbojpeg.progressive", "TJ_PROGRESSIVE"); + return 0; + + bailout: + return -1; +} + +/* TurboJPEG 1.2.x: TJ::bufSize() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize + (JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp) +{ + jint retval=(jint)tjBufSize(width, height, jpegSubsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.4.x: TJ::bufSizeYUV() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII + (JNIEnv *env, jclass cls, jint width, jint pad, jint height, jint subsamp) +{ + jint retval=(jint)tjBufSizeYUV2(width, pad, height, subsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.2.x: TJ::bufSizeYUV() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III + (JNIEnv *env, jclass cls, jint width, jint height, jint subsamp) +{ + return Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII(env, cls, width, + 4, height, subsamp); +} + +/* TurboJPEG 1.4.x: TJ::planeSizeYUV() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII + (JNIEnv *env, jclass cls, jint componentID, jint width, jint stride, + jint height, jint subsamp) +{ + jint retval=(jint)tjPlaneSizeYUV(componentID, width, stride, height, + subsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.4.x: TJ::planeWidth() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III + (JNIEnv *env, jclass cls, jint componentID, jint width, jint subsamp) +{ + jint retval=(jint)tjPlaneWidth(componentID, width, subsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.4.x: TJ::planeHeight() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III + (JNIEnv *env, jclass cls, jint componentID, jint height, jint subsamp) +{ + jint retval=(jint)tjPlaneHeight(componentID, height, subsamp); + if(retval==-1) _throwarg(tjGetErrorStr()); + + bailout: + return retval; +} + +/* TurboJPEG 1.2.x: TJCompressor::init() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init + (JNIEnv *env, jobject obj) +{ + jclass cls; + jfieldID fid; + tjhandle handle; + + if((handle=tjInitCompress())==NULL) + _throwtj(); + + bailif0(cls=(*env)->GetObjectClass(env, obj)); + bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); + + bailout: + return; +} + +static jint TJCompressor_compress + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jbyteArray dst, + jint jpegSubsamp, jint jpegQual, jint flags) +{ + tjhandle handle=0; + unsigned long jpegSize=0; + jsize arraySize=0, actualPitch; + unsigned char *srcBuf=NULL, *jpegBuf=NULL; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 + || pitch<0) + _throwarg("Invalid argument in compress()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch; + arraySize=(y+height-1)*actualPitch + (x+width)*tjPixelSize[pf]; + if((*env)->GetArrayLength(env, src)*srcElementSizeGetArrayLength(env, dst)<(jsize)jpegSize) + _throwarg("Destination buffer is not large enough"); + + bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(ProcessSystemProperties(env)<0) goto bailout; + + if(tjCompress2(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], width, + pitch, height, pf, &jpegBuf, &jpegSize, jpegSubsamp, jpegQual, + flags|TJFLAG_NOREALLOC)==-1) + _throwtj(); + + bailout: + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0); + if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); + return (jint)jpegSize; +} + +/* TurboJPEG 1.3.x: TJCompressor::compress() byte source */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII + (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width, + jint pitch, jint height, jint pf, jbyteArray dst, jint jpegSubsamp, + jint jpegQual, jint flags) +{ + return TJCompressor_compress(env, obj, src, 1, x, y, width, pitch, height, + pf, dst, jpegSubsamp, jpegQual, flags); +} + +/* TurboJPEG 1.2.x: TJCompressor::compress() byte source */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII + (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch, + jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual, + jint flags) +{ + return TJCompressor_compress(env, obj, src, 1, 0, 0, width, pitch, height, + pf, dst, jpegSubsamp, jpegQual, flags); +} + +/* TurboJPEG 1.3.x: TJCompressor::compress() int source */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII + (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width, + jint stride, jint height, jint pf, jbyteArray dst, jint jpegSubsamp, + jint jpegQual, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in compress()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when compressing from an integer buffer."); + + return TJCompressor_compress(env, obj, src, sizeof(jint), x, y, width, + stride*sizeof(jint), height, pf, dst, jpegSubsamp, jpegQual, flags); + + bailout: + return 0; +} + +/* TurboJPEG 1.2.x: TJCompressor::compress() int source */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII + (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride, + jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual, + jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in compress()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when compressing from an integer buffer."); + + return TJCompressor_compress(env, obj, src, sizeof(jint), 0, 0, width, + stride*sizeof(jint), height, pf, dst, jpegSubsamp, jpegQual, flags); + + bailout: + return 0; +} + +/* TurboJPEG 1.4.x: TJCompressor::compressFromYUV() */ +JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jint width, jintArray jSrcStrides, jint height, jint subsamp, + jbyteArray dst, jint jpegQual, jint flags) +{ + tjhandle handle=0; + unsigned long jpegSize=0; + jbyteArray jSrcPlanes[3]={NULL, NULL, NULL}; + const unsigned char *srcPlanes[3]; + unsigned char *jpegBuf=NULL; + int *srcOffsets=NULL, *srcStrides=NULL; + int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; + + gethandle(); + + if(subsamp<0 || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in compressFromYUV()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if((*env)->GetArrayLength(env, srcobjs)GetArrayLength(env, jSrcOffsets)GetArrayLength(env, jSrcStrides)GetArrayLength(env, dst)<(jsize)jpegSize) + _throwarg("Destination buffer is not large enough"); + + bailif0(srcOffsets=(*env)->GetPrimitiveArrayCritical(env, jSrcOffsets, 0)); + bailif0(srcStrides=(*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0)); + for(i=0; iGetObjectArrayElement(env, srcobjs, i)); + if((*env)->GetArrayLength(env, jSrcPlanes[i])GetPrimitiveArrayCritical(env, jSrcPlanes[i], + 0)); + srcPlanes[i]=&srcPlanes[i][srcOffsets[i]]; + } + bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(ProcessSystemProperties(env)<0) goto bailout; + + if(tjCompressFromYUVPlanes(handle, srcPlanes, width, srcStrides, height, + subsamp, &jpegBuf, &jpegSize, jpegQual, flags|TJFLAG_NOREALLOC)==-1) + _throwtj(); + + bailout: + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0); + for(i=0; iReleasePrimitiveArrayCritical(env, jSrcPlanes[i], + (unsigned char *)srcPlanes[i], 0); + } + if(srcStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcStrides, srcStrides, 0); + if(srcOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcOffsets, srcOffsets, 0); + return (jint)jpegSize; +} + +static void TJCompressor_encodeYUV + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) +{ + tjhandle handle=0; + jsize arraySize=0, actualPitch; + jbyteArray jDstPlanes[3]={NULL, NULL, NULL}; + unsigned char *srcBuf=NULL, *dstPlanes[3]; + int *dstOffsets=NULL, *dstStrides=NULL; + int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 + || pitch<0 || subsamp<0 || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in encodeYUV()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF + || org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if((*env)->GetArrayLength(env, dstobjs)GetArrayLength(env, jDstOffsets)GetArrayLength(env, jDstStrides)GetArrayLength(env, src)*srcElementSizeGetPrimitiveArrayCritical(env, jDstOffsets, 0)); + bailif0(dstStrides=(*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0)); + for(i=0; iGetObjectArrayElement(env, dstobjs, i)); + if((*env)->GetArrayLength(env, jDstPlanes[i])GetPrimitiveArrayCritical(env, jDstPlanes[i], + 0)); + dstPlanes[i]=&dstPlanes[i][dstOffsets[i]]; + } + bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + + if(tjEncodeYUVPlanes(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], + width, pitch, height, pf, dstPlanes, dstStrides, subsamp, flags)==-1) + _throwtj(); + + bailout: + if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); + for(i=0; iReleasePrimitiveArrayCritical(env, jDstPlanes[i], dstPlanes[i], + 0); + } + if(dstStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jDstStrides, dstStrides, 0); + if(dstOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jDstOffsets, dstOffsets, 0); + return; +} + +/* TurboJPEG 1.4.x: TJCompressor::encodeYUV() byte source */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III + (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width, + jint pitch, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) +{ + TJCompressor_encodeYUV(env, obj, src, 1, x, y, width, pitch, height, pf, + dstobjs, jDstOffsets, jDstStrides, subsamp, flags); +} + +/* TurboJPEG 1.4.x: TJCompressor::encodeYUV() int source */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3_3B_3I_3III + (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width, + jint stride, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in encodeYUV()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when encoding from an integer buffer."); + + TJCompressor_encodeYUV(env, obj, src, sizeof(jint), x, y, width, + stride*sizeof(jint), height, pf, dstobjs, jDstOffsets, jDstStrides, + subsamp, flags); + + bailout: + return; +} + +JNIEXPORT void JNICALL TJCompressor_encodeYUV_12 + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint width, + jint pitch, jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) +{ + tjhandle handle=0; + jsize arraySize=0; + unsigned char *srcBuf=NULL, *dstBuf=NULL; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 + || pitch<0) + _throwarg("Invalid argument in encodeYUV()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + arraySize=(pitch==0)? width*tjPixelSize[pf]*height:pitch*height; + if((*env)->GetArrayLength(env, src)*srcElementSizeGetArrayLength(env, dst) + <(jsize)tjBufSizeYUV(width, height, subsamp)) + _throwarg("Destination buffer is not large enough"); + + bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(tjEncodeYUV2(handle, srcBuf, width, pitch, height, pf, dstBuf, subsamp, + flags)==-1) + _throwtj(); + + bailout: + if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); + return; +} + +/* TurboJPEG 1.2.x: TJCompressor::encodeYUV() byte source */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII + (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch, + jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) +{ + TJCompressor_encodeYUV_12(env, obj, src, 1, width, pitch, height, pf, dst, + subsamp, flags); +} + +/* TurboJPEG 1.2.x: TJCompressor::encodeYUV() int source */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII + (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride, + jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in encodeYUV()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when encoding from an integer buffer."); + + TJCompressor_encodeYUV_12(env, obj, src, sizeof(jint), width, + stride*sizeof(jint), height, pf, dst, subsamp, flags); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJCompressor::destroy() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy + (JNIEnv *env, jobject obj) +{ + tjhandle handle=0; + + gethandle(); + + if(tjDestroy(handle)==-1) _throwtj(); + (*env)->SetLongField(env, obj, _fid, 0); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJDecompressor::init() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init + (JNIEnv *env, jobject obj) +{ + jclass cls; + jfieldID fid; + tjhandle handle; + + if((handle=tjInitDecompress())==NULL) _throwtj(); + + bailif0(cls=(*env)->GetObjectClass(env, obj)); + bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJDecompressor::getScalingFactors() */ +JNIEXPORT jobjectArray JNICALL Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors + (JNIEnv *env, jclass cls) +{ + jclass sfcls=NULL; jfieldID fid=0; + tjscalingfactor *sf=NULL; int n=0, i; + jobject sfobj=NULL; + jobjectArray sfjava=NULL; + + if((sf=tjGetScalingFactors(&n))==NULL || n==0) + _throwarg(tjGetErrorStr()); + + bailif0(sfcls=(*env)->FindClass(env, "org/libjpegturbo/turbojpeg/TJScalingFactor")); + bailif0(sfjava=(jobjectArray)(*env)->NewObjectArray(env, n, sfcls, 0)); + + for(i=0; iAllocObject(env, sfcls)); + bailif0(fid=(*env)->GetFieldID(env, sfcls, "num", "I")); + (*env)->SetIntField(env, sfobj, fid, sf[i].num); + bailif0(fid=(*env)->GetFieldID(env, sfcls, "denom", "I")); + (*env)->SetIntField(env, sfobj, fid, sf[i].denom); + (*env)->SetObjectArrayElement(env, sfjava, i, sfobj); + } + + bailout: + return sfjava; +} + +/* TurboJPEG 1.2.x: TJDecompressor::decompressHeader() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize) +{ + tjhandle handle=0; + unsigned char *jpegBuf=NULL; + int width=0, height=0, jpegSubsamp=-1, jpegColorspace=-1; + + gethandle(); + + if((*env)->GetArrayLength(env, src)GetPrimitiveArrayCritical(env, src, 0)); + + if(tjDecompressHeader3(handle, jpegBuf, (unsigned long)jpegSize, + &width, &height, &jpegSubsamp, &jpegColorspace)==-1) + _throwtj(); + + (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); jpegBuf=NULL; + + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); + (*env)->SetIntField(env, obj, _fid, jpegSubsamp); + if((_fid=(*env)->GetFieldID(env, _cls, "jpegColorspace", "I"))==0) + (*env)->ExceptionClear(env); + else + (*env)->SetIntField(env, obj, _fid, jpegColorspace); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + (*env)->SetIntField(env, obj, _fid, width); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + (*env)->SetIntField(env, obj, _fid, height); + + bailout: + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + return; +} + +static void TJDecompressor_decompress + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jarray dst, + jint dstElementSize, jint x, jint y, jint width, jint pitch, jint height, + jint pf, jint flags) +{ + tjhandle handle=0; + jsize arraySize=0, actualPitch; + unsigned char *jpegBuf=NULL, *dstBuf=NULL; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + if((*env)->GetArrayLength(env, src)GetArrayLength(env, dst)*dstElementSizeGetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize, + &dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf, + flags)==-1) + _throwtj(); + + bailout: + if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + return; +} + +/* TurboJPEG 1.3.x: TJDecompressor::decompress() byte destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags) +{ + TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, x, y, width, + pitch, height, pf, flags); +} + +/* TurboJPEG 1.2.x: TJDecompressor::decompress() byte destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint width, jint pitch, jint height, jint pf, jint flags) +{ + TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, 0, 0, width, + pitch, height, pf, flags); +} + +/* TurboJPEG 1.3.x: TJDecompressor::decompress() int destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst, + jint x, jint y, jint width, jint stride, jint height, jint pf, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decompressing to an integer buffer."); + + TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), x, y, + width, stride*sizeof(jint), height, pf, flags); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJDecompressor::decompress() int destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst, + jint width, jint stride, jint height, jint pf, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decompressing to an integer buffer."); + + TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), 0, 0, + width, stride*sizeof(jint), height, pf, flags); + + bailout: + return; + +} + +/* TurboJPEG 1.4.x: TJDecompressor::decompressToYUV() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3_3B_3II_3III + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, + jobjectArray dstobjs, jintArray jDstOffsets, jint desiredWidth, + jintArray jDstStrides, jint desiredHeight, jint flags) +{ + tjhandle handle=0; + jbyteArray jDstPlanes[3]={NULL, NULL, NULL}; + unsigned char *jpegBuf=NULL, *dstPlanes[3]; + int *dstOffsets=NULL, *dstStrides=NULL; + int jpegSubsamp=-1, jpegWidth=0, jpegHeight=0; + int nc=0, i, width, height, scaledWidth, scaledHeight, nsf=0; + tjscalingfactor *sf; + + + gethandle(); + + if((*env)->GetArrayLength(env, src)GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); + + nc=(jpegSubsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3); + + width=desiredWidth; height=desiredHeight; + if(width==0) width=jpegWidth; + if(height==0) height=jpegHeight; + sf=tjGetScalingFactors(&nsf); + if(!sf || nsf<1) + _throwarg(tjGetErrorStr()); + for(i=0; i=nsf) + _throwarg("Could not scale down to desired image dimensions"); + + bailif0(dstOffsets=(*env)->GetPrimitiveArrayCritical(env, jDstOffsets, 0)); + bailif0(dstStrides=(*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0)); + for(i=0; iGetObjectArrayElement(env, dstobjs, i)); + if((*env)->GetArrayLength(env, jDstPlanes[i])GetPrimitiveArrayCritical(env, jDstPlanes[i], + 0)); + dstPlanes[i]=&dstPlanes[i][dstOffsets[i]]; + } + bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + + if(tjDecompressToYUVPlanes(handle, jpegBuf, (unsigned long)jpegSize, + dstPlanes, desiredWidth, dstStrides, desiredHeight, flags)==-1) + _throwtj(); + + bailout: + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + for(i=0; iReleasePrimitiveArrayCritical(env, jDstPlanes[i], dstPlanes[i], + 0); + } + if(dstStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jDstStrides, dstStrides, 0); + if(dstOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jDstOffsets, dstOffsets, 0); + return; +} + +/* TurboJPEG 1.2.x: TJDecompressor::decompressToYUV() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint flags) +{ + tjhandle handle=0; + unsigned char *jpegBuf=NULL, *dstBuf=NULL; + int jpegSubsamp=-1, jpegWidth=0, jpegHeight=0; + + gethandle(); + + if((*env)->GetArrayLength(env, src)GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); + if((*env)->GetArrayLength(env, dst) + <(jsize)tjBufSizeYUV(jpegWidth, jpegHeight, jpegSubsamp)) + _throwarg("Destination buffer is not large enough"); + + bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(tjDecompressToYUV(handle, jpegBuf, (unsigned long)jpegSize, dstBuf, + flags)==-1) + _throwtj(); + + bailout: + if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + return; +} + +static void TJDecompressor_decodeYUV + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jarray dst, jint dstElementSize, + jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags) +{ + tjhandle handle=0; + jsize arraySize=0, actualPitch; + jbyteArray jSrcPlanes[3]={NULL, NULL, NULL}; + const unsigned char *srcPlanes[3]; + unsigned char *dstBuf=NULL; + int *srcOffsets=NULL, *srcStrides=NULL; + int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; + + gethandle(); + + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || subsamp<0 + || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in decodeYUV()"); + if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF + || org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if((*env)->GetArrayLength(env, srcobjs)GetArrayLength(env, jSrcOffsets)GetArrayLength(env, jSrcStrides)GetArrayLength(env, dst)*dstElementSizeGetPrimitiveArrayCritical(env, jSrcOffsets, 0)); + bailif0(srcStrides=(*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0)); + for(i=0; iGetObjectArrayElement(env, srcobjs, i)); + if((*env)->GetArrayLength(env, jSrcPlanes[i])GetPrimitiveArrayCritical(env, jSrcPlanes[i], + 0)); + srcPlanes[i]=&srcPlanes[i][srcOffsets[i]]; + } + bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if(tjDecodeYUVPlanes(handle, srcPlanes, srcStrides, subsamp, + &dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf, + flags)==-1) + _throwtj(); + + bailout: + if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + for(i=0; iReleasePrimitiveArrayCritical(env, jSrcPlanes[i], + (unsigned char *)srcPlanes[i], 0); + } + if(srcStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcStrides, srcStrides, 0); + if(srcOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcOffsets, srcOffsets, 0); + return; +} + +/* TurboJPEG 1.4.x: TJDecompressor::decodeYUV() byte destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3BIIIIIII + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jbyteArray dst, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jint flags) +{ + TJDecompressor_decodeYUV(env, obj, srcobjs, jSrcOffsets, jSrcStrides, + subsamp, dst, 1, x, y, width, pitch, height, pf, flags); +} + +/* TurboJPEG 1.4.x: TJDecompressor::decodeYUV() int destination */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3IIIIIIII + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jintArray dst, jint x, jint y, + jint width, jint stride, jint height, jint pf, jint flags) +{ + if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decodeYUV()"); + if(tjPixelSize[pf]!=sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decoding to an integer buffer."); + + TJDecompressor_decodeYUV(env, obj, srcobjs, jSrcOffsets, jSrcStrides, + subsamp, dst, sizeof(jint), x, y, width, stride*sizeof(jint), height, pf, + flags); + + bailout: + return; +} + +/* TurboJPEG 1.2.x: TJTransformer::init() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_init + (JNIEnv *env, jobject obj) +{ + jclass cls; + jfieldID fid; + tjhandle handle; + + if((handle=tjInitTransform())==NULL) _throwtj(); + + bailif0(cls=(*env)->GetObjectClass(env, obj)); + bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); + + bailout: + return; +} + +typedef struct _JNICustomFilterParams +{ + JNIEnv *env; + jobject tobj; + jobject cfobj; +} JNICustomFilterParams; + +static int JNICustomFilter(short *coeffs, tjregion arrayRegion, + tjregion planeRegion, int componentIndex, int transformIndex, + tjtransform *transform) +{ + JNICustomFilterParams *params=(JNICustomFilterParams *)transform->data; + JNIEnv *env=params->env; + jobject tobj=params->tobj, cfobj=params->cfobj; + jobject arrayRegionObj, planeRegionObj, bufobj, borobj; + jclass cls; jmethodID mid; jfieldID fid; + + bailif0(bufobj=(*env)->NewDirectByteBuffer(env, coeffs, + sizeof(short)*arrayRegion.w*arrayRegion.h)); + bailif0(cls=(*env)->FindClass(env, "java/nio/ByteOrder")); + bailif0(mid=(*env)->GetStaticMethodID(env, cls, "nativeOrder", + "()Ljava/nio/ByteOrder;")); + bailif0(borobj=(*env)->CallStaticObjectMethod(env, cls, mid)); + bailif0(cls=(*env)->GetObjectClass(env, bufobj)); + bailif0(mid=(*env)->GetMethodID(env, cls, "order", + "(Ljava/nio/ByteOrder;)Ljava/nio/ByteBuffer;")); + (*env)->CallObjectMethod(env, bufobj, mid, borobj); + bailif0(mid=(*env)->GetMethodID(env, cls, "asShortBuffer", + "()Ljava/nio/ShortBuffer;")); + bailif0(bufobj=(*env)->CallObjectMethod(env, bufobj, mid)); + + bailif0(cls=(*env)->FindClass(env, "java/awt/Rectangle")); + bailif0(arrayRegionObj=(*env)->AllocObject(env, cls)); + bailif0(fid=(*env)->GetFieldID(env, cls, "x", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.x); + bailif0(fid=(*env)->GetFieldID(env, cls, "y", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.y); + bailif0(fid=(*env)->GetFieldID(env, cls, "width", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.w); + bailif0(fid=(*env)->GetFieldID(env, cls, "height", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.h); + + bailif0(planeRegionObj=(*env)->AllocObject(env, cls)); + bailif0(fid=(*env)->GetFieldID(env, cls, "x", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.x); + bailif0(fid=(*env)->GetFieldID(env, cls, "y", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.y); + bailif0(fid=(*env)->GetFieldID(env, cls, "width", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.w); + bailif0(fid=(*env)->GetFieldID(env, cls, "height", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.h); + + bailif0(cls=(*env)->GetObjectClass(env, cfobj)); + bailif0(mid=(*env)->GetMethodID(env, cls, "customFilter", + "(Ljava/nio/ShortBuffer;Ljava/awt/Rectangle;Ljava/awt/Rectangle;IILorg/libjpegturbo/turbojpeg/TJTransform;)V")); + (*env)->CallVoidMethod(env, cfobj, mid, bufobj, arrayRegionObj, + planeRegionObj, componentIndex, transformIndex, tobj); + + return 0; + + bailout: + return -1; +} + +/* TurboJPEG 1.2.x: TJTransformer::transform() */ +JNIEXPORT jintArray JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_transform + (JNIEnv *env, jobject obj, jbyteArray jsrcBuf, jint jpegSize, + jobjectArray dstobjs, jobjectArray tobjs, jint flags) +{ + tjhandle handle=0; int i; + unsigned char *jpegBuf=NULL, **dstBufs=NULL; jsize n=0; + unsigned long *dstSizes=NULL; tjtransform *t=NULL; + jbyteArray *jdstBufs=NULL; + int jpegWidth=0, jpegHeight=0, jpegSubsamp; + jintArray jdstSizes=0; jint *dstSizesi=NULL; + JNICustomFilterParams *params=NULL; + + gethandle(); + + if((*env)->GetArrayLength(env, jsrcBuf)GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); + + n=(*env)->GetArrayLength(env, dstobjs); + if(n!=(*env)->GetArrayLength(env, tobjs)) + _throwarg("Mismatch between size of transforms array and destination buffers array"); + + if((dstBufs=(unsigned char **)malloc(sizeof(unsigned char *)*n))==NULL) + _throwmem(); + if((jdstBufs=(jbyteArray *)malloc(sizeof(jbyteArray)*n))==NULL) + _throwmem(); + if((dstSizes=(unsigned long *)malloc(sizeof(unsigned long)*n))==NULL) + _throwmem(); + if((t=(tjtransform *)malloc(sizeof(tjtransform)*n))==NULL) + _throwmem(); + if((params=(JNICustomFilterParams *)malloc(sizeof(JNICustomFilterParams)*n)) + ==NULL) + _throwmem(); + for(i=0; iGetObjectArrayElement(env, tobjs, i)); + bailif0(_cls=(*env)->GetObjectClass(env, tobj)); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "op", "I")); + t[i].op=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "options", "I")); + t[i].options=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "x", "I")); + t[i].r.x=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "y", "I")); + t[i].r.y=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "width", "I")); + t[i].r.w=(*env)->GetIntField(env, tobj, _fid); + bailif0(_fid=(*env)->GetFieldID(env, _cls, "height", "I")); + t[i].r.h=(*env)->GetIntField(env, tobj, _fid); + + bailif0(_fid=(*env)->GetFieldID(env, _cls, "cf", + "Lorg/libjpegturbo/turbojpeg/TJCustomFilter;")); + cfobj=(*env)->GetObjectField(env, tobj, _fid); + if(cfobj) + { + params[i].env=env; + params[i].tobj=tobj; + params[i].cfobj=cfobj; + t[i].customFilter=JNICustomFilter; + t[i].data=(void *)¶ms[i]; + } + } + + for(i=0; iGetObjectArrayElement(env, dstobjs, i)); + if((unsigned long)(*env)->GetArrayLength(env, jdstBufs[i]) + GetPrimitiveArrayCritical(env, jsrcBuf, 0)); + for(i=0; iGetPrimitiveArrayCritical(env, jdstBufs[i], 0)); + + if(tjTransform(handle, jpegBuf, jpegSize, n, dstBufs, dstSizes, t, + flags|TJFLAG_NOREALLOC)==-1) + _throwtj(); + + for(i=0; iReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0); + dstBufs[i]=NULL; + } + (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0); + jpegBuf=NULL; + + jdstSizes=(*env)->NewIntArray(env, n); + bailif0(dstSizesi=(*env)->GetIntArrayElements(env, jdstSizes, 0)); + for(i=0; iReleaseIntArrayElements(env, jdstSizes, dstSizesi, 0); + if(dstBufs) + { + for(i=0; iReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0); + } + free(dstBufs); + } + if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0); + if(jdstBufs) free(jdstBufs); + if(dstSizes) free(dstSizes); + if(t) free(t); + return jdstSizes; +} + +/* TurboJPEG 1.2.x: TJDecompressor::destroy() */ +JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy + (JNIEnv *env, jobject obj) +{ + Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy(env, obj); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg-mapfile glmark2-2021.02/src/libjpeg-turbo/turbojpeg-mapfile --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg-mapfile 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/turbojpeg-mapfile 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,56 @@ +TURBOJPEG_1.0 +{ + global: + tjInitCompress; + tjCompress; + TJBUFSIZE; + tjInitDecompress; + tjDecompressHeader; + tjDecompress; + tjDestroy; + tjGetErrorStr; + local: + *; +}; + +TURBOJPEG_1.1 +{ + global: + TJBUFSIZEYUV; + tjDecompressHeader2; + tjDecompressToYUV; + tjEncodeYUV; +} TURBOJPEG_1.0; + +TURBOJPEG_1.2 +{ + global: + tjAlloc; + tjBufSize; + tjBufSizeYUV; + tjCompress2; + tjDecompress2; + tjEncodeYUV2; + tjFree; + tjGetScalingFactors; + tjInitTransform; + tjTransform; +} TURBOJPEG_1.1; + +TURBOJPEG_1.4 +{ + global: + tjBufSizeYUV2; + tjCompressFromYUV; + tjCompressFromYUVPlanes; + tjDecodeYUV; + tjDecodeYUVPlanes; + tjDecompressHeader3; + tjDecompressToYUV2; + tjDecompressToYUVPlanes; + tjEncodeYUV3; + tjEncodeYUVPlanes; + tjPlaneHeight; + tjPlaneSizeYUV; + tjPlaneWidth; +} TURBOJPEG_1.2; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg-mapfile.jni glmark2-2021.02/src/libjpeg-turbo/turbojpeg-mapfile.jni --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/turbojpeg-mapfile.jni 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/turbojpeg-mapfile.jni 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,92 @@ +TURBOJPEG_1.0 +{ + global: + tjInitCompress; + tjCompress; + TJBUFSIZE; + tjInitDecompress; + tjDecompressHeader; + tjDecompress; + tjDestroy; + tjGetErrorStr; + local: + *; +}; + +TURBOJPEG_1.1 +{ + global: + TJBUFSIZEYUV; + tjDecompressHeader2; + tjDecompressToYUV; + tjEncodeYUV; +} TURBOJPEG_1.0; + +TURBOJPEG_1.2 +{ + global: + tjAlloc; + tjBufSize; + tjBufSizeYUV; + tjCompress2; + tjDecompress2; + tjEncodeYUV2; + tjFree; + tjGetScalingFactors; + tjInitTransform; + tjTransform; + Java_org_libjpegturbo_turbojpeg_TJ_bufSize; + Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III; + Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors; + Java_org_libjpegturbo_turbojpeg_TJCompressor_init; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_init; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy; + Java_org_libjpegturbo_turbojpeg_TJTransformer_init; + Java_org_libjpegturbo_turbojpeg_TJTransformer_transform; +} TURBOJPEG_1.1; + +TURBOJPEG_1.3 +{ + global: + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII; +} TURBOJPEG_1.2; + +TURBOJPEG_1.4 +{ + global: + tjBufSizeYUV2; + tjCompressFromYUV; + tjCompressFromYUVPlanes; + tjDecodeYUV; + tjDecodeYUVPlanes; + tjDecompressHeader3; + tjDecompressToYUV2; + tjDecompressToYUVPlanes; + tjEncodeYUV3; + tjEncodeYUVPlanes; + tjPlaneHeight; + tjPlaneSizeYUV; + tjPlaneWidth; + Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3_3B_3I_3III; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3_3B_3II_3III; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3BIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3IIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III; + Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII; + Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III; +} TURBOJPEG_1.3; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/usage.txt glmark2-2021.02/src/libjpeg-turbo/usage.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/usage.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/usage.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,660 @@ +NOTE: This file was modified by The libjpeg-turbo Project to include only +information relevant to libjpeg-turbo and to wordsmith certain sections. + +USAGE instructions for the Independent JPEG Group's JPEG software +================================================================= + +This file describes usage of the JPEG conversion programs cjpeg and djpeg, +as well as the utility programs jpegtran, rdjpgcom and wrjpgcom. (See +the other documentation files if you wish to use the JPEG library within +your own programs.) + +If you are on a Unix machine you may prefer to read the Unix-style manual +pages in files cjpeg.1, djpeg.1, jpegtran.1, rdjpgcom.1, wrjpgcom.1. + + +INTRODUCTION + +These programs implement JPEG image encoding, decoding, and transcoding. +JPEG (pronounced "jay-peg") is a standardized compression method for +full-color and grayscale images. + + +GENERAL USAGE + +We provide two programs, cjpeg to compress an image file into JPEG format, +and djpeg to decompress a JPEG file back into a conventional image format. + +On Unix-like systems, you say: + cjpeg [switches] [imagefile] >jpegfile +or + djpeg [switches] [jpegfile] >imagefile +The programs read the specified input file, or standard input if none is +named. They always write to standard output (with trace/error messages to +standard error). These conventions are handy for piping images between +programs. + +On most non-Unix systems, you say: + cjpeg [switches] imagefile jpegfile +or + djpeg [switches] jpegfile imagefile +i.e., both the input and output files are named on the command line. This +style is a little more foolproof, and it loses no functionality if you don't +have pipes. (You can get this style on Unix too, if you prefer, by defining +TWO_FILE_COMMANDLINE when you compile the programs; see install.txt.) + +You can also say: + cjpeg [switches] -outfile jpegfile imagefile +or + djpeg [switches] -outfile imagefile jpegfile +This syntax works on all systems, so it is useful for scripts. + +The currently supported image file formats are: PPM (PBMPLUS color format), +PGM (PBMPLUS grayscale format), BMP, Targa, and RLE (Utah Raster Toolkit +format). (RLE is supported only if the URT library is available, which it +isn't on most non-Unix systems.) cjpeg recognizes the input image format +automatically, with the exception of some Targa files. You have to tell djpeg +which format to generate. + +JPEG files are in the defacto standard JFIF file format. There are other, +less widely used JPEG-based file formats, but we don't support them. + +All switch names may be abbreviated; for example, -grayscale may be written +-gray or -gr. Most of the "basic" switches can be abbreviated to as little as +one letter. Upper and lower case are equivalent (-BMP is the same as -bmp). +British spellings are also accepted (e.g., -greyscale), though for brevity +these are not mentioned below. + + +CJPEG DETAILS + +The basic command line switches for cjpeg are: + + -quality N[,...] Scale quantization tables to adjust image quality. + Quality is 0 (worst) to 100 (best); default is 75. + (See below for more info.) + + -grayscale Create monochrome JPEG file from color input. + Be sure to use this switch when compressing a grayscale + BMP file, because cjpeg isn't bright enough to notice + whether a BMP file uses only shades of gray. By + saying -grayscale, you'll get a smaller JPEG file that + takes less time to process. + + -rgb Create RGB JPEG file. + Using this switch suppresses the conversion from RGB + colorspace input to the default YCbCr JPEG colorspace. + + -optimize Perform optimization of entropy encoding parameters. + Without this, default encoding parameters are used. + -optimize usually makes the JPEG file a little smaller, + but cjpeg runs somewhat slower and needs much more + memory. Image quality and speed of decompression are + unaffected by -optimize. + + -progressive Create progressive JPEG file (see below). + + -targa Input file is Targa format. Targa files that contain + an "identification" field will not be automatically + recognized by cjpeg; for such files you must specify + -targa to make cjpeg treat the input as Targa format. + For most Targa files, you won't need this switch. + +The -quality switch lets you trade off compressed file size against quality of +the reconstructed image: the higher the quality setting, the larger the JPEG +file, and the closer the output image will be to the original input. Normally +you want to use the lowest quality setting (smallest file) that decompresses +into something visually indistinguishable from the original image. For this +purpose the quality setting should generally be between 50 and 95 (the default +is 75) for photographic images. If you see defects at -quality 75, then go up +5 or 10 counts at a time until you are happy with the output image. (The +optimal setting will vary from one image to another.) + +-quality 100 will generate a quantization table of all 1's, minimizing loss +in the quantization step (but there is still information loss in subsampling, +as well as roundoff error.) For most images, specifying a quality value above +about 95 will increase the size of the compressed file dramatically, and while +the quality gain from these higher quality values is measurable (using metrics +such as PSNR or SSIM), it is rarely perceivable by human vision. + +In the other direction, quality values below 50 will produce very small files +of low image quality. Settings around 5 to 10 might be useful in preparing an +index of a large image library, for example. Try -quality 2 (or so) for some +amusing Cubist effects. (Note: quality values below about 25 generate 2-byte +quantization tables, which are considered optional in the JPEG standard. +cjpeg emits a warning message when you give such a quality value, because some +other JPEG programs may be unable to decode the resulting file. Use -baseline +if you need to ensure compatibility at low quality values.) + +The -quality option has been extended in this version of cjpeg to support +separate quality settings for luminance and chrominance (or, in general, +separate settings for every quantization table slot.) The principle is the +same as chrominance subsampling: since the human eye is more sensitive to +spatial changes in brightness than spatial changes in color, the chrominance +components can be quantized more than the luminance components without +incurring any visible image quality loss. However, unlike subsampling, this +feature reduces data in the frequency domain instead of the spatial domain, +which allows for more fine-grained control. This option is useful in +quality-sensitive applications, for which the artifacts generated by +subsampling may be unacceptable. + +The -quality option accepts a comma-separated list of parameters, which +respectively refer to the quality levels that should be assigned to the +quantization table slots. If there are more q-table slots than parameters, +then the last parameter is replicated. Thus, if only one quality parameter is +given, this is used for both luminance and chrominance (slots 0 and 1, +respectively), preserving the legacy behavior of cjpeg v6b and prior. More (or +customized) quantization tables can be set with the -qtables option and +assigned to components with the -qslots option (see the "wizard" switches +below.) + +JPEG files generated with separate luminance and chrominance quality are +fully compliant with standard JPEG decoders. + +CAUTION: For this setting to be useful, be sure to pass an argument of +-sample 1x1 to cjpeg to disable chrominance subsampling. Otherwise, the +default subsampling level (2x2, AKA "4:2:0") will be used. + +The -progressive switch creates a "progressive JPEG" file. In this type of +JPEG file, the data is stored in multiple scans of increasing quality. If the +file is being transmitted over a slow communications link, the decoder can use +the first scan to display a low-quality image very quickly, and can then +improve the display with each subsequent scan. The final image is exactly +equivalent to a standard JPEG file of the same quality setting, and the total +file size is about the same --- often a little smaller. + +Switches for advanced users: + + -arithmetic Use arithmetic coding. CAUTION: arithmetic coded JPEG + is not yet widely implemented, so many decoders will + be unable to view an arithmetic coded JPEG file at + all. + + -dct int Use integer DCT method (default). + -dct fast Use fast integer DCT (less accurate). + In libjpeg-turbo, the fast method is generally about + 5-15% faster than the int method when using the + x86/x86-64 SIMD extensions (results may vary with other + SIMD implementations, or when using libjpeg-turbo + without SIMD extensions.) For quality levels of 90 and + below, there should be little or no perceptible + difference between the two algorithms. For quality + levels above 90, however, the difference between + the fast and the int methods becomes more pronounced. + With quality=97, for instance, the fast method incurs + generally about a 1-3 dB loss (in PSNR) relative to + the int method, but this can be larger for some images. + Do not use the fast method with quality levels above + 97. The algorithm often degenerates at quality=98 and + above and can actually produce a more lossy image than + if lower quality levels had been used. Also, in + libjpeg-turbo, the fast method is not fully accerated + for quality levels above 97, so it will be slower than + the int method. + -dct float Use floating-point DCT method. + The float method is mainly a legacy feature. It does + not produce significantly more accurate results than + the int method, and it is much slower. The float + method may also give different results on different + machines due to varying roundoff behavior, whereas the + integer methods should give the same results on all + machines. + + -restart N Emit a JPEG restart marker every N MCU rows, or every + N MCU blocks if "B" is attached to the number. + -restart 0 (the default) means no restart markers. + + -smooth N Smooth the input image to eliminate dithering noise. + N, ranging from 1 to 100, indicates the strength of + smoothing. 0 (the default) means no smoothing. + + -maxmemory N Set limit for amount of memory to use in processing + large images. Value is in thousands of bytes, or + millions of bytes if "M" is attached to the number. + For example, -max 4m selects 4000000 bytes. If more + space is needed, temporary files will be used. + + -verbose Enable debug printout. More -v's give more printout. + or -debug Also, version information is printed at startup. + +The -restart option inserts extra markers that allow a JPEG decoder to +resynchronize after a transmission error. Without restart markers, any damage +to a compressed file will usually ruin the image from the point of the error +to the end of the image; with restart markers, the damage is usually confined +to the portion of the image up to the next restart marker. Of course, the +restart markers occupy extra space. We recommend -restart 1 for images that +will be transmitted across unreliable networks such as Usenet. + +The -smooth option filters the input to eliminate fine-scale noise. This is +often useful when converting dithered images to JPEG: a moderate smoothing +factor of 10 to 50 gets rid of dithering patterns in the input file, resulting +in a smaller JPEG file and a better-looking image. Too large a smoothing +factor will visibly blur the image, however. + +Switches for wizards: + + -baseline Force baseline-compatible quantization tables to be + generated. This clamps quantization values to 8 bits + even at low quality settings. (This switch is poorly + named, since it does not ensure that the output is + actually baseline JPEG. For example, you can use + -baseline and -progressive together.) + + -qtables file Use the quantization tables given in the specified + text file. + + -qslots N[,...] Select which quantization table to use for each color + component. + + -sample HxV[,...] Set JPEG sampling factors for each color component. + + -scans file Use the scan script given in the specified text file. + +The "wizard" switches are intended for experimentation with JPEG. If you +don't know what you are doing, DON'T USE THEM. These switches are documented +further in the file wizard.txt. + + +DJPEG DETAILS + +The basic command line switches for djpeg are: + + -colors N Reduce image to at most N colors. This reduces the + or -quantize N number of colors used in the output image, so that it + can be displayed on a colormapped display or stored in + a colormapped file format. For example, if you have + an 8-bit display, you'd need to reduce to 256 or fewer + colors. (-colors is the recommended name, -quantize + is provided only for backwards compatibility.) + + -fast Select recommended processing options for fast, low + quality output. (The default options are chosen for + highest quality output.) Currently, this is equivalent + to "-dct fast -nosmooth -onepass -dither ordered". + + -grayscale Force grayscale output even if JPEG file is color. + Useful for viewing on monochrome displays; also, + djpeg runs noticeably faster in this mode. + + -rgb Force RGB output even if JPEG file is grayscale. + + -scale M/N Scale the output image by a factor M/N. Currently + the scale factor must be M/8, where M is an integer + between 1 and 16 inclusive, or any reduced fraction + thereof (such as 1/2, 3/4, etc. Scaling is handy if + the image is larger than your screen; also, djpeg runs + much faster when scaling down the output. + + -bmp Select BMP output format (Windows flavor). 8-bit + colormapped format is emitted if -colors or -grayscale + is specified, or if the JPEG file is grayscale; + otherwise, 24-bit full-color format is emitted. + + -gif Select GIF output format. Since GIF does not support + more than 256 colors, -colors 256 is assumed (unless + you specify a smaller number of colors). If you + specify -fast, the default number of colors is 216. + + -os2 Select BMP output format (OS/2 1.x flavor). 8-bit + colormapped format is emitted if -colors or -grayscale + is specified, or if the JPEG file is grayscale; + otherwise, 24-bit full-color format is emitted. + + -pnm Select PBMPLUS (PPM/PGM) output format (this is the + default format). PGM is emitted if the JPEG file is + grayscale or if -grayscale is specified; otherwise + PPM is emitted. + + -rle Select RLE output format. (Requires URT library.) + + -targa Select Targa output format. Grayscale format is + emitted if the JPEG file is grayscale or if + -grayscale is specified; otherwise, colormapped format + is emitted if -colors is specified; otherwise, 24-bit + full-color format is emitted. + +Switches for advanced users: + + -dct int Use integer DCT method (default). + -dct fast Use fast integer DCT (less accurate). + In libjpeg-turbo, the fast method is generally about + 5-15% faster than the int method when using the + x86/x86-64 SIMD extensions (results may vary with other + SIMD implementations, or when using libjpeg-turbo + without SIMD extensions.) If the JPEG image was + compressed using a quality level of 85 or below, then + there should be little or no perceptible difference + between the two algorithms. When decompressing images + that were compressed using quality levels above 85, + however, the difference between the fast and int + methods becomes more pronounced. With images + compressed using quality=97, for instance, the fast + method incurs generally about a 4-6 dB loss (in PSNR) + relative to the int method, but this can be larger for + some images. If you can avoid it, do not use the fast + method when decompressing images that were compressed + using quality levels above 97. The algorithm often + degenerates for such images and can actually produce + a more lossy output image than if the JPEG image had + been compressed using lower quality levels. + -dct float Use floating-point DCT method. + The float method is mainly a legacy feature. It does +  not produce significantly more accurate results than + the int method, and it is much slower. The float + method may also give different results on different + machines due to varying roundoff behavior, whereas the + integer methods should give the same results on all + machines. + + -dither fs Use Floyd-Steinberg dithering in color quantization. + -dither ordered Use ordered dithering in color quantization. + -dither none Do not use dithering in color quantization. + By default, Floyd-Steinberg dithering is applied when + quantizing colors; this is slow but usually produces + the best results. Ordered dither is a compromise + between speed and quality; no dithering is fast but + usually looks awful. Note that these switches have + no effect unless color quantization is being done. + Ordered dither is only available in -onepass mode. + + -map FILE Quantize to the colors used in the specified image + file. This is useful for producing multiple files + with identical color maps, or for forcing a predefined + set of colors to be used. The FILE must be a GIF + or PPM file. This option overrides -colors and + -onepass. + + -nosmooth Use a faster, lower-quality upsampling routine. + + -onepass Use one-pass instead of two-pass color quantization. + The one-pass method is faster and needs less memory, + but it produces a lower-quality image. -onepass is + ignored unless you also say -colors N. Also, + the one-pass method is always used for grayscale + output (the two-pass method is no improvement then). + + -maxmemory N Set limit for amount of memory to use in processing + large images. Value is in thousands of bytes, or + millions of bytes if "M" is attached to the number. + For example, -max 4m selects 4000000 bytes. If more + space is needed, temporary files will be used. + + -verbose Enable debug printout. More -v's give more printout. + or -debug Also, version information is printed at startup. + + +HINTS FOR CJPEG + +Color GIF files are not the ideal input for JPEG; JPEG is really intended for +compressing full-color (24-bit) images. In particular, don't try to convert +cartoons, line drawings, and other images that have only a few distinct +colors. GIF works great on these, JPEG does not. If you want to convert a +GIF to JPEG, you should experiment with cjpeg's -quality and -smooth options +to get a satisfactory conversion. -smooth 10 or so is often helpful. + +Avoid running an image through a series of JPEG compression/decompression +cycles. Image quality loss will accumulate; after ten or so cycles the image +may be noticeably worse than it was after one cycle. It's best to use a +lossless format while manipulating an image, then convert to JPEG format when +you are ready to file the image away. + +The -optimize option to cjpeg is worth using when you are making a "final" +version for posting or archiving. It's also a win when you are using low +quality settings to make very small JPEG files; the percentage improvement +is often a lot more than it is on larger files. (At present, -optimize +mode is always selected when generating progressive JPEG files.) + +Support for GIF input files was removed in cjpeg v6b due to concerns over +the Unisys LZW patent. Although this patent expired in 2006, cjpeg still +lacks GIF support, for these historical reasons. (Conversion of GIF files to +JPEG is usually a bad idea anyway.) + + +HINTS FOR DJPEG + +To get a quick preview of an image, use the -grayscale and/or -scale switches. +"-grayscale -scale 1/8" is the fastest case. + +Several options are available that trade off image quality to gain speed. +"-fast" turns on the recommended settings. + +"-dct fast" and/or "-nosmooth" gain speed at a small sacrifice in quality. +When producing a color-quantized image, "-onepass -dither ordered" is fast but +much lower quality than the default behavior. "-dither none" may give +acceptable results in two-pass mode, but is seldom tolerable in one-pass mode. + +Two-pass color quantization requires a good deal of memory; on MS-DOS machines +it may run out of memory even with -maxmemory 0. In that case you can still +decompress, with some loss of image quality, by specifying -onepass for +one-pass quantization. + +To avoid the Unisys LZW patent (now expired), djpeg produces uncompressed GIF +files. These are larger than they should be, but are readable by standard GIF +decoders. + + +HINTS FOR BOTH PROGRAMS + +If more space is needed than will fit in the available main memory (as +determined by -maxmemory), temporary files will be used. (MS-DOS versions +will try to get extended or expanded memory first.) The temporary files are +often rather large: in typical cases they occupy three bytes per pixel, for +example 3*800*600 = 1.44Mb for an 800x600 image. If you don't have enough +free disk space, leave out -progressive and -optimize (for cjpeg) or specify +-onepass (for djpeg). + +On MS-DOS, the temporary files are created in the directory named by the TMP +or TEMP environment variable, or in the current directory if neither of those +exist. Amiga implementations put the temp files in the directory named by +JPEGTMP:, so be sure to assign JPEGTMP: to a disk partition with adequate free +space. + +The default memory usage limit (-maxmemory) is set when the software is +compiled. If you get an "insufficient memory" error, try specifying a smaller +-maxmemory value, even -maxmemory 0 to use the absolute minimum space. You +may want to recompile with a smaller default value if this happens often. + +On machines that have "environment" variables, you can define the environment +variable JPEGMEM to set the default memory limit. The value is specified as +described for the -maxmemory switch. JPEGMEM overrides the default value +specified when the program was compiled, and itself is overridden by an +explicit -maxmemory switch. + +On MS-DOS machines, -maxmemory is the amount of main (conventional) memory to +use. (Extended or expanded memory is also used if available.) Most +DOS-specific versions of this software do their own memory space estimation +and do not need you to specify -maxmemory. + + +JPEGTRAN + +jpegtran performs various useful transformations of JPEG files. +It can translate the coded representation from one variant of JPEG to another, +for example from baseline JPEG to progressive JPEG or vice versa. It can also +perform some rearrangements of the image data, for example turning an image +from landscape to portrait format by rotation. For EXIF files and JPEG files +containing Exif data, you may prefer to use exiftran instead. + +jpegtran works by rearranging the compressed data (DCT coefficients), without +ever fully decoding the image. Therefore, its transformations are lossless: +there is no image degradation at all, which would not be true if you used +djpeg followed by cjpeg to accomplish the same conversion. But by the same +token, jpegtran cannot perform lossy operations such as changing the image +quality. However, while the image data is losslessly transformed, metadata +can be removed. See the -copy option for specifics. + +jpegtran uses a command line syntax similar to cjpeg or djpeg. +On Unix-like systems, you say: + jpegtran [switches] [inputfile] >outputfile +On most non-Unix systems, you say: + jpegtran [switches] inputfile outputfile +where both the input and output files are JPEG files. + +To specify the coded JPEG representation used in the output file, +jpegtran accepts a subset of the switches recognized by cjpeg: + -optimize Perform optimization of entropy encoding parameters. + -progressive Create progressive JPEG file. + -arithmetic Use arithmetic coding. + -restart N Emit a JPEG restart marker every N MCU rows, or every + N MCU blocks if "B" is attached to the number. + -scans file Use the scan script given in the specified text file. +See the previous discussion of cjpeg for more details about these switches. +If you specify none of these switches, you get a plain baseline-JPEG output +file. The quality setting and so forth are determined by the input file. + +The image can be losslessly transformed by giving one of these switches: + -flip horizontal Mirror image horizontally (left-right). + -flip vertical Mirror image vertically (top-bottom). + -rotate 90 Rotate image 90 degrees clockwise. + -rotate 180 Rotate image 180 degrees. + -rotate 270 Rotate image 270 degrees clockwise (or 90 ccw). + -transpose Transpose image (across UL-to-LR axis). + -transverse Transverse transpose (across UR-to-LL axis). + +The transpose transformation has no restrictions regarding image dimensions. +The other transformations operate rather oddly if the image dimensions are not +a multiple of the iMCU size (usually 8 or 16 pixels), because they can only +transform complete blocks of DCT coefficient data in the desired way. + +jpegtran's default behavior when transforming an odd-size image is designed +to preserve exact reversibility and mathematical consistency of the +transformation set. As stated, transpose is able to flip the entire image +area. Horizontal mirroring leaves any partial iMCU column at the right edge +untouched, but is able to flip all rows of the image. Similarly, vertical +mirroring leaves any partial iMCU row at the bottom edge untouched, but is +able to flip all columns. The other transforms can be built up as sequences +of transpose and flip operations; for consistency, their actions on edge +pixels are defined to be the same as the end result of the corresponding +transpose-and-flip sequence. + +For practical use, you may prefer to discard any untransformable edge pixels +rather than having a strange-looking strip along the right and/or bottom edges +of a transformed image. To do this, add the -trim switch: + -trim Drop non-transformable edge blocks. +Obviously, a transformation with -trim is not reversible, so strictly speaking +jpegtran with this switch is not lossless. Also, the expected mathematical +equivalences between the transformations no longer hold. For example, +"-rot 270 -trim" trims only the bottom edge, but "-rot 90 -trim" followed by +"-rot 180 -trim" trims both edges. + +If you are only interested in perfect transformations, add the -perfect switch: + -perfect Fail with an error if the transformation is not + perfect. +For example, you may want to do + jpegtran -rot 90 -perfect foo.jpg || djpeg foo.jpg | pnmflip -r90 | cjpeg +to do a perfect rotation, if available, or an approximated one if not. + +This version of jpegtran also offers a lossless crop option, which discards +data outside of a given image region but losslessly preserves what is inside. +Like the rotate and flip transforms, lossless crop is restricted by the current +JPEG format; the upper left corner of the selected region must fall on an iMCU +boundary. If it doesn't, then it is silently moved up and/or left to the +nearest iMCU boundary (the lower right corner is unchanged.) Thus, the output +image covers at least the requested region, but it may cover more. The +adjustment of the region dimensions may be optionally disabled by attaching an +'f' character ("force") to the width or height number. + +The image can be losslessly cropped by giving the switch: + -crop WxH+X+Y Crop to a rectangular region of width W and height H, + starting at point X,Y. + +Other not-strictly-lossless transformation switches are: + + -grayscale Force grayscale output. +This option discards the chrominance channels if the input image is YCbCr +(ie, a standard color JPEG), resulting in a grayscale JPEG file. The +luminance channel is preserved exactly, so this is a better method of reducing +to grayscale than decompression, conversion, and recompression. This switch +is particularly handy for fixing a monochrome picture that was mistakenly +encoded as a color JPEG. (In such a case, the space savings from getting rid +of the near-empty chroma channels won't be large; but the decoding time for +a grayscale JPEG is substantially less than that for a color JPEG.) + +jpegtran also recognizes these switches that control what to do with "extra" +markers, such as comment blocks: + -copy none Copy no extra markers from source file. This setting + suppresses all comments and other metadata in the + source file. + -copy comments Copy only comment markers. This setting copies + comments from the source file but discards any other + metadata. + -copy all Copy all extra markers. This setting preserves + miscellaneous markers found in the source file, such + as JFIF thumbnails, Exif data, and Photoshop settings. + In some files, these extra markers can be sizable. + Note that this option will copy thumbnails as-is; + they will not be transformed. +The default behavior is -copy comments. (Note: in IJG releases v6 and v6a, +jpegtran always did the equivalent of -copy none.) + +Additional switches recognized by jpegtran are: + -outfile filename + -maxmemory N + -verbose + -debug +These work the same as in cjpeg or djpeg. + + +THE COMMENT UTILITIES + +The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file. +Although the standard doesn't actually define what COM blocks are for, they +are widely used to hold user-supplied text strings. This lets you add +annotations, titles, index terms, etc to your JPEG files, and later retrieve +them as text. COM blocks do not interfere with the image stored in the JPEG +file. The maximum size of a COM block is 64K, but you can have as many of +them as you like in one JPEG file. + +We provide two utility programs to display COM block contents and add COM +blocks to a JPEG file. + +rdjpgcom searches a JPEG file and prints the contents of any COM blocks on +standard output. The command line syntax is + rdjpgcom [-raw] [-verbose] [inputfilename] +The switch "-raw" (or just "-r") causes rdjpgcom to output non-printable +characters in JPEG comments. These characters are normally escaped for +security reasons. +The switch "-verbose" (or just "-v") causes rdjpgcom to also display the JPEG +image dimensions. If you omit the input file name from the command line, +the JPEG file is read from standard input. (This may not work on some +operating systems, if binary data can't be read from stdin.) + +wrjpgcom adds a COM block, containing text you provide, to a JPEG file. +Ordinarily, the COM block is added after any existing COM blocks, but you +can delete the old COM blocks if you wish. wrjpgcom produces a new JPEG +file; it does not modify the input file. DO NOT try to overwrite the input +file by directing wrjpgcom's output back into it; on most systems this will +just destroy your file. + +The command line syntax for wrjpgcom is similar to cjpeg's. On Unix-like +systems, it is + wrjpgcom [switches] [inputfilename] +The output file is written to standard output. The input file comes from +the named file, or from standard input if no input file is named. + +On most non-Unix systems, the syntax is + wrjpgcom [switches] inputfilename outputfilename +where both input and output file names must be given explicitly. + +wrjpgcom understands three switches: + -replace Delete any existing COM blocks from the file. + -comment "Comment text" Supply new COM text on command line. + -cfile name Read text for new COM block from named file. +(Switch names can be abbreviated.) If you have only one line of comment text +to add, you can provide it on the command line with -comment. The comment +text must be surrounded with quotes so that it is treated as a single +argument. Longer comments can be read from a text file. + +If you give neither -comment nor -cfile, then wrjpgcom will read the comment +text from standard input. (In this case an input image file name MUST be +supplied, so that the source JPEG file comes from somewhere else.) You can +enter multiple lines, up to 64KB worth. Type an end-of-file indicator +(usually control-D or control-Z) to terminate the comment text entry. + +wrjpgcom will not add a COM block if the provided comment string is empty. +Therefore -replace -comment "" can be used to delete all COM blocks from a +file. + +These utility programs do not depend on the IJG JPEG library. In +particular, the source code for rdjpgcom is intended as an illustration of +the minimum amount of code required to parse a JPEG file header correctly. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wizard.txt glmark2-2021.02/src/libjpeg-turbo/wizard.txt --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wizard.txt 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wizard.txt 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,211 @@ +Advanced usage instructions for the Independent JPEG Group's JPEG software +========================================================================== + +This file describes cjpeg's "switches for wizards". + +The "wizard" switches are intended for experimentation with JPEG by persons +who are reasonably knowledgeable about the JPEG standard. If you don't know +what you are doing, DON'T USE THESE SWITCHES. You'll likely produce files +with worse image quality and/or poorer compression than you'd get from the +default settings. Furthermore, these switches must be used with caution +when making files intended for general use, because not all JPEG decoders +will support unusual JPEG parameter settings. + + +Quantization Table Adjustment +----------------------------- + +Ordinarily, cjpeg starts with a default set of tables (the same ones given +as examples in the JPEG standard) and scales them up or down according to +the -quality setting. The details of the scaling algorithm can be found in +jcparam.c. At very low quality settings, some quantization table entries +can get scaled up to values exceeding 255. Although 2-byte quantization +values are supported by the IJG software, this feature is not in baseline +JPEG and is not supported by all implementations. If you need to ensure +wide compatibility of low-quality files, you can constrain the scaled +quantization values to no more than 255 by giving the -baseline switch. +Note that use of -baseline will result in poorer quality for the same file +size, since more bits than necessary are expended on higher AC coefficients. + +You can substitute a different set of quantization values by using the +-qtables switch: + + -qtables file Use the quantization tables given in the named file. + +The specified file should be a text file containing decimal quantization +values. The file should contain one to four tables, each of 64 elements. +The tables are implicitly numbered 0,1,etc. in order of appearance. Table +entries appear in normal array order (NOT in the zigzag order in which they +will be stored in the JPEG file). + +Quantization table files are free format, in that arbitrary whitespace can +appear between numbers. Also, comments can be included: a comment starts +with '#' and extends to the end of the line. Here is an example file that +duplicates the default quantization tables: + + # Quantization tables given in JPEG spec, section K.1 + + # This is table 0 (the luminance table): + 16 11 10 16 24 40 51 61 + 12 12 14 19 26 58 60 55 + 14 13 16 24 40 57 69 56 + 14 17 22 29 51 87 80 62 + 18 22 37 56 68 109 103 77 + 24 35 55 64 81 104 113 92 + 49 64 78 87 103 121 120 101 + 72 92 95 98 112 100 103 99 + + # This is table 1 (the chrominance table): + 17 18 24 47 99 99 99 99 + 18 21 26 66 99 99 99 99 + 24 26 56 99 99 99 99 99 + 47 66 99 99 99 99 99 99 + 99 99 99 99 99 99 99 99 + 99 99 99 99 99 99 99 99 + 99 99 99 99 99 99 99 99 + 99 99 99 99 99 99 99 99 + +If the -qtables switch is used without -quality, then the specified tables +are used exactly as-is. If both -qtables and -quality are used, then the +tables taken from the file are scaled in the same fashion that the default +tables would be scaled for that quality setting. If -baseline appears, then +the quantization values are constrained to the range 1-255. + +By default, cjpeg will use quantization table 0 for luminance components and +table 1 for chrominance components. To override this choice, use the -qslots +switch: + + -qslots N[,...] Select which quantization table to use for + each color component. + +The -qslots switch specifies a quantization table number for each color +component, in the order in which the components appear in the JPEG SOF marker. +For example, to create a separate table for each of Y,Cb,Cr, you could +provide a -qtables file that defines three quantization tables and say +"-qslots 0,1,2". If -qslots gives fewer table numbers than there are color +components, then the last table number is repeated as necessary. + + +Sampling Factor Adjustment +-------------------------- + +By default, cjpeg uses 2:1 horizontal and vertical downsampling when +compressing YCbCr data, and no downsampling for all other color spaces. +You can override this default with the -sample switch: + + -sample HxV[,...] Set JPEG sampling factors for each color + component. + +The -sample switch specifies the JPEG sampling factors for each color +component, in the order in which they appear in the JPEG SOF marker. +If you specify fewer HxV pairs than there are components, the remaining +components are set to 1x1 sampling. For example, the default YCbCr setting +is equivalent to "-sample 2x2,1x1,1x1", which can be abbreviated to +"-sample 2x2". + +There are still some JPEG decoders in existence that support only 2x1 +sampling (also called 4:2:2 sampling). Compatibility with such decoders can +be achieved by specifying "-sample 2x1". This is not recommended unless +really necessary, since it increases file size and encoding/decoding time +with very little quality gain. + + +Multiple Scan / Progression Control +----------------------------------- + +By default, cjpeg emits a single-scan sequential JPEG file. The +-progressive switch generates a progressive JPEG file using a default series +of progression parameters. You can create multiple-scan sequential JPEG +files or progressive JPEG files with custom progression parameters by using +the -scans switch: + + -scans file Use the scan sequence given in the named file. + +The specified file should be a text file containing a "scan script". +The script specifies the contents and ordering of the scans to be emitted. +Each entry in the script defines one scan. A scan definition specifies +the components to be included in the scan, and for progressive JPEG it also +specifies the progression parameters Ss,Se,Ah,Al for the scan. Scan +definitions are separated by semicolons (';'). A semicolon after the last +scan definition is optional. + +Each scan definition contains one to four component indexes, optionally +followed by a colon (':') and the four progressive-JPEG parameters. The +component indexes denote which color component(s) are to be transmitted in +the scan. Components are numbered in the order in which they appear in the +JPEG SOF marker, with the first component being numbered 0. (Note that these +indexes are not the "component ID" codes assigned to the components, just +positional indexes.) + +The progression parameters for each scan are: + Ss Zigzag index of first coefficient included in scan + Se Zigzag index of last coefficient included in scan + Ah Zero for first scan of a coefficient, else Al of prior scan + Al Successive approximation low bit position for scan +If the progression parameters are omitted, the values 0,63,0,0 are used, +producing a sequential JPEG file. cjpeg automatically determines whether +the script represents a progressive or sequential file, by observing whether +Ss and Se values other than 0 and 63 appear. (The -progressive switch is +not needed to specify this; in fact, it is ignored when -scans appears.) +The scan script must meet the JPEG restrictions on progression sequences. +(cjpeg checks that the spec's requirements are obeyed.) + +Scan script files are free format, in that arbitrary whitespace can appear +between numbers and around punctuation. Also, comments can be included: a +comment starts with '#' and extends to the end of the line. For additional +legibility, commas or dashes can be placed between values. (Actually, any +single punctuation character other than ':' or ';' can be inserted.) For +example, the following two scan definitions are equivalent: + 0 1 2: 0 63 0 0; + 0,1,2 : 0-63, 0,0 ; + +Here is an example of a scan script that generates a partially interleaved +sequential JPEG file: + + 0; # Y only in first scan + 1 2; # Cb and Cr in second scan + +Here is an example of a progressive scan script using only spectral selection +(no successive approximation): + + # Interleaved DC scan for Y,Cb,Cr: + 0,1,2: 0-0, 0, 0 ; + # AC scans: + 0: 1-2, 0, 0 ; # First two Y AC coefficients + 0: 3-5, 0, 0 ; # Three more + 1: 1-63, 0, 0 ; # All AC coefficients for Cb + 2: 1-63, 0, 0 ; # All AC coefficients for Cr + 0: 6-9, 0, 0 ; # More Y coefficients + 0: 10-63, 0, 0 ; # Remaining Y coefficients + +Here is an example of a successive-approximation script. This is equivalent +to the default script used by "cjpeg -progressive" for YCbCr images: + + # Initial DC scan for Y,Cb,Cr (lowest bit not sent) + 0,1,2: 0-0, 0, 1 ; + # First AC scan: send first 5 Y AC coefficients, minus 2 lowest bits: + 0: 1-5, 0, 2 ; + # Send all Cr,Cb AC coefficients, minus lowest bit: + # (chroma data is usually too small to be worth subdividing further; + # but note we send Cr first since eye is least sensitive to Cb) + 2: 1-63, 0, 1 ; + 1: 1-63, 0, 1 ; + # Send remaining Y AC coefficients, minus 2 lowest bits: + 0: 6-63, 0, 2 ; + # Send next-to-lowest bit of all Y AC coefficients: + 0: 1-63, 2, 1 ; + # At this point we've sent all but the lowest bit of all coefficients. + # Send lowest bit of DC coefficients + 0,1,2: 0-0, 1, 0 ; + # Send lowest bit of AC coefficients + 2: 1-63, 1, 0 ; + 1: 1-63, 1, 0 ; + # Y AC lowest bit scan is last; it's usually the largest scan + 0: 1-63, 1, 0 ; + +It may be worth pointing out that this script is tuned for quality settings +of around 50 to 75. For lower quality settings, you'd probably want to use +a script with fewer stages of successive approximation (otherwise the +initial scans will be really bad). For higher quality settings, you might +want to use more stages of successive approximation (so that the initial +scans are not too large). diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrbmp.c glmark2-2021.02/src/libjpeg-turbo/wrbmp.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrbmp.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wrbmp.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,493 @@ +/* + * wrbmp.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2014-2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in Microsoft "BMP" + * format (MS Windows 3.x and OS/2 1.x flavors). + * Either 8-bit colormapped or 24-bit full-color format can be written. + * No compression is supported. + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + * + * This code contributed by James Arthur Boucher. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "jconfigint.h" + +#ifdef BMP_SUPPORTED + + +/* + * To support 12-bit JPEG data, we'd have to scale output down to 8 bits. + * This is not yet implemented. + */ + +#if BITS_IN_JSAMPLE != 8 + Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */ +#endif + +/* + * Since BMP stores scanlines bottom-to-top, we have to invert the image + * from JPEG's top-to-bottom order. To do this, we save the outgoing data + * in a virtual array during put_pixel_row calls, then actually emit the + * BMP file during finish_output. The virtual array contains one JSAMPLE per + * pixel if the output is grayscale or colormapped, three if it is full color. + */ + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + boolean is_os2; /* saves the OS2 format request flag */ + + jvirt_sarray_ptr whole_image; /* needed to reverse row order */ + JDIMENSION data_width; /* JSAMPLEs per row */ + JDIMENSION row_width; /* physical width of one row in the BMP file */ + int pad_bytes; /* number of padding bytes needed per row */ + JDIMENSION cur_output_row; /* next row# to write to virtual array */ +} bmp_dest_struct; + +typedef bmp_dest_struct *bmp_dest_ptr; + + +/* Forward declarations */ +LOCAL(void) write_colormap + (j_decompress_ptr cinfo, bmp_dest_ptr dest, int map_colors, + int map_entry_size); + + +static INLINE boolean is_big_endian(void) +{ + int test_value = 1; + if(*(char *)&test_value != 1) + return TRUE; + return FALSE; +} + + +/* + * Write some pixel data. + * In this module rows_supplied will always be 1. + */ + +METHODDEF(void) +put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +/* This version is for writing 24-bit pixels */ +{ + bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + JSAMPARRAY image_ptr; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + int pad; + + /* Access next row in virtual array */ + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->whole_image, + dest->cur_output_row, (JDIMENSION) 1, TRUE); + dest->cur_output_row++; + + /* Transfer data. Note destination values must be in BGR order + * (even though Microsoft's own documents say the opposite). + */ + inptr = dest->pub.buffer[0]; + outptr = image_ptr[0]; + + if(cinfo->out_color_space == JCS_RGB565) { + boolean big_endian = is_big_endian(); + unsigned short *inptr2 = (unsigned short *)inptr; + for (col = cinfo->output_width; col > 0; col--) { + if (big_endian) { + outptr[0] = (*inptr2 >> 5) & 0xF8; + outptr[1] = ((*inptr2 << 5) & 0xE0) | ((*inptr2 >> 11) & 0x1C); + outptr[2] = *inptr2 & 0xF8; + } else { + outptr[0] = (*inptr2 << 3) & 0xF8; + outptr[1] = (*inptr2 >> 3) & 0xFC; + outptr[2] = (*inptr2 >> 8) & 0xF8; + } + outptr += 3; + inptr2++; + } + } else { + for (col = cinfo->output_width; col > 0; col--) { + outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[1] = *inptr++; + outptr[0] = *inptr++; + outptr += 3; + } + } + + /* Zero out the pad bytes. */ + pad = dest->pad_bytes; + while (--pad >= 0) + *outptr++ = 0; +} + +METHODDEF(void) +put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +/* This version is for grayscale OR quantized color output */ +{ + bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + JSAMPARRAY image_ptr; + register JSAMPROW inptr, outptr; + register JDIMENSION col; + int pad; + + /* Access next row in virtual array */ + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->whole_image, + dest->cur_output_row, (JDIMENSION) 1, TRUE); + dest->cur_output_row++; + + /* Transfer data. */ + inptr = dest->pub.buffer[0]; + outptr = image_ptr[0]; + for (col = cinfo->output_width; col > 0; col--) { + *outptr++ = *inptr++; /* can omit GETJSAMPLE() safely */ + } + + /* Zero out the pad bytes. */ + pad = dest->pad_bytes; + while (--pad >= 0) + *outptr++ = 0; +} + + +/* + * Startup: normally writes the file header. + * In this module we may as well postpone everything until finish_output. + */ + +METHODDEF(void) +start_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + /* no work here */ +} + + +/* + * Finish up at the end of the file. + * + * Here is where we really output the BMP file. + * + * First, routines to write the Windows and OS/2 variants of the file header. + */ + +LOCAL(void) +write_bmp_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) +/* Write a Windows-style BMP file header, including colormap if needed */ +{ + char bmpfileheader[14]; + char bmpinfoheader[40]; +#define PUT_2B(array,offset,value) \ + (array[offset] = (char) ((value) & 0xFF), \ + array[offset+1] = (char) (((value) >> 8) & 0xFF)) +#define PUT_4B(array,offset,value) \ + (array[offset] = (char) ((value) & 0xFF), \ + array[offset+1] = (char) (((value) >> 8) & 0xFF), \ + array[offset+2] = (char) (((value) >> 16) & 0xFF), \ + array[offset+3] = (char) (((value) >> 24) & 0xFF)) + long headersize, bfSize; + int bits_per_pixel, cmap_entries; + + /* Compute colormap size and total file size */ + if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->quantize_colors) { + /* Colormapped RGB */ + bits_per_pixel = 8; + cmap_entries = 256; + } else { + /* Unquantized, full color RGB */ + bits_per_pixel = 24; + cmap_entries = 0; + } + } else if (cinfo->out_color_space == JCS_RGB565) { + bits_per_pixel = 24; + cmap_entries = 0; + } else { + /* Grayscale output. We need to fake a 256-entry colormap. */ + bits_per_pixel = 8; + cmap_entries = 256; + } + /* File size */ + headersize = 14 + 40 + cmap_entries * 4; /* Header and colormap */ + bfSize = headersize + (long) dest->row_width * (long) cinfo->output_height; + + /* Set unused fields of header to 0 */ + MEMZERO(bmpfileheader, sizeof(bmpfileheader)); + MEMZERO(bmpinfoheader, sizeof(bmpinfoheader)); + + /* Fill the file header */ + bmpfileheader[0] = 0x42; /* first 2 bytes are ASCII 'B', 'M' */ + bmpfileheader[1] = 0x4D; + PUT_4B(bmpfileheader, 2, bfSize); /* bfSize */ + /* we leave bfReserved1 & bfReserved2 = 0 */ + PUT_4B(bmpfileheader, 10, headersize); /* bfOffBits */ + + /* Fill the info header (Microsoft calls this a BITMAPINFOHEADER) */ + PUT_2B(bmpinfoheader, 0, 40); /* biSize */ + PUT_4B(bmpinfoheader, 4, cinfo->output_width); /* biWidth */ + PUT_4B(bmpinfoheader, 8, cinfo->output_height); /* biHeight */ + PUT_2B(bmpinfoheader, 12, 1); /* biPlanes - must be 1 */ + PUT_2B(bmpinfoheader, 14, bits_per_pixel); /* biBitCount */ + /* we leave biCompression = 0, for none */ + /* we leave biSizeImage = 0; this is correct for uncompressed data */ + if (cinfo->density_unit == 2) { /* if have density in dots/cm, then */ + PUT_4B(bmpinfoheader, 24, (long) (cinfo->X_density*100)); /* XPels/M */ + PUT_4B(bmpinfoheader, 28, (long) (cinfo->Y_density*100)); /* XPels/M */ + } + PUT_2B(bmpinfoheader, 32, cmap_entries); /* biClrUsed */ + /* we leave biClrImportant = 0 */ + + if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t) 14) + ERREXIT(cinfo, JERR_FILE_WRITE); + if (JFWRITE(dest->pub.output_file, bmpinfoheader, 40) != (size_t) 40) + ERREXIT(cinfo, JERR_FILE_WRITE); + + if (cmap_entries > 0) + write_colormap(cinfo, dest, cmap_entries, 4); +} + + +LOCAL(void) +write_os2_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) +/* Write an OS2-style BMP file header, including colormap if needed */ +{ + char bmpfileheader[14]; + char bmpcoreheader[12]; + long headersize, bfSize; + int bits_per_pixel, cmap_entries; + + /* Compute colormap size and total file size */ + if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->quantize_colors) { + /* Colormapped RGB */ + bits_per_pixel = 8; + cmap_entries = 256; + } else { + /* Unquantized, full color RGB */ + bits_per_pixel = 24; + cmap_entries = 0; + } + } else if (cinfo->out_color_space == JCS_RGB565) { + bits_per_pixel = 24; + cmap_entries = 0; + } else { + /* Grayscale output. We need to fake a 256-entry colormap. */ + bits_per_pixel = 8; + cmap_entries = 256; + } + /* File size */ + headersize = 14 + 12 + cmap_entries * 3; /* Header and colormap */ + bfSize = headersize + (long) dest->row_width * (long) cinfo->output_height; + + /* Set unused fields of header to 0 */ + MEMZERO(bmpfileheader, sizeof(bmpfileheader)); + MEMZERO(bmpcoreheader, sizeof(bmpcoreheader)); + + /* Fill the file header */ + bmpfileheader[0] = 0x42; /* first 2 bytes are ASCII 'B', 'M' */ + bmpfileheader[1] = 0x4D; + PUT_4B(bmpfileheader, 2, bfSize); /* bfSize */ + /* we leave bfReserved1 & bfReserved2 = 0 */ + PUT_4B(bmpfileheader, 10, headersize); /* bfOffBits */ + + /* Fill the info header (Microsoft calls this a BITMAPCOREHEADER) */ + PUT_2B(bmpcoreheader, 0, 12); /* bcSize */ + PUT_2B(bmpcoreheader, 4, cinfo->output_width); /* bcWidth */ + PUT_2B(bmpcoreheader, 6, cinfo->output_height); /* bcHeight */ + PUT_2B(bmpcoreheader, 8, 1); /* bcPlanes - must be 1 */ + PUT_2B(bmpcoreheader, 10, bits_per_pixel); /* bcBitCount */ + + if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t) 14) + ERREXIT(cinfo, JERR_FILE_WRITE); + if (JFWRITE(dest->pub.output_file, bmpcoreheader, 12) != (size_t) 12) + ERREXIT(cinfo, JERR_FILE_WRITE); + + if (cmap_entries > 0) + write_colormap(cinfo, dest, cmap_entries, 3); +} + + +/* + * Write the colormap. + * Windows uses BGR0 map entries; OS/2 uses BGR entries. + */ + +LOCAL(void) +write_colormap (j_decompress_ptr cinfo, bmp_dest_ptr dest, + int map_colors, int map_entry_size) +{ + JSAMPARRAY colormap = cinfo->colormap; + int num_colors = cinfo->actual_number_of_colors; + FILE *outfile = dest->pub.output_file; + int i; + + if (colormap != NULL) { + if (cinfo->out_color_components == 3) { + /* Normal case with RGB colormap */ + for (i = 0; i < num_colors; i++) { + putc(GETJSAMPLE(colormap[2][i]), outfile); + putc(GETJSAMPLE(colormap[1][i]), outfile); + putc(GETJSAMPLE(colormap[0][i]), outfile); + if (map_entry_size == 4) + putc(0, outfile); + } + } else { + /* Grayscale colormap (only happens with grayscale quantization) */ + for (i = 0; i < num_colors; i++) { + putc(GETJSAMPLE(colormap[0][i]), outfile); + putc(GETJSAMPLE(colormap[0][i]), outfile); + putc(GETJSAMPLE(colormap[0][i]), outfile); + if (map_entry_size == 4) + putc(0, outfile); + } + } + } else { + /* If no colormap, must be grayscale data. Generate a linear "map". */ + for (i = 0; i < 256; i++) { + putc(i, outfile); + putc(i, outfile); + putc(i, outfile); + if (map_entry_size == 4) + putc(0, outfile); + } + } + /* Pad colormap with zeros to ensure specified number of colormap entries */ + if (i > map_colors) + ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, i); + for (; i < map_colors; i++) { + putc(0, outfile); + putc(0, outfile); + putc(0, outfile); + if (map_entry_size == 4) + putc(0, outfile); + } +} + + +METHODDEF(void) +finish_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + register FILE *outfile = dest->pub.output_file; + JSAMPARRAY image_ptr; + register JSAMPROW data_ptr; + JDIMENSION row; + register JDIMENSION col; + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + + /* Write the header and colormap */ + if (dest->is_os2) + write_os2_header(cinfo, dest); + else + write_bmp_header(cinfo, dest); + + /* Write the file body from our virtual array */ + for (row = cinfo->output_height; row > 0; row--) { + if (progress != NULL) { + progress->pub.pass_counter = (long) (cinfo->output_height - row); + progress->pub.pass_limit = (long) cinfo->output_height; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->whole_image, row-1, (JDIMENSION) 1, FALSE); + data_ptr = image_ptr[0]; + for (col = dest->row_width; col > 0; col--) { + putc(GETJSAMPLE(*data_ptr), outfile); + data_ptr++; + } + } + if (progress != NULL) + progress->completed_extra_passes++; + + /* Make sure we wrote the output file OK */ + fflush(outfile); + if (ferror(outfile)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for BMP format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_bmp (j_decompress_ptr cinfo, boolean is_os2) +{ + bmp_dest_ptr dest; + JDIMENSION row_width; + + /* Create module interface object, fill in method pointers */ + dest = (bmp_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(bmp_dest_struct)); + dest->pub.start_output = start_output_bmp; + dest->pub.finish_output = finish_output_bmp; + dest->is_os2 = is_os2; + + if (cinfo->out_color_space == JCS_GRAYSCALE) { + dest->pub.put_pixel_rows = put_gray_rows; + } else if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->quantize_colors) + dest->pub.put_pixel_rows = put_gray_rows; + else + dest->pub.put_pixel_rows = put_pixel_rows; + } else if(cinfo->out_color_space == JCS_RGB565 ) { + dest->pub.put_pixel_rows = put_pixel_rows; + } else { + ERREXIT(cinfo, JERR_BMP_COLORSPACE); + } + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + /* Determine width of rows in the BMP file (padded to 4-byte boundary). */ + if (cinfo->out_color_space == JCS_RGB565) { + row_width = cinfo->output_width * 2; + dest->row_width = dest->data_width = cinfo->output_width * 3; + } else { + row_width = cinfo->output_width * cinfo->output_components; + dest->row_width = dest->data_width = row_width; + } + while ((dest->row_width & 3) != 0) dest->row_width++; + dest->pad_bytes = (int) (dest->row_width - dest->data_width); + if (cinfo->out_color_space == JCS_RGB565) { + while ((row_width & 3) != 0) row_width++; + } else { + row_width = dest->row_width; + } + + + /* Allocate space for inversion array, prepare for write pass */ + dest->whole_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + dest->row_width, cinfo->output_height, (JDIMENSION) 1); + dest->cur_output_row = 0; + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } + + /* Create decompressor output buffer. */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, row_width, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + + return (djpeg_dest_ptr) dest; +} + +#endif /* BMP_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrgif.c glmark2-2021.02/src/libjpeg-turbo/wrgif.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrgif.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wrgif.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,402 @@ +/* + * wrgif.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2015, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in GIF format. + * + ************************************************************************** + * NOTE: to avoid entanglements with Unisys' patent on LZW compression, * + * this code has been modified to output "uncompressed GIF" files. * + * There is no trace of the LZW algorithm in this file. * + ************************************************************************** + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + */ + +/* + * This code is loosely based on ppmtogif from the PBMPLUS distribution + * of Feb. 1991. That file contains the following copyright notice: + * Based on GIFENCODE by David Rowley . + * Lempel-Ziv compression based on "compress" by Spencer W. Thomas et al. + * Copyright (C) 1989 by Jef Poskanzer. + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose and without fee is hereby granted, provided + * that the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation. This software is provided "as is" without express or + * implied warranty. + * + * We are also required to state that + * "The Graphics Interchange Format(c) is the Copyright property of + * CompuServe Incorporated. GIF(sm) is a Service Mark property of + * CompuServe Incorporated." + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef GIF_SUPPORTED + + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + j_decompress_ptr cinfo; /* back link saves passing separate parm */ + + /* State for packing variable-width codes into a bitstream */ + int n_bits; /* current number of bits/code */ + int maxcode; /* maximum code, given n_bits */ + long cur_accum; /* holds bits not yet output */ + int cur_bits; /* # of bits in cur_accum */ + + /* State for GIF code assignment */ + int ClearCode; /* clear code (doesn't change) */ + int EOFCode; /* EOF code (ditto) */ + int code_counter; /* counts output symbols */ + + /* GIF data packet construction buffer */ + int bytesinpkt; /* # of bytes in current packet */ + char packetbuf[256]; /* workspace for accumulating packet */ + +} gif_dest_struct; + +typedef gif_dest_struct *gif_dest_ptr; + +/* Largest value that will fit in N bits */ +#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) + + +/* + * Routines to package finished data bytes into GIF data blocks. + * A data block consists of a count byte (1..255) and that many data bytes. + */ + +LOCAL(void) +flush_packet (gif_dest_ptr dinfo) +/* flush any accumulated data */ +{ + if (dinfo->bytesinpkt > 0) { /* never write zero-length packet */ + dinfo->packetbuf[0] = (char) dinfo->bytesinpkt++; + if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt) + != (size_t) dinfo->bytesinpkt) + ERREXIT(dinfo->cinfo, JERR_FILE_WRITE); + dinfo->bytesinpkt = 0; + } +} + + +/* Add a character to current packet; flush to disk if necessary */ +#define CHAR_OUT(dinfo,c) \ + { (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c); \ + if ((dinfo)->bytesinpkt >= 255) \ + flush_packet(dinfo); \ + } + + +/* Routine to convert variable-width codes into a byte stream */ + +LOCAL(void) +output (gif_dest_ptr dinfo, int code) +/* Emit a code of n_bits bits */ +/* Uses cur_accum and cur_bits to reblock into 8-bit bytes */ +{ + dinfo->cur_accum |= ((long) code) << dinfo->cur_bits; + dinfo->cur_bits += dinfo->n_bits; + + while (dinfo->cur_bits >= 8) { + CHAR_OUT(dinfo, dinfo->cur_accum & 0xFF); + dinfo->cur_accum >>= 8; + dinfo->cur_bits -= 8; + } +} + + +/* The pseudo-compression algorithm. + * + * In this module we simply output each pixel value as a separate symbol; + * thus, no compression occurs. In fact, there is expansion of one bit per + * pixel, because we use a symbol width one bit wider than the pixel width. + * + * GIF ordinarily uses variable-width symbols, and the decoder will expect + * to ratchet up the symbol width after a fixed number of symbols. + * To simplify the logic and keep the expansion penalty down, we emit a + * GIF Clear code to reset the decoder just before the width would ratchet up. + * Thus, all the symbols in the output file will have the same bit width. + * Note that emitting the Clear codes at the right times is a mere matter of + * counting output symbols and is in no way dependent on the LZW patent. + * + * With a small basic pixel width (low color count), Clear codes will be + * needed very frequently, causing the file to expand even more. So this + * simplistic approach wouldn't work too well on bilevel images, for example. + * But for output of JPEG conversions the pixel width will usually be 8 bits + * (129 to 256 colors), so the overhead added by Clear symbols is only about + * one symbol in every 256. + */ + +LOCAL(void) +compress_init (gif_dest_ptr dinfo, int i_bits) +/* Initialize pseudo-compressor */ +{ + /* init all the state variables */ + dinfo->n_bits = i_bits; + dinfo->maxcode = MAXCODE(dinfo->n_bits); + dinfo->ClearCode = (1 << (i_bits - 1)); + dinfo->EOFCode = dinfo->ClearCode + 1; + dinfo->code_counter = dinfo->ClearCode + 2; + /* init output buffering vars */ + dinfo->bytesinpkt = 0; + dinfo->cur_accum = 0; + dinfo->cur_bits = 0; + /* GIF specifies an initial Clear code */ + output(dinfo, dinfo->ClearCode); +} + + +LOCAL(void) +compress_pixel (gif_dest_ptr dinfo, int c) +/* Accept and "compress" one pixel value. + * The given value must be less than n_bits wide. + */ +{ + /* Output the given pixel value as a symbol. */ + output(dinfo, c); + /* Issue Clear codes often enough to keep the reader from ratcheting up + * its symbol size. + */ + if (dinfo->code_counter < dinfo->maxcode) { + dinfo->code_counter++; + } else { + output(dinfo, dinfo->ClearCode); + dinfo->code_counter = dinfo->ClearCode + 2; /* reset the counter */ + } +} + + +LOCAL(void) +compress_term (gif_dest_ptr dinfo) +/* Clean up at end */ +{ + /* Send an EOF code */ + output(dinfo, dinfo->EOFCode); + /* Flush the bit-packing buffer */ + if (dinfo->cur_bits > 0) { + CHAR_OUT(dinfo, dinfo->cur_accum & 0xFF); + } + /* Flush the packet buffer */ + flush_packet(dinfo); +} + + +/* GIF header construction */ + + +LOCAL(void) +put_word (gif_dest_ptr dinfo, unsigned int w) +/* Emit a 16-bit word, LSB first */ +{ + putc(w & 0xFF, dinfo->pub.output_file); + putc((w >> 8) & 0xFF, dinfo->pub.output_file); +} + + +LOCAL(void) +put_3bytes (gif_dest_ptr dinfo, int val) +/* Emit 3 copies of same byte value --- handy subr for colormap construction */ +{ + putc(val, dinfo->pub.output_file); + putc(val, dinfo->pub.output_file); + putc(val, dinfo->pub.output_file); +} + + +LOCAL(void) +emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) +/* Output the GIF file header, including color map */ +/* If colormap==NULL, synthesize a grayscale colormap */ +{ + int BitsPerPixel, ColorMapSize, InitCodeSize, FlagByte; + int cshift = dinfo->cinfo->data_precision - 8; + int i; + + if (num_colors > 256) + ERREXIT1(dinfo->cinfo, JERR_TOO_MANY_COLORS, num_colors); + /* Compute bits/pixel and related values */ + BitsPerPixel = 1; + while (num_colors > (1 << BitsPerPixel)) + BitsPerPixel++; + ColorMapSize = 1 << BitsPerPixel; + if (BitsPerPixel <= 1) + InitCodeSize = 2; + else + InitCodeSize = BitsPerPixel; + /* + * Write the GIF header. + * Note that we generate a plain GIF87 header for maximum compatibility. + */ + putc('G', dinfo->pub.output_file); + putc('I', dinfo->pub.output_file); + putc('F', dinfo->pub.output_file); + putc('8', dinfo->pub.output_file); + putc('7', dinfo->pub.output_file); + putc('a', dinfo->pub.output_file); + /* Write the Logical Screen Descriptor */ + put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); + put_word(dinfo, (unsigned int) dinfo->cinfo->output_height); + FlagByte = 0x80; /* Yes, there is a global color table */ + FlagByte |= (BitsPerPixel-1) << 4; /* color resolution */ + FlagByte |= (BitsPerPixel-1); /* size of global color table */ + putc(FlagByte, dinfo->pub.output_file); + putc(0, dinfo->pub.output_file); /* Background color index */ + putc(0, dinfo->pub.output_file); /* Reserved (aspect ratio in GIF89) */ + /* Write the Global Color Map */ + /* If the color map is more than 8 bits precision, */ + /* we reduce it to 8 bits by shifting */ + for (i=0; i < ColorMapSize; i++) { + if (i < num_colors) { + if (colormap != NULL) { + if (dinfo->cinfo->out_color_space == JCS_RGB) { + /* Normal case: RGB color map */ + putc(GETJSAMPLE(colormap[0][i]) >> cshift, dinfo->pub.output_file); + putc(GETJSAMPLE(colormap[1][i]) >> cshift, dinfo->pub.output_file); + putc(GETJSAMPLE(colormap[2][i]) >> cshift, dinfo->pub.output_file); + } else { + /* Grayscale "color map": possible if quantizing grayscale image */ + put_3bytes(dinfo, GETJSAMPLE(colormap[0][i]) >> cshift); + } + } else { + /* Create a grayscale map of num_colors values, range 0..255 */ + put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1)); + } + } else { + /* fill out the map to a power of 2 */ + put_3bytes(dinfo, 0); + } + } + /* Write image separator and Image Descriptor */ + putc(',', dinfo->pub.output_file); /* separator */ + put_word(dinfo, 0); /* left/top offset */ + put_word(dinfo, 0); + put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); /* image size */ + put_word(dinfo, (unsigned int) dinfo->cinfo->output_height); + /* flag byte: not interlaced, no local color map */ + putc(0x00, dinfo->pub.output_file); + /* Write Initial Code Size byte */ + putc(InitCodeSize, dinfo->pub.output_file); + + /* Initialize for "compression" of image data */ + compress_init(dinfo, InitCodeSize+1); +} + + +/* + * Startup: write the file header. + */ + +METHODDEF(void) +start_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + + if (cinfo->quantize_colors) + emit_header(dest, cinfo->actual_number_of_colors, cinfo->colormap); + else + emit_header(dest, 256, (JSAMPARRAY) NULL); +} + + +/* + * Write some pixel data. + * In this module rows_supplied will always be 1. + */ + +METHODDEF(void) +put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + for (col = cinfo->output_width; col > 0; col--) { + compress_pixel(dest, GETJSAMPLE(*ptr++)); + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + + /* Flush "compression" mechanism */ + compress_term(dest); + /* Write a zero-length data block to end the series */ + putc(0, dest->pub.output_file); + /* Write the GIF terminator mark */ + putc(';', dest->pub.output_file); + /* Make sure we wrote the output file OK */ + fflush(dest->pub.output_file); + if (ferror(dest->pub.output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for GIF format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_gif (j_decompress_ptr cinfo) +{ + gif_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (gif_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(gif_dest_struct)); + dest->cinfo = cinfo; /* make back link for subroutines */ + dest->pub.start_output = start_output_gif; + dest->pub.put_pixel_rows = put_pixel_rows; + dest->pub.finish_output = finish_output_gif; + + if (cinfo->out_color_space != JCS_GRAYSCALE && + cinfo->out_color_space != JCS_RGB) + ERREXIT(cinfo, JERR_GIF_COLORSPACE); + + /* Force quantization if color or if > 8 bits input */ + if (cinfo->out_color_space != JCS_GRAYSCALE || cinfo->data_precision > 8) { + /* Force quantization to at most 256 colors */ + cinfo->quantize_colors = TRUE; + if (cinfo->desired_number_of_colors > 256) + cinfo->desired_number_of_colors = 256; + } + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + if (cinfo->output_components != 1) /* safety check: just one component? */ + ERREXIT(cinfo, JERR_GIF_BUG); + + /* Create decompressor output buffer. */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, cinfo->output_width, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + + return (djpeg_dest_ptr) dest; +} + +#endif /* GIF_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrjpgcom.1 glmark2-2021.02/src/libjpeg-turbo/wrjpgcom.1 --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrjpgcom.1 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wrjpgcom.1 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,103 @@ +.TH WRJPGCOM 1 "15 June 1995" +.SH NAME +wrjpgcom \- insert text comments into a JPEG file +.SH SYNOPSIS +.B wrjpgcom +[ +.B \-replace +] +[ +.BI \-comment " text" +] +[ +.BI \-cfile " name" +] +[ +.I filename +] +.LP +.SH DESCRIPTION +.LP +.B wrjpgcom +reads the named JPEG/JFIF file, or the standard input if no file is named, +and generates a new JPEG/JFIF file on standard output. A comment block is +added to the file. +.PP +The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file. +Although the standard doesn't actually define what COM blocks are for, they +are widely used to hold user-supplied text strings. This lets you add +annotations, titles, index terms, etc to your JPEG files, and later retrieve +them as text. COM blocks do not interfere with the image stored in the JPEG +file. The maximum size of a COM block is 64K, but you can have as many of +them as you like in one JPEG file. +.PP +.B wrjpgcom +adds a COM block, containing text you provide, to a JPEG file. +Ordinarily, the COM block is added after any existing COM blocks; but you +can delete the old COM blocks if you wish. +.SH OPTIONS +Switch names may be abbreviated, and are not case sensitive. +.TP +.B \-replace +Delete any existing COM blocks from the file. +.TP +.BI \-comment " text" +Supply text for new COM block on command line. +.TP +.BI \-cfile " name" +Read text for new COM block from named file. +.PP +If you have only one line of comment text to add, you can provide it on the +command line with +.BR \-comment . +The comment text must be surrounded with quotes so that it is treated as a +single argument. Longer comments can be read from a text file. +.PP +If you give neither +.B \-comment +nor +.BR \-cfile , +then +.B wrjpgcom +will read the comment text from standard input. (In this case an input image +file name MUST be supplied, so that the source JPEG file comes from somewhere +else.) You can enter multiple lines, up to 64KB worth. Type an end-of-file +indicator (usually control-D) to terminate the comment text entry. +.PP +.B wrjpgcom +will not add a COM block if the provided comment string is empty. Therefore +\fB\-replace \-comment ""\fR can be used to delete all COM blocks from a file. +.SH EXAMPLES +.LP +Add a short comment to in.jpg, producing out.jpg: +.IP +.B wrjpgcom \-c +\fI"View of my back yard" in.jpg +.B > +.I out.jpg +.PP +Attach a long comment previously stored in comment.txt: +.IP +.B wrjpgcom +.I in.jpg +.B < +.I comment.txt +.B > +.I out.jpg +.PP +or equivalently +.IP +.B wrjpgcom +.B -cfile +.I comment.txt +.B < +.I in.jpg +.B > +.I out.jpg +.SH SEE ALSO +.BR cjpeg (1), +.BR djpeg (1), +.BR jpegtran (1), +.BR rdjpgcom (1) +.SH AUTHOR +Independent JPEG Group diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrjpgcom.c glmark2-2021.02/src/libjpeg-turbo/wrjpgcom.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrjpgcom.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wrjpgcom.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,592 @@ +/* + * wrjpgcom.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2014, D. R. Commander + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains a very simple stand-alone application that inserts + * user-supplied text as a COM (comment) marker in a JFIF file. + * This may be useful as an example of the minimum logic needed to parse + * JPEG markers. + */ + +#define JPEG_CJPEG_DJPEG /* to get the command-line config symbols */ +#include "jinclude.h" /* get auto-config symbols, */ + +#ifndef HAVE_STDLIB_H /* should declare malloc() */ +extern void *malloc (); +#endif +#include /* to declare isupper(), tolower() */ +#ifdef USE_SETMODE +#include /* to declare setmode()'s parameter macros */ +/* If you have setmode() but not , just delete this line: */ +#include /* to declare setmode() */ +#endif + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + +#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */ +#define READ_BINARY "r" +#define WRITE_BINARY "w" +#else +#define READ_BINARY "rb" +#define WRITE_BINARY "wb" +#endif + +#ifndef EXIT_FAILURE /* define exit() codes if not provided */ +#define EXIT_FAILURE 1 +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + +/* Reduce this value if your malloc() can't allocate blocks up to 64K. + * On DOS, compiling in large model is usually a better solution. + */ + +#ifndef MAX_COM_LENGTH +#define MAX_COM_LENGTH 65000L /* must be <= 65533 in any case */ +#endif + + +/* + * These macros are used to read the input file and write the output file. + * To reuse this code in another application, you might need to change these. + */ + +static FILE *infile; /* input JPEG file */ + +/* Return next input byte, or EOF if no more */ +#define NEXTBYTE() getc(infile) + +static FILE *outfile; /* output JPEG file */ + +/* Emit an output byte */ +#define PUTBYTE(x) putc((x), outfile) + + +/* Error exit handler */ +#define ERREXIT(msg) (fprintf(stderr, "%s\n", msg), exit(EXIT_FAILURE)) + + +/* Read one byte, testing for EOF */ +static int +read_1_byte (void) +{ + int c; + + c = NEXTBYTE(); + if (c == EOF) + ERREXIT("Premature EOF in JPEG file"); + return c; +} + +/* Read 2 bytes, convert to unsigned int */ +/* All 2-byte quantities in JPEG markers are MSB first */ +static unsigned int +read_2_bytes (void) +{ + int c1, c2; + + c1 = NEXTBYTE(); + if (c1 == EOF) + ERREXIT("Premature EOF in JPEG file"); + c2 = NEXTBYTE(); + if (c2 == EOF) + ERREXIT("Premature EOF in JPEG file"); + return (((unsigned int) c1) << 8) + ((unsigned int) c2); +} + + +/* Routines to write data to output file */ + +static void +write_1_byte (int c) +{ + PUTBYTE(c); +} + +static void +write_2_bytes (unsigned int val) +{ + PUTBYTE((val >> 8) & 0xFF); + PUTBYTE(val & 0xFF); +} + +static void +write_marker (int marker) +{ + PUTBYTE(0xFF); + PUTBYTE(marker); +} + +static void +copy_rest_of_file (void) +{ + int c; + + while ((c = NEXTBYTE()) != EOF) + PUTBYTE(c); +} + + +/* + * JPEG markers consist of one or more 0xFF bytes, followed by a marker + * code byte (which is not an FF). Here are the marker codes of interest + * in this program. (See jdmarker.c for a more complete list.) + */ + +#define M_SOF0 0xC0 /* Start Of Frame N */ +#define M_SOF1 0xC1 /* N indicates which compression process */ +#define M_SOF2 0xC2 /* Only SOF0-SOF2 are now in common use */ +#define M_SOF3 0xC3 +#define M_SOF5 0xC5 /* NB: codes C4 and CC are NOT SOF markers */ +#define M_SOF6 0xC6 +#define M_SOF7 0xC7 +#define M_SOF9 0xC9 +#define M_SOF10 0xCA +#define M_SOF11 0xCB +#define M_SOF13 0xCD +#define M_SOF14 0xCE +#define M_SOF15 0xCF +#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ +#define M_EOI 0xD9 /* End Of Image (end of datastream) */ +#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ +#define M_COM 0xFE /* COMment */ + + +/* + * Find the next JPEG marker and return its marker code. + * We expect at least one FF byte, possibly more if the compressor used FFs + * to pad the file. (Padding FFs will NOT be replicated in the output file.) + * There could also be non-FF garbage between markers. The treatment of such + * garbage is unspecified; we choose to skip over it but emit a warning msg. + * NB: this routine must not be used after seeing SOS marker, since it will + * not deal correctly with FF/00 sequences in the compressed image data... + */ + +static int +next_marker (void) +{ + int c; + int discarded_bytes = 0; + + /* Find 0xFF byte; count and skip any non-FFs. */ + c = read_1_byte(); + while (c != 0xFF) { + discarded_bytes++; + c = read_1_byte(); + } + /* Get marker code byte, swallowing any duplicate FF bytes. Extra FFs + * are legal as pad bytes, so don't count them in discarded_bytes. + */ + do { + c = read_1_byte(); + } while (c == 0xFF); + + if (discarded_bytes != 0) { + fprintf(stderr, "Warning: garbage data found in JPEG file\n"); + } + + return c; +} + + +/* + * Read the initial marker, which should be SOI. + * For a JFIF file, the first two bytes of the file should be literally + * 0xFF M_SOI. To be more general, we could use next_marker, but if the + * input file weren't actually JPEG at all, next_marker might read the whole + * file and then return a misleading error message... + */ + +static int +first_marker (void) +{ + int c1, c2; + + c1 = NEXTBYTE(); + c2 = NEXTBYTE(); + if (c1 != 0xFF || c2 != M_SOI) + ERREXIT("Not a JPEG file"); + return c2; +} + + +/* + * Most types of marker are followed by a variable-length parameter segment. + * This routine skips over the parameters for any marker we don't otherwise + * want to process. + * Note that we MUST skip the parameter segment explicitly in order not to + * be fooled by 0xFF bytes that might appear within the parameter segment; + * such bytes do NOT introduce new markers. + */ + +static void +copy_variable (void) +/* Copy an unknown or uninteresting variable-length marker */ +{ + unsigned int length; + + /* Get the marker parameter length count */ + length = read_2_bytes(); + write_2_bytes(length); + /* Length includes itself, so must be at least 2 */ + if (length < 2) + ERREXIT("Erroneous JPEG marker length"); + length -= 2; + /* Skip over the remaining bytes */ + while (length > 0) { + write_1_byte(read_1_byte()); + length--; + } +} + +static void +skip_variable (void) +/* Skip over an unknown or uninteresting variable-length marker */ +{ + unsigned int length; + + /* Get the marker parameter length count */ + length = read_2_bytes(); + /* Length includes itself, so must be at least 2 */ + if (length < 2) + ERREXIT("Erroneous JPEG marker length"); + length -= 2; + /* Skip over the remaining bytes */ + while (length > 0) { + (void) read_1_byte(); + length--; + } +} + + +/* + * Parse the marker stream until SOFn or EOI is seen; + * copy data to output, but discard COM markers unless keep_COM is true. + */ + +static int +scan_JPEG_header (int keep_COM) +{ + int marker; + + /* Expect SOI at start of file */ + if (first_marker() != M_SOI) + ERREXIT("Expected SOI marker first"); + write_marker(M_SOI); + + /* Scan miscellaneous markers until we reach SOFn. */ + for (;;) { + marker = next_marker(); + switch (marker) { + /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be, + * treated as SOFn. C4 in particular is actually DHT. + */ + case M_SOF0: /* Baseline */ + case M_SOF1: /* Extended sequential, Huffman */ + case M_SOF2: /* Progressive, Huffman */ + case M_SOF3: /* Lossless, Huffman */ + case M_SOF5: /* Differential sequential, Huffman */ + case M_SOF6: /* Differential progressive, Huffman */ + case M_SOF7: /* Differential lossless, Huffman */ + case M_SOF9: /* Extended sequential, arithmetic */ + case M_SOF10: /* Progressive, arithmetic */ + case M_SOF11: /* Lossless, arithmetic */ + case M_SOF13: /* Differential sequential, arithmetic */ + case M_SOF14: /* Differential progressive, arithmetic */ + case M_SOF15: /* Differential lossless, arithmetic */ + return marker; + + case M_SOS: /* should not see compressed data before SOF */ + ERREXIT("SOS without prior SOFn"); + break; + + case M_EOI: /* in case it's a tables-only JPEG stream */ + return marker; + + case M_COM: /* Existing COM: conditionally discard */ + if (keep_COM) { + write_marker(marker); + copy_variable(); + } else { + skip_variable(); + } + break; + + default: /* Anything else just gets copied */ + write_marker(marker); + copy_variable(); /* we assume it has a parameter count... */ + break; + } + } /* end loop */ +} + + +/* Command line parsing code */ + +static const char *progname; /* program name for error messages */ + + +static void +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "wrjpgcom inserts a textual comment in a JPEG file.\n"); + fprintf(stderr, "You can add to or replace any existing comment(s).\n"); + + fprintf(stderr, "Usage: %s [switches] ", progname); +#ifdef TWO_FILE_COMMANDLINE + fprintf(stderr, "inputfile outputfile\n"); +#else + fprintf(stderr, "[inputfile]\n"); +#endif + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -replace Delete any existing comments\n"); + fprintf(stderr, " -comment \"text\" Insert comment with given text\n"); + fprintf(stderr, " -cfile name Read comment from named file\n"); + fprintf(stderr, "Notice that you must put quotes around the comment text\n"); + fprintf(stderr, "when you use -comment.\n"); + fprintf(stderr, "If you do not give either -comment or -cfile on the command line,\n"); + fprintf(stderr, "then the comment text is read from standard input.\n"); + fprintf(stderr, "It can be multiple lines, up to %u characters total.\n", + (unsigned int) MAX_COM_LENGTH); +#ifndef TWO_FILE_COMMANDLINE + fprintf(stderr, "You must specify an input JPEG file name when supplying\n"); + fprintf(stderr, "comment text from standard input.\n"); +#endif + + exit(EXIT_FAILURE); +} + + +static int +keymatch (char *arg, const char *keyword, int minchars) +/* Case-insensitive matching of (possibly abbreviated) keyword switches. */ +/* keyword is the constant keyword (must be lower case already), */ +/* minchars is length of minimum legal abbreviation. */ +{ + register int ca, ck; + register int nmatched = 0; + + while ((ca = *arg++) != '\0') { + if ((ck = *keyword++) == '\0') + return 0; /* arg longer than keyword, no good */ + if (isupper(ca)) /* force arg to lcase (assume ck is already) */ + ca = tolower(ca); + if (ca != ck) + return 0; /* no good */ + nmatched++; /* count matched characters */ + } + /* reached end of argument; fail if it's too short for unique abbrev */ + if (nmatched < minchars) + return 0; + return 1; /* A-OK */ +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + int argn; + char *arg; + int keep_COM = 1; + char *comment_arg = NULL; + FILE *comment_file = NULL; + unsigned int comment_length = 0; + int marker; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "wrjpgcom"; /* in case C library doesn't provide it */ + + /* Parse switches, if any */ + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (arg[0] != '-') + break; /* not switch, must be file name */ + arg++; /* advance over '-' */ + if (keymatch(arg, "replace", 1)) { + keep_COM = 0; + } else if (keymatch(arg, "cfile", 2)) { + if (++argn >= argc) usage(); + if ((comment_file = fopen(argv[argn], "r")) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + } else if (keymatch(arg, "comment", 1)) { + if (++argn >= argc) usage(); + comment_arg = argv[argn]; + /* If the comment text starts with '"', then we are probably running + * under MS-DOG and must parse out the quoted string ourselves. Sigh. + */ + if (comment_arg[0] == '"') { + comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH); + if (comment_arg == NULL) + ERREXIT("Insufficient memory"); + if (strlen(argv[argn]) + 2 >= (size_t) MAX_COM_LENGTH) { + fprintf(stderr, "Comment text may not exceed %u bytes\n", + (unsigned int) MAX_COM_LENGTH); + exit(EXIT_FAILURE); + } + strcpy(comment_arg, argv[argn]+1); + for (;;) { + comment_length = (unsigned int) strlen(comment_arg); + if (comment_length > 0 && comment_arg[comment_length-1] == '"') { + comment_arg[comment_length-1] = '\0'; /* zap terminating quote */ + break; + } + if (++argn >= argc) + ERREXIT("Missing ending quote mark"); + if (strlen(comment_arg) + strlen(argv[argn]) + 2 >= + (size_t) MAX_COM_LENGTH) { + fprintf(stderr, "Comment text may not exceed %u bytes\n", + (unsigned int) MAX_COM_LENGTH); + exit(EXIT_FAILURE); + } + strcat(comment_arg, " "); + strcat(comment_arg, argv[argn]); + } + } else if (strlen(argv[argn]) >= (size_t) MAX_COM_LENGTH) { + fprintf(stderr, "Comment text may not exceed %u bytes\n", + (unsigned int) MAX_COM_LENGTH); + exit(EXIT_FAILURE); + } + comment_length = (unsigned int) strlen(comment_arg); + } else + usage(); + } + + /* Cannot use both -comment and -cfile. */ + if (comment_arg != NULL && comment_file != NULL) + usage(); + /* If there is neither -comment nor -cfile, we will read the comment text + * from stdin; in this case there MUST be an input JPEG file name. + */ + if (comment_arg == NULL && comment_file == NULL && argn >= argc) + usage(); + + /* Open the input file. */ + if (argn < argc) { + if ((infile = fopen(argv[argn], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + } else { + /* default input file is stdin */ +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdin), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open stdin\n", progname); + exit(EXIT_FAILURE); + } +#else + infile = stdin; +#endif + } + + /* Open the output file. */ +#ifdef TWO_FILE_COMMANDLINE + /* Must have explicit output file name */ + if (argn != argc-2) { + fprintf(stderr, "%s: must name one input and one output file\n", + progname); + usage(); + } + if ((outfile = fopen(argv[argn+1], WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn+1]); + exit(EXIT_FAILURE); + } +#else + /* Unix style: expect zero or one file name */ + if (argn < argc-1) { + fprintf(stderr, "%s: only one input file\n", progname); + usage(); + } + /* default output file is stdout */ +#ifdef USE_SETMODE /* need to hack file mode? */ + setmode(fileno(stdout), O_BINARY); +#endif +#ifdef USE_FDOPEN /* need to re-open in binary mode? */ + if ((outfile = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open stdout\n", progname); + exit(EXIT_FAILURE); + } +#else + outfile = stdout; +#endif +#endif /* TWO_FILE_COMMANDLINE */ + + /* Collect comment text from comment_file or stdin, if necessary */ + if (comment_arg == NULL) { + FILE *src_file; + int c; + + comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH); + if (comment_arg == NULL) + ERREXIT("Insufficient memory"); + comment_length = 0; + src_file = (comment_file != NULL ? comment_file : stdin); + while ((c = getc(src_file)) != EOF) { + if (comment_length >= (unsigned int) MAX_COM_LENGTH) { + fprintf(stderr, "Comment text may not exceed %u bytes\n", + (unsigned int) MAX_COM_LENGTH); + exit(EXIT_FAILURE); + } + comment_arg[comment_length++] = (char) c; + } + if (comment_file != NULL) + fclose(comment_file); + } + + /* Copy JPEG headers until SOFn marker; + * we will insert the new comment marker just before SOFn. + * This (a) causes the new comment to appear after, rather than before, + * existing comments; and (b) ensures that comments come after any JFIF + * or JFXX markers, as required by the JFIF specification. + */ + marker = scan_JPEG_header(keep_COM); + /* Insert the new COM marker, but only if nonempty text has been supplied */ + if (comment_length > 0) { + write_marker(M_COM); + write_2_bytes(comment_length + 2); + while (comment_length > 0) { + write_1_byte(*comment_arg++); + comment_length--; + } + } + /* Duplicate the remainder of the source file. + * Note that any COM markers occuring after SOF will not be touched. + */ + write_marker(marker); + copy_rest_of_file(); + + /* All done. */ + exit(EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrppm.c glmark2-2021.02/src/libjpeg-turbo/wrppm.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrppm.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wrppm.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,252 @@ +/* + * wrppm.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * Modified 2009 by Guido Vollbeding. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in PPM/PGM format. + * The extended 2-byte-per-sample raw PPM/PGM formats are supported. + * The PBMPLUS library is NOT required to compile this software + * (but it is highly useful as a set of PPM image manipulation programs). + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "wrppm.h" + +#ifdef PPM_SUPPORTED + + +/* + * For 12-bit JPEG data, we either downscale the values to 8 bits + * (to write standard byte-per-sample PPM/PGM files), or output + * nonstandard word-per-sample PPM/PGM files. Downscaling is done + * if PPM_NORAWWORD is defined (this can be done in the Makefile + * or in jconfig.h). + * (When the core library supports data precision reduction, a cleaner + * implementation will be to ask for that instead.) + */ + +#if BITS_IN_JSAMPLE == 8 +#define PUTPPMSAMPLE(ptr,v) *ptr++ = (char) (v) +#define BYTESPERSAMPLE 1 +#define PPM_MAXVAL 255 +#else +#ifdef PPM_NORAWWORD +#define PUTPPMSAMPLE(ptr,v) *ptr++ = (char) ((v) >> (BITS_IN_JSAMPLE-8)) +#define BYTESPERSAMPLE 1 +#define PPM_MAXVAL 255 +#else +/* The word-per-sample format always puts the MSB first. */ +#define PUTPPMSAMPLE(ptr,v) \ + { register int val_ = v; \ + *ptr++ = (char) ((val_ >> 8) & 0xFF); \ + *ptr++ = (char) (val_ & 0xFF); \ + } +#define BYTESPERSAMPLE 2 +#define PPM_MAXVAL ((1<pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * This code is used when we have to copy the data and apply a pixel + * format translation. Typically this only happens in 12-bit mode. + */ + +METHODDEF(void) +copy_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + register char *bufferptr; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + bufferptr = dest->iobuffer; + for (col = dest->samples_per_row; col > 0; col--) { + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(*ptr++)); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Write some pixel data when color quantization is in effect. + * We have to demap the color index values to straight data. + */ + +METHODDEF(void) +put_demapped_rgb (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + register char *bufferptr; + register int pixval; + register JSAMPROW ptr; + register JSAMPROW color_map0 = cinfo->colormap[0]; + register JSAMPROW color_map1 = cinfo->colormap[1]; + register JSAMPROW color_map2 = cinfo->colormap[2]; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + bufferptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + pixval = GETJSAMPLE(*ptr++); + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map0[pixval])); + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map1[pixval])); + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map2[pixval])); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +METHODDEF(void) +put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + register char *bufferptr; + register JSAMPROW ptr; + register JSAMPROW color_map = cinfo->colormap[0]; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + bufferptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map[GETJSAMPLE(*ptr++)])); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Startup: write the file header. + */ + +METHODDEF(void) +start_output_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + + /* Emit file header */ + switch (cinfo->out_color_space) { + case JCS_GRAYSCALE: + /* emit header for raw PGM format */ + fprintf(dest->pub.output_file, "P5\n%ld %ld\n%d\n", + (long) cinfo->output_width, (long) cinfo->output_height, + PPM_MAXVAL); + break; + case JCS_RGB: + /* emit header for raw PPM format */ + fprintf(dest->pub.output_file, "P6\n%ld %ld\n%d\n", + (long) cinfo->output_width, (long) cinfo->output_height, + PPM_MAXVAL); + break; + default: + ERREXIT(cinfo, JERR_PPM_COLORSPACE); + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_output_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + /* Make sure we wrote the output file OK */ + fflush(dinfo->output_file); + if (ferror(dinfo->output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for PPM format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_ppm (j_decompress_ptr cinfo) +{ + ppm_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (ppm_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(ppm_dest_struct)); + dest->pub.start_output = start_output_ppm; + dest->pub.finish_output = finish_output_ppm; + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + /* Create physical I/O buffer */ + dest->samples_per_row = cinfo->output_width * cinfo->out_color_components; + dest->buffer_width = dest->samples_per_row * (BYTESPERSAMPLE * sizeof(char)); + dest->iobuffer = (char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, dest->buffer_width); + + if (cinfo->quantize_colors || BITS_IN_JSAMPLE != 8 || + sizeof(JSAMPLE) != sizeof(char)) { + /* When quantizing, we need an output buffer for colormap indexes + * that's separate from the physical I/O buffer. We also need a + * separate buffer if pixel format translation must take place. + */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->output_width * cinfo->output_components, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + if (! cinfo->quantize_colors) + dest->pub.put_pixel_rows = copy_pixel_rows; + else if (cinfo->out_color_space == JCS_GRAYSCALE) + dest->pub.put_pixel_rows = put_demapped_gray; + else + dest->pub.put_pixel_rows = put_demapped_rgb; + } else { + /* We will fwrite() directly from decompressor output buffer. */ + /* Synthesize a JSAMPARRAY pointer structure */ + dest->pixrow = (JSAMPROW) dest->iobuffer; + dest->pub.buffer = & dest->pixrow; + dest->pub.buffer_height = 1; + dest->pub.put_pixel_rows = put_pixel_rows; + } + + return (djpeg_dest_ptr) dest; +} + +#endif /* PPM_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrppm.h glmark2-2021.02/src/libjpeg-turbo/wrppm.h --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrppm.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wrppm.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,26 @@ +/* + * wrppm.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +#ifdef PPM_SUPPORTED + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + /* Usually these two pointers point to the same place: */ + char *iobuffer; /* fwrite's I/O buffer */ + JSAMPROW pixrow; /* decompressor output buffer */ + size_t buffer_width; /* width of I/O buffer */ + JDIMENSION samples_per_row; /* JSAMPLEs per output row */ +} ppm_dest_struct; + +typedef ppm_dest_struct *ppm_dest_ptr; + +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrrle.c glmark2-2021.02/src/libjpeg-turbo/wrrle.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrrle.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wrrle.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,307 @@ +/* + * wrrle.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in RLE format. + * The Utah Raster Toolkit library is required (version 3.1 or later). + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + * + * Based on code contributed by Mike Lijewski, + * with updates from Robert Hutchinson. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef RLE_SUPPORTED + +/* rle.h is provided by the Utah Raster Toolkit. */ + +#include + +/* + * We assume that JSAMPLE has the same representation as rle_pixel, + * to wit, "unsigned char". Hence we can't cope with 12- or 16-bit samples. + */ + +#if BITS_IN_JSAMPLE != 8 + Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */ +#endif + + +/* + * Since RLE stores scanlines bottom-to-top, we have to invert the image + * from JPEG's top-to-bottom order. To do this, we save the outgoing data + * in a virtual array during put_pixel_row calls, then actually emit the + * RLE file during finish_output. + */ + + +/* + * For now, if we emit an RLE color map then it is always 256 entries long, + * though not all of the entries need be used. + */ + +#define CMAPBITS 8 +#define CMAPLENGTH (1<<(CMAPBITS)) + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + jvirt_sarray_ptr image; /* virtual array to store the output image */ + rle_map *colormap; /* RLE-style color map, or NULL if none */ + rle_pixel **rle_row; /* To pass rows to rle_putrow() */ + +} rle_dest_struct; + +typedef rle_dest_struct *rle_dest_ptr; + +/* Forward declarations */ +METHODDEF(void) rle_put_pixel_rows + (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied); + + +/* + * Write the file header. + * + * In this module it's easier to wait till finish_output to write anything. + */ + +METHODDEF(void) +start_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + rle_dest_ptr dest = (rle_dest_ptr) dinfo; + size_t cmapsize; + int i, ci; +#ifdef PROGRESS_REPORT + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; +#endif + + /* + * Make sure the image can be stored in RLE format. + * + * - RLE stores image dimensions as *signed* 16 bit integers. JPEG + * uses unsigned, so we have to check the width. + * + * - Colorspace is expected to be grayscale or RGB. + * + * - The number of channels (components) is expected to be 1 (grayscale/ + * pseudocolor) or 3 (truecolor/directcolor). + * (could be 2 or 4 if using an alpha channel, but we aren't) + */ + + if (cinfo->output_width > 32767 || cinfo->output_height > 32767) + ERREXIT2(cinfo, JERR_RLE_DIMENSIONS, cinfo->output_width, + cinfo->output_height); + + if (cinfo->out_color_space != JCS_GRAYSCALE && + cinfo->out_color_space != JCS_RGB) + ERREXIT(cinfo, JERR_RLE_COLORSPACE); + + if (cinfo->output_components != 1 && cinfo->output_components != 3) + ERREXIT1(cinfo, JERR_RLE_TOOMANYCHANNELS, cinfo->num_components); + + /* Convert colormap, if any, to RLE format. */ + + dest->colormap = NULL; + + if (cinfo->quantize_colors) { + /* Allocate storage for RLE-style cmap, zero any extra entries */ + cmapsize = cinfo->out_color_components * CMAPLENGTH * sizeof(rle_map); + dest->colormap = (rle_map *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, cmapsize); + MEMZERO(dest->colormap, cmapsize); + + /* Save away data in RLE format --- note 8-bit left shift! */ + /* Shifting would need adjustment for JSAMPLEs wider than 8 bits. */ + for (ci = 0; ci < cinfo->out_color_components; ci++) { + for (i = 0; i < cinfo->actual_number_of_colors; i++) { + dest->colormap[ci * CMAPLENGTH + i] = + GETJSAMPLE(cinfo->colormap[ci][i]) << 8; + } + } + } + + /* Set the output buffer to the first row */ + dest->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->image, (JDIMENSION) 0, (JDIMENSION) 1, TRUE); + dest->pub.buffer_height = 1; + + dest->pub.put_pixel_rows = rle_put_pixel_rows; + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->total_extra_passes++; /* count file writing as separate pass */ + } +#endif +} + + +/* + * Write some pixel data. + * + * This routine just saves the data away in a virtual array. + */ + +METHODDEF(void) +rle_put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + rle_dest_ptr dest = (rle_dest_ptr) dinfo; + + if (cinfo->output_scanline < cinfo->output_height) { + dest->pub.buffer = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->image, + cinfo->output_scanline, (JDIMENSION) 1, TRUE); + } +} + +/* + * Finish up at the end of the file. + * + * Here is where we really output the RLE file. + */ + +METHODDEF(void) +finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + rle_dest_ptr dest = (rle_dest_ptr) dinfo; + rle_hdr header; /* Output file information */ + rle_pixel **rle_row, *red, *green, *blue; + JSAMPROW output_row; + char cmapcomment[80]; + int row, col; + int ci; +#ifdef PROGRESS_REPORT + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; +#endif + + /* Initialize the header info */ + header = *rle_hdr_init(NULL); + header.rle_file = dest->pub.output_file; + header.xmin = 0; + header.xmax = cinfo->output_width - 1; + header.ymin = 0; + header.ymax = cinfo->output_height - 1; + header.alpha = 0; + header.ncolors = cinfo->output_components; + for (ci = 0; ci < cinfo->output_components; ci++) { + RLE_SET_BIT(header, ci); + } + if (cinfo->quantize_colors) { + header.ncmap = cinfo->out_color_components; + header.cmaplen = CMAPBITS; + header.cmap = dest->colormap; + /* Add a comment to the output image with the true colormap length. */ + sprintf(cmapcomment, "color_map_length=%d", cinfo->actual_number_of_colors); + rle_putcom(cmapcomment, &header); + } + + /* Emit the RLE header and color map (if any) */ + rle_put_setup(&header); + + /* Now output the RLE data from our virtual array. + * We assume here that rle_pixel is represented the same as JSAMPLE. + */ + +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_limit = cinfo->output_height; + progress->pub.pass_counter = 0; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + + if (cinfo->output_components == 1) { + for (row = cinfo->output_height-1; row >= 0; row--) { + rle_row = (rle_pixel **) (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->image, + (JDIMENSION) row, (JDIMENSION) 1, FALSE); + rle_putrow(rle_row, (int) cinfo->output_width, &header); +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + } else { + for (row = cinfo->output_height-1; row >= 0; row--) { + rle_row = (rle_pixel **) dest->rle_row; + output_row = *(*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, dest->image, + (JDIMENSION) row, (JDIMENSION) 1, FALSE); + red = rle_row[0]; + green = rle_row[1]; + blue = rle_row[2]; + for (col = cinfo->output_width; col > 0; col--) { + *red++ = GETJSAMPLE(*output_row++); + *green++ = GETJSAMPLE(*output_row++); + *blue++ = GETJSAMPLE(*output_row++); + } + rle_putrow(rle_row, (int) cinfo->output_width, &header); +#ifdef PROGRESS_REPORT + if (progress != NULL) { + progress->pub.pass_counter++; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } +#endif + } + } + +#ifdef PROGRESS_REPORT + if (progress != NULL) + progress->completed_extra_passes++; +#endif + + /* Emit file trailer */ + rle_puteof(&header); + fflush(dest->pub.output_file); + if (ferror(dest->pub.output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for RLE format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_rle (j_decompress_ptr cinfo) +{ + rle_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (rle_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(rle_dest_struct)); + dest->pub.start_output = start_output_rle; + dest->pub.finish_output = finish_output_rle; + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + /* Allocate a work array for output to the RLE library. */ + dest->rle_row = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->output_width, (JDIMENSION) cinfo->output_components); + + /* Allocate a virtual array to hold the image. */ + dest->image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) (cinfo->output_width * cinfo->output_components), + cinfo->output_height, (JDIMENSION) 1); + + return (djpeg_dest_ptr) dest; +} + +#endif /* RLE_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrtarga.c glmark2-2021.02/src/libjpeg-turbo/wrtarga.c --- glmark2-2014.03+git20150611.fa71af2d/src/libjpeg-turbo/wrtarga.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/libjpeg-turbo/wrtarga.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,247 @@ +/* + * wrtarga.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * It was modified by The libjpeg-turbo Project to include only code and + * information relevant to libjpeg-turbo. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains routines to write output images in Targa format. + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + * + * Based on code contributed by Lee Daniel Crocker. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef TARGA_SUPPORTED + + +/* + * To support 12-bit JPEG data, we'd have to scale output down to 8 bits. + * This is not yet implemented. + */ + +#if BITS_IN_JSAMPLE != 8 + Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */ +#endif + + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + char *iobuffer; /* physical I/O buffer */ + JDIMENSION buffer_width; /* width of one row */ +} tga_dest_struct; + +typedef tga_dest_struct *tga_dest_ptr; + + +LOCAL(void) +write_header (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, int num_colors) +/* Create and write a Targa header */ +{ + char targaheader[18]; + + /* Set unused fields of header to 0 */ + MEMZERO(targaheader, sizeof(targaheader)); + + if (num_colors > 0) { + targaheader[1] = 1; /* color map type 1 */ + targaheader[5] = (char) (num_colors & 0xFF); + targaheader[6] = (char) (num_colors >> 8); + targaheader[7] = 24; /* 24 bits per cmap entry */ + } + + targaheader[12] = (char) (cinfo->output_width & 0xFF); + targaheader[13] = (char) (cinfo->output_width >> 8); + targaheader[14] = (char) (cinfo->output_height & 0xFF); + targaheader[15] = (char) (cinfo->output_height >> 8); + targaheader[17] = 0x20; /* Top-down, non-interlaced */ + + if (cinfo->out_color_space == JCS_GRAYSCALE) { + targaheader[2] = 3; /* image type = uncompressed grayscale */ + targaheader[16] = 8; /* bits per pixel */ + } else { /* must be RGB */ + if (num_colors > 0) { + targaheader[2] = 1; /* image type = colormapped RGB */ + targaheader[16] = 8; + } else { + targaheader[2] = 2; /* image type = uncompressed RGB */ + targaheader[16] = 24; + } + } + + if (JFWRITE(dinfo->output_file, targaheader, 18) != (size_t) 18) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * Write some pixel data. + * In this module rows_supplied will always be 1. + */ + +METHODDEF(void) +put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +/* used for unquantized full-color output */ +{ + tga_dest_ptr dest = (tga_dest_ptr) dinfo; + register JSAMPROW inptr; + register char *outptr; + register JDIMENSION col; + + inptr = dest->pub.buffer[0]; + outptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + outptr[0] = (char) GETJSAMPLE(inptr[2]); /* RGB to BGR order */ + outptr[1] = (char) GETJSAMPLE(inptr[1]); + outptr[2] = (char) GETJSAMPLE(inptr[0]); + inptr += 3, outptr += 3; + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + +METHODDEF(void) +put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +/* used for grayscale OR quantized color output */ +{ + tga_dest_ptr dest = (tga_dest_ptr) dinfo; + register JSAMPROW inptr; + register char *outptr; + register JDIMENSION col; + + inptr = dest->pub.buffer[0]; + outptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + *outptr++ = (char) GETJSAMPLE(*inptr++); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Write some demapped pixel data when color quantization is in effect. + * For Targa, this is only applied to grayscale data. + */ + +METHODDEF(void) +put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + tga_dest_ptr dest = (tga_dest_ptr) dinfo; + register JSAMPROW inptr; + register char *outptr; + register JSAMPROW color_map0 = cinfo->colormap[0]; + register JDIMENSION col; + + inptr = dest->pub.buffer[0]; + outptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + *outptr++ = (char) GETJSAMPLE(color_map0[GETJSAMPLE(*inptr++)]); + } + (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Startup: write the file header. + */ + +METHODDEF(void) +start_output_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + tga_dest_ptr dest = (tga_dest_ptr) dinfo; + int num_colors, i; + FILE *outfile; + + if (cinfo->out_color_space == JCS_GRAYSCALE) { + /* Targa doesn't have a mapped grayscale format, so we will */ + /* demap quantized gray output. Never emit a colormap. */ + write_header(cinfo, dinfo, 0); + if (cinfo->quantize_colors) + dest->pub.put_pixel_rows = put_demapped_gray; + else + dest->pub.put_pixel_rows = put_gray_rows; + } else if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->quantize_colors) { + /* We only support 8-bit colormap indexes, so only 256 colors */ + num_colors = cinfo->actual_number_of_colors; + if (num_colors > 256) + ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, num_colors); + write_header(cinfo, dinfo, num_colors); + /* Write the colormap. Note Targa uses BGR byte order */ + outfile = dest->pub.output_file; + for (i = 0; i < num_colors; i++) { + putc(GETJSAMPLE(cinfo->colormap[2][i]), outfile); + putc(GETJSAMPLE(cinfo->colormap[1][i]), outfile); + putc(GETJSAMPLE(cinfo->colormap[0][i]), outfile); + } + dest->pub.put_pixel_rows = put_gray_rows; + } else { + write_header(cinfo, dinfo, 0); + dest->pub.put_pixel_rows = put_pixel_rows; + } + } else { + ERREXIT(cinfo, JERR_TGA_COLORSPACE); + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_output_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + /* Make sure we wrote the output file OK */ + fflush(dinfo->output_file); + if (ferror(dinfo->output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for Targa format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_targa (j_decompress_ptr cinfo) +{ + tga_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (tga_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + sizeof(tga_dest_struct)); + dest->pub.start_output = start_output_tga; + dest->pub.finish_output = finish_output_tga; + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + /* Create I/O buffer. */ + dest->buffer_width = cinfo->output_width * cinfo->output_components; + dest->iobuffer = (char *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (size_t) (dest->buffer_width * sizeof(char))); + + /* Create decompressor output buffer. */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, dest->buffer_width, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + + return (djpeg_dest_ptr) dest; +} + +#endif /* TARGA_SUPPORTED */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/log.cc glmark2-2021.02/src/libmatrix/log.cc --- glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/log.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libmatrix/log.cc 2021-04-25 01:47:22.000000000 +0800 @@ -10,7 +10,6 @@ // Alexandros Frantzis // Jesse Barker // -#include #include #include #include @@ -22,6 +21,13 @@ #include #endif +#ifdef _WIN32 +// On windows 'isatty' is found in +#include +#else +#include +#endif + using std::string; const string Log::continuation_prefix("\x10"); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/mat.h glmark2-2021.02/src/libmatrix/mat.h --- glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/mat.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libmatrix/mat.h 2021-04-25 01:47:22.000000000 +0800 @@ -109,9 +109,6 @@ // NOTE: If this is non-invertible, we will // throw to avoid undefined behavior. tmat2& inverse() -#ifdef USE_EXCEPTIONS - throw(std::runtime_error) -#endif // USE_EXCEPTIONS { T d(determinant()); if (d == static_cast(0)) @@ -411,9 +408,6 @@ // NOTE: If this is non-invertible, we will // throw to avoid undefined behavior. tmat3& inverse() -#ifdef USE_EXCEPTIONS - throw(std::runtime_error) -#endif // USE_EXCEPTIONS { T d(determinant()); if (d == static_cast(0)) @@ -793,9 +787,6 @@ // NOTE: If this is non-invertible, we will // throw to avoid undefined behavior. tmat4& inverse() -#ifdef USE_EXCEPTIONS - throw(std::runtime_error) -#endif // USE_EXCEPTIONS { T d(determinant()); if (d == static_cast(0)) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/program.cc glmark2-2021.02/src/libmatrix/program.cc --- glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/program.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libmatrix/program.cc 2021-04-25 01:47:22.000000000 +0800 @@ -190,7 +190,7 @@ } shader.attach(handle_); - shaders_.push_back(shader); + shaders_.push_back(std::move(shader)); return; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/program.h glmark2-2021.02/src/libmatrix/program.h --- glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/program.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libmatrix/program.h 2021-04-25 01:47:22.000000000 +0800 @@ -35,6 +35,13 @@ message_(shader.message_), ready_(shader.ready_), valid_(shader.valid_) {} + Shader(Shader&& shader) noexcept: + handle_(std::exchange(shader.handle_, 0)), + type_(std::exchange(shader.type_, 0)), + source_(std::move(shader.source_)), + message_(std::move(shader.message_)), + ready_(std::exchange(shader.ready_, false)), + valid_(std::exchange(shader.valid_, false)) {} Shader(unsigned int type, const std::string& source); ~Shader(); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/shader-source.cc glmark2-2021.02/src/libmatrix/shader-source.cc --- glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/shader-source.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libmatrix/shader-source.cc 2021-04-25 01:47:22.000000000 +0800 @@ -18,6 +18,36 @@ #include "vec.h" #include "util.h" +namespace +{ + +bool is_valid_precision_value(ShaderSource::PrecisionValue precision_value) +{ + switch(precision_value) { + case ShaderSource::PrecisionValueLow: + case ShaderSource::PrecisionValueMedium: + case ShaderSource::PrecisionValueHigh: + case ShaderSource::PrecisionValueDefault: + return true; + default: + return false; + } +} + +bool is_valid_shader_type(ShaderSource::ShaderType shader_type) +{ + switch(shader_type) { + case ShaderSource::ShaderTypeVertex: + case ShaderSource::ShaderTypeFragment: + case ShaderSource::ShaderTypeUnknown: + return true; + default: + return false; + } +} + +} + /** * Holds default precision values for all shader types * (even the unknown type, which is hardwired to default precision values) @@ -34,7 +64,7 @@ bool ShaderSource::load_file(const std::string& filename, std::string& str) { - std::auto_ptr is_ptr(Util::get_resource(filename)); + std::unique_ptr is_ptr(Util::get_resource(filename)); std::istream& inputFile(*is_ptr); if (!inputFile) @@ -421,7 +451,9 @@ ss << "#endif" << std::endl; } } - else if (val >= 0 && val < ShaderSource::PrecisionValueDefault) { + else if (is_valid_precision_value(val) && + val != ShaderSource::PrecisionValueDefault) + { ss << "precision " << precision_map[val] << " "; ss << type_str << ";" << std::endl; } @@ -512,7 +544,7 @@ ShaderSource::default_precision(const ShaderSource::Precision& precision, ShaderSource::ShaderType type) { - if (type < 0 || type > ShaderSource::ShaderTypeUnknown) + if (!is_valid_shader_type(type)) type = ShaderSource::ShaderTypeUnknown; if (type == ShaderSource::ShaderTypeUnknown) { @@ -537,7 +569,7 @@ const ShaderSource::Precision& ShaderSource::default_precision(ShaderSource::ShaderType type) { - if (type < 0 || type > ShaderSource::ShaderTypeUnknown) + if (!is_valid_shader_type(type)) type = ShaderSource::ShaderTypeUnknown; return default_precision_[type]; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/util.cc glmark2-2021.02/src/libmatrix/util.cc --- glmark2-2014.03+git20150611.fa71af2d/src/libmatrix/util.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/libmatrix/util.cc 2021-04-25 01:47:22.000000000 +0800 @@ -220,10 +220,10 @@ uint64_t Util::get_timestamp_us() { - struct timeval tv; - gettimeofday(&tv, NULL); - uint64_t now = static_cast(tv.tv_sec) * 1000000 + - static_cast(tv.tv_usec); + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + uint64_t now = static_cast(ts.tv_sec) * 1000000 + + static_cast(ts.tv_nsec)/1000.0; return now; } @@ -244,7 +244,7 @@ std::istream * Util::get_resource(const std::string &path) { - std::ifstream *ifs = new std::ifstream(path.c_str()); + std::ifstream *ifs = new std::ifstream(path.c_str(), std::ios::binary); return static_cast(ifs); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/main.cpp glmark2-2021.02/src/main.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/main.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/main.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -42,12 +42,18 @@ #include "native-state-mir.h" #elif GLMARK2_USE_WAYLAND #include "native-state-wayland.h" +#elif GLMARK2_USE_DISPMANX +#include "native-state-dispmanx.h" +#elif GLMARK2_USE_WIN32 +#include "native-state-win32.h" #endif #if GLMARK2_USE_EGL #include "gl-state-egl.h" #elif GLMARK2_USE_GLX #include "gl-state-glx.h" +#elif GLMARK2_USE_WGL +#include "gl-state-wgl.h" #endif using std::vector; @@ -138,7 +144,6 @@ int main(int argc, char *argv[]) { - if (!Options::parse_args(argc, argv)) return 1; @@ -168,12 +173,18 @@ NativeStateMir native_state; #elif GLMARK2_USE_WAYLAND NativeStateWayland native_state; +#elif GLMARK2_USE_DISPMANX + NativeStateDispmanx native_state; +#elif GLMARK2_USE_WIN32 + NativeStateWin32 native_state; #endif #if GLMARK2_USE_EGL GLStateEGL gl_state; #elif GLMARK2_USE_GLX GLStateGLX gl_state; +#elif GLMARK2_USE_WGL + GLStateWGL gl_state; #endif CanvasGeneric canvas(native_state, gl_state, Options::size.first, Options::size.second); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/main-loop.cpp glmark2-2021.02/src/main-loop.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/main-loop.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/main-loop.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -80,9 +80,9 @@ /* If we have found a valid scene, set it up */ if (bench_iter_ != benchmarks_.end()) { + before_scene_setup(); if (!Options::reuse_context) canvas_.reset(); - before_scene_setup(); scene_ = &(*bench_iter_)->setup_scene(); if (!scene_->running()) { if (!scene_->supported(false)) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/mesh.cpp glmark2-2021.02/src/mesh.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/mesh.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/mesh.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -396,8 +396,9 @@ /* Update the current range from the vertex data */ float *dest(array + nfloats * ri->first); for (size_t n = ri->first; n <= ri->second; n++) { - for (size_t i = 0; i < nfloats; i++) - *dest++ = vertices_[n][offset + i]; + float *src(vertices_[n].data() + offset); + std::copy(src, src + nfloats, dest); + dest += nfloats; } } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/meson.build glmark2-2021.02/src/meson.build --- glmark2-2014.03+git20150611.fa71af2d/src/meson.build 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/meson.build 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,263 @@ +common_sources = [ + 'benchmark-collection.cpp', + 'benchmark.cpp', + 'canvas-generic.cpp', + 'gl-headers.cpp', + 'gl-visual-config.cpp', + 'image-reader.cpp', + 'libmatrix/log.cc', + 'libmatrix/mat.cc', + 'libmatrix/program.cc', + 'libmatrix/shader-source.cc', + 'libmatrix/util.cc', + 'main-loop.cpp', + 'mesh.cpp', + 'model.cpp', + 'options.cpp', + 'scene-buffer.cpp', + 'scene-build.cpp', + 'scene-bump.cpp', + 'scene-clear.cpp', + 'scene-conditionals.cpp', + 'scene.cpp', + 'scene-default-options.cpp', + 'scene-desktop.cpp', + 'scene-effect-2d.cpp', + 'scene-function.cpp', + 'scene-grid.cpp', + 'scene-ideas/a.cc', + 'scene-ideas.cpp', + 'scene-ideas/d.cc', + 'scene-ideas/e.cc', + 'scene-ideas/i.cc', + 'scene-ideas/lamp.cc', + 'scene-ideas/logo.cc', + 'scene-ideas/m.cc', + 'scene-ideas/n.cc', + 'scene-ideas/o.cc', + 'scene-ideas/s.cc', + 'scene-ideas/splines.cc', + 'scene-ideas/table.cc', + 'scene-ideas/t.cc', + 'scene-jellyfish.cpp', + 'scene-loop.cpp', + 'scene-pulsar.cpp', + 'scene-refract.cpp', + 'scene-shading.cpp', + 'scene-shadow.cpp', + 'scene-terrain/base-renderer.cpp', + 'scene-terrain/blur-renderer.cpp', + 'scene-terrain/copy-renderer.cpp', + 'scene-terrain.cpp', + 'scene-terrain/luminance-renderer.cpp', + 'scene-terrain/normal-from-height-renderer.cpp', + 'scene-terrain/overlay-renderer.cpp', + 'scene-terrain/renderer-chain.cpp', + 'scene-terrain/simplex-noise-renderer.cpp', + 'scene-terrain/terrain-renderer.cpp', + 'scene-terrain/texture-renderer.cpp', + 'scene-texture.cpp', + 'shared-library.cpp', + 'text-renderer.cpp', + 'texture.cpp' +] + +libmatrix_headers_dep = declare_dependency( + include_directories: include_directories('libmatrix'), + compile_args: ['-DUSE_EXCEPTIONS'], +) + +common_deps = [ + m_dep, + dl_dep, + libjpeg_dep, + libpng_dep, + libmatrix_headers_dep, +] + +if need_drm + native_drm_lib = static_library( + 'native-drm', + 'native-state-drm.cpp', + dependencies: [libdrm_dep, libudev_dep, gbm_dep, libmatrix_headers_dep], + ) + native_drm_dep = declare_dependency( + link_with: native_drm_lib, + dependencies: libdrm_dep, + compile_args: ['-DGLMARK2_USE_DRM', '-D__GBM__'], + ) +else + native_drm_dep = declare_dependency() +endif + +if need_wayland + wayland_scanner = find_program(wayland_scanner_dep.get_pkgconfig_variable('wayland_scanner')) + wayland_protocols_dir = wayland_protocols_dep.get_pkgconfig_variable('pkgdatadir') + + xdg_shell_xml_path = wayland_protocols_dir + '/stable/xdg-shell/xdg-shell.xml' + xdg_shell_client_header = custom_target( + 'xdg-shell client-header', + command: [ wayland_scanner, 'client-header', '@INPUT@', '@OUTPUT@' ], + input: xdg_shell_xml_path, + output: 'xdg-shell-client-protocol.h', + ) + xdg_shell_private_code = custom_target( + 'xdg-shell private-code', + command: [ wayland_scanner, 'private-code', '@INPUT@', '@OUTPUT@' ], + input: xdg_shell_xml_path, + output: 'xdg-shell-protocol.c', + ) + + native_wayland_lib = static_library( + 'native-wayland', + 'native-state-wayland.cpp', + xdg_shell_client_header, + xdg_shell_private_code, + dependencies: [libmatrix_headers_dep, wayland_client_dep, wayland_cursor_dep, wayland_egl_dep], + ) + + native_wayland_dep = declare_dependency( + link_with: native_wayland_lib, + dependencies: [wayland_client_dep, wayland_cursor_dep, wayland_egl_dep], + compile_args: ['-DGLMARK2_USE_WAYLAND', '-DWL_EGL_PLATFORM'], + ) +else + native_wayland_dep = declare_dependency() +endif + +if need_x11 + native_x11_lib = static_library( + 'native-x11', + 'native-state-x11.cpp', + dependencies: [x11_dep, libmatrix_headers_dep], + ) + + native_x11_dep = declare_dependency( + link_with: native_x11_lib, + compile_args: ['-DGLMARK2_USE_X11'], + ) +else + native_x11_dep = declare_dependency() +endif + +if need_gl + gl_gl_lib = static_library( + 'gl-gl', + 'glad/src/gl.c', + include_directories: include_directories('glad/include'), + ) + gl_gl_dep = declare_dependency( + link_with: gl_gl_lib, + include_directories: include_directories('glad/include'), + compile_args: ['-DGLMARK2_USE_GL'], + ) + + lib_common_gl = static_library( + 'common-gl', + common_sources, + dependencies: [gl_gl_dep, common_deps], + include_directories: [ + include_directories('scene-ideas'), + include_directories('scene-terrain'), + ] + ) + common_gl_dep = declare_dependency( + link_with: lib_common_gl, + include_directories: include_directories('libmatrix'), + ) +else + gl_gl_dep = declare_dependency() + common_gl_dep = declare_dependency() +endif + +if need_glesv2 + gl_glesv2_lib = static_library( + 'gl-glesv2', + 'glad/src/gles2.c', + include_directories: include_directories('glad/include'), + ) + gl_glesv2_dep = declare_dependency( + link_with: gl_glesv2_lib, + include_directories: include_directories('glad/include'), + compile_args: ['-DGLMARK2_USE_GLESv2'], + ) + + lib_common_glesv2 = static_library( + 'common-glesv2', + common_sources, + dependencies: [gl_glesv2_dep, common_deps], + include_directories: [ + include_directories('scene-ideas'), + include_directories('scene-terrain'), + ] + ) + common_glesv2_dep = declare_dependency( + link_with: lib_common_glesv2, + include_directories: include_directories('libmatrix'), + ) +else + gl_glesv2_dep = declare_dependency() + common_glesv2_dep = declare_dependency() +endif + +if need_egl + # gl-state-egl builds depend on both the GL type and the native type. To + # avoid creating all the required combinations explicitly, make this a + # source dependency which will be built properly as part of the final + # glmark2 executable. + wsi_egl_dep = declare_dependency( + sources: ['gl-state-egl.cpp', 'glad/src/egl.c'], + include_directories: [ + include_directories('glad/include'), + include_directories('libmatrix'), + ], + compile_args: ['-DGLMARK2_USE_EGL'], + ) +else + wsi_egl_dep = declare_dependency() +endif + +if need_glx + # Although there is a single valid flavor for glx (x11-gl) and we could + # build gl-state-glx here, make this a source dependency for convenience + # and parity with gl-state-egl. + wsi_glx_dep = declare_dependency( + sources: ['gl-state-glx.cpp', 'glad/src/glx.c'], + include_directories: [ + include_directories('glad/include'), + include_directories('libmatrix'), + ], + compile_args: ['-DGLMARK2_USE_GLX'], + ) +else + wsi_glx_dep = declare_dependency() +endif + +# Flavor info : [ binary, native, gl, wsi ] +flavor_info = { + 'drm-gl' : ['glmark2-drm', native_drm_dep, gl_gl_dep, wsi_egl_dep], + 'drm-glesv2' : ['glmark2-es2-drm', native_drm_dep, gl_glesv2_dep, wsi_egl_dep], + 'wayland-gl' : ['glmark2-wayland', native_wayland_dep, gl_gl_dep, wsi_egl_dep], + 'wayland-glesv2' : ['glmark2-es2-wayland', native_wayland_dep, gl_glesv2_dep, wsi_egl_dep], + 'x11-gl' : ['glmark2', native_x11_dep, gl_gl_dep, wsi_glx_dep], + 'x11-glesv2' : ['glmark2-es2', native_x11_dep, gl_glesv2_dep, wsi_egl_dep], +} + +foreach flavor : flavors + info = flavor_info[flavor] + binary = info[0] + native_dep = info[1] + gl_dep = info[2] + wsi_dep = info[3] + if gl_dep == gl_gl_dep + common_dep = common_gl_dep + elif gl_dep == gl_glesv2_dep + common_dep = common_glesv2_dep + endif + executable( + binary, + ['main.cpp', 'canvas-generic.cpp'], + dependencies: [common_dep, native_dep, gl_dep, wsi_dep], + install: true, + ) +endforeach diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/model.cpp glmark2-2021.02/src/model.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/model.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/model.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -361,7 +361,7 @@ Log::debug("Loading model from 3ds file '%s'\n", filename.c_str()); - const std::auto_ptr input_file_ptr(Util::get_resource(filename)); + const std::unique_ptr input_file_ptr(Util::get_resource(filename)); std::istream& input_file(*input_file_ptr); if (!input_file) { @@ -385,7 +385,7 @@ return false; } - //Read the lenght of the chunk + //Read the length of the chunk read_or_fail(input_file, &chunk_length, 4); switch (chunk_id) @@ -393,7 +393,7 @@ //----------------- MAIN3DS ----------------- // Description: Main chunk, contains all the other chunks // Chunk ID: 4d4d - // Chunk Lenght: 0 + sub chunks + // Chunk Length: 0 + sub chunks //------------------------------------------- case 0x4d4d: break; @@ -401,7 +401,7 @@ //----------------- EDIT3DS ----------------- // Description: 3D Editor chunk, objects layout info // Chunk ID: 3d3d (hex) - // Chunk Lenght: 0 + sub chunks + // Chunk Length: 0 + sub chunks //------------------------------------------- case 0x3d3d: break; @@ -409,7 +409,7 @@ //--------------- EDIT_OBJECT --------------- // Description: Object block, info for each object // Chunk ID: 4000 (hex) - // Chunk Lenght: len(object name) + sub chunks + // Chunk Length: len(object name) + sub chunks //------------------------------------------- case 0x4000: { @@ -429,7 +429,7 @@ //--------------- OBJ_TRIMESH --------------- // Description: Triangular mesh, contains chunks for 3d mesh info // Chunk ID: 4100 (hex) - // Chunk Lenght: 0 + sub chunks + // Chunk Length: 0 + sub chunks //------------------------------------------- case 0x4100: break; @@ -437,7 +437,7 @@ //--------------- TRI_VERTEXL --------------- // Description: Vertices list // Chunk ID: 4110 (hex) - // Chunk Lenght: 1 x unsigned short (number of vertices) + // Chunk Length: 1 x unsigned short (number of vertices) // + 3 x float (vertex coordinates) x (number of vertices) // + sub chunks //------------------------------------------- @@ -461,7 +461,7 @@ //--------------- TRI_FACEL1 ---------------- // Description: Polygons (faces) list // Chunk ID: 4120 (hex) - // Chunk Lenght: 1 x unsigned short (number of polygons) + // Chunk Length: 1 x unsigned short (number of polygons) // + 3 x unsigned short (polygon points) x (number of polygons) // + sub chunks //------------------------------------------- @@ -484,7 +484,7 @@ //------------- TRI_MAPPINGCOORS ------------ // Description: Vertices list // Chunk ID: 4140 (hex) - // Chunk Lenght: 1 x unsigned short (number of mapping points) + // Chunk Length: 1 x unsigned short (number of mapping points) // + 2 x float (mapping coordinates) x (number of mapping points) // + sub chunks //------------------------------------------- @@ -505,7 +505,7 @@ //----------- Skip unknow chunks ------------ //We need to skip all the chunks that currently we don't use - //We use the chunk lenght information to set the file pointer + //We use the chunk length information to set the file pointer //to the same level next chunk //------------------------------------------- default: @@ -681,7 +681,7 @@ { Log::debug("Loading model from obj file '%s'\n", filename.c_str()); - const std::auto_ptr input_file_ptr(Util::get_resource(filename)); + const std::unique_ptr input_file_ptr(Util::get_resource(filename)); std::istream& inputFile(*input_file_ptr); if (!inputFile) { @@ -814,7 +814,7 @@ return ModelPrivate::modelMap; } vector pathVec; - string dataDir(GLMARK_DATA_PATH"/models"); + string dataDir(Options::data_path + "/models"); Util::list_files(dataDir, pathVec); #ifdef GLMARK_EXTRAS_PATH string extrasDir(GLMARK_EXTRAS_PATH"/models"); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-dispmanx.cpp glmark2-2021.02/src/native-state-dispmanx.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-dispmanx.cpp 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/native-state-dispmanx.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,132 @@ +/* + * Copyright © 2013 Rafal Mielniczuk + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Rafal Mielniczuk + */ + +#include "native-state-dispmanx.h" +#include "log.h" + +#include +#include + +NativeStateDispmanx::NativeStateDispmanx() +{ + memset(&properties_, 0, sizeof(properties_)); + memset(&egl_dispmanx_window, 0, sizeof(egl_dispmanx_window)); +} + +NativeStateDispmanx::~NativeStateDispmanx() +{ +} + +bool +NativeStateDispmanx::init_display() +{ + bcm_host_init(); + + return true; +} + +void* +NativeStateDispmanx::display() +{ + return EGL_DEFAULT_DISPLAY; +} + +bool +NativeStateDispmanx::create_window(WindowProperties const& properties) +{ + int dispmanx_output = 0; /* LCD */ + + if (!properties.fullscreen) { + Log::error("Error: Dispmanx only supports full screen rendering.\n"); + return false; + } + + unsigned screen_width, screen_height; + if (graphics_get_display_size(dispmanx_output, + &screen_width, &screen_height) < 0) { + Log::error("Error: Couldn't get screen width/height.\n"); + return false; + } + + properties_.fullscreen = properties.fullscreen; + properties_.visual_id = properties.visual_id; + properties_.width = screen_width; + properties_.height = screen_height; + + dispmanx_display = vc_dispmanx_display_open(dispmanx_output); + + VC_RECT_T dst_rect; + VC_RECT_T src_rect; + dst_rect.x = 0; + dst_rect.y = 0; + dst_rect.width = screen_width; + dst_rect.height = screen_height; + + src_rect.x = 0; + src_rect.y = 0; + src_rect.width = screen_width << 16; + src_rect.height = screen_height << 16; + + dispmanx_update = vc_dispmanx_update_start(0); + + dispmanx_element = vc_dispmanx_element_add(dispmanx_update, + dispmanx_display, + 0 /*layer*/, + &dst_rect, + 0 /*src*/, + &src_rect, + DISPMANX_PROTECTION_NONE, + 0 /*alpha*/, + 0 /*clamp*/, + DISPMANX_NO_ROTATE); + + egl_dispmanx_window.element = dispmanx_element; + egl_dispmanx_window.width = dst_rect.width; + egl_dispmanx_window.height = dst_rect.height; + vc_dispmanx_update_submit_sync(dispmanx_update); + + return true; +} + +void* +NativeStateDispmanx::window(WindowProperties &properties) +{ + properties = properties_; + return &egl_dispmanx_window; +} + +void +NativeStateDispmanx::visible(bool /*v*/) +{ +} + +bool +NativeStateDispmanx::should_quit() +{ + return false; +} + +void +NativeStateDispmanx::flip() +{ +} + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-dispmanx.h glmark2-2021.02/src/native-state-dispmanx.h --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-dispmanx.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/native-state-dispmanx.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,56 @@ +/* + * Copyright © 2013 Rafal Mielniczuk + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Rafal Mielniczuk + */ + +#ifndef GLMARK2_NATIVE_STATE_DISPMANX_H_ +#define GLMARK2_NATIVE_STATE_DISPMANX_H_ + +#include +#include "bcm_host.h" +#include "GLES/gl.h" +#include "EGL/egl.h" +#include "EGL/eglext.h" + +#include "native-state.h" + +class NativeStateDispmanx : public NativeState +{ +public: + NativeStateDispmanx(); + ~NativeStateDispmanx(); + + bool init_display(); + void* display(); + bool create_window(WindowProperties const& properties); + void* window(WindowProperties& properties); + void visible(bool v); + bool should_quit(); + void flip(); + +private: + DISPMANX_DISPLAY_HANDLE_T dispmanx_display; + DISPMANX_UPDATE_HANDLE_T dispmanx_update; + DISPMANX_ELEMENT_HANDLE_T dispmanx_element; + EGL_DISPMANX_WINDOW_T egl_dispmanx_window; + WindowProperties properties_; +}; + +#endif /* GLMARK2_NATIVE_STATE_DISPMANX_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-drm.cpp glmark2-2021.02/src/native-state-drm.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-drm.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/native-state-drm.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -25,6 +25,11 @@ #include "native-state-drm.h" #include "log.h" +#include +#include +#include +#include + /****************** * Public methods * ******************/ @@ -106,7 +111,8 @@ FD_ZERO(&fds); FD_SET(fd_, &fds); drmEventContext evCtx; - evCtx.version = DRM_EVENT_CONTEXT_VERSION; + memset(&evCtx, 0, sizeof(evCtx)); + evCtx.version = 2; evCtx.page_flip_handler = page_flip_handler; while (waiting) { @@ -134,6 +140,269 @@ * Private methods * *******************/ +/* Simple helpers */ + +inline static bool valid_fd(int fd) +{ + return fd >= 0; +} + +inline static bool valid_drm_node_path(std::string const& provided_node_path) +{ + return !provided_node_path.empty(); +} + +inline static bool invalid_drm_node_path(std::string const& provided_node_path) +{ + return !(valid_drm_node_path(provided_node_path)); +} + +/* Udev methods */ +// Udev detection functions +#define UDEV_TEST_FUNC_SIGNATURE(udev_identifier, device_identifier, syspath_identifier) \ + struct udev * __restrict const udev_identifier, \ + struct udev_device * __restrict const device_identifier, \ + char const * __restrict syspath_identifier + +/* Omitting the parameter names is kind of ugly but is the only way + * to force G++ to forget about the unused parameters. + * Having big warnings during the compilation isn't very nice. + * + * These functions will be used as function pointers and should have + * the same signature to avoid weird stack related issues. + * + * Another way to deal with that issue will be to mark unused parameters + * with __attribute__((unused)) + */ +static bool udev_drm_test_virtual( + UDEV_TEST_FUNC_SIGNATURE(,,tested_node_syspath)) +{ + return strstr(tested_node_syspath, "virtual") != NULL; +} + +static bool udev_drm_test_not_virtual( + UDEV_TEST_FUNC_SIGNATURE(udev, current_device, tested_node_syspath)) +{ + return !udev_drm_test_virtual(udev, + current_device, + tested_node_syspath); +} + +static bool +udev_drm_test_primary_gpu(UDEV_TEST_FUNC_SIGNATURE(, current_device,)) +{ + bool is_main_gpu = false; + + auto const drm_node_parent = udev_device_get_parent(current_device); + + /* While tempting, using udev_device_unref will generate issues + * when unreferencing the child in udev_get_node_that_pass_in_enum + * + * udev_device_unref WILL unreference the parent, so avoid doing + * that here. + * + * ( See udev sources : src/libudev/libudev-device.c ) + */ + if (drm_node_parent != NULL) { + is_main_gpu = + (udev_device_get_sysattr_value(drm_node_parent, "boot_vga") + != NULL); + } + + return is_main_gpu; +} + +/* Test if the drm-device is actually modeset capable. + * Render-only devices cannot drive an actual display, + * so the GETRESOURCES ioctl will fail in that case. + */ +static bool udev_drm_test_modeset(std::string const& dev_path) +{ + struct drm_mode_card_res res {}; + int fd, ret; + + fd = open(dev_path.c_str(), O_RDWR); + if (!valid_fd(fd)) + return false; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_GETRESOURCES, &res); + drmClose(fd); + + return !ret; +} + +static std::string +udev_get_node_that_pass_in_enum( + struct udev * __restrict const udev, + struct udev_enumerate * __restrict const dev_enum, + bool (* check_function)(UDEV_TEST_FUNC_SIGNATURE(,,))) +{ + std::string result; + + auto current_element = udev_enumerate_get_list_entry(dev_enum); + + while (current_element && result.empty()) { + char const * __restrict current_element_sys_path = + udev_list_entry_get_name(current_element); + + if (current_element_sys_path) { + struct udev_device * current_device = + udev_device_new_from_syspath(udev, + current_element_sys_path); + auto check_passed = check_function( + udev, current_device, current_element_sys_path); + + if (check_passed) { + const char * device_node_path = + udev_device_get_devnode(current_device); + + if (device_node_path && + udev_drm_test_modeset(device_node_path)) { + result = device_node_path; + } + + } + + udev_device_unref(current_device); + } + + current_element = udev_list_entry_get_next(current_element); + } + + return result; +} + +/* Inspired by KWin detection mechanism */ +/* And yet KWin got it wrong too, it seems. + * 1 - Looking for the primary GPU by checking the flag 'boot_vga' + * won't get you far with some embedded chipsets, like Rockchip. + * 2 - Looking for a GPU plugged in PCI will fail on various embedded + * devices ! + * 3 - Looking for a render node is not guaranteed to work on some + * poorly maintained DRM drivers, which plague some embedded + * devices too... + * + * So, we won't play too smart here. + * - We first check for a primary GPU plugged in PCI with the 'boot_vga' + * attribute, to take care of Desktop users using multiple GPU. + * - Then, we just check for a DRM node that is not virtual + * - At least, we use the first virtual node we get, if we didn't find + * anything yet. + * This should take care of almost every potential use case. + * + * The remaining ones will be dealt with an additional option to + * specify the DRM dev node manually. + */ +static std::string udev_main_gpu_drm_node_path() +{ + Log::debug("Using Udev to detect the right DRM node to use\n"); + auto udev = udev_new(); + auto dev_enumeration = udev_enumerate_new(udev); + + udev_enumerate_add_match_subsystem(dev_enumeration, "drm"); + udev_enumerate_add_match_sysname(dev_enumeration, "card[0-9]*"); + udev_enumerate_scan_devices(dev_enumeration); + + Log::debug("Looking for the main GPU DRM node...\n"); + std::string node_path = udev_get_node_that_pass_in_enum( + udev, dev_enumeration, udev_drm_test_primary_gpu); + + if (invalid_drm_node_path(node_path)) { + Log::debug("Not found!\n"); + Log::debug("Looking for a concrete GPU DRM node...\n"); + node_path = udev_get_node_that_pass_in_enum( + udev, dev_enumeration, udev_drm_test_not_virtual); + } + if (invalid_drm_node_path(node_path)) { + Log::debug("Not found!?\n"); + Log::debug("Looking for a virtual GPU DRM node...\n"); + node_path = udev_get_node_that_pass_in_enum( + udev, dev_enumeration, udev_drm_test_virtual); + } + if (invalid_drm_node_path(node_path)) { + Log::debug("Not found.\n"); + Log::debug("Cannot find a single DRM node using UDEV...\n"); + } + + if (valid_drm_node_path(node_path)) { + Log::debug("Success!\n"); + } + + udev_enumerate_unref(dev_enumeration); + udev_unref(udev); + + return node_path; +} + +static int open_using_udev_scan() +{ + auto dev_path = udev_main_gpu_drm_node_path(); + + int fd = -1; + if (valid_drm_node_path(dev_path)) { + Log::debug("Trying to use the DRM node %s\n", dev_path.c_str()); + fd = open(dev_path.c_str(), O_RDWR); + } + else { + Log::error("Can't determine the main graphic card " + "DRM device node\n"); + } + + if (!valid_fd(fd)) { + // %m is GLIBC specific... Maybe use strerror here... + Log::error("Tried to use '%s' but failed.\nReason : %m\n", + dev_path.c_str()); + } + else + Log::debug("Success!\n"); + + return fd; +} + +/* -- End of Udev helpers -- */ + +/* + * This method is there to take care of cases that would not be handled + * by open_using_udev_scan. This should not happen. + * + * If your driver defines a /dev/dri/cardX node and open_using_udev_scan + * were not able to detect it, you should probably file an issue. + * + */ +static int open_using_module_checking() +{ + static const char* drm_modules[] = { + "i915", + "imx-drm", + "nouveau", + "radeon", + "vmgfx", + "omapdrm", + "exynos", + "pl111", + "vc4", + "msm", + "meson", + "rockchip", + "sun4i-drm", + "stm", + }; + + int fd = -1; + unsigned int num_modules(sizeof(drm_modules)/sizeof(drm_modules[0])); + for (unsigned int m = 0; m < num_modules; m++) { + fd = drmOpen(drm_modules[m], 0); + if (fd < 0) { + Log::debug("Failed to open DRM module '%s'\n", drm_modules[m]); + continue; + } + Log::debug("Opened DRM module '%s'\n", drm_modules[m]); + break; + } + + return fd; +} + void NativeStateDRM::fb_destroy_callback(gbm_bo* bo, void* data) { @@ -197,34 +466,23 @@ bool NativeStateDRM::init() { - // TODO: Replace this with something that explicitly probes for the loaded - // driver (udev?). - static const char* drm_modules[] = { - "i915", - "nouveau", - "radeon", - "vmgfx", - "omapdrm", - "exynos" - }; + // TODO: The user should be able to define *exactly* which device + // node to open and the program should try to open only + // this node, in order to take care of unknown use cases. + int fd = open_using_udev_scan(); - unsigned int num_modules(sizeof(drm_modules)/sizeof(drm_modules[0])); - for (unsigned int m = 0; m < num_modules; m++) { - fd_ = drmOpen(drm_modules[m], 0); - if (fd_ < 0) { - Log::debug("Failed to open DRM module '%s'\n", drm_modules[m]); - continue; - } - Log::debug("Opened DRM module '%s'\n", drm_modules[m]); - break; + if (!valid_fd(fd)) { + fd = open_using_module_checking(); } - if (fd_ < 0) { + if (!valid_fd(fd)) { Log::error("Failed to find a suitable DRM device\n"); return false; } - resources_ = drmModeGetResources(fd_); + fd_ = fd; + + resources_ = drmModeGetResources(fd); if (!resources_) { Log::error("drmModeGetResources failed\n"); return false; @@ -232,7 +490,7 @@ // Find a connected connector for (int c = 0; c < resources_->count_connectors; c++) { - connector_ = drmModeGetConnector(fd_, resources_->connectors[c]); + connector_ = drmModeGetConnector(fd, resources_->connectors[c]); if (DRM_MODE_CONNECTED == connector_->connection) { break; } @@ -263,14 +521,40 @@ // Find a suitable encoder for (int e = 0; e < resources_->count_encoders; e++) { + bool found = false; encoder_ = drmModeGetEncoder(fd_, resources_->encoders[e]); - if (encoder_ && encoder_->encoder_id == connector_->encoder_id) { - break; + for (int ce = 0; ce < connector_->count_encoders; ce++) { + if (encoder_ && encoder_->encoder_id == connector_->encoders[ce]) { + found = true; + break; + } } + if (found) + break; drmModeFreeEncoder(encoder_); encoder_ = 0; } + // If encoder is not connected to the connector try to find + // a suitable one + if (!encoder_) { + for (int e = 0; e < connector_->count_encoders; e++) { + encoder_ = drmModeGetEncoder(fd, connector_->encoders[e]); + for (int c = 0; c < resources_->count_crtcs; c++) { + if (encoder_->possible_crtcs & (1 << c)) { + encoder_->crtc_id = resources_->crtcs[c]; + break; + } + } + if (encoder_->crtc_id) { + break; + } + + drmModeFreeEncoder(encoder_); + encoder_ = 0; + } + } + if (!encoder_) { Log::error("Failed to find a suitable encoder\n"); return false; @@ -282,8 +566,13 @@ crtc_ = drmModeGetCrtc(fd_, encoder_->crtc_id); if (!crtc_) { - Log::error("Failed to get current CRTC\n"); - return false; + // if there is no current CRTC, make sure to attach a suitable one + for (int c = 0; c < resources_->count_crtcs; c++) { + if (encoder_->possible_crtcs & (1 << c)) { + encoder_->crtc_id = resources_->crtcs[c]; + break; + } + } } signal(SIGINT, &NativeStateDRM::quit_handler); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-drm.h glmark2-2021.02/src/native-state-drm.h --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-drm.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/native-state-drm.h 2021-04-25 01:47:22.000000000 +0800 @@ -41,6 +41,7 @@ resources_(0), connector_(0), encoder_(0), + crtc_(0), mode_(0), dev_(0), surface_(0), diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state.h glmark2-2021.02/src/native-state.h --- glmark2-2014.03+git20150611.fa71af2d/src/native-state.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/native-state.h 2021-04-25 01:47:22.000000000 +0800 @@ -22,12 +22,14 @@ #ifndef GLMARK2_NATIVE_STATE_H_ #define GLMARK2_NATIVE_STATE_H_ +#include + class NativeState { public: struct WindowProperties { - WindowProperties(int w, int h, bool f, int v) + WindowProperties(int w, int h, bool f, intptr_t v) : width(w), height(h), fullscreen(f), visual_id(v) {} WindowProperties() : width(0), height(0), fullscreen(false), visual_id(0) {} @@ -35,7 +37,7 @@ int width; int height; bool fullscreen; - int visual_id; + intptr_t visual_id; }; virtual ~NativeState() {} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-mir.cpp glmark2-2021.02/src/native-state-mir.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-mir.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/native-state-mir.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -157,7 +157,7 @@ bool NativeStateMir::create_window(WindowProperties const& properties) { - static const char *win_name("glmark2 "GLMARK_VERSION); + static const char *win_name("glmark2 " GLMARK_VERSION); if (!mir_connection_is_valid(mir_connection_)) { Log::error("Cannot create a Mir surface without a valid connection " diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-wayland.cpp glmark2-2021.02/src/native-state-wayland.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-wayland.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/native-state-wayland.cpp 2021-04-25 01:47:47.000000000 +0800 @@ -21,19 +21,29 @@ */ #include "native-state-wayland.h" +#include "log.h" +#include #include #include +#include const struct wl_registry_listener NativeStateWayland::registry_listener_ = { NativeStateWayland::registry_handle_global, NativeStateWayland::registry_handle_global_remove }; -const struct wl_shell_surface_listener NativeStateWayland::shell_surface_listener_ = { - NativeStateWayland::shell_surface_handle_ping, - NativeStateWayland::shell_surface_handle_configure, - NativeStateWayland::shell_surface_handle_popup_done +const struct xdg_toplevel_listener NativeStateWayland::xdg_toplevel_listener_ = { + NativeStateWayland::xdg_toplevel_handle_configure, + NativeStateWayland::xdg_toplevel_handle_close +}; + +const struct xdg_surface_listener NativeStateWayland::xdg_surface_listener_ = { + NativeStateWayland::xdg_surface_handle_configure +}; + +const struct xdg_wm_base_listener NativeStateWayland::xdg_wm_base_listener_ = { + NativeStateWayland::xdg_wm_base_handle_ping }; const struct wl_output_listener NativeStateWayland::output_listener_ = { @@ -43,29 +53,58 @@ NativeStateWayland::output_handle_scale }; +const struct wl_seat_listener NativeStateWayland::seat_listener_ = { + NativeStateWayland::seat_handle_capabilities, +}; + +const struct wl_pointer_listener NativeStateWayland::pointer_listener_ = { + NativeStateWayland::pointer_handle_enter, + NativeStateWayland::pointer_handle_leave, + NativeStateWayland::pointer_handle_motion, + NativeStateWayland::pointer_handle_button, + NativeStateWayland::pointer_handle_axis, +}; + +const struct wl_keyboard_listener NativeStateWayland::keyboard_listener_ = { + NativeStateWayland::keyboard_handle_keymap, + NativeStateWayland::keyboard_handle_enter, + NativeStateWayland::keyboard_handle_leave, + NativeStateWayland::keyboard_handle_key, + NativeStateWayland::keyboard_handle_modifiers, +}; + volatile bool NativeStateWayland::should_quit_ = false; -NativeStateWayland::NativeStateWayland() : display_(0), window_(0) +NativeStateWayland::NativeStateWayland() : cursor_(0), display_(0), window_(0) { } NativeStateWayland::~NativeStateWayland() { if (window_) { - if (window_->shell_surface) - wl_shell_surface_destroy(window_->shell_surface); - if (window_->opaque_reqion) - wl_region_destroy(window_->opaque_reqion); - if (window_->surface) - wl_surface_destroy(window_->surface); + if (window_->xdg_toplevel) + xdg_toplevel_destroy(window_->xdg_toplevel); + if (window_->xdg_surface) + xdg_surface_destroy(window_->xdg_surface); if (window_->native) wl_egl_window_destroy(window_->native); + if (window_->surface) + wl_surface_destroy(window_->surface); delete window_; } + if (cursor_) { + if (cursor_->cursor_surface) + wl_surface_destroy(cursor_->cursor_surface); + if (cursor_->cursor_theme) + wl_cursor_theme_destroy(cursor_->cursor_theme); + + delete cursor_; + } + if (display_) { - if (display_->shell) - wl_shell_destroy(display_->shell); + if (display_->xdg_wm_base) + xdg_wm_base_destroy(display_->xdg_wm_base); for (OutputsVector::iterator it = display_->outputs.begin(); it != display_->outputs.end(); ++it) { @@ -88,30 +127,66 @@ void NativeStateWayland::registry_handle_global(void *data, struct wl_registry *registry, uint32_t id, const char *interface, - uint32_t /*version*/) + uint32_t version) { NativeStateWayland *that = static_cast(data); if (strcmp(interface, "wl_compositor") == 0) { that->display_->compositor = static_cast( wl_registry_bind(registry, - id, &wl_compositor_interface, 1)); - } else if (strcmp(interface, "wl_shell") == 0) { - that->display_->shell = - static_cast( + id, &wl_compositor_interface, std::min(version, 4U))); + } else if (strcmp(interface, "xdg_wm_base") == 0) { + that->display_->xdg_wm_base = + static_cast( wl_registry_bind(registry, - id, &wl_shell_interface, 1)); + id, &xdg_wm_base_interface, std::min(version, 2U))); + xdg_wm_base_add_listener(that->display_->xdg_wm_base, &xdg_wm_base_listener_, that); } else if (strcmp(interface, "wl_output") == 0) { - struct my_output *my_output = new struct my_output; + struct my_output *my_output = new struct my_output(); memset(my_output, 0, sizeof(*my_output)); + my_output->scale = 1; my_output->output = static_cast( wl_registry_bind(registry, - id, &wl_output_interface, 2)); + id, &wl_output_interface, std::min(version, 2U))); that->display_->outputs.push_back(my_output); wl_output_add_listener(my_output->output, &output_listener_, my_output); wl_display_roundtrip(that->display_->display); + } else if (strcmp(interface, "wl_seat") == 0) { + that->display_->seat = + static_cast( + wl_registry_bind(registry, id, &wl_seat_interface, 1)); + wl_seat_add_listener(that->display_->seat, &seat_listener_, that); + } else if (strcmp(interface, "wl_shm") == 0) { + that->display_->shm = + static_cast( + wl_registry_bind(registry, id, &wl_shm_interface, 1)); + + struct my_cursor *my_cursor = new struct my_cursor(); + my_cursor->cursor_surface = + wl_compositor_create_surface(that->display_->compositor); + my_cursor->cursor_theme = wl_cursor_theme_load(NULL, 32, that->display_->shm); + + if (!my_cursor->cursor_theme) { + Log::error("unable to load default theme\n"); + wl_surface_destroy(my_cursor->cursor_surface); + delete my_cursor; + return; + } + + my_cursor->default_cursor = + wl_cursor_theme_get_cursor(my_cursor->cursor_theme, "left_ptr"); + + if (!my_cursor->default_cursor) { + wl_surface_destroy(my_cursor->cursor_surface); + // assume above cursor_theme was set + wl_cursor_theme_destroy(my_cursor->cursor_theme); + delete my_cursor; + return; + } + + that->cursor_ = my_cursor; } } @@ -152,32 +227,84 @@ } void -NativeStateWayland::output_handle_scale(void * /*data*/, struct wl_output * /*wl_output*/, - int32_t /*factor*/) +NativeStateWayland::output_handle_scale(void *data, struct wl_output * /*wl_output*/, + int32_t factor) { + struct my_output *my_output = static_cast(data); + my_output->scale = factor; } void -NativeStateWayland::shell_surface_handle_ping(void * /*data*/, struct wl_shell_surface *shell_surface, - uint32_t serial) +NativeStateWayland::xdg_wm_base_handle_ping(void * /*data*/, struct xdg_wm_base *xdg_wm_base, + uint32_t serial) { - wl_shell_surface_pong(shell_surface, serial); + xdg_wm_base_pong(xdg_wm_base, serial); } void -NativeStateWayland::shell_surface_handle_popup_done(void * /*data*/, - struct wl_shell_surface * /*shell_surface*/) +NativeStateWayland::xdg_toplevel_handle_configure(void *data, struct xdg_toplevel * /*xdg_toplevel*/, + int32_t width, int32_t height, + struct wl_array *states) { + NativeStateWayland *that = static_cast(data); + uint32_t *state; + bool want_fullscreen = false; + uint32_t scale = 1; + + that->window_->waiting_for_configure = false; + + if (!that->display_->outputs.empty()) scale = that->display_->outputs.at(0)->scale; + + for (state = (uint32_t *) states->data; + state < (uint32_t *) ((char *) states->data + states->size); + state++) { + if (*state == XDG_TOPLEVEL_STATE_FULLSCREEN) + want_fullscreen = true; + } + + if (want_fullscreen) { + width *= scale; + height *= scale; + } + + if (want_fullscreen == that->window_->properties.fullscreen && + (width == 0 || width == that->window_->properties.width) && + (height == 0 || height == that->window_->properties.fullscreen)) { + return; + } + + that->window_->properties.fullscreen = want_fullscreen; + that->window_->properties.width = width; + that->window_->properties.height = height; + + if (!that->window_->native) + return; + + wl_egl_window_resize(that->window_->native, width, height, 0, 0); + + struct wl_region *opaque_reqion = wl_compositor_create_region(that->display_->compositor); + wl_region_add(opaque_reqion, 0, 0, + that->window_->properties.width, + that->window_->properties.height); + wl_surface_set_opaque_region(that->window_->surface, opaque_reqion); + wl_region_destroy(opaque_reqion); } void -NativeStateWayland::shell_surface_handle_configure(void *data, struct wl_shell_surface * /*shell_surface*/, - uint32_t /*edges*/, int32_t width, int32_t height) +NativeStateWayland::xdg_toplevel_handle_close(void *data, + struct xdg_toplevel * /*xdg_toplevel */) { NativeStateWayland *that = static_cast(data); - that->window_->properties.width = width; - that->window_->properties.height = height; - wl_egl_window_resize(that->window_->native, width, height, 0, 0); + + that->should_quit_ = true; +} + +void +NativeStateWayland::xdg_surface_handle_configure(void * /*data*/, + struct xdg_surface *xdg_surface, + uint32_t serial) +{ + xdg_surface_ack_configure(xdg_surface, serial); } bool @@ -191,7 +318,7 @@ sigaction(SIGINT, &sa, NULL); sigaction(SIGTERM, &sa, NULL); - display_ = new struct my_display; + display_ = new struct my_display(); if (!display_) { return false; @@ -223,42 +350,51 @@ { struct my_output *output = 0; if (!display_->outputs.empty()) output = display_->outputs.at(0); - window_ = new struct my_window; + window_ = new struct my_window(); window_->properties = properties; - window_->surface = wl_compositor_create_surface(display_->compositor); - if (window_->properties.fullscreen && output) { - window_->native = wl_egl_window_create(window_->surface, - output->width, output->height); - window_->properties.width = output->width; - window_->properties.height = output->height; - } else { - window_->native = wl_egl_window_create(window_->surface, - properties.width, properties.height); - } - window_->opaque_reqion = wl_compositor_create_region(display_->compositor); - wl_region_add(window_->opaque_reqion, 0, 0, - window_->properties.width, - window_->properties.height); - wl_surface_set_opaque_region(window_->surface, window_->opaque_reqion); - - window_->shell_surface = wl_shell_get_shell_surface(display_->shell, - window_->surface); + window_->surface = wl_compositor_create_surface(display_->compositor); + window_->xdg_surface = xdg_wm_base_get_xdg_surface(display_->xdg_wm_base, + window_->surface); + xdg_surface_add_listener(window_->xdg_surface, &xdg_surface_listener_, this); + window_->xdg_toplevel = xdg_surface_get_toplevel(window_->xdg_surface); + xdg_toplevel_add_listener(window_->xdg_toplevel, &xdg_toplevel_listener_, this); + + xdg_toplevel_set_app_id(window_->xdg_toplevel, "com.github.glmark2.glmark2"); + xdg_toplevel_set_title(window_->xdg_toplevel, "glmark2"); + if (window_->properties.fullscreen && output) + xdg_toplevel_set_fullscreen(window_->xdg_toplevel, output->output); + wl_surface_commit(window_->surface); + window_->waiting_for_configure = true; + + /* xdg-shell requires us to wait for the compositor to tell us what its + * desired size for us is */ + while (window_->waiting_for_configure) + wl_display_roundtrip(display_->display); + + if (output && window_->properties.fullscreen) { + if (window_->properties.width <= 0 || + window_->properties.height <= 0) { + window_->properties.width = output->width * output->scale; + window_->properties.height = output->height * output->scale; + } - if (window_->shell_surface) { - wl_shell_surface_add_listener(window_->shell_surface, - &shell_surface_listener_, this); + if (wl_proxy_get_version((struct wl_proxy *) output) >= + WL_OUTPUT_SCALE_SINCE_VERSION) { + wl_surface_set_buffer_scale(window_->surface, output->scale); + } } - wl_shell_surface_set_title(window_->shell_surface, "glmark2"); + window_->native = wl_egl_window_create(window_->surface, + window_->properties.width, + window_->properties.height); - if (window_->properties.fullscreen) { - wl_shell_surface_set_fullscreen(window_->shell_surface, - WL_SHELL_SURFACE_FULLSCREEN_METHOD_DRIVER, - output->refresh, output->output); - } else { - wl_shell_surface_set_toplevel(window_->shell_surface); - } + struct wl_region *opaque_reqion = wl_compositor_create_region(display_->compositor); + wl_region_add(opaque_reqion, 0, 0, + window_->properties.width, + window_->properties.height); + wl_surface_set_opaque_region(window_->surface, opaque_reqion); + wl_region_destroy(opaque_reqion); return true; } @@ -298,3 +434,135 @@ should_quit_ = true; } +void +NativeStateWayland::seat_handle_capabilities(void *data, struct wl_seat *seat, uint32_t caps) +{ + NativeStateWayland *that = static_cast(data); + + if ((caps & WL_SEAT_CAPABILITY_POINTER) && !that->display_->pointer) { + that->display_->pointer = wl_seat_get_pointer(seat); + wl_pointer_add_listener(that->display_->pointer, &pointer_listener_, that); + } else if (!(caps & WL_SEAT_CAPABILITY_POINTER) && that->display_->pointer) { + wl_pointer_destroy(that->display_->pointer); + that->display_->pointer = NULL; + } + + if ((caps & WL_SEAT_CAPABILITY_KEYBOARD) && !that->display_->keyboard) { + that->display_->keyboard = wl_seat_get_keyboard(seat); + wl_keyboard_add_listener(that->display_->keyboard, &keyboard_listener_, that); + } else if (!(caps & WL_SEAT_CAPABILITY_KEYBOARD) && that->display_->keyboard) { + wl_keyboard_destroy(that->display_->keyboard); + that->display_->keyboard = NULL; + } +} + + +void +NativeStateWayland::pointer_handle_enter(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface, + wl_fixed_t sx, wl_fixed_t sy) +{ + NativeStateWayland *that = static_cast(data); + + struct wl_buffer *buffer; + struct my_cursor *my_cursor = that->cursor_; + struct wl_cursor *cursor; + struct wl_cursor_image *image; + + if (!my_cursor) + return; + + cursor = my_cursor->default_cursor; + + if (that->window_->properties.fullscreen) { + wl_pointer_set_cursor(pointer, serial, NULL, 0, 0); + } else { + if (cursor) { + image = my_cursor->default_cursor->images[0]; + buffer = wl_cursor_image_get_buffer(image); + if (!buffer) + return; + + wl_pointer_set_cursor(pointer, serial, + my_cursor->cursor_surface, + image->hotspot_x, + image->hotspot_y); + wl_surface_attach(my_cursor->cursor_surface, buffer, 0, 0); + wl_surface_damage(my_cursor->cursor_surface, 0, 0, + image->width, image->height); + wl_surface_commit(my_cursor->cursor_surface); + } + } +} + +void +NativeStateWayland::pointer_handle_leave(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface) +{ +} + +void +NativeStateWayland::pointer_handle_motion(void *data, struct wl_pointer *pointer, + uint32_t time, wl_fixed_t sx, wl_fixed_t sy) +{ +} + +void +NativeStateWayland::pointer_handle_button(void *data, struct wl_pointer *wl_pointer, + uint32_t serial, uint32_t time, uint32_t button, + uint32_t state) +{ + /* just show that window_ can be used */ + NativeStateWayland *that = static_cast(data); + + if (!that->window_->xdg_toplevel) + return; + + if (state == WL_POINTER_BUTTON_STATE_PRESSED && button == BTN_LEFT) + xdg_toplevel_move(that->window_->xdg_toplevel, + that->display_->seat, serial); +} + +void +NativeStateWayland::pointer_handle_axis(void *data, struct wl_pointer *wl_pointer, + uint32_t time, uint32_t axis, wl_fixed_t value) +{ +} + +void +NativeStateWayland::keyboard_handle_keymap(void *data, struct wl_keyboard *keyboard, + uint32_t format, int fd, uint32_t size) +{ + close(fd); +} + +void +NativeStateWayland::keyboard_handle_enter(void *data, struct wl_keyboard *keyboard, + uint32_t serial, struct wl_surface *surface, + struct wl_array *keys) +{ +} + +void +NativeStateWayland::keyboard_handle_leave(void *data, struct wl_keyboard *keyboard, + uint32_t serial, struct wl_surface *surface) +{ +} + +void +NativeStateWayland::keyboard_handle_key(void *data, struct wl_keyboard *keyboard, + uint32_t serial, uint32_t time, uint32_t key, + uint32_t state) +{ + if (state == WL_KEYBOARD_KEY_STATE_PRESSED && + (key == KEY_ESC || key == KEY_Q)) + should_quit_ = true; +} + +void +NativeStateWayland::keyboard_handle_modifiers(void *data, struct wl_keyboard *keyboard, + uint32_t serial, uint32_t mods_depressed, + uint32_t mods_latched, uint32_t mods_locked, + uint32_t group) +{ +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-wayland.h glmark2-2021.02/src/native-state-wayland.h --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-wayland.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/native-state-wayland.h 2021-04-25 01:47:47.000000000 +0800 @@ -26,6 +26,8 @@ #include #include #include +#include +#include "xdg-shell-client-protocol.h" #include "native-state.h" @@ -47,8 +49,13 @@ static void quit_handler(int signum); static const struct wl_registry_listener registry_listener_; - static const struct wl_shell_surface_listener shell_surface_listener_; + static const struct xdg_wm_base_listener xdg_wm_base_listener_; + static const struct xdg_surface_listener xdg_surface_listener_; + static const struct xdg_toplevel_listener xdg_toplevel_listener_; static const struct wl_output_listener output_listener_; + static const struct wl_seat_listener seat_listener_; + static const struct wl_pointer_listener pointer_listener_; + static const struct wl_keyboard_listener keyboard_listener_; static void registry_handle_global(void *data, struct wl_registry *registry, @@ -76,39 +83,87 @@ output_handle_scale(void *data, struct wl_output *wl_output, int32_t factor); static void - shell_surface_handle_ping(void *data, struct wl_shell_surface *shell_surface, - uint32_t serial); + xdg_wm_base_handle_ping(void *data, struct xdg_wm_base *xdg_wm_base, + uint32_t serial); static void - shell_surface_handle_configure(void *data, - struct wl_shell_surface *shell_surface, - uint32_t edges, - int32_t width, int32_t height); - static void - shell_surface_handle_popup_done(void *data, - struct wl_shell_surface *shell_surface); - + xdg_toplevel_handle_configure(void *data, + struct xdg_toplevel *xdg_toplevel, + int32_t width, int32_t height, + struct wl_array *states); + static void + xdg_toplevel_handle_close(void *data, struct xdg_toplevel *xdg_toplevel); + static void + xdg_surface_handle_configure(void *data, struct xdg_surface *xdg_surface, + uint32_t serial); + + static void seat_handle_capabilities(void *data, + struct wl_seat *seat, uint32_t caps); + + static void pointer_handle_enter(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface, + wl_fixed_t sx, wl_fixed_t sy); + + static void pointer_handle_leave(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface); + + static void pointer_handle_motion(void *data, struct wl_pointer *pointer, + uint32_t time, wl_fixed_t sx, wl_fixed_t sy); + + static void pointer_handle_button(void *data, struct wl_pointer *wl_pointer, + uint32_t serial, uint32_t time, + uint32_t button, uint32_t state); + + static void pointer_handle_axis(void *data, struct wl_pointer *wl_pointer, + uint32_t time, uint32_t axis, wl_fixed_t value); + + static void keyboard_handle_keymap(void *data, struct wl_keyboard *keyboard, + uint32_t format, int fd, uint32_t size); + static void keyboard_handle_enter(void *data, struct wl_keyboard *keyboard, + uint32_t serial, struct wl_surface *surface, + struct wl_array *keys); + static void keyboard_handle_leave(void *data, struct wl_keyboard *keyboard, + uint32_t serial, struct wl_surface *surface); + static void keyboard_handle_key(void *data, struct wl_keyboard *keyboard, + uint32_t serial, uint32_t time, uint32_t key, + uint32_t state); + static void keyboard_handle_modifiers(void *data, struct wl_keyboard *keyboard, + uint32_t serial, uint32_t mods_depressed, + uint32_t mods_latched, uint32_t mods_locked, + uint32_t group); struct my_output { wl_output *output; int32_t width, height; int32_t refresh; + int32_t scale; }; typedef std::vector OutputsVector; + struct my_cursor { + wl_cursor_theme *cursor_theme; + wl_cursor *default_cursor; + wl_surface *cursor_surface; + } *cursor_; + struct my_display { wl_display *display; wl_registry *registry; wl_compositor *compositor; - wl_shell *shell; + wl_shm *shm; + wl_seat *seat; + wl_pointer *pointer; + wl_keyboard *keyboard; + struct xdg_wm_base *xdg_wm_base; OutputsVector outputs; } *display_; struct my_window { WindowProperties properties; + bool waiting_for_configure; struct wl_surface *surface; - struct wl_region *opaque_reqion; struct wl_egl_window *native; - struct wl_shell_surface *shell_surface; + struct xdg_surface *xdg_surface; + struct xdg_toplevel *xdg_toplevel; } *window_; static volatile bool should_quit_; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-win32.cpp glmark2-2021.02/src/native-state-win32.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-win32.cpp 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/native-state-win32.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,248 @@ +/* + * Copyright © 2019 Jamie Madill + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Jamie Madill + */ +#include "native-state-win32.h" + +static const char *win_name("glmark2 " GLMARK_VERSION); +static const char *child_win_name("glmark2 " GLMARK_VERSION " child"); + +/****************** + * Public methods * + ******************/ + +NativeStateWin32::NativeStateWin32() : parent_window_(0), child_window_(0), native_display_(0), should_quit_(false) {} + +NativeStateWin32::~NativeStateWin32() { cleanup(); } + +bool +NativeStateWin32::init_display() +{ + cleanup(); + + static const LONG initial_width = 128; + static const LONG initial_height = 128; + + // Work around compile error from not defining "UNICODE" while Chromium does + const LPSTR idcArrow = MAKEINTRESOURCEA(32512); + + WNDCLASSEXA parentWindowClass = {}; + parentWindowClass.cbSize = sizeof(WNDCLASSEXA); + parentWindowClass.style = 0; + parentWindowClass.lpfnWndProc = wnd_proc; + parentWindowClass.cbClsExtra = 0; + parentWindowClass.cbWndExtra = 0; + parentWindowClass.hInstance = GetModuleHandle(nullptr); + parentWindowClass.hIcon = nullptr; + parentWindowClass.hCursor = LoadCursorA(nullptr, idcArrow); + parentWindowClass.hbrBackground = 0; + parentWindowClass.lpszMenuName = nullptr; + parentWindowClass.lpszClassName = win_name; + if (!RegisterClassExA(&parentWindowClass)) + { + return false; + } + + WNDCLASSEXA childWindowClass = {}; + childWindowClass.cbSize = sizeof(WNDCLASSEXA); + childWindowClass.style = CS_OWNDC; + childWindowClass.lpfnWndProc = wnd_proc; + childWindowClass.cbClsExtra = 0; + childWindowClass.cbWndExtra = 0; + childWindowClass.hInstance = GetModuleHandle(nullptr); + childWindowClass.hIcon = nullptr; + childWindowClass.hCursor = LoadCursorA(nullptr, idcArrow); + childWindowClass.hbrBackground = 0; + childWindowClass.lpszMenuName = nullptr; + childWindowClass.lpszClassName = child_win_name; + if (!RegisterClassExA(&childWindowClass)) + { + return false; + } + + DWORD parentStyle = WS_CAPTION | WS_THICKFRAME | WS_MINIMIZEBOX | WS_MAXIMIZEBOX | WS_SYSMENU; + DWORD parentExtendedStyle = WS_EX_APPWINDOW; + + RECT sizeRect = { 0, 0, initial_width, initial_height }; + AdjustWindowRectEx(&sizeRect, parentStyle, FALSE, parentExtendedStyle); + + parent_window_ = CreateWindowExA(parentExtendedStyle, win_name, win_name, + parentStyle, CW_USEDEFAULT, CW_USEDEFAULT, + sizeRect.right - sizeRect.left, sizeRect.bottom - sizeRect.top, + nullptr, nullptr, GetModuleHandle(nullptr), this); + + child_window_ = CreateWindowExA(0, child_win_name, win_name, WS_CHILD, 0, 0, + static_cast(initial_width), static_cast(initial_height), + parent_window_, nullptr, GetModuleHandle(nullptr), this); + + native_display_ = GetDC(child_window_); + if (!native_display_) + { + cleanup(); + } + + return native_display_; +} + +void* +NativeStateWin32::display() +{ + return native_display_; +} + +bool +NativeStateWin32::create_window(WindowProperties const& properties) +{ + properties_ = properties; + + RECT windowRect; + if (!GetWindowRect(parent_window_, &windowRect)) + { + return false; + } + + RECT clientRect; + if (!GetClientRect(parent_window_, &clientRect)) + { + return false; + } + + LONG diffX = (windowRect.right - windowRect.left) - clientRect.right; + LONG diffY = (windowRect.bottom - windowRect.top) - clientRect.bottom; + if (!MoveWindow(parent_window_, windowRect.left, windowRect.top, properties.width + diffX, properties.height + diffY, + TRUE)) + { + return false; + } + + if (!MoveWindow(child_window_, 0, 0, properties.width, properties.height, FALSE)) + { + return false; + } + + return true; +} + +void* +NativeStateWin32::window(WindowProperties& properties) +{ + properties = properties_; + return child_window_; +} + +void +NativeStateWin32::visible(bool visible) +{ + int flag = (visible ? SW_SHOW : SW_HIDE); + + if (parent_window_) + { + ShowWindow(parent_window_, flag); + } + + if (child_window_) + { + ShowWindow(child_window_, flag); + } +} + +bool +NativeStateWin32::should_quit() +{ + pump_messages(); + return should_quit_; +} + +void +NativeStateWin32::flip() +{ + pump_messages(); +} + +/******************* + * Private methods * + *******************/ + +void +NativeStateWin32::cleanup() +{ + if (native_display_) + { + ReleaseDC(child_window_, native_display_); + native_display_ = 0; + } + + if (child_window_) + { + DestroyWindow(child_window_); + child_window_ = 0; + } + + if (parent_window_) + { + DestroyWindow(parent_window_); + parent_window_ = 0; + } + + UnregisterClassA(win_name, nullptr); + UnregisterClassA(child_win_name, nullptr); +} + +void +NativeStateWin32::pump_messages() +{ + MSG msg; + while (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) + { + TranslateMessage(&msg); + DispatchMessage(&msg); + } +} + +LRESULT CALLBACK +NativeStateWin32::wnd_proc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + switch (message) + { + case WM_NCCREATE: + { + LPCREATESTRUCT pCreateStruct = reinterpret_cast(lParam); + SetWindowLongPtr(hWnd, GWLP_USERDATA, + reinterpret_cast(pCreateStruct->lpCreateParams)); + return DefWindowProcA(hWnd, message, wParam, lParam); + } + } + + NativeStateWin32 *window = reinterpret_cast(GetWindowLongPtr(hWnd, GWLP_USERDATA)); + if (window) + { + switch (message) + { + case WM_CLOSE: + case WM_DESTROY: + window->should_quit_ = true; + break; + default: + break; + } + } + + return DefWindowProcA(hWnd, message, wParam, lParam); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-win32.h glmark2-2021.02/src/native-state-win32.h --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-win32.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/native-state-win32.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,56 @@ +/* + * Copyright © 2019 Jamie Madill + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Jamie Madill + */ +#ifndef GLMARK2_NATIVE_STATE_WIN32_H_ +#define GLMARK2_NATIVE_STATE_WIN32_H_ + +#include "native-state.h" + +#include + +class NativeStateWin32 : public NativeState +{ +public: + NativeStateWin32(); + ~NativeStateWin32(); + + bool init_display(); + void* display(); + bool create_window(WindowProperties const& properties); + void* window(WindowProperties& properties); + void visible(bool v); + bool should_quit(); + void flip(); + +private: + void cleanup(); + void pump_messages(); + + static LRESULT CALLBACK wnd_proc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); + + HWND parent_window_; + HWND child_window_; + HDC native_display_; + WindowProperties properties_; + bool should_quit_; +}; + +#endif /* GLMARK2_NATIVE_STATE_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/native-state-x11.cpp glmark2-2021.02/src/native-state-x11.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/native-state-x11.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/native-state-x11.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -60,7 +60,7 @@ bool NativeStateX11::create_window(WindowProperties const& properties) { - static const char *win_name("glmark2 "GLMARK_VERSION); + static const char *win_name("glmark2 " GLMARK_VERSION); if (!xdpy_) { Log::error("Error: X11 Display has not been initialized!\n"); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/options.cpp glmark2-2021.02/src/options.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/options.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/options.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -31,6 +31,7 @@ std::vector Options::benchmarks; std::vector Options::benchmark_files; bool Options::validate = false; +std::string Options::data_path = std::string(GLMARK_DATA_PATH); Options::FrameEnd Options::frame_end = Options::FrameEndDefault; std::pair Options::size(800, 600); bool Options::list_scenes = false; @@ -48,6 +49,7 @@ {"benchmark", 1, 0, 0}, {"benchmark-file", 1, 0, 0}, {"validate", 0, 0, 0}, + {"data-path", 1, 0, 0}, {"frame-end", 1, 0, 0}, {"off-screen", 0, 0, 0}, {"visual-config", 1, 0, 0}, @@ -116,13 +118,15 @@ printf("A benchmark for Open GL (ES) 2.0\n" "\n" "Options:\n" - " -b, --benchmark BENCH A benchmark to run: 'scene(:opt1=val1)*'\n" + " -b, --benchmark BENCH A benchmark or options to run: '(scene)?(:opt1=val1)*'\n" " (the option can be used multiple times)\n" " -f, --benchmark-file F Load benchmarks to run from a file containing a\n" " list of benchmark descriptions (one per line)\n" " (the option can be used multiple times)\n" " --validate Run a quick output validation test instead of \n" " running the benchmarks\n" + " --data-path PATH Path to glmark2 models, shaders and textures\n" + " Default: " GLMARK_DATA_PATH "\n" " --frame-end METHOD How to end a frame [default,none,swap,finish,readpixels]\n" " --off-screen Render to an off-screen surface\n" " --visual-config C The visual configuration to use for the rendering\n" @@ -171,6 +175,8 @@ Options::benchmark_files.push_back(optarg); else if (!strcmp(optname, "validate")) Options::validate = true; + else if (!strcmp(optname, "data-path")) + Options::data_path = std::string(optarg); else if (!strcmp(optname, "frame-end")) Options::frame_end = frame_end_from_str(optarg); else if (!strcmp(optname, "off-screen")) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/options.h glmark2-2021.02/src/options.h --- glmark2-2014.03+git20150611.fa71af2d/src/options.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/options.h 2021-04-25 01:47:22.000000000 +0800 @@ -43,6 +43,7 @@ static std::vector benchmarks; static std::vector benchmark_files; static bool validate; + static std::string data_path; static FrameEnd frame_end; static std::pair size; static bool list_scenes; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-buffer.cpp glmark2-2021.02/src/scene-buffer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-buffer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-buffer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,6 +23,7 @@ #include "scene.h" #include "log.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "shader-source.h" #include "util.h" @@ -244,9 +245,9 @@ { /* Set up shaders */ static const std::string vtx_shader_filename( - GLMARK_DATA_PATH"/shaders/buffer-wireframe.vert"); + Options::data_path + "/shaders/buffer-wireframe.vert"); static const std::string frg_shader_filename( - GLMARK_DATA_PATH"/shaders/buffer-wireframe.frag"); + Options::data_path + "/shaders/buffer-wireframe.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-build.cpp glmark2-2021.02/src/scene-build.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-build.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-build.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -25,6 +25,7 @@ #include "scene.h" #include "log.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "shader-source.h" #include "model.h" @@ -89,8 +90,8 @@ return false; /* Set up shaders */ - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/light-basic.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/light-basic.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); static const LibMatrix::vec4 materialDiffuse(1.0f, 1.0f, 1.0f, 1.0f); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-bump.cpp glmark2-2021.02/src/scene-bump.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-bump.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-bump.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,6 +23,7 @@ #include "scene.h" #include "log.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "shader-source.h" #include "model.h" @@ -61,8 +62,8 @@ bool SceneBump::setup_model_plain(const std::string &type) { - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/bump-poly.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/bump-poly.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/bump-poly.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/bump-poly.frag"); static const std::string low_poly_filename("asteroid-low"); static const std::string high_poly_filename("asteroid-high"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); @@ -115,8 +116,8 @@ bool SceneBump::setup_model_normals() { - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/bump-normals.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/bump-normals.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/bump-normals.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/bump-normals.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); Model model; @@ -170,8 +171,8 @@ bool SceneBump::setup_model_normals_tangent() { - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/bump-normals-tangent.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/bump-normals-tangent.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/bump-normals-tangent.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/bump-normals-tangent.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); Model model; @@ -228,8 +229,8 @@ bool SceneBump::setup_model_height() { - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/bump-height.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/bump-height.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/bump-height.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/bump-height.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); Model model; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-clear.cpp glmark2-2021.02/src/scene-clear.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-clear.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-clear.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2013 Linaro Limited +// Copyright © 2013 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-collection.h glmark2-2021.02/src/scene-collection.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-collection.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-collection.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2013 Linaro Limited +// Copyright © 2013 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-conditionals.cpp glmark2-2021.02/src/scene-conditionals.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-conditionals.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-conditionals.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,6 +21,7 @@ */ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -29,7 +30,7 @@ #include -static const std::string shader_file_base(GLMARK_DATA_PATH"/shaders/conditionals"); +static const std::string shader_file_base("/shaders/conditionals"); static const std::string vtx_file(shader_file_base + ".vert"); static const std::string frg_file(shader_file_base + ".frag"); @@ -56,14 +57,14 @@ static std::string get_vertex_shader_source(int steps, bool conditionals) { - ShaderSource source(vtx_file); + ShaderSource source(Options::data_path + vtx_file); ShaderSource source_main; for (int i = 0; i < steps; i++) { if (conditionals) - source_main.append_file(step_conditional_file); + source_main.append_file(Options::data_path + step_conditional_file); else - source_main.append_file(step_simple_file); + source_main.append_file(Options::data_path + step_simple_file); } source.replace("$MAIN$", source_main.str()); @@ -74,14 +75,14 @@ static std::string get_fragment_shader_source(int steps, bool conditionals) { - ShaderSource source(frg_file); + ShaderSource source(Options::data_path + frg_file); ShaderSource source_main; for (int i = 0; i < steps; i++) { if (conditionals) - source_main.append_file(step_conditional_file); + source_main.append_file(Options::data_path + step_conditional_file); else - source_main.append_file(step_simple_file); + source_main.append_file(Options::data_path + step_simple_file); } source.replace("$MAIN$", source_main.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene.cpp glmark2-2021.02/src/scene.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -43,10 +43,12 @@ Scene::Scene(Canvas &pCanvas, const string &name) : canvas_(pCanvas), name_(name), startTime_(0), lastUpdateTime_(0), currentFrame_(0), - running_(0), duration_(0) + running_(0), duration_(0), nframes_(0) { options_["duration"] = Scene::Option("duration", "10.0", "The duration of each benchmark in seconds"); + options_["nframes"] = Scene::Option("nframes", "", + "The number of frames to render"); options_["vertex-precision"] = Scene::Option("vertex-precision", "default,default,default,default", "The precision values for the vertex shader (\"int,float,sampler2d,samplercube\")"); @@ -96,6 +98,7 @@ Scene::setup() { duration_ = Util::fromString(options_["duration"].value); + nframes_ = Util::fromString(options_["nframes"].value); ShaderSource::default_precision( ShaderSource::Precision(options_["vertex-precision"].value), @@ -132,6 +135,9 @@ if (elapsed_time >= duration_) running_ = false; + + if (nframes_ > 0 && currentFrame_ >= nframes_) + running_ = false; } void diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-desktop.cpp glmark2-2021.02/src/scene-desktop.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-desktop.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-desktop.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -20,11 +20,13 @@ * Alexandros Frantzis (glmark2) * Jesse Barker (glmark2) */ +#include #include #include #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -43,8 +45,8 @@ create_blur_shaders(ShaderSource& vtx_source, ShaderSource& frg_source, unsigned int radius, float sigma, BlurDirection direction) { - vtx_source.append_file(GLMARK_DATA_PATH"/shaders/desktop.vert"); - frg_source.append_file(GLMARK_DATA_PATH"/shaders/desktop-blur.frag"); + vtx_source.append_file(Options::data_path + "/shaders/desktop.vert"); + frg_source.append_file(Options::data_path + "/shaders/desktop-blur.frag"); /* Don't let the gaussian curve become too narrow */ if (sigma < 1.0) @@ -122,6 +124,9 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + GLint prev_fbo; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &prev_fbo); + /* Create a FBO */ glGenFramebuffers(1, &fbo_); glBindFramebuffer(GL_FRAMEBUFFER, fbo_); @@ -144,12 +149,12 @@ } } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, prev_fbo); /* Load the shader program when this class if first used */ if (RenderObject::use_count == 0) { - ShaderSource vtx_source(GLMARK_DATA_PATH"/shaders/desktop.vert"); - ShaderSource frg_source(GLMARK_DATA_PATH"/shaders/desktop.frag"); + ShaderSource vtx_source(Options::data_path + "/shaders/desktop.vert"); + ShaderSource frg_source(Options::data_path + "/shaders/desktop.frag"); Scene::load_shaders_from_strings(main_program, vtx_source.str(), frg_source.str()); } @@ -196,6 +201,8 @@ /* Recreate the backing texture with correct size */ if (size_.x() != size.x() || size_.y() != size.y()) { size_ = size; + if (fbo_ == 0) + return; /* If we're resizing the texture, we need to tell the framebuffer*/ glBindTexture(GL_TEXTURE_2D, texture_); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, size_.x(), size_.y(), 0, @@ -377,9 +384,11 @@ */ class RenderScreen : public RenderObject { + Canvas &canvas_; public: - RenderScreen(Canvas &canvas) { fbo_ = canvas.fbo(); } - virtual void init() {} + RenderScreen(Canvas &canvas) : canvas_(canvas) {} + virtual void init() { fbo_ = canvas_.fbo(); } + virtual void size(const LibMatrix::vec2& size) { size_ = size; clear(); } virtual void release() {} }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-effect-2d.cpp glmark2-2021.02/src/scene-effect-2d.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-effect-2d.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-effect-2d.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -26,6 +26,7 @@ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -96,7 +97,7 @@ create_convolution_fragment_shader(Canvas &canvas, std::vector &array, unsigned int width, unsigned int height) { - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/effect-2d-convolution.frag"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/effect-2d-convolution.frag"); ShaderSource source(frg_shader_filename); if (width * height != array.size()) { @@ -303,7 +304,7 @@ Texture::find_textures(); - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/effect-2d.vert"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/effect-2d.vert"); std::vector kernel; unsigned int kernel_width = 0; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-function.cpp glmark2-2021.02/src/scene-function.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-function.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-function.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,13 +23,14 @@ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" #include "shader-source.h" #include "util.h" -static const std::string shader_file_base(GLMARK_DATA_PATH"/shaders/function"); +static const std::string shader_file_base("/shaders/function"); static const std::string vtx_file(shader_file_base + ".vert"); static const std::string frg_file(shader_file_base + ".frag"); @@ -61,7 +62,7 @@ static std::string get_vertex_shader_source(int steps, bool function, std::string &complexity) { - ShaderSource source(vtx_file); + ShaderSource source(Options::data_path + vtx_file); ShaderSource source_main; std::string step_file; @@ -72,13 +73,13 @@ for (int i = 0; i < steps; i++) { if (function) - source_main.append_file(call_file); + source_main.append_file(Options::data_path + call_file); else - source_main.append_file(step_file); + source_main.append_file(Options::data_path + step_file); } if (function) - source.replace_with_file("$PROCESS$", step_file); + source.replace_with_file("$PROCESS$", Options::data_path + step_file); else source.replace("$PROCESS$", ""); @@ -90,7 +91,7 @@ static std::string get_fragment_shader_source(int steps, bool function, std::string &complexity) { - ShaderSource source(frg_file); + ShaderSource source(Options::data_path + frg_file); ShaderSource source_main; std::string step_file; @@ -101,13 +102,13 @@ for (int i = 0; i < steps; i++) { if (function) - source_main.append_file(call_file); + source_main.append_file(Options::data_path + call_file); else - source_main.append_file(step_file); + source_main.append_file(Options::data_path + step_file); } if (function) - source.replace_with_file("$PROCESS$", step_file); + source.replace_with_file("$PROCESS$", Options::data_path + step_file); else source.replace("$PROCESS$", ""); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene.h glmark2-2021.02/src/scene.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene.h 2021-04-25 01:47:22.000000000 +0800 @@ -234,6 +234,7 @@ unsigned currentFrame_; bool running_; double duration_; // Duration of run in seconds + unsigned nframes_; }; /* diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/a.cc glmark2-2021.02/src/scene-ideas/a.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/a.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/a.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'a' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/characters.h glmark2-2021.02/src/scene-ideas/characters.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/characters.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/characters.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/d.cc glmark2-2021.02/src/scene-ideas/d.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/d.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/d.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'd' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/e.cc glmark2-2021.02/src/scene-ideas/e.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/e.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/e.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'e' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/i.cc glmark2-2021.02/src/scene-ideas/i.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/i.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/i.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'i' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/lamp.cc glmark2-2021.02/src/scene-ideas/lamp.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/lamp.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/lamp.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the lamp * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * @@ -23,6 +23,7 @@ * Jesse Barker */ #include "lamp.h" +#include "options.h" #include "shader-source.h" #include "log.h" #include "scene.h" @@ -170,8 +171,8 @@ // Initialize shader sources from input files and create programs from them // The program for handling lighting... - string lit_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-lamp-lit.vert"); - string lit_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-lamp-lit.frag"); + string lit_vtx_filename(Options::data_path + "/shaders/ideas-lamp-lit.vert"); + string lit_frg_filename(Options::data_path + "/shaders/ideas-lamp-lit.frag"); ShaderSource lit_vtx_source(lit_vtx_filename); ShaderSource lit_frg_source(lit_frg_filename); if (!Scene::load_shaders_from_strings(litProgram_, lit_vtx_source.str(), @@ -182,8 +183,8 @@ } // The simple program with no lighting... - string unlit_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-lamp-unlit.vert"); - string unlit_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-lamp-unlit.frag"); + string unlit_vtx_filename(Options::data_path + "/shaders/ideas-lamp-unlit.vert"); + string unlit_frg_filename(Options::data_path + "/shaders/ideas-lamp-unlit.frag"); ShaderSource unlit_vtx_source(unlit_vtx_filename); ShaderSource unlit_frg_source(unlit_frg_filename); if (!Scene::load_shaders_from_strings(unlitProgram_, unlit_vtx_source.str(), diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/lamp.h glmark2-2021.02/src/scene-ideas/lamp.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/lamp.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/lamp.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/logo.cc glmark2-2021.02/src/scene-ideas/logo.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/logo.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/logo.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the old Silicon Graphics logo * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * @@ -23,6 +23,7 @@ * Jesse Barker */ #include "logo.h" +#include "options.h" #include "scene.h" #include "shader-source.h" #include "log.h" @@ -423,8 +424,8 @@ // Initialize shader sources from input files and create programs from them // The program for handling the main object with lighting... - string logo_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-logo.vert"); - string logo_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-logo.frag"); + string logo_vtx_filename(Options::data_path + "/shaders/ideas-logo.vert"); + string logo_frg_filename(Options::data_path + "/shaders/ideas-logo.frag"); ShaderSource logo_vtx_source(logo_vtx_filename); ShaderSource logo_frg_source(logo_frg_filename); if (!Scene::load_shaders_from_strings(normalProgram_, logo_vtx_source.str(), @@ -437,8 +438,8 @@ normalNormalIndex_ = normalProgram_[normalAttribName_].location(); // The program for handling the flat object... - string logo_flat_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-logo-flat.vert"); - string logo_flat_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-logo-flat.frag"); + string logo_flat_vtx_filename(Options::data_path + "/shaders/ideas-logo-flat.vert"); + string logo_flat_frg_filename(Options::data_path + "/shaders/ideas-logo-flat.frag"); ShaderSource logo_flat_vtx_source(logo_flat_vtx_filename); ShaderSource logo_flat_frg_source(logo_flat_frg_filename); if (!Scene::load_shaders_from_strings(flatProgram_, logo_flat_vtx_source.str(), @@ -450,8 +451,8 @@ flatVertexIndex_ = flatProgram_[vertexAttribName_].location(); // The program for handling the shadow object with texturing... - string logo_shadow_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-logo-shadow.vert"); - string logo_shadow_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-logo-shadow.frag"); + string logo_shadow_vtx_filename(Options::data_path + "/shaders/ideas-logo-shadow.vert"); + string logo_shadow_frg_filename(Options::data_path + "/shaders/ideas-logo-shadow.frag"); ShaderSource logo_shadow_vtx_source(logo_shadow_vtx_filename); ShaderSource logo_shadow_frg_source(logo_shadow_frg_filename); if (!Scene::load_shaders_from_strings(shadowProgram_, logo_shadow_vtx_source.str(), diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/logo.h glmark2-2021.02/src/scene-ideas/logo.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/logo.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/logo.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/m.cc glmark2-2021.02/src/scene-ideas/m.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/m.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/m.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'm' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/n.cc glmark2-2021.02/src/scene-ideas/n.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/n.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/n.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'o' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/o.cc glmark2-2021.02/src/scene-ideas/o.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/o.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/o.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 'o' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/s.cc glmark2-2021.02/src/scene-ideas/s.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/s.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/s.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 's' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/splines.cc glmark2-2021.02/src/scene-ideas/splines.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/splines.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/splines.cc 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/splines.h glmark2-2021.02/src/scene-ideas/splines.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/splines.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/splines.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/table.cc glmark2-2021.02/src/scene-ideas/table.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/table.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/table.cc 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * @@ -20,6 +20,7 @@ * Authors: * Jesse Barker */ +#include "options.h" #include "table.h" #include "scene.h" #include "shader-source.h" @@ -101,8 +102,8 @@ // Initialize shader sources from input files and create programs from them // Program to render the table with lighting and a time-based fade... - string table_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-table.vert"); - string table_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-table.frag"); + string table_vtx_filename(Options::data_path + "/shaders/ideas-table.vert"); + string table_frg_filename(Options::data_path + "/shaders/ideas-table.frag"); ShaderSource table_vtx_source(table_vtx_filename); ShaderSource table_frg_source(table_frg_filename); if (!Scene::load_shaders_from_strings(tableProgram_, table_vtx_source.str(), @@ -111,11 +112,11 @@ Log::error("No valid program for table rendering.\n"); return; } - textVertexIndex_ = tableProgram_[vertexAttribName_].location(); + tableVertexIndex_ = tableProgram_[vertexAttribName_].location(); // Program to render the paper with lighting and a time-based fade... - string paper_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-paper.vert"); - string paper_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-paper.frag"); + string paper_vtx_filename(Options::data_path + "/shaders/ideas-paper.vert"); + string paper_frg_filename(Options::data_path + "/shaders/ideas-paper.frag"); ShaderSource paper_vtx_source(paper_vtx_filename); ShaderSource paper_frg_source(paper_frg_filename); if (!Scene::load_shaders_from_strings(paperProgram_, paper_vtx_source.str(), @@ -127,8 +128,8 @@ paperVertexIndex_ = paperProgram_[vertexAttribName_].location(); // Program to handle the text (time-based color fade)... - string text_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-text.vert"); - string text_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-text.frag"); + string text_vtx_filename(Options::data_path + "/shaders/ideas-text.vert"); + string text_frg_filename(Options::data_path + "/shaders/ideas-text.frag"); ShaderSource text_vtx_source(text_vtx_filename); ShaderSource text_frg_source(text_frg_filename); if (!Scene::load_shaders_from_strings(textProgram_, text_vtx_source.str(), @@ -140,8 +141,8 @@ textVertexIndex_ = textProgram_[vertexAttribName_].location(); // Program for the drawUnder functionality (just paint it black)... - string under_table_vtx_filename(GLMARK_DATA_PATH"/shaders/ideas-under-table.vert"); - string under_table_frg_filename(GLMARK_DATA_PATH"/shaders/ideas-under-table.frag"); + string under_table_vtx_filename(Options::data_path + "/shaders/ideas-under-table.vert"); + string under_table_frg_filename(Options::data_path + "/shaders/ideas-under-table.frag"); ShaderSource under_table_vtx_source(under_table_vtx_filename); ShaderSource under_table_frg_source(under_table_frg_filename); if (!Scene::load_shaders_from_strings(underProgram_, under_table_vtx_source.str(), diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/table.h glmark2-2021.02/src/scene-ideas/table.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/table.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/table.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/t.cc glmark2-2021.02/src/scene-ideas/t.cc --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas/t.cc 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas/t.cc 2021-04-25 01:47:22.000000000 +0800 @@ -2,7 +2,7 @@ * Vertex position data describing the letter 't' * * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas.cpp glmark2-2021.02/src/scene-ideas.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-ideas.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-ideas.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ /* * (c) Copyright 1993, Silicon Graphics, Inc. - * Copyright © 2012 Linaro Limited + * Copyright © 2012 Linaro Limited * * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. * @@ -48,7 +48,7 @@ timeOffset_(START_TIME_) { startTime_.tv_sec = 0; - startTime_.tv_usec = 0; + startTime_.tv_nsec = 0; } ~SceneIdeasPrivate() { @@ -69,7 +69,7 @@ float currentSpeed_; float currentTime_; float timeOffset_; - struct timeval startTime_; + struct timespec startTime_; static const float CYCLE_TIME_; static const float TIME_; static const float START_TIME_; @@ -163,17 +163,17 @@ SceneIdeasPrivate::reset_time() { timeOffset_ = START_TIME_; - gettimeofday(&startTime_, NULL); + clock_gettime(CLOCK_MONOTONIC, &startTime_); } void SceneIdeasPrivate::update_time() { // Compute new time - struct timeval current = {0, 0}; - gettimeofday(¤t, NULL); + struct timespec current = {0, 0}; + clock_gettime(CLOCK_MONOTONIC, ¤t); float timediff = (current.tv_sec - startTime_.tv_sec) + - static_cast(current.tv_usec - startTime_.tv_usec) / 1000000.0; + static_cast(current.tv_nsec - startTime_.tv_nsec) / 1000000000.0; float sceneTime = timediff * currentSpeed_ + timeOffset_; // Keep the current time in [START_TIME_..CYCLE_TIME_) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-jellyfish.cpp glmark2-2021.02/src/scene-jellyfish.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-jellyfish.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-jellyfish.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -24,6 +24,7 @@ #include #include #include +#include "options.h" #include "scene.h" #include "scene-jellyfish.h" #include "log.h" @@ -115,8 +116,8 @@ GradientRenderer::init() { // Program set up - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/gradient.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/gradient.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/gradient.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/gradient.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); if (!Scene::load_shaders_from_strings(program_, vtx_source.str(), @@ -272,7 +273,7 @@ { Log::debug("Loading model from file '%s'\n", filename.c_str()); - const std::auto_ptr input_file_ptr(Util::get_resource(filename)); + const std::unique_ptr input_file_ptr(Util::get_resource(filename)); std::istream& inputFile(*input_file_ptr); if (!inputFile) { @@ -375,7 +376,7 @@ bool JellyfishPrivate::initialize() { - static const string modelFilename(GLMARK_DATA_PATH"/models/jellyfish.jobj"); + static const string modelFilename(Options::data_path + "/models/jellyfish.jobj"); if (!load_obj(modelFilename)) { return false; @@ -410,8 +411,8 @@ // Set up program first so we can store attribute and uniform locations // away for the using std::string; - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/jellyfish.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/jellyfish.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/jellyfish.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/jellyfish.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-jellyfish.h glmark2-2021.02/src/scene-jellyfish.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-jellyfish.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-jellyfish.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-loop.cpp glmark2-2021.02/src/scene-loop.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-loop.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-loop.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,13 +23,14 @@ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" #include "shader-source.h" #include "util.h" -static const std::string shader_file_base(GLMARK_DATA_PATH"/shaders/loop"); +static const std::string shader_file_base("/shaders/loop"); static const std::string vtx_file(shader_file_base + ".vert"); static const std::string frg_file(shader_file_base + ".frag"); @@ -62,11 +63,11 @@ static std::string get_fragment_shader_source(int steps, bool loop, bool uniform) { - ShaderSource source(frg_file); + ShaderSource source(Options::data_path + frg_file); ShaderSource source_main; if (loop) { - source_main.append_file(step_loop_file); + source_main.append_file(Options::data_path + step_loop_file); if (uniform) { source_main.replace("$NLOOPS$", "FragmentLoops"); } @@ -76,7 +77,7 @@ } else { for (int i = 0; i < steps; i++) - source_main.append_file(step_simple_file); + source_main.append_file(Options::data_path + step_simple_file); } source.replace("$MAIN$", source_main.str()); @@ -87,11 +88,11 @@ static std::string get_vertex_shader_source(int steps, bool loop, bool uniform) { - ShaderSource source(vtx_file); + ShaderSource source(Options::data_path + vtx_file); ShaderSource source_main; if (loop) { - source_main.append_file(step_loop_file); + source_main.append_file(Options::data_path + step_loop_file); if (uniform) { source_main.replace("$NLOOPS$", "VertexLoops"); } @@ -101,7 +102,7 @@ } else { for (int i = 0; i < steps; i++) - source_main.append_file(step_simple_file); + source_main.append_file(Options::data_path + step_simple_file); } source.replace("$MAIN$", source_main.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-pulsar.cpp glmark2-2021.02/src/scene-pulsar.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-pulsar.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-pulsar.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -26,6 +26,7 @@ #include #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -112,19 +113,19 @@ std::string frg_shader_filename; static const vec4 lightPosition(-20.0f, 20.0f,-20.0f, 1.0f); if (options_["light"].value == "true") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/pulsar-light.vert"; + vtx_shader_filename = Options::data_path + "/shaders/pulsar-light.vert"; } else { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/pulsar.vert"; + vtx_shader_filename = Options::data_path + "/shaders/pulsar.vert"; } if (options_["texture"].value == "true") { - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-basic-tex.frag"; + frg_shader_filename = Options::data_path + "/shaders/light-basic-tex.frag"; Texture::find_textures(); if (!Texture::load("crate-base", &texture_, GL_NEAREST, GL_NEAREST, 0)) return false; } else { - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-basic.frag"; + frg_shader_filename = Options::data_path + "/shaders/light-basic.frag"; } ShaderSource vtx_source(vtx_shader_filename); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-refract.cpp glmark2-2021.02/src/scene-refract.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-refract.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-refract.cpp 2021-04-25 01:47:47.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -21,6 +21,7 @@ // #include "scene-refract.h" #include "model.h" +#include "options.h" #include "texture.h" #include "util.h" #include "log.h" @@ -176,10 +177,10 @@ // bool -DistanceRenderTarget::setup(unsigned int width, unsigned int height) +DistanceRenderTarget::setup(unsigned int canvas_fbo, unsigned int width, unsigned int height) { - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/depth.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/depth.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/depth.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/depth.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); @@ -192,6 +193,7 @@ canvas_height_ = height; width_ = canvas_width_ * 2; height_ = canvas_height_ * 2; + canvas_fbo_ = canvas_fbo; // If the texture will be too large for the implemnetation, we need to // clamp the dimensions but maintain the aspect ratio. @@ -235,7 +237,7 @@ Log::error("DistanceRenderTarget::setup: glCheckFramebufferStatus failed (0x%x)\n", status); return false; } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_fbo_); return true; } @@ -272,7 +274,7 @@ void DistanceRenderTarget::disable() { - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_fbo_); glViewport(0, 0, canvas_width_, canvas_height_); glCullFace(GL_BACK); } @@ -281,8 +283,8 @@ RefractPrivate::setup(map& options) { // Program object setup - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/light-refract.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/light-refract.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/light-refract.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/light-refract.frag"); static const vec4 lightColor(0.4, 0.4, 0.4, 1.0); ShaderSource vtx_source(vtx_shader_filename); @@ -382,7 +384,7 @@ 0.0, 0.0, 0.0, 0.0, 1.0, 0.0); - if (!depthTarget_.setup(canvas_.width(), canvas_.height())) { + if (!depthTarget_.setup(canvas_.fbo(), canvas_.width(), canvas_.height())) { Log::error("Failed to set up the render target for the depth pass\n"); return false; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-refract.h glmark2-2021.02/src/scene-refract.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-refract.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-refract.h 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -46,6 +46,7 @@ unsigned int height_; unsigned int tex_[2]; unsigned int fbo_; + unsigned int canvas_fbo_; public: DistanceRenderTarget() : canvas_width_(0), @@ -57,7 +58,7 @@ tex_[DEPTH] = tex_[COLOR] = 0; } ~DistanceRenderTarget() {} - bool setup(unsigned int width, unsigned int height); + bool setup(unsigned int canvas_fbo, unsigned int width, unsigned int height); void teardown(); void enable(const LibMatrix::mat4& mvp); void disable(); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-shading.cpp glmark2-2021.02/src/scene-shading.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-shading.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-shading.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -24,6 +24,7 @@ */ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -154,24 +155,24 @@ ShaderSource vtx_source; ShaderSource frg_source; if (shading == "gouraud") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/light-basic.vert"; - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-basic.frag"; + vtx_shader_filename = Options::data_path + "/shaders/light-basic.vert"; + frg_shader_filename = Options::data_path + "/shaders/light-basic.frag"; frg_source.append_file(frg_shader_filename); vtx_source.append_file(vtx_shader_filename); vtx_source.add_const("LightSourcePosition", lightPosition); vtx_source.add_const("MaterialDiffuse", materialDiffuse); } else if (shading == "blinn-phong-inf") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/light-advanced.vert"; - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-advanced.frag"; + vtx_shader_filename = Options::data_path + "/shaders/light-advanced.vert"; + frg_shader_filename = Options::data_path + "/shaders/light-advanced.frag"; frg_source.append_file(frg_shader_filename); frg_source.add_const("LightSourcePosition", lightPosition); frg_source.add_const("LightSourceHalfVector", halfVector); vtx_source.append_file(vtx_shader_filename); } else if (shading == "phong") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/light-phong.vert"; - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-phong.frag"; + vtx_shader_filename = Options::data_path + "/shaders/light-phong.vert"; + frg_shader_filename = Options::data_path + "/shaders/light-phong.frag"; unsigned int num_lights = Util::fromString(options_["num-lights"].value); string fragsource = get_fragment_shader_source(frg_shader_filename, num_lights); frg_source.append(fragsource); @@ -179,8 +180,8 @@ vtx_source.append_file(vtx_shader_filename); } else if (shading == "cel") { - vtx_shader_filename = GLMARK_DATA_PATH"/shaders/light-phong.vert"; - frg_shader_filename = GLMARK_DATA_PATH"/shaders/light-cel.frag"; + vtx_shader_filename = Options::data_path + "/shaders/light-phong.vert"; + frg_shader_filename = Options::data_path + "/shaders/light-cel.frag"; vtx_source.append_file(vtx_shader_filename); frg_source.append_file(frg_shader_filename); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-shadow.cpp glmark2-2021.02/src/scene-shadow.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-shadow.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-shadow.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -1,5 +1,5 @@ // -// Copyright © 2012 Linaro Limited +// Copyright © 2012 Linaro Limited // // This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. // @@ -21,6 +21,7 @@ // #include "scene.h" #include "model.h" +#include "options.h" #include "util.h" #include "log.h" #include "shader-source.h" @@ -53,6 +54,7 @@ unsigned int height_; unsigned int tex_; unsigned int fbo_; + unsigned int canvas_fbo_; public: DepthRenderTarget() : canvas_width_(0), @@ -62,7 +64,7 @@ tex_(0), fbo_(0) {} ~DepthRenderTarget() {} - bool setup(unsigned int width, unsigned int height); + bool setup(unsigned int canvas_fbo, unsigned int width, unsigned int height); void teardown(); void enable(const mat4& mvp); void disable(); @@ -71,10 +73,10 @@ }; bool -DepthRenderTarget::setup(unsigned int width, unsigned int height) +DepthRenderTarget::setup(unsigned int canvas_fbo, unsigned int width, unsigned int height) { - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/depth.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/depth.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/depth.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/depth.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); @@ -85,6 +87,7 @@ canvas_width_ = width; canvas_height_ = height; + canvas_fbo_ = canvas_fbo; width_ = canvas_width_ * 2; height_ = canvas_height_ * 2; @@ -120,7 +123,7 @@ Log::error("DepthRenderTarget::setup: glCheckFramebufferStatus failed (0x%x)\n", status); return false; } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_fbo_); return true; } @@ -155,7 +158,7 @@ void DepthRenderTarget::disable() { - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_fbo_); glViewport(0, 0, canvas_width_, canvas_height_); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } @@ -201,8 +204,8 @@ // Program set up static const vec4 materialDiffuse(0.3f, 0.3f, 0.3f, 1.0f); - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/shadow.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/shadow.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/shadow.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/shadow.frag"); ShaderSource vtx_source(vtx_shader_filename); ShaderSource frg_source(frg_shader_filename); @@ -314,8 +317,8 @@ ShadowPrivate::setup(map& options) { // Program object setup - static const string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.vert"); - static const string frg_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.frag"); + static const string vtx_shader_filename(Options::data_path + "/shaders/light-basic.vert"); + static const string frg_shader_filename(Options::data_path + "/shaders/light-basic.frag"); static const vec4 materialDiffuse(1.0f, 1.0f, 1.0f, 1.0f); ShaderSource vtx_source(vtx_shader_filename); @@ -372,7 +375,7 @@ float aspect(static_cast(canvas_.width())/static_cast(canvas_.height())); projection_.perspective(fovy, aspect, 2.0, 50.0); - if (!depthTarget_.setup(canvas_.width(), canvas_.height())) { + if (!depthTarget_.setup(canvas_.fbo(), canvas_.width(), canvas_.height())) { Log::error("Failed to set up the render target for the depth pass\n"); return false; } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/base-renderer.cpp glmark2-2021.02/src/scene-terrain/base-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/base-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/base-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,26 +21,33 @@ */ #include "renderer.h" -BaseRenderer::BaseRenderer(const LibMatrix::vec2 &size) : +BaseRenderer::BaseRenderer() : texture_(0), input_texture_(0), fbo_(0), depth_renderbuffer_(0), - min_filter_(GL_LINEAR), mag_filter_(GL_LINEAR), + owns_fbo_(false), min_filter_(GL_LINEAR), mag_filter_(GL_LINEAR), wrap_s_(GL_CLAMP_TO_EDGE), wrap_t_(GL_CLAMP_TO_EDGE) { - setup(size, true, true); } BaseRenderer::~BaseRenderer() { glDeleteTextures(1, &texture_); glDeleteRenderbuffers(1, &depth_renderbuffer_); - glDeleteFramebuffers(1, &fbo_); + if (owns_fbo_) + glDeleteFramebuffers(1, &fbo_); +} + +void +BaseRenderer::setup_onscreen(Canvas& canvas) +{ + size_ = LibMatrix::vec2(canvas.width(), canvas.height()); + recreate(&canvas, false); } void -BaseRenderer::setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth) +BaseRenderer::setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) { size_ = size; - recreate(onscreen, has_depth); + recreate(nullptr, has_depth); } void @@ -59,7 +66,7 @@ { glBindFramebuffer(GL_FRAMEBUFFER, fbo_); glViewport(0, 0, size_.x(), size_.y()); - if (!fbo_ || depth_renderbuffer_) { + if (!owns_fbo_ || depth_renderbuffer_) { glEnable(GL_DEPTH_TEST); glDepthMask(GL_TRUE); } @@ -79,21 +86,26 @@ } void -BaseRenderer::recreate(bool onscreen, bool has_depth) +BaseRenderer::recreate(Canvas* canvas, bool has_depth) { if (texture_) { glDeleteTextures(1, &texture_); texture_ = 0; } - if (fbo_) { + if (owns_fbo_ && fbo_) { glDeleteRenderbuffers(1, &depth_renderbuffer_); depth_renderbuffer_ = 0; glDeleteFramebuffers(1, &fbo_); fbo_ = 0; } - if (!onscreen) { + + if (canvas) { + fbo_ = canvas->fbo(); + owns_fbo_ = false; + } else { create_texture(); create_fbo(has_depth); + owns_fbo_ = true; } } @@ -131,6 +143,9 @@ size_.x(), size_.y()); } + GLint prev_fbo; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &prev_fbo); + /* Create the FBO and attach the texture and the renderebuffer */ glGenFramebuffers(1, &fbo_); glBindFramebuffer(GL_FRAMEBUFFER, fbo_); @@ -140,5 +155,6 @@ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth_renderbuffer_); } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + + glBindFramebuffer(GL_FRAMEBUFFER, prev_fbo); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/blur-renderer.cpp glmark2-2021.02/src/scene-terrain/blur-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/blur-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/blur-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,18 +19,21 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" +#include + void create_blur_shaders(ShaderSource& vtx_source, ShaderSource& frg_source, unsigned int radius, float sigma, BlurRenderer::BlurDirection direction, float tilt_shift); -BlurRenderer::BlurRenderer(const LibMatrix::vec2 &size, int radius, float sigma, +BlurRenderer::BlurRenderer(int radius, float sigma, BlurDirection dir, const LibMatrix::vec2 &step, float tilt_shift) : - TextureRenderer(size, *blur_program(true, radius, sigma, dir, step, tilt_shift)) + TextureRenderer(*blur_program(true, radius, sigma, dir, step, tilt_shift)) { blur_program_ = blur_program(false, radius, sigma, dir, step, tilt_shift); } @@ -74,15 +77,15 @@ unsigned int radius, float sigma, BlurRenderer::BlurDirection direction, float tilt_shift) { - vtx_source.append_file(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - frg_source.append_file(GLMARK_DATA_PATH"/shaders/terrain-blur.frag"); + vtx_source.append_file(Options::data_path + "/shaders/terrain-texture.vert"); + frg_source.append_file(Options::data_path + "/shaders/terrain-blur.frag"); /* Don't let the gaussian curve become too narrow */ if (sigma < 1.0) sigma = 1.0; unsigned int side = 2 * radius + 1; - float values[radius]; + std::vector values(radius + 1); float sum = 0.0; for (unsigned int i = 0; i < radius + 1; i++) { diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/copy-renderer.cpp glmark2-2021.02/src/scene-terrain/copy-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/copy-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/copy-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,12 +19,13 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" -CopyRenderer::CopyRenderer(const LibMatrix::vec2 &size) : - TextureRenderer(size, *copy_program(true)) +CopyRenderer::CopyRenderer() : + TextureRenderer(*copy_program(true)) { copy_program_ = copy_program(false); } @@ -38,8 +39,8 @@ if (!copy_program) { copy_program = new Program(); - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-overlay.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-overlay.frag"); Scene::load_shaders_from_strings(*copy_program, vtx_shader.str(), frg_shader.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/luminance-renderer.cpp glmark2-2021.02/src/scene-terrain/luminance-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/luminance-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/luminance-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,12 +19,13 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" -LuminanceRenderer::LuminanceRenderer(const LibMatrix::vec2 &size) : - TextureRenderer(size, *luminance_program(true)) +LuminanceRenderer::LuminanceRenderer() : + TextureRenderer(*luminance_program(true)) { luminance_program_ = luminance_program(false); } @@ -38,8 +39,8 @@ if (!luminance_program) { luminance_program = new Program(); - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-luminance.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-luminance.frag"); Scene::load_shaders_from_strings(*luminance_program, vtx_shader.str(), frg_shader.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/normal-from-height-renderer.cpp glmark2-2021.02/src/scene-terrain/normal-from-height-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/normal-from-height-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/normal-from-height-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,19 +19,40 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" -NormalFromHeightRenderer::NormalFromHeightRenderer(const LibMatrix::vec2 &size) : - TextureRenderer(size, *normal_from_height_program(size, true)) +NormalFromHeightRenderer::NormalFromHeightRenderer() : + TextureRenderer(*normal_from_height_program(true)) { - normal_from_height_program_ = normal_from_height_program(size, false); + normal_from_height_program_ = normal_from_height_program(false); +} + +void +NormalFromHeightRenderer::setup_onscreen(Canvas& canvas) +{ + TextureRenderer::setup_onscreen(canvas); + + normal_from_height_program_->start(); + (*normal_from_height_program_)["resolution"] = + LibMatrix::vec2(canvas.width(), canvas.height()); + normal_from_height_program_->stop(); +} + +void +NormalFromHeightRenderer::setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) +{ + TextureRenderer::setup_offscreen(size, has_depth); + + normal_from_height_program_->start(); + (*normal_from_height_program_)["resolution"] = size; + normal_from_height_program_->stop(); } Program * -NormalFromHeightRenderer::normal_from_height_program(const LibMatrix::vec2 &size, - bool create_new) +NormalFromHeightRenderer::normal_from_height_program(bool create_new) { static Program *normal_from_height_program(0); if (create_new) @@ -39,15 +60,14 @@ if (!normal_from_height_program) { normal_from_height_program = new Program(); - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-normalmap.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-normalmap.frag"); Scene::load_shaders_from_strings(*normal_from_height_program, vtx_shader.str(), frg_shader.str()); normal_from_height_program->start(); (*normal_from_height_program)["heightMap"] = 0; - (*normal_from_height_program)["resolution"] = size; (*normal_from_height_program)["height"] = 0.05f; (*normal_from_height_program)["uvOffset"] = LibMatrix::vec2(0.0f, 0.0f); (*normal_from_height_program)["uvScale"] = LibMatrix::vec2(1.0f, 1.0f); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/overlay-renderer.cpp glmark2-2021.02/src/scene-terrain/overlay-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/overlay-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/overlay-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,6 +19,7 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" @@ -33,10 +34,14 @@ void -OverlayRenderer::setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth) +OverlayRenderer::setup_onscreen(Canvas &canvas) +{ +} + +void +OverlayRenderer::setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) { static_cast(size); - static_cast(onscreen); static_cast(has_depth); } @@ -107,8 +112,8 @@ void OverlayRenderer::create_program() { - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-overlay.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-overlay.frag"); if (!Scene::load_shaders_from_strings(program_, vtx_shader.str(), frg_shader.str())) return; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/renderer-chain.cpp glmark2-2021.02/src/scene-terrain/renderer-chain.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/renderer-chain.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/renderer-chain.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -22,10 +22,15 @@ #include "renderer.h" void -RendererChain::setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth) +RendererChain::setup_onscreen(Canvas &canvas) +{ + static_cast(canvas); +} + +void +RendererChain::setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) { static_cast(size); - static_cast(onscreen); static_cast(has_depth); } diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/renderer.h glmark2-2021.02/src/scene-terrain/renderer.h --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/renderer.h 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/renderer.h 2021-04-25 01:47:22.000000000 +0800 @@ -22,6 +22,7 @@ #include #include +#include "canvas.h" #include "mesh.h" #include "vec.h" #include "program.h" @@ -36,7 +37,8 @@ /** * Sets up the renderer's target. */ - virtual void setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth) = 0; + virtual void setup_onscreen(Canvas& canvas) = 0; + virtual void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth) = 0; /** * Sets up the renderer's target texture (if any). */ @@ -81,7 +83,8 @@ virtual ~RendererChain() {} /* IRenderer methods */ - void setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth); + void setup_onscreen(Canvas& canvas); + void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth); void setup_texture(GLint min_filter, GLint mag_filter, GLint wrap_s, GLint wrap_t); void input_texture(GLuint t); @@ -108,11 +111,12 @@ class BaseRenderer : public IRenderer { public: - BaseRenderer(const LibMatrix::vec2 &size); + BaseRenderer(); virtual ~BaseRenderer(); /* IRenderer methods */ - virtual void setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth); + virtual void setup_onscreen(Canvas& canvas); + virtual void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth); virtual void setup_texture(GLint min_filter, GLint mag_filter, GLint wrap_s, GLint wrap_t); virtual void input_texture(GLuint t) { input_texture_ = t; } @@ -123,7 +127,7 @@ virtual void render() = 0; protected: - void recreate(bool onscreen, bool has_depth); + void recreate(Canvas* canvas, bool has_depth); void create_texture(); void update_texture_parameters(); void create_fbo(bool has_depth); @@ -133,6 +137,7 @@ GLuint input_texture_; GLuint fbo_; GLuint depth_renderbuffer_; + bool owns_fbo_; GLint min_filter_; GLint mag_filter_; GLint wrap_s_; @@ -146,7 +151,7 @@ class TextureRenderer : public BaseRenderer { public: - TextureRenderer(const LibMatrix::vec2 &size, Program &program); + TextureRenderer(Program &program); virtual ~TextureRenderer() { } /* IRenderer/BaseRenderer methods */ @@ -170,7 +175,7 @@ class CopyRenderer : public TextureRenderer { public: - CopyRenderer(const LibMatrix::vec2 &size); + CopyRenderer(); virtual ~CopyRenderer() { delete copy_program_; } @@ -186,7 +191,7 @@ class SimplexNoiseRenderer : public TextureRenderer { public: - SimplexNoiseRenderer(const LibMatrix::vec2 &size); + SimplexNoiseRenderer(); virtual ~SimplexNoiseRenderer() { delete noise_program_; } LibMatrix::vec2 uv_scale() { return uv_scale_; } @@ -204,12 +209,14 @@ class NormalFromHeightRenderer : public TextureRenderer { public: - NormalFromHeightRenderer(const LibMatrix::vec2 &size); + NormalFromHeightRenderer(); virtual ~NormalFromHeightRenderer() { delete normal_from_height_program_; } + virtual void setup_onscreen(Canvas& canvas); + virtual void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth); + private: - static Program *normal_from_height_program(const LibMatrix::vec2 &size, - bool create_new); + static Program *normal_from_height_program(bool create_new); Program *normal_from_height_program_; }; @@ -220,7 +227,7 @@ class LuminanceRenderer : public TextureRenderer { public: - LuminanceRenderer(const LibMatrix::vec2 &size); + LuminanceRenderer(); virtual ~LuminanceRenderer() { delete luminance_program_; } private: @@ -242,8 +249,8 @@ BlurDirectionBoth }; - BlurRenderer(const LibMatrix::vec2 &size, int radius, float sigma, - BlurDirection dir, const LibMatrix::vec2 &step, float tilt_shift); + BlurRenderer(int radius, float sigma, BlurDirection dir, + const LibMatrix::vec2 &step, float tilt_shift); virtual ~BlurRenderer() { delete blur_program_; } private: @@ -265,7 +272,8 @@ virtual ~OverlayRenderer() { } /* IRenderable Methods */ - void setup(const LibMatrix::vec2 &size, bool onscreen, bool has_depth); + virtual void setup_onscreen(Canvas& canvas); + virtual void setup_offscreen(const LibMatrix::vec2 &size, bool has_depth); void setup_texture(GLint min_filter, GLint mag_filter, GLint wrap_s, GLint wrap_t); void input_texture(GLuint t) { input_texture_ = t; } @@ -293,7 +301,7 @@ class TerrainRenderer : public BaseRenderer { public: - TerrainRenderer(const LibMatrix::vec2 &size, const LibMatrix::vec2 &repeat_overlay); + TerrainRenderer(const LibMatrix::vec2 &repeat_overlay); virtual ~TerrainRenderer(); /* IRenderable Methods */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/simplex-noise-renderer.cpp glmark2-2021.02/src/scene-terrain/simplex-noise-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/simplex-noise-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/simplex-noise-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,14 +19,15 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "shader-source.h" LibMatrix::vec2 SimplexNoiseRenderer::uv_scale_(1.5f, 1.5); -SimplexNoiseRenderer::SimplexNoiseRenderer(const LibMatrix::vec2 &size) : - TextureRenderer(size, *noise_program(true)) +SimplexNoiseRenderer::SimplexNoiseRenderer() : + TextureRenderer(*noise_program(true)) { noise_program_ = noise_program(false); } @@ -40,8 +41,8 @@ if (!noise_program) { noise_program = new Program(); - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain-texture.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain-noise.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain-texture.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain-noise.frag"); Scene::load_shaders_from_strings(*noise_program, vtx_shader.str(), frg_shader.str()); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/terrain-renderer.cpp glmark2-2021.02/src/scene-terrain/terrain-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/terrain-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/terrain-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -19,14 +19,14 @@ * Authors: * Alexandros Frantzis */ +#include "options.h" #include "scene.h" #include "renderer.h" #include "texture.h" #include "shader-source.h" -TerrainRenderer::TerrainRenderer(const LibMatrix::vec2 &size, - const LibMatrix::vec2 &repeat_overlay) : - BaseRenderer(size), height_map_tex_(0), normal_map_tex_(0), +TerrainRenderer::TerrainRenderer(const LibMatrix::vec2 &repeat_overlay) : + BaseRenderer(), height_map_tex_(0), normal_map_tex_(0), specular_map_tex_(0), repeat_overlay_(repeat_overlay) { create_mesh(); @@ -85,8 +85,8 @@ void TerrainRenderer::init_program() { - ShaderSource vtx_shader(GLMARK_DATA_PATH"/shaders/terrain.vert"); - ShaderSource frg_shader(GLMARK_DATA_PATH"/shaders/terrain.frag"); + ShaderSource vtx_shader(Options::data_path + "/shaders/terrain.vert"); + ShaderSource frg_shader(Options::data_path + "/shaders/terrain.frag"); if (!Scene::load_shaders_from_strings(program_, vtx_shader.str(), frg_shader.str())) return; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/texture-renderer.cpp glmark2-2021.02/src/scene-terrain/texture-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain/texture-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain/texture-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,8 +21,8 @@ */ #include "renderer.h" -TextureRenderer::TextureRenderer(const LibMatrix::vec2 &size, Program &program) : - BaseRenderer(size), program_(program) +TextureRenderer::TextureRenderer(Program &program) : + BaseRenderer(), program_(program) { /* Create the mesh (quad) used for rendering */ create_mesh(); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain.cpp glmark2-2021.02/src/scene-terrain.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-terrain.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-terrain.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -64,63 +64,67 @@ const vec2 bloom_res(256.0f, 256.0f); const vec2 grass_res(512.0f, 512.0f); - height_map_renderer = new SimplexNoiseRenderer(map_res); - height_map_renderer->setup(height_map_renderer->size(), false, false); + height_map_renderer = new SimplexNoiseRenderer(); + height_map_renderer->setup_offscreen(map_res, false); - normal_map_renderer = new NormalFromHeightRenderer(map_res); - normal_map_renderer->setup(normal_map_renderer->size(), false, false); + normal_map_renderer = new NormalFromHeightRenderer(); + normal_map_renderer->setup_offscreen(map_res, false); - specular_map_renderer = new LuminanceRenderer(grass_res); + specular_map_renderer = new LuminanceRenderer(); + specular_map_renderer->setup_offscreen(grass_res, false); specular_map_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_REPEAT, GL_REPEAT); - specular_map_renderer->setup(specular_map_renderer->size(), false, false); - terrain_renderer = new TerrainRenderer(screen_res, repeat_overlay); - terrain_renderer->setup(terrain_renderer->size(), - !use_bloom && !use_tilt_shift, - true); + terrain_renderer = new TerrainRenderer(repeat_overlay); + if (!use_bloom && !use_tilt_shift) + terrain_renderer->setup_onscreen(canvas); + else + terrain_renderer->setup_offscreen(screen_res, true); terrain_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE); /* Bloom */ if (use_bloom) { - bloom_h_renderer = new BlurRenderer(bloom_res, 2, 4.0, + bloom_h_renderer = new BlurRenderer(2, 4.0, BlurRenderer::BlurDirectionHorizontal, vec2(1.0, 1.0) / screen_res, 0.0); + bloom_h_renderer->setup_offscreen(bloom_res, false); bloom_h_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE); - bloom_h_renderer->setup(bloom_h_renderer->size(), false, false); - bloom_v_renderer = new BlurRenderer(bloom_res, 2, 4.0, + bloom_v_renderer = new BlurRenderer(2, 4.0, BlurRenderer::BlurDirectionVertical, vec2(1.0, 1.0) / bloom_res, 0.0); + bloom_v_renderer->setup_offscreen(bloom_res, false); bloom_v_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE); - bloom_v_renderer->setup(bloom_v_renderer->size(), false, false); overlay_renderer = new OverlayRenderer(*terrain_renderer, 0.6); } /* Tilt-shift */ if (use_tilt_shift) { - tilt_h_renderer = new BlurRenderer(screen_res, 4, 2.7, + tilt_h_renderer = new BlurRenderer(4, 2.7, BlurRenderer::BlurDirectionHorizontal, vec2(1.0, 1.0) / screen_res, 0.5); + tilt_h_renderer->setup_offscreen(screen_res, false); tilt_h_renderer->setup_texture(GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE); - tilt_h_renderer->setup(tilt_h_renderer->size(), false, false); - tilt_v_renderer = new BlurRenderer(screen_res, 4, 2.7, + tilt_v_renderer = new BlurRenderer(4, 2.7, BlurRenderer::BlurDirectionVertical, vec2(1.0, 1.0) / screen_res, 0.5); + tilt_v_renderer->setup_onscreen(canvas); } /* Copy renderer */ - if (use_bloom && !use_tilt_shift) - copy_renderer = new CopyRenderer(screen_res); + if (use_bloom && !use_tilt_shift) { + copy_renderer = new CopyRenderer(); + copy_renderer->setup_onscreen(canvas); + } /* Height normal chain */ height_normal_chain = new RendererChain(); @@ -310,7 +314,7 @@ /* Create the specular map */ priv_->specular_map_renderer->render(); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, canvas_.fbo()); glViewport(0, 0, canvas_.width(), canvas_.height()); currentFrame_ = 0; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/scene-texture.cpp glmark2-2021.02/src/scene-texture.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/scene-texture.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/scene-texture.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -24,6 +24,7 @@ */ #include "scene.h" #include "mat.h" +#include "options.h" #include "stack.h" #include "vec.h" #include "log.h" @@ -109,10 +110,10 @@ if (!Scene::setup()) return false; - static const std::string vtx_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic.vert"); - static const std::string vtx_shader_texgen_filename(GLMARK_DATA_PATH"/shaders/light-basic-texgen.vert"); - static const std::string frg_shader_filename(GLMARK_DATA_PATH"/shaders/light-basic-tex.frag"); - static const std::string frg_shader_bilinear_filename(GLMARK_DATA_PATH"/shaders/light-basic-tex-bilinear.frag"); + static const std::string vtx_shader_filename(Options::data_path + "/shaders/light-basic.vert"); + static const std::string vtx_shader_texgen_filename(Options::data_path + "/shaders/light-basic-texgen.vert"); + static const std::string frg_shader_filename(Options::data_path + "/shaders/light-basic-tex.frag"); + static const std::string frg_shader_bilinear_filename(Options::data_path + "/shaders/light-basic-tex-bilinear.frag"); static const LibMatrix::vec4 lightPosition(20.0f, 20.0f, 10.0f, 1.0f); static const LibMatrix::vec4 materialDiffuse(1.0f, 1.0f, 1.0f, 1.0f); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/shared-library.cpp glmark2-2021.02/src/shared-library.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/shared-library.cpp 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/shared-library.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,90 @@ +/* + * Copyright © 2019 Jamie Madill + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Jamie Madill + */ +#include "shared-library.h" + +#include + +#if defined(WIN32) +#include +#else +#include +#endif + +/****************** + * Public methods * + ******************/ +SharedLibrary::SharedLibrary() : handle_(nullptr) {} + +SharedLibrary::~SharedLibrary() +{ + close(); +} + +bool SharedLibrary::open(const char *libName) +{ +#if defined(WIN32) + handle_ = LoadLibraryA(libName); +#else + handle_ = dlopen(libName, RTLD_NOW | RTLD_NODELETE); +#endif + return (handle_ != nullptr); +} + +bool SharedLibrary::open_from_alternatives(std::initializer_list alt_names) +{ + for (const char *name : alt_names) { + if (open(name)) + return true; + } + + return false; +} + +void SharedLibrary::close() +{ + if (handle_) + { +#if defined(WIN32) + FreeLibrary(reinterpret_cast(handle_)); +#else + dlclose(handle_); +#endif + } +} + +void *SharedLibrary::handle() const +{ + return handle_; +} + +void *SharedLibrary::load(const char *symbol) const +{ +#if defined(WIN32) + return reinterpret_cast(GetProcAddress(reinterpret_cast(handle_), symbol)); +#else + return dlsym(handle_, symbol); +#endif +} + +/******************* + * Private methods * + *******************/ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/shared-library.h glmark2-2021.02/src/shared-library.h --- glmark2-2014.03+git20150611.fa71af2d/src/shared-library.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/shared-library.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,44 @@ +/* + * Copyright © 2019 Jamie Madill + * + * This file is part of the glmark2 OpenGL (ES) 2.0 benchmark. + * + * glmark2 is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * glmark2 is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * glmark2. If not, see . + * + * Authors: + * Jamie Madill + */ +#ifndef GLMARK2_SHARED_LIBRARY_H_ +#define GLMARK2_SHARED_LIBRARY_H_ + +#include + +class SharedLibrary +{ +public: + SharedLibrary(); + ~SharedLibrary(); + + bool open(const char *name); + bool open_from_alternatives(std::initializer_list alt_names); + void close(); + + void *handle() const; + void *load(const char *symbol) const; + +private: + void *handle_; +}; + +#endif /* GLMARK2_SHARED_LIBRARY_H_ */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/text-renderer.cpp glmark2-2021.02/src/text-renderer.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/text-renderer.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/text-renderer.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -21,6 +21,7 @@ */ #include "text-renderer.h" #include "gl-headers.h" +#include "options.h" #include "scene.h" #include "shader-source.h" #include "vec.h" @@ -49,8 +50,8 @@ size(0.03); glGenBuffers(2, vbo_); - ShaderSource vtx_source(GLMARK_DATA_PATH"/shaders/text-renderer.vert"); - ShaderSource frg_source(GLMARK_DATA_PATH"/shaders/text-renderer.frag"); + ShaderSource vtx_source(Options::data_path + "/shaders/text-renderer.vert"); + ShaderSource frg_source(Options::data_path + "/shaders/text-renderer.frag"); if (!Scene::load_shaders_from_strings(program_, vtx_source.str(), frg_source.str())) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/texture.cpp glmark2-2021.02/src/texture.cpp --- glmark2-2014.03+git20150611.fa71af2d/src/texture.cpp 2015-06-11 22:39:35.000000000 +0800 +++ glmark2-2021.02/src/texture.cpp 2021-04-25 01:47:22.000000000 +0800 @@ -23,9 +23,11 @@ */ #include "texture.h" #include "log.h" +#include "options.h" #include "util.h" #include "image-reader.h" +#include #include #include @@ -149,7 +151,7 @@ return TexturePrivate::textureMap; } vector pathVec; - string dataDir(GLMARK_DATA_PATH"/textures"); + string dataDir(Options::data_path + "/textures"); Util::list_files(dataDir, pathVec); // Now that we have a list of all of the image files available to us, // let's go through and pull out the names and what format they're in diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/wscript_build glmark2-2021.02/src/wscript_build --- glmark2-2014.03+git20150611.fa71af2d/src/wscript_build 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/src/wscript_build 2021-04-25 01:47:22.000000000 +0800 @@ -1,70 +1,251 @@ +# Detect a Windows build. +is_win = 'WIN32' in ' '.join(bld.env.keys()) + +# Detect MSVC +is_msvc = bld.env.CXX_NAME == 'msvc' + all_sources = bld.path.ant_glob('*.cpp scene-ideas/*.cc scene-terrain/*.cpp') common_sources = [f for f in all_sources if f.name.find('canvas-') == -1 and f.name.find('android') == -1 and f.name.find('native-state-') == -1 and - f.name.find('gl-state-') == -1] -common_uselibs = ['libpng'] + f.name.find('gl-state-') == -1 and + f.name.find('main.cpp') == -1] + common_defines = ['USE_EXCEPTIONS'] libmatrix_sources = [f for f in bld.path.ant_glob('libmatrix/*.cc') if not f.name.endswith('test.cc')] + +common_flavor_sources = ['main.cpp', 'canvas-generic.cpp'] + +libpng_local_sources = [f for f in bld.path.ant_glob('libpng/*.c')] +zlib_local_sources = [f for f in bld.path.ant_glob('zlib/*.c')] + +# Some c sources in libjpeg-turbo are included like headers. So we cannot use globs. +libjpeg_local_sources = [ + 'jcapimin', 'jcapistd', 'jccoefct', 'jccolor', 'jcdctmgr', 'jchuff', 'jcinit', + 'jcmainct', 'jcmarker', 'jcmaster', 'jcomapi', 'jcparam', 'jcphuff', 'jcprepct', + 'jcsample', 'jdapimin', 'jdapistd', 'jdatadst', 'jdatasrc', 'jdcoefct', 'jdcolor', + 'jddctmgr', 'jdhuff', 'jdinput', 'jdmainct', 'jdmarker', 'jdmaster', 'jdmerge', + 'jdphuff', 'jdpostct', 'jdsample', 'jerror', 'jfdctflt', 'jfdctfst', 'jfdctint', + 'jidctflt', 'jidctfst', 'jidctint', 'jidctred', 'jmemmgr', 'jmemnobs', 'jquant1', + 'jquant2', 'jsimd_none', 'jutils', +] + +libjpeg_turbo_local_sources = ['libjpeg-turbo/%s.c' % f for f in libjpeg_local_sources] + +platform_includes = [] +platform_libs = [] +platform_uselibs = [] + +if is_win: + platform_uselibs += ['libpng-local', 'zlib-local', 'libjpeg-turbo-local'] + platform_libs = ['user32'] + + # On windows include some overrides for posix headers. + if is_msvc: + platform_includes += ['include'] +else: + platform_uselibs += ['libpng'] + platform_libs = ['m', 'jpeg', 'dl'] + +if 'WAYLAND_SCANNER_wayland_scanner' in bld.env.keys(): + def wayland_scanner_cmd(arg, src): + return '%s %s < %s > ${TGT}' % (bld.env['WAYLAND_SCANNER_wayland_scanner'], arg, src) + + def wayland_proto_src_path(proto, ver): + wp_dir = bld.env['WAYLAND_PROTOCOLS_pkgdatadir'] + if ver == 'stable': + return '%s/stable/%s/%s.xml' % (wp_dir, proto, proto) + else: + return '%s/unstable/%s/%s-unstable-%s.xml' % (wp_dir, proto, proto, ver) + + def wayland_client_protocol(proto, ver, dst_base): + src_path = wayland_proto_src_path(proto, ver) + out = '%s-client-protocol.h' % dst_base + return bld(rule = wayland_scanner_cmd('client-header', src_path), + target = out, + name = out, + ext_out = ['.h']) + + def wayland_protocol_code(proto, ver, dst_base): + src_path = wayland_proto_src_path(proto, ver) + out = '%s-protocol.c' % dst_base + return bld(rule = wayland_scanner_cmd('private-code', src_path), + target = out, + name = out) + + wayland_client_protocol('xdg-shell', 'stable', 'xdg-shell') + wayland_protocol_code('xdg-shell', 'stable', 'xdg-shell') + flavor_sources = { - 'x11-gl' : ['canvas-generic.cpp', 'native-state-x11.cpp', 'gl-state-glx.cpp'], - 'x11-glesv2' : ['canvas-generic.cpp', 'native-state-x11.cpp', 'gl-state-egl.cpp'], - 'drm-gl' : ['canvas-generic.cpp', 'native-state-drm.cpp', 'gl-state-egl.cpp'], - 'drm-glesv2' : ['canvas-generic.cpp', 'native-state-drm.cpp', 'gl-state-egl.cpp'], - 'mir-gl' : ['canvas-generic.cpp', 'native-state-mir.cpp', 'gl-state-egl.cpp'], - 'mir-glesv2' : ['canvas-generic.cpp', 'native-state-mir.cpp', 'gl-state-egl.cpp'], - 'wayland-gl' : ['canvas-generic.cpp', 'native-state-wayland.cpp', 'gl-state-egl.cpp'], - 'wayland-glesv2' : ['canvas-generic.cpp', 'native-state-wayland.cpp', 'gl-state-egl.cpp'] + 'dispmanx-glesv2' : common_flavor_sources + ['native-state-dispmanx.cpp', 'gl-state-egl.cpp'], + 'drm-gl' : common_flavor_sources + ['native-state-drm.cpp', 'gl-state-egl.cpp'], + 'drm-glesv2' : common_flavor_sources + ['native-state-drm.cpp', 'gl-state-egl.cpp'], + 'mir-gl' : common_flavor_sources + ['native-state-mir.cpp', 'gl-state-egl.cpp'], + 'mir-glesv2' : common_flavor_sources + ['native-state-mir.cpp', 'gl-state-egl.cpp'], + 'wayland-gl' : common_flavor_sources + ['native-state-wayland.cpp', 'gl-state-egl.cpp'], + 'wayland-glesv2' : common_flavor_sources + ['native-state-wayland.cpp', 'gl-state-egl.cpp'], + 'win32-gl': common_flavor_sources + ['native-state-win32.cpp', 'gl-state-wgl.cpp'], + 'win32-glesv2': common_flavor_sources + ['native-state-win32.cpp', 'gl-state-egl.cpp'], + 'x11-gl' : common_flavor_sources + ['native-state-x11.cpp', 'gl-state-glx.cpp'], + 'x11-glesv2' : common_flavor_sources + ['native-state-x11.cpp', 'gl-state-egl.cpp'], } flavor_uselibs = { - 'x11-gl' : ['x11', 'gl', 'matrix-gl'], - 'x11-glesv2' : ['x11', 'egl', 'glesv2', 'matrix-glesv2'], - 'drm-gl' : ['drm', 'gbm', 'egl', 'gl', 'matrix-gl'], - 'drm-glesv2' : ['drm', 'gbm', 'egl', 'glesv2', 'matrix-glesv2'], - 'mir-gl' : ['mirclient', 'egl', 'gl', 'matrix-gl'], - 'mir-glesv2' : ['mirclient', 'egl', 'glesv2', 'matrix-glesv2'], - 'wayland-gl' : ['wayland-client', 'wayland-egl', 'egl', 'gl', 'matrix-gl'], - 'wayland-glesv2' : ['wayland-client', 'wayland-egl', 'egl', 'glesv2', 'matrix-glesv2'] + 'dispmanx-glesv2' : ['glad-egl-dispmanx', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2', 'dispmanx'], + 'drm-gl' : ['drm', 'gbm', 'udev', 'glad-egl-drm', 'glad-gl', 'matrix-gl', 'common-gl'], + 'drm-glesv2' : ['drm', 'gbm', 'udev', 'glad-egl-drm', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], + 'mir-gl' : ['mirclient', 'glad-egl-mir', 'glad-gl', 'matrix-gl', 'common-gl'], + 'mir-glesv2' : ['mirclient', 'glad-egl-mir', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], + 'wayland-gl' : ['wayland-client', 'wayland-egl', 'wayland-cursor', 'glad-egl-wayland', 'glad-gl', 'matrix-gl', 'common-gl'], + 'wayland-glesv2' : ['wayland-client', 'wayland-egl', 'wayland-cursor', 'glad-egl-wayland', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], + 'win32-gl': ['glad-gl', 'glad-wgl', 'matrix-gl', 'common-gl'], + 'win32-glesv2': ['glad-egl-win32', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], + 'x11-gl' : ['x11', 'glad-gl', 'glad-glx', 'matrix-gl', 'common-gl'], + 'x11-glesv2' : ['x11', 'glad-egl-x11', 'glad-glesv2', 'matrix-glesv2', 'common-glesv2'], } + flavor_defines = { - 'x11-gl' : ['GLMARK2_USE_X11', 'GLMARK2_USE_GL', 'GLMARK2_USE_GLX'], - 'x11-glesv2' : ['GLMARK2_USE_X11', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], - 'drm-gl' : ['GLMARK2_USE_DRM', 'GLMARK2_USE_GL', 'GLMARK2_USE_EGL', '__GBM__'], - 'drm-glesv2' : ['GLMARK2_USE_DRM', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL', '__GBM__'], + 'dispmanx-glesv2' : ['GLMARK2_USE_DISPMANX', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], + 'drm-gl' : ['GLMARK2_USE_DRM', 'GLMARK2_USE_GL', 'GLMARK2_USE_EGL'], + 'drm-glesv2' : ['GLMARK2_USE_DRM', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], 'mir-gl' : ['GLMARK2_USE_MIR', 'GLMARK2_USE_GL', 'GLMARK2_USE_EGL'], 'mir-glesv2' : ['GLMARK2_USE_MIR', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], 'wayland-gl' : ['GLMARK2_USE_WAYLAND', 'GLMARK2_USE_GL', 'GLMARK2_USE_EGL'], - 'wayland-glesv2' : ['GLMARK2_USE_WAYLAND', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'] + 'wayland-glesv2' : ['GLMARK2_USE_WAYLAND', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], + 'win32-gl': ['GLMARK2_USE_WIN32', 'GLMARK2_USE_WGL', 'GLMARK2_USE_GL'], + 'win32-glesv2': ['GLMARK2_USE_WIN32', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], + 'x11-gl' : ['GLMARK2_USE_X11', 'GLMARK2_USE_GL', 'GLMARK2_USE_GLX'], + 'x11-glesv2' : ['GLMARK2_USE_X11', 'GLMARK2_USE_GLESv2', 'GLMARK2_USE_EGL'], +} +flavor_libs = { + 'dispmanx-glesv2' : [], + 'drm-gl' : [], + 'drm-glesv2' : [], + 'mir-gl' : [], + 'mir-glesv2' : [], + 'wayland-gl' : [], + 'wayland-glesv2' : [], + 'win32-gl': ['opengl32', 'gdi32'], + 'win32-glesv2' : [], + 'x11-gl' : [], + 'x11-glesv2' : [], +} +flavor_depends_on = { + 'dispmanx-glesv2' : [], + 'drm-gl' : [], + 'drm-glesv2' : [], + 'mir-gl' : [], + 'mir-glesv2' : [], + 'wayland-gl' : ['xdg-shell-client-protocol.h', 'xdg-shell-protocol.c'], + 'wayland-glesv2' : ['xdg-shell-client-protocol.h', 'xdg-shell-protocol.c'], + 'win32-gl': [], + 'win32-glesv2' : [], + 'x11-gl' : [], + 'x11-glesv2' : [], +} +flavor_sources_gen = { + 'dispmanx-glesv2' : [], + 'drm-gl' : [], + 'drm-glesv2' : [], + 'mir-gl' : [], + 'mir-glesv2' : [], + 'wayland-gl' : [bld.path.find_or_declare('xdg-shell-protocol.c')], + 'wayland-glesv2' : [bld.path.find_or_declare('xdg-shell-protocol.c')], + 'win32-gl': [], + 'win32-glesv2' : [], + 'x11-gl' : [], + 'x11-glesv2' : [], +} +egl_platform_defines = { + 'dispmanx' : ['MESA_EGL_NO_X11_HEADERS'], + 'drm' : ['__GBM__'], + 'mir' : ['MESA_EGL_NO_X11_HEADERS'], + 'wayland' : ['WL_EGL_PLATFORM'], + 'win32' : [], + 'x11' : [], } -includes = ['.', 'scene-ideas', 'scene-terrain'] +includes = ['.', 'scene-ideas', 'scene-terrain'] + platform_includes all_uselibs = set() for name in bld.env.keys(): if name.startswith('FLAVOR_') and bld.env[name]: flavor = name.replace('FLAVOR_', '').lower().replace('_', '-') + egl_platform = flavor.split('-')[0] target = bld.env[name] - bld( + node = bld( features = ['cxx', 'cprogram'], - source = common_sources + flavor_sources[flavor], + source = flavor_sources[flavor], target = target, - use = common_uselibs + flavor_uselibs[flavor], - lib = ['m', 'jpeg', 'dl'], - includes = includes, - defines = common_defines + flavor_defines[flavor] + use = platform_uselibs + flavor_uselibs[flavor], + lib = platform_libs + flavor_libs[flavor], + includes = ['.'] + platform_includes, + defines = common_defines + flavor_defines[flavor] + + egl_platform_defines[egl_platform], + depends_on = flavor_depends_on[flavor] ) - all_uselibs |= set(flavor_uselibs[flavor]) + if flavor_sources_gen[flavor]: + node.source.extend(flavor_sources_gen[flavor]) + all_uselibs |= set(flavor_uselibs[flavor] + platform_uselibs) + +# Build glad-egl for all used EGL platforms +for egl_target in (v for v in all_uselibs if v.startswith('glad-egl')): + egl_platform = egl_target.split('-')[2] + bld( + features = ['c'], + source = ['glad/src/egl.c'], + target = egl_target, + includes = ['glad/include'], + export_includes = 'glad/include', + defines = egl_platform_defines[egl_platform] + ) + +if 'glad-glx' in all_uselibs: + bld( + features = ['c'], + source = ['glad/src/glx.c'], + target = 'glad-glx', + includes = ['glad/include'], + export_includes = 'glad/include' + ) + +if 'glad-gl' in all_uselibs: + bld( + features = ['c'], + source = ['glad/src/gl.c'], + target = 'glad-gl', + includes = ['glad/include'], + export_includes = 'glad/include' + ) + +if 'glad-wgl' in all_uselibs: + bld( + features = ['c'], + source = ['glad/src/wgl.c'], + target = 'glad-wgl', + includes = ['glad/include'], + export_includes = 'glad/include' + ) + +if 'glad-glesv2' in all_uselibs: + bld( + features = ['c'], + source = ['glad/src/gles2.c'], + target = 'glad-glesv2', + includes = ['glad/include'], + export_includes = 'glad/include' + ) if 'matrix-gl' in all_uselibs: bld( features = ['cxx', 'cxxstlib'], source = libmatrix_sources, target = 'matrix-gl', + use = ['glad-gl'], lib = ['m'], - includes = ['.'], + includes = ['.'] + platform_includes, export_includes = 'libmatrix', defines = ['GLMARK2_USE_GL', 'USE_EXCEPTIONS'] ) @@ -74,8 +255,56 @@ features = ['cxx', 'cxxstlib'], source = libmatrix_sources, target = 'matrix-glesv2', + use = ['glad-glesv2'], lib = ['m'], - includes = ['.'], + includes = ['.'] + platform_includes, export_includes = 'libmatrix', defines = ['GLMARK2_USE_GLESv2', 'USE_EXCEPTIONS'] ) + +if 'libpng-local' in all_uselibs: + bld( + features = ['c'], + source = libpng_local_sources, + target = 'libpng-local', + use = ['zlib-local'], + export_includes = 'libpng' + ) + +if 'zlib-local' in all_uselibs: + bld( + features = ['c'], + source = zlib_local_sources, + target = 'zlib-local', + export_includes = 'zlib' + ) + +if 'libjpeg-turbo-local' in all_uselibs: + bld( + features = ['c'], + source = libjpeg_turbo_local_sources, + target = 'libjpeg-turbo-local', + export_includes = 'libjpeg-turbo' + ) + +if 'common-gl' in all_uselibs: + bld( + features = ['cxx', 'cxxstlib'], + source = common_sources, + target = 'common-gl', + use = platform_uselibs + ['glad-gl', 'matrix-gl'], + lib = ['m', 'jpeg', 'dl'], + includes = includes, + defines = ['GLMARK2_USE_GL', 'USE_EXCEPTIONS'] + ) + +if 'common-glesv2' in all_uselibs: + bld( + features = ['cxx', 'cxxstlib'], + source = common_sources, + target = 'common-glesv2', + use = platform_uselibs + ['glad-glesv2', 'matrix-glesv2'], + lib = ['m', 'jpeg', 'dl'], + includes = includes, + defines = ['GLMARK2_USE_GLESv2', 'USE_EXCEPTIONS'] + ) diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/adler32.c glmark2-2021.02/src/zlib/adler32.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/adler32.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/adler32.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,186 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); + +#define BASE 65521U /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware -- + try it both ways to see which is faster */ +#ifdef NO_DIVIDE +/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 + (thank you to John Reiser for pointing this out) */ +# define CHOP(a) \ + do { \ + unsigned long tmp = a >> 16; \ + a &= 0xffffUL; \ + a += (tmp << 4) - tmp; \ + } while (0) +# define MOD28(a) \ + do { \ + CHOP(a); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD(a) \ + do { \ + CHOP(a); \ + MOD28(a); \ + } while (0) +# define MOD63(a) \ + do { /* this assumes a is not negative */ \ + z_off64_t tmp = a >> 32; \ + a &= 0xffffffffL; \ + a += (tmp << 8) - (tmp << 5) + tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD28(a) a %= BASE +# define MOD63(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32_z(adler, buf, len) + uLong adler; + const Bytef *buf; + z_size_t len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD28(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + return adler32_z(adler, buf, len); +} + +/* ========================================================================= */ +local uLong adler32_combine_(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* for negative len, return invalid adler32 as a clue for debugging */ + if (len2 < 0) + return 0xffffffffUL; + + /* the derivation of this formula is left as an exercise for the reader */ + MOD63(len2); /* assumes len2 >= 0 */ + rem = (unsigned)len2; + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} + +uLong ZEXPORT adler32_combine64(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/compress.c glmark2-2021.02/src/zlib/compress.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/compress.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/compress.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,86 @@ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; + int level; +{ + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong left; + + left = *destLen; + *destLen = 0; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen; + sourceLen -= stream.avail_in; + } + err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); + } while (err == Z_OK); + + *destLen = stream.total_out; + deflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} + +/* =========================================================================== + If the default memLevel or windowBits for deflateInit() is changed, then + this function needs to be updated. + */ +uLong ZEXPORT compressBound (sourceLen) + uLong sourceLen; +{ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/crc32.c glmark2-2021.02/src/zlib/crc32.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/crc32.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/crc32.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,442 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +/* @(#) $Id$ */ + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + + DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. + */ + +#ifdef MAKECRCH +# include +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +/* Definitions for doing the crc four data bytes at a time. */ +#if !defined(NOBYFOUR) && defined(Z_U4) +# define BYFOUR +#endif +#ifdef BYFOUR + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, z_size_t)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, z_size_t)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); +local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); + + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local z_crc_t FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const z_crc_t FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + z_crc_t c; + int n, k; + z_crc_t poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0; + for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) + poly |= (z_crc_t)1 << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (z_crc_t)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = ZSWAP32(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = ZSWAP32(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const z_crc_t FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const z_crc_t FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", + (unsigned long)(table[n]), + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const z_crc_t FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const z_crc_t FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + z_crc_t endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + uInt len; +{ + return crc32_z(crc, buf, len); +} + +#ifdef BYFOUR + +/* + This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit + integer pointer type. This violates the strict aliasing rule, where a + compiler can assume, for optimization purposes, that two pointers to + fundamentally different types won't ever point to the same memory. This can + manifest as a problem only if one of the pointers is written to. This code + only reads from those pointers. So long as this code remains isolated in + this compilation unit, there won't be a problem. For this reason, this code + should not be copied and pasted into a compilation unit in which other code + writes to the buffer that is passed to these routines. + */ + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = (z_crc_t)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *buf4++; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = ZSWAP32((z_crc_t)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(ZSWAP32(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +local uLong crc32_combine_(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case (also disallow negative lengths) */ + if (len2 <= 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} + +uLong ZEXPORT crc32_combine64(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/crc32.h glmark2-2021.02/src/zlib/crc32.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/crc32.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/crc32.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,441 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const z_crc_t FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/deflate.c glmark2-2021.02/src/zlib/deflate.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/deflate.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/deflate.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,2163 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://tools.ietf.org/html/rfc1951 + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id$ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local int deflateStateCheck OF((z_streamp strm)); +local void slide_hash OF((deflate_state *s)); +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +#ifndef FASTEST +local block_state deflate_slow OF((deflate_state *s, int flush)); +#endif +local block_state deflate_rle OF((deflate_state *s, int flush)); +local block_state deflate_huff OF((deflate_state *s, int flush)); +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifdef ASMV +# pragma message("Assembler code may have bugs -- use at your own risk") + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif + +#ifdef ZLIB_DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ +#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to UPDATE_HASH are made with consecutive input + * characters, so that a running hash key can be computed from the previous + * key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to INSERT_STRING are made with consecutive input + * characters and the first MIN_MATCH bytes of str are valid (except for + * the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +local void slide_hash(s) + deflate_state *s; +{ + unsigned n, m; + Posf *p; + uInt wsize = s->w_size; + + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + } while (--n); + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif +} + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + s->status = INIT_STATE; /* to pass state test in deflateReset() */ + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = (uInt)windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = (uInt)memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->high_water = 0; /* nothing written to s->window yet */ + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= + * Check for a valid deflate stream state. Return 0 if ok, 1 if not. + */ +local int deflateStateCheck (strm) + z_streamp strm; +{ + deflate_state *s; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + s = strm->state; + if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE && +#ifdef GZIP + s->status != GZIP_STATE && +#endif + s->status != EXTRA_STATE && + s->status != NAME_STATE && + s->status != COMMENT_STATE && + s->status != HCRC_STATE && + s->status != BUSY_STATE && + s->status != FINISH_STATE)) + return 1; + return 0; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt str, n; + int wrap; + unsigned avail; + z_const unsigned char *next; + + if (deflateStateCheck(strm) || dictionary == Z_NULL) + return Z_STREAM_ERROR; + s = strm->state; + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; + + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; + } + + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); + } + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength) + z_streamp strm; + Bytef *dictionary; + uInt *dictLength; +{ + deflate_state *s; + uInt len; + + if (deflateStateCheck(strm)) + return Z_STREAM_ERROR; + s = strm->state; + len = s->strstart + s->lookahead; + if (len > s->w_size) + len = s->w_size; + if (dictionary != Z_NULL && len) + zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len); + if (dictLength != Z_NULL) + *dictLength = len; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateResetKeep (strm) + z_streamp strm; +{ + deflate_state *s; + + if (deflateStateCheck(strm)) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = +#ifdef GZIP + s->wrap == 2 ? GZIP_STATE : +#endif + s->wrap ? INIT_STATE : BUSY_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader (strm, head) + z_streamp strm; + gz_headerp head; +{ + if (deflateStateCheck(strm) || strm->state->wrap != 2) + return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePending (strm, pending, bits) + unsigned *pending; + int *bits; + z_streamp strm; +{ + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + if (pending != Z_NULL) + *pending = strm->state->pending; + if (bits != Z_NULL) + *bits = strm->state->bi_valid; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime (strm, bits, value) + z_streamp strm; + int bits; + int value; +{ + deflate_state *s; + int put; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; + if (put > bits) + put = bits; + s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); + s->bi_valid += put; + _tr_flush_bits(s); + value >>= put; + bits -= put; + } while (bits); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if ((strategy != s->strategy || func != configuration_table[level].func) && + s->high_water) { + /* Flush the last buffer: */ + int err = deflate(strm, Z_BLOCK); + if (err == Z_STREAM_ERROR) + return err; + if (strm->avail_out == 0) + return Z_BUF_ERROR; + } + if (s->level != level) { + if (s->level == 0 && s->matches != 0) { + if (s->matches == 1) + slide_hash(s); + else + CLEAR_HASH(s); + s->matches = 0; + } + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) + z_streamp strm; + int good_length; + int max_lazy; + int nice_length; + int max_chain; +{ + deflate_state *s; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = (uInt)good_length; + s->max_lazy_match = (uInt)max_lazy; + s->nice_match = nice_length; + s->max_chain_length = (uInt)max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns + * a close to exact, as well as small, upper bound on the compressed size. + * They are coded as constants here for a reason--if the #define's are + * changed, then this function needs to be changed as well. The return + * value for 15 and 8 only works for those exact settings. + * + * For any setting other than those defaults for windowBits and memLevel, + * the value returned is a conservative worst case for the maximum expansion + * resulting from using fixed blocks instead of stored blocks, which deflate + * can emit on compressed data for some combinations of the parameters. + * + * This function could be more sophisticated to provide closer upper bounds for + * every combination of windowBits and memLevel. But even the conservative + * upper bound of about 14% expansion does not seem onerous for output buffer + * allocation. + */ +uLong ZEXPORT deflateBound(strm, sourceLen) + z_streamp strm; + uLong sourceLen; +{ + deflate_state *s; + uLong complen, wraplen; + + /* conservative upper bound for compressed data */ + complen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; + + /* if can't get parameters, return conservative bound plus zlib wrapper */ + if (deflateStateCheck(strm)) + return complen + 6; + + /* compute wrapper length */ + s = strm->state; + switch (s->wrap) { + case 0: /* raw deflate */ + wraplen = 0; + break; + case 1: /* zlib wrapper */ + wraplen = 6 + (s->strstart ? 4 : 0); + break; +#ifdef GZIP + case 2: /* gzip wrapper */ + wraplen = 18; + if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ + Bytef *str; + if (s->gzhead->extra != Z_NULL) + wraplen += 2 + s->gzhead->extra_len; + str = s->gzhead->name; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + str = s->gzhead->comment; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + if (s->gzhead->hcrc) + wraplen += 2; + } + break; +#endif + default: /* for compiler happiness */ + wraplen = 6; + } + + /* if not default parameters, return conservative bound */ + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return complen + wraplen; + + /* default settings: return tight bound for that case */ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13 - 6 + wraplen; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output, except for + * some deflate_stored() output, goes through this function so some + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len; + deflate_state *s = strm->state; + + _tr_flush_bits(s); + len = s->pending; + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, s->pending_out, len); + strm->next_out += len; + s->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; + } +} + +/* =========================================================================== + * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1]. + */ +#define HCRC_UPDATE(beg) \ + do { \ + if (s->gzhead->hcrc && s->pending > (beg)) \ + strm->adler = crc32(strm->adler, s->pending_buf + (beg), \ + s->pending - (beg)); \ + } while (0) + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->avail_in != 0 && strm->next_in == Z_NULL) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + old_flush = s->last_flush; + s->last_flush = flush; + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Write the header */ + if (s->status == INIT_STATE) { + /* zlib header */ + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#ifdef GZIP + if (s->status == GZIP_STATE) { + /* gzip header */ + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == Z_NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != Z_NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex; + while (s->pending + left > s->pending_buf_size) { + uInt copy = s->pending_buf_size - s->pending; + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, copy); + s->pending = s->pending_buf_size; + HCRC_UPDATE(beg); + s->gzindex += copy; + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + left -= copy; + } + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, left); + s->pending += left; + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + } + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) { + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); + } + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#endif + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = s->level == 0 ? deflate_stored(s, flush) : + s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + if (s->lookahead == 0) { + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + + status = strm->state->status; + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (deflateStateCheck(source) || dest == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local unsigned read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, buf, len); + } +#endif + strm->next_in += len; + strm->total_in += len; + + return len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->insert = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifndef FASTEST +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +#endif +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = (int)s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} +#endif /* ASMV */ + +#else /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for FASTEST only + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#endif /* FASTEST */ + +#ifdef ZLIB_DEBUG + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* ZLIB_DEBUG */ + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + unsigned n; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + slide_hash(s); + more += wsize; + } + if (s->strm->avail_in == 0) break; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead + s->insert >= MIN_MATCH) { + uInt str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + while (s->insert) { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + s->insert--; + if (s->lookahead + s->insert < MIN_MATCH) + break; + } + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); + + /* If the WIN_INIT bytes after the end of the current data have never been + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next + * time through here. WIN_INIT is set to MAX_MATCH since the longest match + * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + ulg curr = s->strstart + (ulg)(s->lookahead); + ulg init; + + if (s->high_water < curr) { + /* Previous high water mark below current data -- zero WIN_INIT + * bytes or up to end of window, whichever is less. + */ + init = s->window_size - curr; + if (init > WIN_INIT) + init = WIN_INIT; + zmemzero(s->window + curr, (unsigned)init); + s->high_water = curr + init; + } + else if (s->high_water < (ulg)curr + WIN_INIT) { + /* High water mark at or above current data, but below current data + * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up + * to end of window, whichever is less. + */ + init = (ulg)curr + WIN_INIT - s->high_water; + if (init > s->window_size - s->high_water) + init = s->window_size - s->high_water; + zmemzero(s->window + s->high_water, (unsigned)init); + s->high_water += init; + } + } + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "not enough room for search"); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, last) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (last)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, last) { \ + FLUSH_BLOCK_ONLY(s, last); \ + if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ +} + +/* Maximum stored block length in deflate format (not including header). */ +#define MAX_STORED 65535 + +/* Minimum of a and b. */ +#define MIN(a, b) ((a) > (b) ? (b) : (a)) + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * + * In case deflateParams() is used to later switch to a non-zero compression + * level, s->matches (otherwise unused when storing) keeps track of the number + * of hash table slides to perform. If s->matches is 1, then one hash table + * slide will be done when switching. If s->matches is 2, the maximum value + * allowed here, then the hash table will be cleared, since two or more slides + * is the same as a clear. + * + * deflate_stored() is written to minimize the number of times an input byte is + * copied. It is most efficient with large input and output buffers, which + * maximizes the opportunites to have a single copy from next_in to next_out. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Smallest worthy block size when not flushing or finishing. By default + * this is 32K. This can be as small as 507 bytes for memLevel == 1. For + * large input and output buffers, the stored block size will be larger. + */ + unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); + + /* Copy as many min_block or larger stored blocks directly to next_out as + * possible. If flushing, copy the remaining available input to next_out as + * stored blocks, if there is enough space. + */ + unsigned len, left, have, last = 0; + unsigned used = s->strm->avail_in; + do { + /* Set len to the maximum size block that we can copy directly with the + * available input data and output space. Set left to how much of that + * would be copied from what's left in the window. + */ + len = MAX_STORED; /* maximum deflate stored block length */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + if (s->strm->avail_out < have) /* need room for header */ + break; + /* maximum stored block length that will fit in avail_out: */ + have = s->strm->avail_out - have; + left = s->strstart - s->block_start; /* bytes left in window */ + if (len > (ulg)left + s->strm->avail_in) + len = left + s->strm->avail_in; /* limit len to the input */ + if (len > have) + len = have; /* limit len to the output */ + + /* If the stored block would be less than min_block in length, or if + * unable to copy all of the available input when flushing, then try + * copying to the window and the pending buffer instead. Also don't + * write an empty block when flushing -- deflate() does that. + */ + if (len < min_block && ((len == 0 && flush != Z_FINISH) || + flush == Z_NO_FLUSH || + len != left + s->strm->avail_in)) + break; + + /* Make a dummy stored block in pending to get the header bytes, + * including any pending bits. This also updates the debugging counts. + */ + last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; + _tr_stored_block(s, (char *)0, 0L, last); + + /* Replace the lengths in the dummy stored block with len. */ + s->pending_buf[s->pending - 4] = len; + s->pending_buf[s->pending - 3] = len >> 8; + s->pending_buf[s->pending - 2] = ~len; + s->pending_buf[s->pending - 1] = ~len >> 8; + + /* Write the stored block header bytes. */ + flush_pending(s->strm); + +#ifdef ZLIB_DEBUG + /* Update debugging counts for the data about to be copied. */ + s->compressed_len += len << 3; + s->bits_sent += len << 3; +#endif + + /* Copy uncompressed bytes from the window to next_out. */ + if (left) { + if (left > len) + left = len; + zmemcpy(s->strm->next_out, s->window + s->block_start, left); + s->strm->next_out += left; + s->strm->avail_out -= left; + s->strm->total_out += left; + s->block_start += left; + len -= left; + } + + /* Copy uncompressed bytes directly from next_in to next_out, updating + * the check value. + */ + if (len) { + read_buf(s->strm, s->strm->next_out, len); + s->strm->next_out += len; + s->strm->avail_out -= len; + s->strm->total_out += len; + } + } while (last == 0); + + /* Update the sliding window with the last s->w_size bytes of the copied + * data, or append all of the copied data to the existing window if less + * than s->w_size bytes were copied. Also update the number of bytes to + * insert in the hash tables, in the event that deflateParams() switches to + * a non-zero compression level. + */ + used -= s->strm->avail_in; /* number of input bytes directly copied */ + if (used) { + /* If any input was used, then no unused input remains in the window, + * therefore s->block_start == s->strstart. + */ + if (used >= s->w_size) { /* supplant the previous history */ + s->matches = 2; /* clear hash */ + zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); + s->strstart = s->w_size; + } + else { + if (s->window_size - s->strstart <= used) { + /* Slide the window down. */ + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + } + zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); + s->strstart += used; + } + s->block_start = s->strstart; + s->insert += MIN(used, s->w_size - s->insert); + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* If the last block was written to next_out, then done. */ + if (last) + return finish_done; + + /* If flushing and all input has been consumed, then done. */ + if (flush != Z_NO_FLUSH && flush != Z_FINISH && + s->strm->avail_in == 0 && (long)s->strstart == s->block_start) + return block_done; + + /* Fill the window with any remaining input. */ + have = s->window_size - s->strstart - 1; + if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { + /* Slide the window down. */ + s->block_start -= s->w_size; + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + have += s->w_size; /* more space now */ + } + if (have > s->strm->avail_in) + have = s->strm->avail_in; + if (have) { + read_buf(s->strm, s->window + s->strstart, have); + s->strstart += have; + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* There was not enough avail_out to write a complete worthy or flushed + * stored block to next_out. Write a stored block to pending instead, if we + * have enough input for a worthy block, or if flushing and there is enough + * room for the remaining input as a stored block in the pending buffer. + */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + /* maximum stored block length that will fit in pending: */ + have = MIN(s->pending_buf_size - have, MAX_STORED); + min_block = MIN(have, s->w_size); + left = s->strstart - s->block_start; + if (left >= min_block || + ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && + s->strm->avail_in == 0 && left <= have)) { + len = MIN(left, have); + last = flush == Z_FINISH && s->strm->avail_in == 0 && + len == left ? 1 : 0; + _tr_stored_block(s, (charf *)s->window + s->block_start, len, last); + s->block_start += len; + flush_pending(s->strm); + } + + /* We've done all we can with the available input and output. */ + return last ? finish_started : need_more; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} +#endif /* FASTEST */ + +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + uInt prev; /* byte at distance one to match */ + Bytef *scan, *strend; /* scan goes up to strend for length of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest run, plus one for the unrolled loop. + */ + if (s->lookahead <= MAX_MATCH) { + fill_window(s); + if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + s->match_length = 0; + if (s->lookahead >= MIN_MATCH && s->strstart > 0) { + scan = s->window + s->strstart - 1; + prev = *scan; + if (prev == *++scan && prev == *++scan && prev == *++scan) { + strend = s->window + s->strstart + MAX_MATCH; + do { + } while (prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + scan < strend); + s->match_length = MAX_MATCH - (uInt)(strend - scan); + if (s->match_length > s->lookahead) + s->match_length = s->lookahead; + } + Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan"); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, s->match_length); + + _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + s->strstart += s->match_length; + s->match_length = 0; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. + * (It will be regenerated if this run of deflate switches away from Huffman.) + */ +local block_state deflate_huff(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we have a literal to write. */ + if (s->lookahead == 0) { + fill_window(s); + if (s->lookahead == 0) { + if (flush == Z_NO_FLUSH) + return need_more; + break; /* flush the current block */ + } + } + + /* Output a literal byte */ + s->match_length = 0; + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/deflate.h glmark2-2021.02/src/zlib/deflate.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/deflate.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/deflate.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,349 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef DEFLATE_H +#define DEFLATE_H + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define Buf_size 16 +/* size of bit buffer in bi_buf */ + +#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ +#ifdef GZIP +# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ +#endif +#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ +#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ +#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ +#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ +#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ +#define FINISH_STATE 666 /* stream complete */ +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + const static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + ulg pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + ulg gzindex; /* where in extra, name, or comment */ + Byte method; /* can only be DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + uInt insert; /* bytes at end of window left to insert */ + +#ifdef ZLIB_DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + + ulg high_water; + /* High water mark offset in window for initialized bytes -- bytes above + * this are set to zero in order to avoid memory check warnings when + * longest match routines access bytes past the input. This is then + * updated to the new high water mark. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +#define WIN_INIT MAX_MATCH +/* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + + /* in trees.c */ +void ZLIB_INTERNAL _tr_init OF((deflate_state *s)); +int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); +void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef ZLIB_DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch ZLIB_INTERNAL _length_code[]; + extern uch ZLIB_INTERNAL _dist_code[]; +#else + extern const uch ZLIB_INTERNAL _length_code[]; + extern const uch ZLIB_INTERNAL _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* DEFLATE_H */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzclose.c glmark2-2021.02/src/zlib/gzclose.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzclose.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/gzclose.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,25 @@ +/* gzclose.c -- zlib gzclose() function + * Copyright (C) 2004, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* gzclose() is in a separate file so that it is linked in only if it is used. + That way the other gzclose functions can be used instead to avoid linking in + unneeded compression or decompression routines. */ +int ZEXPORT gzclose(file) + gzFile file; +{ +#ifndef NO_GZCOMPRESS + gz_statep state; + + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); +#else + return gzclose_r(file); +#endif +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzguts.h glmark2-2021.02/src/zlib/gzguts.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzguts.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/gzguts.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,218 @@ +/* gzguts.h -- zlib internal header definitions for gz* operations + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef _LARGEFILE64_SOURCE +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# endif +# ifdef _FILE_OFFSET_BITS +# undef _FILE_OFFSET_BITS +# endif +#endif + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include +#include "zlib.h" +#ifdef STDC +# include +# include +# include +#endif + +#ifndef _POSIX_SOURCE +# define _POSIX_SOURCE +#endif +#include + +#ifdef _WIN32 +# include +#endif + +#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) +# include +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +# define WIDECHAR +#endif + +#ifdef WINAPI_FAMILY +# define open _open +# define read _read +# define write _write +# define close _close +#endif + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS +/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 +/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) +# define vsnprintf _vsnprintf +# endif +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +# ifdef VMS +# define NO_vsnprintf +# endif +# ifdef __OS400__ +# define NO_vsnprintf +# endif +# ifdef __MVS__ +# define NO_vsnprintf +# endif +#endif + +/* unlike snprintf (which is required in C99), _snprintf does not guarantee + null termination of the result -- however this is only used in gzlib.c where + the result is assured to fit in the space provided */ +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +/* gz* functions always use library allocation functions */ +#ifndef STDC + extern voidp malloc OF((uInt size)); + extern void free OF((voidpf ptr)); +#endif + +/* get errno and strerror definition */ +#if defined UNDER_CE +# include +# define zstrerror() gz_strwinerror((DWORD)GetLastError()) +#else +# ifndef NO_STRERROR +# include +# define zstrerror() strerror(errno) +# else +# define zstrerror() "stdio error (consult errno)" +# endif +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + +/* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ +#define GZBUFSIZE 8192 + +/* gzip modes, also provide a little integrity check on the passed structure */ +#define GZ_NONE 0 +#define GZ_READ 7247 +#define GZ_WRITE 31153 +#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ + +/* values for gz_state how */ +#define LOOK 0 /* look for a gzip header */ +#define COPY 1 /* copy input directly */ +#define GZIP 2 /* decompress a gzip stream */ + +/* internal gzip file state data structure */ +typedef struct { + /* exposed contents for gzgetc() macro */ + struct gzFile_s x; /* "x" for exposed */ + /* x.have: number of bytes available at x.next */ + /* x.next: next output data to deliver or write */ + /* x.pos: current position in uncompressed data */ + /* used for both reading and writing */ + int mode; /* see gzip modes above */ + int fd; /* file descriptor */ + char *path; /* path or fd for error messages */ + unsigned size; /* buffer size, zero if not allocated yet */ + unsigned want; /* requested buffer size, default is GZBUFSIZE */ + unsigned char *in; /* input buffer (double-sized when writing) */ + unsigned char *out; /* output buffer (double-sized when reading) */ + int direct; /* 0 if processing gzip, 1 if transparent */ + /* just for reading */ + int how; /* 0: get header, 1: copy, 2: decompress */ + z_off64_t start; /* where the gzip data started, for rewinding */ + int eof; /* true if end of input file reached */ + int past; /* true if read requested past end */ + /* just for writing */ + int level; /* compression level */ + int strategy; /* compression strategy */ + /* seek request */ + z_off64_t skip; /* amount to skip (already rewound if backwards) */ + int seek; /* true if seek request pending */ + /* error information */ + int err; /* error code */ + char *msg; /* error message */ + /* zlib inflate or deflate stream */ + z_stream strm; /* stream structure in-place (not a pointer) */ +} gz_state; +typedef gz_state FAR *gz_statep; + +/* shared functions */ +void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); +#if defined UNDER_CE +char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error)); +#endif + +/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +#ifdef INT_MAX +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) +#else +unsigned ZLIB_INTERNAL gz_intmax OF((void)); +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzlib.c glmark2-2021.02/src/zlib/gzlib.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzlib.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/gzlib.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,637 @@ +/* gzlib.c -- zlib functions common to reading and writing gzip files + * Copyright (C) 2004-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__) +# define LSEEK _lseeki64 +#else +#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 +# define LSEEK lseek64 +#else +# define LSEEK lseek +#endif +#endif + +/* Local functions */ +local void gz_reset OF((gz_statep)); +local gzFile gz_open OF((const void *, int, const char *)); + +#if defined UNDER_CE + +/* Map the Windows error number in ERROR to a locale-dependent error message + string and return a pointer to it. Typically, the values for ERROR come + from GetLastError. + + The string pointed to shall not be modified by the application, but may be + overwritten by a subsequent call to gz_strwinerror + + The gz_strwinerror function does not change the current setting of + GetLastError. */ +char ZLIB_INTERNAL *gz_strwinerror (error) + DWORD error; +{ + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +#endif /* UNDER_CE */ + +/* Reset gzip file state */ +local void gz_reset(state) + gz_statep state; +{ + state->x.have = 0; /* no output data available */ + if (state->mode == GZ_READ) { /* for reading ... */ + state->eof = 0; /* not at end of file */ + state->past = 0; /* have not read past end yet */ + state->how = LOOK; /* look for gzip header */ + } + state->seek = 0; /* no seek request pending */ + gz_error(state, Z_OK, NULL); /* clear error */ + state->x.pos = 0; /* no uncompressed data yet */ + state->strm.avail_in = 0; /* no input data yet */ +} + +/* Open a gzip file either by name or file descriptor. */ +local gzFile gz_open(path, fd, mode) + const void *path; + int fd; + const char *mode; +{ + gz_statep state; + z_size_t len; + int oflag; +#ifdef O_CLOEXEC + int cloexec = 0; +#endif +#ifdef O_EXCL + int exclusive = 0; +#endif + + /* check input */ + if (path == NULL) + return NULL; + + /* allocate gzFile structure to return */ + state = (gz_statep)malloc(sizeof(gz_state)); + if (state == NULL) + return NULL; + state->size = 0; /* no buffers allocated yet */ + state->want = GZBUFSIZE; /* requested buffer size */ + state->msg = NULL; /* no error message yet */ + + /* interpret mode */ + state->mode = GZ_NONE; + state->level = Z_DEFAULT_COMPRESSION; + state->strategy = Z_DEFAULT_STRATEGY; + state->direct = 0; + while (*mode) { + if (*mode >= '0' && *mode <= '9') + state->level = *mode - '0'; + else + switch (*mode) { + case 'r': + state->mode = GZ_READ; + break; +#ifndef NO_GZCOMPRESS + case 'w': + state->mode = GZ_WRITE; + break; + case 'a': + state->mode = GZ_APPEND; + break; +#endif + case '+': /* can't read and write at the same time */ + free(state); + return NULL; + case 'b': /* ignore -- will request binary anyway */ + break; +#ifdef O_CLOEXEC + case 'e': + cloexec = 1; + break; +#endif +#ifdef O_EXCL + case 'x': + exclusive = 1; + break; +#endif + case 'f': + state->strategy = Z_FILTERED; + break; + case 'h': + state->strategy = Z_HUFFMAN_ONLY; + break; + case 'R': + state->strategy = Z_RLE; + break; + case 'F': + state->strategy = Z_FIXED; + break; + case 'T': + state->direct = 1; + break; + default: /* could consider as an error, but just ignore */ + ; + } + mode++; + } + + /* must provide an "r", "w", or "a" */ + if (state->mode == GZ_NONE) { + free(state); + return NULL; + } + + /* can't force transparent read */ + if (state->mode == GZ_READ) { + if (state->direct) { + free(state); + return NULL; + } + state->direct = 1; /* for empty file */ + } + + /* save the path name for error messages */ +#ifdef WIDECHAR + if (fd == -2) { + len = wcstombs(NULL, path, 0); + if (len == (z_size_t)-1) + len = 0; + } + else +#endif + len = strlen((const char *)path); + state->path = (char *)malloc(len + 1); + if (state->path == NULL) { + free(state); + return NULL; + } +#ifdef WIDECHAR + if (fd == -2) + if (len) + wcstombs(state->path, path, len + 1); + else + *(state->path) = 0; + else +#endif +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->path, len + 1, "%s", (const char *)path); +#else + strcpy(state->path, path); +#endif + + /* compute the flags for open() */ + oflag = +#ifdef O_LARGEFILE + O_LARGEFILE | +#endif +#ifdef O_BINARY + O_BINARY | +#endif +#ifdef O_CLOEXEC + (cloexec ? O_CLOEXEC : 0) | +#endif + (state->mode == GZ_READ ? + O_RDONLY : + (O_WRONLY | O_CREAT | +#ifdef O_EXCL + (exclusive ? O_EXCL : 0) | +#endif + (state->mode == GZ_WRITE ? + O_TRUNC : + O_APPEND))); + + /* open the file with the appropriate flags (or just use fd) */ + state->fd = fd > -1 ? fd : ( +#ifdef WIDECHAR + fd == -2 ? _wopen(path, oflag, 0666) : +#endif + open((const char *)path, oflag, 0666)); + if (state->fd == -1) { + free(state->path); + free(state); + return NULL; + } + if (state->mode == GZ_APPEND) { + LSEEK(state->fd, 0, SEEK_END); /* so gzoffset() is correct */ + state->mode = GZ_WRITE; /* simplify later checks */ + } + + /* save the current position for rewinding (only if reading) */ + if (state->mode == GZ_READ) { + state->start = LSEEK(state->fd, 0, SEEK_CUR); + if (state->start == -1) state->start = 0; + } + + /* initialize stream */ + gz_reset(state); + + /* return stream */ + return (gzFile)state; +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen(path, mode) + const char *path; + const char *mode; +{ + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen64(path, mode) + const char *path; + const char *mode; +{ + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzdopen(fd, mode) + int fd; + const char *mode; +{ + char *path; /* identifier for error messages */ + gzFile gz; + + if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL) + return NULL; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(path, 7 + 3 * sizeof(int), "", fd); +#else + sprintf(path, "", fd); /* for debugging */ +#endif + gz = gz_open(path, fd, mode); + free(path); + return gz; +} + +/* -- see zlib.h -- */ +#ifdef WIDECHAR +gzFile ZEXPORT gzopen_w(path, mode) + const wchar_t *path; + const char *mode; +{ + return gz_open(path, -2, mode); +} +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzbuffer(file, size) + gzFile file; + unsigned size; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* make sure we haven't already allocated memory */ + if (state->size != 0) + return -1; + + /* check and set requested size */ + if ((size << 1) < size) + return -1; /* need to be able to double it */ + if (size < 2) + size = 2; /* need two bytes to check magic header */ + state->want = size; + return 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzrewind(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* back up and start over */ + if (LSEEK(state->fd, state->start, SEEK_SET) == -1) + return -1; + gz_reset(state); + return 0; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzseek64(file, offset, whence) + gzFile file; + z_off64_t offset; + int whence; +{ + unsigned n; + z_off64_t ret; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* check that there's no error */ + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* can only seek from start or relative to current position */ + if (whence != SEEK_SET && whence != SEEK_CUR) + return -1; + + /* normalize offset to a SEEK_CUR specification */ + if (whence == SEEK_SET) + offset -= state->x.pos; + else if (state->seek) + offset += state->skip; + state->seek = 0; + + /* if within raw area while reading, just go there */ + if (state->mode == GZ_READ && state->how == COPY && + state->x.pos + offset >= 0) { + ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR); + if (ret == -1) + return -1; + state->x.have = 0; + state->eof = 0; + state->past = 0; + state->seek = 0; + gz_error(state, Z_OK, NULL); + state->strm.avail_in = 0; + state->x.pos += offset; + return state->x.pos; + } + + /* calculate skip amount, rewinding if needed for back seek when reading */ + if (offset < 0) { + if (state->mode != GZ_READ) /* writing -- can't go backwards */ + return -1; + offset += state->x.pos; + if (offset < 0) /* before start of file! */ + return -1; + if (gzrewind(file) == -1) /* rewind, then skip to offset */ + return -1; + } + + /* if reading, skip what's in output buffer (one less gzgetc() check) */ + if (state->mode == GZ_READ) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ? + (unsigned)offset : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + offset -= n; + } + + /* request skip (if not zero) */ + if (offset) { + state->seek = 1; + state->skip = offset; + } + return state->x.pos + offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzseek(file, offset, whence) + gzFile file; + z_off_t offset; + int whence; +{ + z_off64_t ret; + + ret = gzseek64(file, (z_off64_t)offset, whence); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gztell64(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* return position */ + return state->x.pos + (state->seek ? state->skip : 0); +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gztell(file) + gzFile file; +{ + z_off64_t ret; + + ret = gztell64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzoffset64(file) + gzFile file; +{ + z_off64_t offset; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* compute and return effective offset in file */ + offset = LSEEK(state->fd, 0, SEEK_CUR); + if (offset == -1) + return -1; + if (state->mode == GZ_READ) /* reading */ + offset -= state->strm.avail_in; /* don't count buffered input */ + return offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzoffset(file) + gzFile file; +{ + z_off64_t ret; + + ret = gzoffset64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzeof(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return 0; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return 0; + + /* return end-of-file state */ + return state->mode == GZ_READ ? state->past : 0; +} + +/* -- see zlib.h -- */ +const char * ZEXPORT gzerror(file, errnum) + gzFile file; + int *errnum; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return NULL; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return NULL; + + /* return error information */ + if (errnum != NULL) + *errnum = state->err; + return state->err == Z_MEM_ERROR ? "out of memory" : + (state->msg == NULL ? "" : state->msg); +} + +/* -- see zlib.h -- */ +void ZEXPORT gzclearerr(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return; + + /* clear error and end-of-file */ + if (state->mode == GZ_READ) { + state->eof = 0; + state->past = 0; + } + gz_error(state, Z_OK, NULL); +} + +/* Create an error message in allocated memory and set state->err and + state->msg accordingly. Free any previous error message already there. Do + not try to free or allocate space if the error is Z_MEM_ERROR (out of + memory). Simply save the error message as a static string. If there is an + allocation failure constructing the error message, then convert the error to + out of memory. */ +void ZLIB_INTERNAL gz_error(state, err, msg) + gz_statep state; + int err; + const char *msg; +{ + /* free previously allocated message and clear */ + if (state->msg != NULL) { + if (state->err != Z_MEM_ERROR) + free(state->msg); + state->msg = NULL; + } + + /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */ + if (err != Z_OK && err != Z_BUF_ERROR) + state->x.have = 0; + + /* set error code, and if no message, then done */ + state->err = err; + if (msg == NULL) + return; + + /* for an out of memory error, return literal string when requested */ + if (err == Z_MEM_ERROR) + return; + + /* construct error message with path */ + if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) == + NULL) { + state->err = Z_MEM_ERROR; + return; + } +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, + "%s%s%s", state->path, ": ", msg); +#else + strcpy(state->msg, state->path); + strcat(state->msg, ": "); + strcat(state->msg, msg); +#endif +} + +#ifndef INT_MAX +/* portably return maximum value for an int (when limits.h presumed not + available) -- we need to do this to cover cases where 2's complement not + used, since C standard permits 1's complement and sign-bit representations, + otherwise we could just use ((unsigned)-1) >> 1 */ +unsigned ZLIB_INTERNAL gz_intmax() +{ + unsigned p, q; + + p = 1; + do { + q = p; + p <<= 1; + p++; + } while (p > q); + return q >> 1; +} +#endif diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzread.c glmark2-2021.02/src/zlib/gzread.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzread.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/gzread.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,654 @@ +/* gzread.c -- zlib functions for reading gzip files + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Local functions */ +local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); +local int gz_avail OF((gz_statep)); +local int gz_look OF((gz_statep)); +local int gz_decomp OF((gz_statep)); +local int gz_fetch OF((gz_statep)); +local int gz_skip OF((gz_statep, z_off64_t)); +local z_size_t gz_read OF((gz_statep, voidp, z_size_t)); + +/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from + state->fd, and update state->eof, state->err, and state->msg as appropriate. + This function needs to loop on read(), since read() is not guaranteed to + read the number of bytes requested, depending on the type of descriptor. */ +local int gz_load(state, buf, len, have) + gz_statep state; + unsigned char *buf; + unsigned len; + unsigned *have; +{ + int ret; + unsigned get, max = ((unsigned)-1 >> 2) + 1; + + *have = 0; + do { + get = len - *have; + if (get > max) + get = max; + ret = read(state->fd, buf + *have, get); + if (ret <= 0) + break; + *have += (unsigned)ret; + } while (*have < len); + if (ret < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + if (ret == 0) + state->eof = 1; + return 0; +} + +/* Load up input buffer and set eof flag if last data loaded -- return -1 on + error, 0 otherwise. Note that the eof flag is set when the end of the input + file is reached, even though there may be unused data in the buffer. Once + that data has been used, no more attempts will be made to read the file. + If strm->avail_in != 0, then the current data is moved to the beginning of + the input buffer, and then the remainder of the buffer is loaded with the + available data from the input file. */ +local int gz_avail(state) + gz_statep state; +{ + unsigned got; + z_streamp strm = &(state->strm); + + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + if (state->eof == 0) { + if (strm->avail_in) { /* copy what's there to the start */ + unsigned char *p = state->in; + unsigned const char *q = strm->next_in; + unsigned n = strm->avail_in; + do { + *p++ = *q++; + } while (--n); + } + if (gz_load(state, state->in + strm->avail_in, + state->size - strm->avail_in, &got) == -1) + return -1; + strm->avail_in += got; + strm->next_in = state->in; + } + return 0; +} + +/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. + If this is the first time in, allocate required memory. state->how will be + left unchanged if there is no more input data available, will be set to COPY + if there is no gzip header and direct copying will be performed, or it will + be set to GZIP for decompression. If direct copying, then leftover input + data from the input buffer will be copied to the output buffer. In that + case, all further file reads will be directly to either the output buffer or + a user buffer. If decompressing, the inflate state will be initialized. + gz_look() will return 0 on success or -1 on failure. */ +local int gz_look(state) + gz_statep state; +{ + z_streamp strm = &(state->strm); + + /* allocate read buffers and inflate memory */ + if (state->size == 0) { + /* allocate buffers */ + state->in = (unsigned char *)malloc(state->want); + state->out = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL || state->out == NULL) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + state->size = state->want; + + /* allocate inflate memory */ + state->strm.zalloc = Z_NULL; + state->strm.zfree = Z_NULL; + state->strm.opaque = Z_NULL; + state->strm.avail_in = 0; + state->strm.next_in = Z_NULL; + if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ + free(state->out); + free(state->in); + state->size = 0; + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + } + + /* get at least the magic bytes in the input buffer */ + if (strm->avail_in < 2) { + if (gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) + return 0; + } + + /* look for gzip magic bytes -- if there, do gzip decoding (note: there is + a logical dilemma here when considering the case of a partially written + gzip file, to wit, if a single 31 byte is written, then we cannot tell + whether this is a single-byte file, or just a partially written gzip + file -- for here we assume that if a gzip file is being written, then + the header will be written in a single operation, so that reading a + single byte is sufficient indication that it is not a gzip file) */ + if (strm->avail_in > 1 && + strm->next_in[0] == 31 && strm->next_in[1] == 139) { + inflateReset(strm); + state->how = GZIP; + state->direct = 0; + return 0; + } + + /* no gzip header -- if we were decoding gzip before, then this is trailing + garbage. Ignore the trailing garbage and finish. */ + if (state->direct == 0) { + strm->avail_in = 0; + state->eof = 1; + state->x.have = 0; + return 0; + } + + /* doing raw i/o, copy any leftover input to output -- this assumes that + the output buffer is larger than the input buffer, which also assures + space for gzungetc() */ + state->x.next = state->out; + if (strm->avail_in) { + memcpy(state->x.next, strm->next_in, strm->avail_in); + state->x.have = strm->avail_in; + strm->avail_in = 0; + } + state->how = COPY; + state->direct = 1; + return 0; +} + +/* Decompress from input to the provided next_out and avail_out in the state. + On return, state->x.have and state->x.next point to the just decompressed + data. If the gzip stream completes, state->how is reset to LOOK to look for + the next gzip stream or raw data, once state->x.have is depleted. Returns 0 + on success, -1 on failure. */ +local int gz_decomp(state) + gz_statep state; +{ + int ret = Z_OK; + unsigned had; + z_streamp strm = &(state->strm); + + /* fill output buffer up to end of deflate stream */ + had = strm->avail_out; + do { + /* get more input for inflate() */ + if (strm->avail_in == 0 && gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) { + gz_error(state, Z_BUF_ERROR, "unexpected end of file"); + break; + } + + /* decompress and handle errors */ + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { + gz_error(state, Z_STREAM_ERROR, + "internal error: inflate stream corrupt"); + return -1; + } + if (ret == Z_MEM_ERROR) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ + gz_error(state, Z_DATA_ERROR, + strm->msg == NULL ? "compressed data error" : strm->msg); + return -1; + } + } while (strm->avail_out && ret != Z_STREAM_END); + + /* update available output */ + state->x.have = had - strm->avail_out; + state->x.next = strm->next_out - state->x.have; + + /* if the gzip stream completed successfully, look for another */ + if (ret == Z_STREAM_END) + state->how = LOOK; + + /* good decompression */ + return 0; +} + +/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. + Data is either copied from the input file or decompressed from the input + file depending on state->how. If state->how is LOOK, then a gzip header is + looked for to determine whether to copy or decompress. Returns -1 on error, + otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the + end of the input file has been reached and all data has been processed. */ +local int gz_fetch(state) + gz_statep state; +{ + z_streamp strm = &(state->strm); + + do { + switch(state->how) { + case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ + if (gz_look(state) == -1) + return -1; + if (state->how == LOOK) + return 0; + break; + case COPY: /* -> COPY */ + if (gz_load(state, state->out, state->size << 1, &(state->x.have)) + == -1) + return -1; + state->x.next = state->out; + return 0; + case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ + strm->avail_out = state->size << 1; + strm->next_out = state->out; + if (gz_decomp(state) == -1) + return -1; + } + } while (state->x.have == 0 && (!state->eof || strm->avail_in)); + return 0; +} + +/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ +local int gz_skip(state, len) + gz_statep state; + z_off64_t len; +{ + unsigned n; + + /* skip over len bytes or reach end-of-file, whichever comes first */ + while (len) + /* skip over whatever is in output buffer */ + if (state->x.have) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? + (unsigned)len : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + len -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) + break; + + /* need more data to skip -- load up output buffer */ + else { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return -1; + } + return 0; +} + +/* Read len bytes into buf from file, or less than len up to the end of the + input. Return the number of bytes read. If zero is returned, either the + end of file was reached, or there was an error. state->err must be + consulted in that case to determine which. */ +local z_size_t gz_read(state, buf, len) + gz_statep state; + voidp buf; + z_size_t len; +{ + z_size_t got; + unsigned n; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return 0; + } + + /* get len bytes to buf, or less than len if at the end */ + got = 0; + do { + /* set n to the maximum amount of len that fits in an unsigned int */ + n = -1; + if (n > len) + n = len; + + /* first just try copying data from the output buffer */ + if (state->x.have) { + if (state->x.have < n) + n = state->x.have; + memcpy(buf, state->x.next, n); + state->x.next += n; + state->x.have -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) { + state->past = 1; /* tried to read past end */ + break; + } + + /* need output data -- for small len or new stream load up our output + buffer */ + else if (state->how == LOOK || n < (state->size << 1)) { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return 0; + continue; /* no progress yet -- go back to copy above */ + /* the copy above assures that we will leave with space in the + output buffer, allowing at least one gzungetc() to succeed */ + } + + /* large len -- read directly into user buffer */ + else if (state->how == COPY) { /* read directly */ + if (gz_load(state, (unsigned char *)buf, n, &n) == -1) + return 0; + } + + /* large len -- decompress directly into user buffer */ + else { /* state->how == GZIP */ + state->strm.avail_out = n; + state->strm.next_out = (unsigned char *)buf; + if (gz_decomp(state) == -1) + return 0; + n = state->x.have; + state->x.have = 0; + } + + /* update progress */ + len -= n; + buf = (char *)buf + n; + got += n; + state->x.pos += n; + } while (len); + + /* return number of bytes read into user buffer */ + return got; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzread(file, buf, len) + gzFile file; + voidp buf; + unsigned len; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); + return -1; + } + + /* read len or fewer bytes to buf */ + len = gz_read(state, buf, len); + + /* check for an error */ + if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* return the number of bytes read (this is assured to fit in an int) */ + return (int)len; +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfread(buf, size, nitems, file) + voidp buf; + z_size_t size; + z_size_t nitems; + gzFile file; +{ + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* read len or fewer bytes to buf, return the number of full items read */ + return len ? gz_read(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +#else +# undef gzgetc +#endif +int ZEXPORT gzgetc(file) + gzFile file; +{ + int ret; + unsigned char buf[1]; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* try output buffer (no need to check for skip request) */ + if (state->x.have) { + state->x.have--; + state->x.pos++; + return *(state->x.next)++; + } + + /* nothing there -- try gz_read() */ + ret = gz_read(state, buf, 1); + return ret < 1 ? -1 : buf[0]; +} + +int ZEXPORT gzgetc_(file) +gzFile file; +{ + return gzgetc(file); +} + +/* -- see zlib.h -- */ +int ZEXPORT gzungetc(c, file) + int c; + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return -1; + } + + /* can't push EOF */ + if (c < 0) + return -1; + + /* if output buffer empty, put byte at end (allows more pushing) */ + if (state->x.have == 0) { + state->x.have = 1; + state->x.next = state->out + (state->size << 1) - 1; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; + } + + /* if no room, give up (must have already done a gzungetc()) */ + if (state->x.have == (state->size << 1)) { + gz_error(state, Z_DATA_ERROR, "out of room to push characters"); + return -1; + } + + /* slide output data if needed and insert byte before existing data */ + if (state->x.next == state->out) { + unsigned char *src = state->out + state->x.have; + unsigned char *dest = state->out + (state->size << 1); + while (src > state->out) + *--dest = *--src; + state->x.next = dest; + } + state->x.have++; + state->x.next--; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; +} + +/* -- see zlib.h -- */ +char * ZEXPORT gzgets(file, buf, len) + gzFile file; + char *buf; + int len; +{ + unsigned left, n; + char *str; + unsigned char *eol; + gz_statep state; + + /* check parameters and get internal structure */ + if (file == NULL || buf == NULL || len < 1) + return NULL; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return NULL; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return NULL; + } + + /* copy output bytes up to new line or len - 1, whichever comes first -- + append a terminating zero to the string (we don't check for a zero in + the contents, let the user worry about that) */ + str = buf; + left = (unsigned)len - 1; + if (left) do { + /* assure that something is in the output buffer */ + if (state->x.have == 0 && gz_fetch(state) == -1) + return NULL; /* error */ + if (state->x.have == 0) { /* end of file */ + state->past = 1; /* read past end */ + break; /* return what we have */ + } + + /* look for end-of-line in current output buffer */ + n = state->x.have > left ? left : state->x.have; + eol = (unsigned char *)memchr(state->x.next, '\n', n); + if (eol != NULL) + n = (unsigned)(eol - state->x.next) + 1; + + /* copy through end-of-line, or remainder if not found */ + memcpy(buf, state->x.next, n); + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + left -= n; + buf += n; + } while (left && eol == NULL); + + /* return terminated string, or if nothing, end of file */ + if (buf == str) + return NULL; + buf[0] = 0; + return str; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzdirect(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* if the state is not known, but we can find out, then do so (this is + mainly for right after a gzopen() or gzdopen()) */ + if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) + (void)gz_look(state); + + /* return 1 if transparent, 0 if processing a gzip stream */ + return state->direct; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_r(file) + gzFile file; +{ + int ret, err; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're reading */ + if (state->mode != GZ_READ) + return Z_STREAM_ERROR; + + /* free memory and close file */ + if (state->size) { + inflateEnd(&(state->strm)); + free(state->out); + free(state->in); + } + err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; + gz_error(state, Z_OK, NULL); + free(state->path); + ret = close(state->fd); + free(state); + return ret ? Z_ERRNO : err; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzwrite.c glmark2-2021.02/src/zlib/gzwrite.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/gzwrite.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/gzwrite.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,665 @@ +/* gzwrite.c -- zlib functions for writing gzip files + * Copyright (C) 2004-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Local functions */ +local int gz_init OF((gz_statep)); +local int gz_comp OF((gz_statep, int)); +local int gz_zero OF((gz_statep, z_off64_t)); +local z_size_t gz_write OF((gz_statep, voidpc, z_size_t)); + +/* Initialize state for writing a gzip file. Mark initialization by setting + state->size to non-zero. Return -1 on a memory allocation failure, or 0 on + success. */ +local int gz_init(state) + gz_statep state; +{ + int ret; + z_streamp strm = &(state->strm); + + /* allocate input buffer (double size for gzprintf) */ + state->in = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* only need output buffer and deflate state if compressing */ + if (!state->direct) { + /* allocate output buffer */ + state->out = (unsigned char *)malloc(state->want); + if (state->out == NULL) { + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* allocate deflate memory, set up for gzip compression */ + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = deflateInit2(strm, state->level, Z_DEFLATED, + MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy); + if (ret != Z_OK) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + strm->next_in = NULL; + } + + /* mark state as initialized */ + state->size = state->want; + + /* initialize write buffer if compressing */ + if (!state->direct) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = strm->next_out; + } + return 0; +} + +/* Compress whatever is at avail_in and next_in and write to the output file. + Return -1 if there is an error writing to the output file or if gz_init() + fails to allocate memory, otherwise 0. flush is assumed to be a valid + deflate() flush value. If flush is Z_FINISH, then the deflate() state is + reset to start a new gzip stream. If gz->direct is true, then simply write + to the output file without compressing, and ignore flush. */ +local int gz_comp(state, flush) + gz_statep state; + int flush; +{ + int ret, writ; + unsigned have, put, max = ((unsigned)-1 >> 2) + 1; + z_streamp strm = &(state->strm); + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return -1; + + /* write directly if requested */ + if (state->direct) { + while (strm->avail_in) { + put = strm->avail_in > max ? max : strm->avail_in; + writ = write(state->fd, strm->next_in, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + strm->avail_in -= (unsigned)writ; + strm->next_in += writ; + } + return 0; + } + + /* run deflate() on provided input until it produces no more output */ + ret = Z_OK; + do { + /* write out current buffer contents if full, or if flushing, but if + doing Z_FINISH then don't write until we get to Z_STREAM_END */ + if (strm->avail_out == 0 || (flush != Z_NO_FLUSH && + (flush != Z_FINISH || ret == Z_STREAM_END))) { + while (strm->next_out > state->x.next) { + put = strm->next_out - state->x.next > (int)max ? max : + (unsigned)(strm->next_out - state->x.next); + writ = write(state->fd, state->x.next, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + state->x.next += writ; + } + if (strm->avail_out == 0) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = state->out; + } + } + + /* compress */ + have = strm->avail_out; + ret = deflate(strm, flush); + if (ret == Z_STREAM_ERROR) { + gz_error(state, Z_STREAM_ERROR, + "internal error: deflate stream corrupt"); + return -1; + } + have -= strm->avail_out; + } while (have); + + /* if that completed a deflate stream, allow another to start */ + if (flush == Z_FINISH) + deflateReset(strm); + + /* all done, no errors */ + return 0; +} + +/* Compress len zeros to output. Return -1 on a write error or memory + allocation failure by gz_comp(), or 0 on success. */ +local int gz_zero(state, len) + gz_statep state; + z_off64_t len; +{ + int first; + unsigned n; + z_streamp strm = &(state->strm); + + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + + /* compress len zeros (len guaranteed > 0) */ + first = 1; + while (len) { + n = GT_OFF(state->size) || (z_off64_t)state->size > len ? + (unsigned)len : state->size; + if (first) { + memset(state->in, 0, n); + first = 0; + } + strm->avail_in = n; + strm->next_in = state->in; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + len -= n; + } + return 0; +} + +/* Write len bytes from buf to file. Return the number of bytes written. If + the returned value is less than len, then there was an error. */ +local z_size_t gz_write(state, buf, len) + gz_statep state; + voidpc buf; + z_size_t len; +{ + z_size_t put = len; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return 0; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return 0; + } + + /* for small len, copy to input buffer, otherwise compress directly */ + if (len < state->size) { + /* copy to input buffer, compress when full */ + do { + unsigned have, copy; + + if (state->strm.avail_in == 0) + state->strm.next_in = state->in; + have = (unsigned)((state->strm.next_in + state->strm.avail_in) - + state->in); + copy = state->size - have; + if (copy > len) + copy = len; + memcpy(state->in + have, buf, copy); + state->strm.avail_in += copy; + state->x.pos += copy; + buf = (const char *)buf + copy; + len -= copy; + if (len && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + } while (len); + } + else { + /* consume whatever's left in the input buffer */ + if (state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + + /* directly compress user buffer to file */ + state->strm.next_in = (z_const Bytef *)buf; + do { + unsigned n = (unsigned)-1; + if (n > len) + n = len; + state->strm.avail_in = n; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + len -= n; + } while (len); + } + + /* input was all buffered or compressed */ + return put; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzwrite(file, buf, len) + gzFile file; + voidpc buf; + unsigned len; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); + return 0; + } + + /* write len bytes from buf (the return value will fit in an int) */ + return (int)gz_write(state, buf, len); +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfwrite(buf, size, nitems, file) + voidpc buf; + z_size_t size; + z_size_t nitems; + gzFile file; +{ + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* write len bytes to buf, return the number of full items written */ + return len ? gz_write(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputc(file, c) + gzFile file; + int c; +{ + unsigned have; + unsigned char buf[1]; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return -1; + } + + /* try writing to input buffer for speed (state->size == 0 if buffer not + initialized) */ + if (state->size) { + if (strm->avail_in == 0) + strm->next_in = state->in; + have = (unsigned)((strm->next_in + strm->avail_in) - state->in); + if (have < state->size) { + state->in[have] = (unsigned char)c; + strm->avail_in++; + state->x.pos++; + return c & 0xff; + } + } + + /* no room in buffer or not initialized, use gz_write() */ + buf[0] = (unsigned char)c; + if (gz_write(state, buf, 1) != 1) + return -1; + return c & 0xff; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputs(file, str) + gzFile file; + const char *str; +{ + int ret; + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* write string */ + len = strlen(str); + ret = gz_write(state, str, len); + return ret == 0 && len != 0 ? -1 : ret; +} + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +#include + +/* -- see zlib.h -- */ +int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) +{ + int len; + unsigned left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->err; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(state->in + (strm->next_in - state->in) + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_vsnprintf +# ifdef HAS_vsprintf_void + (void)vsprintf(next, format, va); + for (len = 0; len < state->size; len++) + if (next[len] == 0) break; +# else + len = vsprintf(next, format, va); +# endif +#else +# ifdef HAS_vsnprintf_void + (void)vsnprintf(next, state->size, format, va); + len = strlen(next); +# else + len = vsnprintf(next, state->size, format, va); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || (unsigned)len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += (unsigned)len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memcpy(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return len; +} + +int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) +{ + va_list va; + int ret; + + va_start(va, format); + ret = gzvprintf(file, format, va); + va_end(va); + return ret; +} + +#else /* !STDC && !Z_HAVE_STDARG_H */ + +/* -- see zlib.h -- */ +int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) + gzFile file; + const char *format; + int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; +{ + unsigned len, left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that can really pass pointer in ints */ + if (sizeof(int) != sizeof(void *)) + return Z_STREAM_ERROR; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->error; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->error; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(strm->next_in + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_snprintf +# ifdef HAS_sprintf_void + sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, + a13, a14, a15, a16, a17, a18, a19, a20); + for (len = 0; len < size; len++) + if (next[len] == 0) + break; +# else + len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, + a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#else +# ifdef HAS_snprintf_void + snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, + a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + len = strlen(next); +# else + len = snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memcpy(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return (int)len; +} + +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzflush(file, flush) + gzFile file; + int flush; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* check flush parameter */ + if (flush < 0 || flush > Z_FINISH) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* compress remaining data with requested flush */ + (void)gz_comp(state, flush); + return state->err; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzsetparams(file, level, strategy) + gzFile file; + int level; + int strategy; +{ + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* if no change is requested, then do nothing */ + if (level == state->level && strategy == state->strategy) + return Z_OK; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* change compression parameters for subsequent input */ + if (state->size) { + /* flush previous input with previous parameters before changing */ + if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1) + return state->err; + deflateParams(strm, level, strategy); + } + state->level = level; + state->strategy = strategy; + return Z_OK; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_w(file) + gzFile file; +{ + int ret = Z_OK; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing */ + if (state->mode != GZ_WRITE) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + ret = state->err; + } + + /* flush, free memory, and close file */ + if (gz_comp(state, Z_FINISH) == -1) + ret = state->err; + if (state->size) { + if (!state->direct) { + (void)deflateEnd(&(state->strm)); + free(state->out); + } + free(state->in); + } + gz_error(state, Z_OK, NULL); + free(state->path); + if (close(state->fd) == -1) + ret = Z_ERRNO; + free(state); + return ret; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/infback.c glmark2-2021.02/src/zlib/infback.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/infback.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/infback.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,640 @@ +/* infback.c -- inflate using a call-back interface + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + This code is largely copied from inflate.c. Normally either infback.o or + inflate.o would be linked into an application--not both. The interface + with inffast.c is retained so that optimized assembler-coded versions of + inflate_fast() can be used with either inflate.c or infback.c. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. + */ +int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) +z_streamp strm; +int windowBits; +unsigned char FAR *window; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->dmax = 32768U; + state->wbits = (uInt)windowBits; + state->wsize = 1U << windowBits; + state->window = window; + state->wnext = 0; + state->whave = 0; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* Macros for inflateBack(): */ + +/* Load returned state from inflate_fast() */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Set state from registers for inflate_fast() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = state->window; \ + left = state->wsize; \ + state->whave = left; \ + if (out(out_desc, put, left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) +z_streamp strm; +in_func in; +void FAR *in_desc; +out_func out; +void FAR *out_desc; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + state->mode = TYPE; + state->last = 0; + state->whave = 0; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = state->window; + left = state->wsize; + + /* Inflate until end of block marked as last */ + for (;;) + switch (state->mode) { + case TYPE: + /* determine and dispatch block type */ + if (state->last) { + BYTEBITS(); + state->mode = DONE; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + + /* copy stored block from input to output */ + while (state->length != 0) { + copy = state->length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= 6 && left >= 258) { + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; + inflate_fast(strm, state->wsize); + LOAD(); + break; + } + + /* get a literal, length, or end-of-block code */ + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + state->length = (unsigned)here.val; + + /* process literal */ + if (here.op == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + ROOM(); + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + } + + /* process end of block */ + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + + /* invalid code */ + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + + /* get distance code */ + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + + /* get distance extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } + if (state->offset > state->wsize - (state->whave < state->wsize ? + left : 0)) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = state->wsize - state->offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - state->offset; + copy = left; + } + if (copy > state->length) copy = state->length; + state->length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (state->length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left)) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBackEnd(strm) +z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/inffast.c glmark2-2021.02/src/zlib/inffast.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/inffast.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/inffast.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,323 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef ASMINF +# pragma message("Assembler code may have bugs -- use at your own risk") +#else + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void ZLIB_INTERNAL inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code here; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in; + last = in + (strm->avail_in - 5); + out = strm->next_out; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + wnext = state->wnext; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = lcode[hold & lmask]; + dolen: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op == 0) { /* literal */ + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + *out++ = (unsigned char)(here.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = dcode[hold & dmask]; + dodist: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + *out++ = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + *out++ = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + *out++ = *from++; + } while (--len); + continue; + } +#endif + } + from = window; + if (wnext == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; + if (op < len) { /* some from end of window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = window; + if (wnext < len) { /* some from start of window */ + op = wnext; + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += wnext - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } while (len > 2); + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + here = dcode[here.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + here = lcode[here.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in; + strm->next_out = out; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and wnext == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/inffast.h glmark2-2021.02/src/zlib/inffast.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/inffast.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/inffast.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/inffixed.h glmark2-2021.02/src/zlib/inffixed.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/inffixed.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/inffixed.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,94 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/inflate.c glmark2-2021.02/src/zlib/inflate.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/inflate.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/inflate.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1561 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common wnext == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +/* function prototypes */ +local int inflateStateCheck OF((z_streamp strm)); +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, + unsigned copy)); +#ifdef BUILDFIXED + void makefixed OF((void)); +#endif +local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf, + unsigned len)); + +local int inflateStateCheck(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + state = (struct inflate_state FAR *)strm->state; + if (state == Z_NULL || state->strm != strm || + state->mode < HEAD || state->mode > SYNC) + return 1; + return 0; +} + +int ZEXPORT inflateResetKeep(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->head = Z_NULL; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); +} + +int ZEXPORT inflateReset2(strm, windowBits) +z_streamp strm; +int windowBits; +{ + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 5; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + int ret; + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->strm = strm; + state->window = Z_NULL; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { + ZFREE(strm, state); + strm->state = Z_NULL; + } + return ret; +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; + state->bits = 0; + return Z_OK; + } + if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += (unsigned)value << state->bits; + state->bits += (uInt)bits; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed() +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, + state.lencode[low].bits, state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, end, copy) +z_streamp strm; +const Bytef *end; +unsigned copy; +{ + struct inflate_state FAR *state; + unsigned dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->wnext = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->wnext; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->wnext, end - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; + state->whave = state->wsize; + } + else { + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (inflateStateCheck(strm) || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + if (state->wbits == 0) + state->wbits = 15; + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (state->wbits == 0) + state->wbits = len; + if (len > 15 || len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if ((state->wrap & 4) && hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = ZSWAP32(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + case COPY_: + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (const code FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + case LEN_: + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + if (state->mode == TYPE) + state->back = -1; + break; + } + state->back = 0; + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + state->mode = LIT; + break; + } + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->back = -1; + state->mode = TYPE; + break; + } + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(here.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; + state->mode = DIST; + case DIST: + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; + if (copy > state->length) copy = state->length; + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = 0; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; +#endif + } + if (copy > state->wnext) { + copy -= state->wnext; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->wnext - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if ((state->wrap & 4) && ( +#ifdef GUNZIP + state->flags ? hold : +#endif + ZSWAP32(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength) +z_streamp strm; +Bytef *dictionary; +uInt *dictLength; +{ + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, + state->whave - state->wnext); + zmemcpy(dictionary + state->whave - state->wnext, + state->window, state->wnext); + } + if (dictLength != Z_NULL) + *dictLength = state->whave; + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) +z_streamp strm; +const Bytef *dictionary; +uInt dictLength; +{ + struct inflate_state FAR *state; + unsigned long dictid; + int ret; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary identifier */ + if (state->mode == DICT) { + dictid = adler32(0L, Z_NULL, 0); + dictid = adler32(dictid, dictionary, dictLength); + if (dictid != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); + if (ret) { + state->mode = MEM; + return Z_MEM_ERROR; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(strm, head) +z_streamp strm; +gz_headerp head; +{ + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(have, buf, len) +unsigned FAR *have; +const unsigned char FAR *buf; +unsigned len; +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(strm) +z_streamp strm; +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(dest, source) +z_streamp dest; +z_streamp source; +{ + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (inflateStateCheck(source) || dest == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + copy->strm = dest; + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} + +int ZEXPORT inflateUndermine(strm, subvert) +z_streamp strm; +int subvert; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + state->sane = !subvert; + return Z_OK; +#else + (void)subvert; + state->sane = 1; + return Z_DATA_ERROR; +#endif +} + +int ZEXPORT inflateValidate(strm, check) +z_streamp strm; +int check; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (check) + state->wrap |= 4; + else + state->wrap &= ~4; + return Z_OK; +} + +long ZEXPORT inflateMark(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) + return -(1L << 16); + state = (struct inflate_state FAR *)strm->state; + return (long)(((unsigned long)((long)state->back)) << 16) + + (state->mode == COPY ? state->length : + (state->mode == MATCH ? state->was - state->length : 0)); +} + +unsigned long ZEXPORT inflateCodesUsed(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) return (unsigned long)-1; + state = (struct inflate_state FAR *)strm->state; + return (unsigned long)(state->next - state->codes); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/inflate.h glmark2-2021.02/src/zlib/inflate.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/inflate.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/inflate.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,125 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD = 16180, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to BAD or MEM on error -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + (raw) -> TYPEDO + Read deflate blocks: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* State maintained between inflate() calls -- approximately 7K bytes, not + including the allocated sliding window, which is up to 32K bytes. */ +struct inflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip, + bit 2 true to validate check value */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ +}; diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/inftrees.c glmark2-2021.02/src/zlib/inftrees.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/inftrees.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/inftrees.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,304 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code here; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + unsigned match; /* use base and extra for symbol >= match */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min < max; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + match = 20; + break; + case LENS: + base = lbase; + extra = lext; + match = 257; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + match = 0; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + here.bits = (unsigned char)(len - drop); + if (work[sym] + 1U < match) { + here.op = (unsigned char)0; + here.val = work[sym]; + } + else if (work[sym] >= match) { + here.op = (unsigned char)(extra[work[sym] - match]); + here.val = base[work[sym] - match]; + } + else { + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = here; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/inftrees.h glmark2-2021.02/src/zlib/inftrees.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/inftrees.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/inftrees.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,62 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribtution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns returns 852, and "enough 30 6 15" for distance codes returns 592. + The initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +/* Type of code to build for inflate_table() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/README glmark2-2021.02/src/zlib/README --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/README 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/README 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,115 @@ +ZLIB DATA COMPRESSION LIBRARY + +zlib 1.2.11 is a general purpose data compression library. All the code is +thread safe. The data format used by the zlib library is described by RFCs +(Request for Comments) 1950 to 1952 in the files +http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and +rfc1952 (gzip format). + +All functions of the compression library are documented in the file zlib.h +(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example +of the library is given in the file test/example.c which also tests that +the library is working correctly. Another example is given in the file +test/minigzip.c. The compression library itself is composed of all source +files in the root directory. + +To compile all files and run the test program, follow the instructions given at +the top of Makefile.in. In short "./configure; make test", and if that goes +well, "make install" should work for most flavors of Unix. For Windows, use +one of the special makefiles in win32/ or contrib/vstudio/ . For VMS, use +make_vms.com. + +Questions about zlib should be sent to , or to Gilles Vollant + for the Windows DLL version. The zlib home page is +http://zlib.net/ . Before reporting a problem, please check this site to +verify that you have the latest version of zlib; otherwise get the latest +version and check whether the problem still exists or not. + +PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help. + +Mark Nelson wrote an article about zlib for the Jan. 1997 +issue of Dr. Dobb's Journal; a copy of the article is available at +http://marknelson.us/1997/01/01/zlib-engine/ . + +The changes made in version 1.2.11 are documented in the file ChangeLog. + +Unsupported third party contributions are provided in directory contrib/ . + +zlib is available in Java using the java.util.zip package, documented at +http://java.sun.com/developer/technicalArticles/Programming/compression/ . + +A Perl interface to zlib written by Paul Marquess is available +at CPAN (Comprehensive Perl Archive Network) sites, including +http://search.cpan.org/~pmqs/IO-Compress-Zlib/ . + +A Python interface to zlib written by A.M. Kuchling is +available in Python 1.5 and later versions, see +http://docs.python.org/library/zlib.html . + +zlib is built into tcl: http://wiki.tcl.tk/4610 . + +An experimental package to read and write files in .zip format, written on top +of zlib by Gilles Vollant , is available in the +contrib/minizip directory of zlib. + + +Notes for some targets: + +- For Windows DLL versions, please see win32/DLL_FAQ.txt + +- For 64-bit Irix, deflate.c must be compiled without any optimization. With + -O, one libpng test fails. The test works in 32 bit mode (with the -n32 + compiler flag). The compiler bug has been reported to SGI. + +- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works + when compiled with cc. + +- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is + necessary to get gzprintf working correctly. This is done by configure. + +- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with + other compilers. Use "make test" to check your compiler. + +- gzdopen is not supported on RISCOS or BEOS. + +- For PalmOs, see http://palmzlib.sourceforge.net/ + + +Acknowledgments: + + The deflate format used by zlib was defined by Phil Katz. The deflate and + zlib specifications were written by L. Peter Deutsch. Thanks to all the + people who reported problems and suggested various improvements in zlib; they + are too numerous to cite here. + +Copyright notice: + + (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +If you use the zlib library in a product, we would appreciate *not* receiving +lengthy legal documents to sign. The sources are provided for free but without +warranty of any kind. The library has been entirely written by Jean-loup +Gailly and Mark Adler; it does not include third-party code. + +If you redistribute modified sources, we would appreciate that you include in +the file ChangeLog history information documenting your changes. Please read +the FAQ for more information on the distribution of modified source versions. diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/trees.c glmark2-2021.02/src/zlib/trees.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/trees.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/trees.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1203 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2017 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef ZLIB_DEBUG +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local const static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local const static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local const static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); +local int detect_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef ZLIB_DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* !ZLIB_DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef ZLIB_DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (ush)value << s->bi_valid; + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= (ush)value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !ZLIB_DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = (int)value;\ + s->bi_buf |= (ush)val << s->bi_valid;\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* ZLIB_DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; +#endif + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef ZLIB_DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void ZLIB_INTERNAL _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (unsigned)(bits + xbits); + if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); + } + if (overflow == 0) return; + + Tracev((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + unsigned code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = (ush)code; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ + bi_windup(s); /* align on byte boundary */ + put_short(s, (ush)stored_len); + put_short(s, (ush)~stored_len); + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + s->pending += stored_len; +#ifdef ZLIB_DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; + s->bits_sent += 2*16; + s->bits_sent += stored_len<<3; +#endif +} + +/* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(s) + deflate_state *s; +{ + bi_flush(s); +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + */ +void ZLIB_INTERNAL _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef ZLIB_DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and write out the encoded block. + */ +void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, last); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+last, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (last) { + bi_windup(s); +#ifdef ZLIB_DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*last)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int ZLIB_INTERNAL _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + const ct_data *ltree; /* literal tree */ + const ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= (unsigned)base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, + "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); +} + +/* =========================================================================== + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "black list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local int detect_data_type(s) + deflate_state *s; +{ + /* black_mask is the bit mask of black-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 + */ + unsigned long black_mask = 0xf3ffc07fUL; + int n; + + /* Check for non-textual ("black-listed") bytes. */ + for (n = 0; n <= 31; n++, black_mask >>= 1) + if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("white-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) + if (s->dyn_ltree[n].Freq != 0) + return Z_TEXT; + + /* There are no "black-listed" or "white-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/trees.h glmark2-2021.02/src/zlib/trees.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/trees.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/trees.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,128 @@ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/uncompr.c glmark2-2021.02/src/zlib/uncompr.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/uncompr.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/uncompr.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,93 @@ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. *sourceLen is + the byte length of the source buffer. Upon entry, *destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, + *destLen is the size of the decompressed data and *sourceLen is the number + of source bytes consumed. Upon return, source + *sourceLen points to the + first unused input byte. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, or + Z_DATA_ERROR if the input data was corrupted, including if the input data is + an incomplete zlib stream. +*/ +int ZEXPORT uncompress2 (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong *sourceLen; +{ + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong len, left; + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ + + len = *sourceLen; + if (*destLen) { + left = *destLen; + *destLen = 0; + } + else { + left = 1; + dest = buf; + } + + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = len > (uLong)max ? max : (uInt)len; + len -= stream.avail_in; + } + err = inflate(&stream, Z_NO_FLUSH); + } while (err == Z_OK); + + *sourceLen -= len + stream.avail_in; + if (dest != buf) + *destLen = stream.total_out; + else if (stream.total_out && err == Z_BUF_ERROR) + left = 1; + + inflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : + err == Z_NEED_DICT ? Z_DATA_ERROR : + err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR : + err; +} + +int ZEXPORT uncompress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return uncompress2(dest, destLen, source, &sourceLen); +} diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/zconf.h glmark2-2021.02/src/zlib/zconf.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/zconf.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/zconf.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,534 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_z z_crc32_z +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO + typedef unsigned long z_size_t; +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#ifdef HAVE_STDARG_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H) +# define Z_HAVE_UNISTD_H +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/zlib.h glmark2-2021.02/src/zlib/zlib.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/zlib.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/zlib.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,1912 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.11" +#define ZLIB_VERNUM 0x12b0 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 11 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more ouput + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if any input has been consumed in a previous + deflate() call, then the input available so far is compressed with the old + level and strategy using deflate(strm, Z_BLOCK). There are three approaches + for the compression levels 0, 1..3, and 4..9 respectively. The new level + and strategy will take effect at the next call of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will not automatically decode concatenated gzip streams. + inflate() will return Z_STREAM_END at the end of the gzip stream. The state + would need to be reset to continue decoding a subsequent gzip stream. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm)); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen)); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); + + Opens a gzip (.gz) file for reading or writing. The mode parameter is as + in fopen ("rb" or "wb") but can also include a compression level ("wb9") or + a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only + compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' + for fixed code compression as in "wb9F". (See the description of + deflateInit2 for more information about the strategy parameter.) 'T' will + request transparent writing or appending with no compression and not using + the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen associates a gzFile with the file descriptor fd. File descriptors + are obtained from calls like open, dup, creat, pipe or fileno (if the file + has been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); +/* + Set the internal buffer size used by this library's functions. The + default buffer size is 8192 bytes. This function must be called after + gzopen() or gzdopen(), and before any other calls that read or write the + file. The buffer memory allocation is always deferred to the first read or + write. Three times that size in buffer space is allocated. A larger buffer + size of, for example, 64K or 128K bytes will noticeably increase the speed + of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. Previously provided + data is flushed before the parameter change. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems, + gzFile file)); +/* + Read up to nitems items of size size from file to buf, otherwise operating + as gzread() does. This duplicates the interface of stdio's fread(), with + size_t request and return types. If the library defines size_t, then + z_size_t is identical to size_t. If not, then z_size_t is an unsigned + integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevetheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, reseting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes written or 0 in case of + error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size, + z_size_t nitems, gzFile file)); +/* + gzfwrite() writes nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the arguments to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf() + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or a + newline character is read and transferred to buf, or an end-of-file + condition is encountered. If any characters are read or if len == 1, the + string is terminated with a null character. If no characters are read due + to an end-of-file or len < 1, then the buffer is left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read as the first character + on the next read. At least one character of push-back is allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter flush + is as in the deflate() function. The return value is the zlib error number + (see function gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); + + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); + + Returns the starting position for the next gzread or gzwrite on the given + compressed file. This position represents a number of bytes in the + uncompressed data stream, and is zero when starting, even if appending or + reading a gzip stream from the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); + + Returns the current offset in the file being read or written. This offset + includes the count of bytes that precede the gzip stream, for example when + appending or when using gzdopen() for reading. When reading, the offset + does not include as yet unused buffered input. This information can be used + for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns true (1) if the end-of-file indicator has been set while reading, + false (0) otherwise. Note that the end-of-file indicator is set only if the + read tried to go past the end of the input, but came up short. Therefore, + just like feof(), gzeof() may return false even if there is no more data to + read, in the event that the last read request was for the exact number of + bytes remaining in the input file. This will happen if the input file size + is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file and + deallocates the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r OF((gzFile file)); +ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the given + compressed file. errnum is set to zlib error number. If an error occurred + in the file system and not in the compression library, errnum is set to + Z_ERRNO and the application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); +ZEXTERN int ZEXPORT inflateValidate OF((z_streamp, int)); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed OF ((z_streamp)); +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); +#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, + const char *mode)); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file, + const char *format, + va_list va)); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/zutil.c glmark2-2021.02/src/zlib/zutil.c --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/zutil.c 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/zutil.c 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,325 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2017 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" +#ifndef Z_SOLO +# include "gzguts.h" +#endif + +z_const char * const z_errmsg[10] = { + (z_const char *)"need dictionary", /* Z_NEED_DICT 2 */ + (z_const char *)"stream end", /* Z_STREAM_END 1 */ + (z_const char *)"", /* Z_OK 0 */ + (z_const char *)"file error", /* Z_ERRNO (-1) */ + (z_const char *)"stream error", /* Z_STREAM_ERROR (-2) */ + (z_const char *)"data error", /* Z_DATA_ERROR (-3) */ + (z_const char *)"insufficient memory", /* Z_MEM_ERROR (-4) */ + (z_const char *)"buffer error", /* Z_BUF_ERROR (-5) */ + (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */ + (z_const char *)"" +}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags() +{ + uLong flags; + + flags = 0; + switch ((int)(sizeof(uInt))) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch ((int)(sizeof(uLong))) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch ((int)(sizeof(voidpf))) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch ((int)(sizeof(z_off_t))) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef ZLIB_DEBUG + flags += 1 << 8; +#endif +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef ZLIB_DEBUG +#include +# ifndef verbose +# define verbose 0 +# endif +int ZLIB_INTERNAL z_verbose = verbose; + +void ZLIB_INTERNAL z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#ifndef HAVE_MEMCPY + +void ZLIB_INTERNAL zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int ZLIB_INTERNAL zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void ZLIB_INTERNAL zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifndef Z_SOLO + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf; + ulg bsize = (ulg)items*size; + + (void)opaque; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + int n; + + (void)opaque; + + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size) +{ + (void)opaque; + return _halloc((long)items, size); +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + (void)opaque; + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf ZLIB_INTERNAL zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + (void)opaque; + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void ZLIB_INTERNAL zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + (void)opaque; + free(ptr); +} + +#endif /* MY_ZCALLOC */ + +#endif /* !Z_SOLO */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/src/zlib/zutil.h glmark2-2021.02/src/zlib/zutil.h --- glmark2-2014.03+git20150611.fa71af2d/src/zlib/zutil.h 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/src/zlib/zutil.h 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,271 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include "zlib.h" + +#if defined(STDC) && !defined(Z_SOLO) +# if !(defined(_WIN32_WCE) && defined(_MSC_VER)) +# include +# endif +# include +# include +#endif + +#ifdef Z_SOLO + typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# ifndef Z_SOLO +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 1 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 2 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#ifdef __370__ +# if __TARGET_LIB__ < 0x20000000 +# define OS_CODE 4 +# elif __TARGET_LIB__ < 0x40000000 +# define OS_CODE 11 +# else +# define OS_CODE 8 +# endif +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 5 +#endif + +#ifdef OS2 +# define OS_CODE 6 +# if defined(M_I86) && !defined(Z_SOLO) +# include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 7 +# ifndef Z_SOLO +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +# endif +#endif + +#ifdef __acorn +# define OS_CODE 13 +#endif + +#if defined(WIN32) && !defined(__CYGWIN__) +# define OS_CODE 10 +#endif + +#ifdef _BEOS_ +# define OS_CODE 16 +#endif + +#ifdef __TOS_OS400__ +# define OS_CODE 18 +#endif + +#ifdef __APPLE__ +# define OS_CODE 19 +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + +#if defined(__BORLANDC__) && !defined(MSDOS) + #pragma warn -8004 + #pragma warn -8008 + #pragma warn -8066 +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_WIN32) && \ + (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 3 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(pyr) || defined(Z_SOLO) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef ZLIB_DEBUG +# include + extern int ZLIB_INTERNAL z_verbose; + extern void ZLIB_INTERNAL z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +#ifndef Z_SOLO + voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, + unsigned size)); + void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); +#endif + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +#endif /* ZUTIL_H */ diff -Nru glmark2-2014.03+git20150611.fa71af2d/waf glmark2-2021.02/waf --- glmark2-2014.03+git20150611.fa71af2d/waf 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waf 2021-04-25 01:47:22.000000000 +0800 @@ -1,7 +1,6 @@ -#!/usr/bin/env python -# encoding: ISO8859-1 -# Thomas Nagy, 2005-2011 - +#!/usr/bin/env python3 +# Thomas Nagy, 2005-2016 +# """ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -30,13 +29,15 @@ POSSIBILITY OF SUCH DAMAGE. """ -import os, sys +import os, sys, inspect -VERSION="1.6.11" -REVISION="30618c54883417962c38f5d395f83584" +VERSION="1.9.3" +REVISION="826456a3e8d1d27787c240c6ba69d280" +GIT="b022313c197614245fae732c0dd1d8167b9ee5dd" INSTALL='' -C1='#*' +C1='#)' C2='#%' +C3='#$' cwd = os.getcwd() join = os.path.join @@ -53,8 +54,8 @@ print(('\033[91mError: %s\033[0m' % m)) sys.exit(1) -def unpack_wafdir(dir): - f = open(sys.argv[0],'rb') +def unpack_wafdir(dir, src): + f = open(src,'rb') c = 'corrupt archive (%d)' while 1: line = f.readline() @@ -65,22 +66,22 @@ if f.readline() != b('#<==\n'): err(c % 2) break if not txt: err(c % 3) - txt = txt[1:-1].replace(b(C1), b('\n')).replace(b(C2), b('\r')) + txt = txt[1:-1].replace(b(C1), b('\n')).replace(b(C2), b('\r')).replace(b(C3), b('\x00')) import shutil, tarfile try: shutil.rmtree(dir) except OSError: pass try: - for x in ['Tools', 'extras']: + for x in ('Tools', 'extras'): os.makedirs(join(dir, 'waflib', x)) except OSError: - err("Cannot unpack waf lib into %s\nMove waf into a writeable directory" % dir) + err("Cannot unpack waf lib into %s\nMove waf in a writable directory" % dir) os.chdir(dir) tmp = 't.bz2' t = open(tmp,'wb') - t.write(txt) - t.close() + try: t.write(txt) + finally: t.close() try: t = tarfile.open(tmp) @@ -95,10 +96,12 @@ except OSError: pass err("Waf cannot be unpacked, check that bzip2 support is present") - for x in t: t.extract(x) - t.close() + try: + for x in t: t.extract(x) + finally: + t.close() - for x in ['Tools', 'extras']: + for x in ('Tools', 'extras'): os.chmod(join('waflib',x), 493) if sys.hexversion<0x300000f: @@ -106,7 +109,7 @@ import fixpy2 fixpy2.fixdir(dir) - os.unlink(tmp) + os.remove(tmp) os.chdir(cwd) try: dir = unicode(dir, 'mbcs') @@ -125,8 +128,8 @@ pass def find_lib(): - name = sys.argv[0] - base = os.path.dirname(os.path.abspath(name)) + src = os.path.abspath(inspect.getfile(inspect.getmodule(err))) + base, name = os.path.split(src) #devs use $WAFDIR w=test(os.environ.get('WAFDIR', '')) @@ -139,7 +142,7 @@ err('waf-light requires waflib -> export WAFDIR=/folder') dirname = '%s-%s-%s' % (WAF, VERSION, REVISION) - for i in [INSTALL,'/usr','/usr/local','/opt']: + for i in (INSTALL,'/usr','/usr/local','/opt'): w = test(i + '/lib/' + dirname) if w: return w @@ -149,14 +152,14 @@ if w: return w #unpack - unpack_wafdir(dir) + unpack_wafdir(dir, src) return dir wafdir = find_lib() sys.path.insert(0, wafdir) if __name__ == '__main__': - import waflib.extras.compat15 + from waflib import Scripting Scripting.waf_entry_point(cwd, VERSION, wafdir) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/ansiterm.py glmark2-2021.02/waflib/ansiterm.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/ansiterm.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/ansiterm.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,52 +1,84 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys,os +import os,re,sys +from waflib import Utils +wlock=Utils.threading.Lock() try: - if not(sys.stderr.isatty()and sys.stdout.isatty()): - raise ValueError('not a tty') - from ctypes import* + from ctypes import Structure,windll,c_short,c_ushort,c_ulong,c_int,byref,c_wchar,POINTER,c_long +except ImportError: + class AnsiTerm(object): + def __init__(self,stream): + self.stream=stream + try: + self.errors=self.stream.errors + except AttributeError: + pass + self.encoding=self.stream.encoding + def write(self,txt): + try: + wlock.acquire() + self.stream.write(txt) + self.stream.flush() + finally: + wlock.release() + def fileno(self): + return self.stream.fileno() + def flush(self): + self.stream.flush() + def isatty(self): + return self.stream.isatty() +else: class COORD(Structure): _fields_=[("X",c_short),("Y",c_short)] class SMALL_RECT(Structure): _fields_=[("Left",c_short),("Top",c_short),("Right",c_short),("Bottom",c_short)] class CONSOLE_SCREEN_BUFFER_INFO(Structure): - _fields_=[("Size",COORD),("CursorPosition",COORD),("Attributes",c_short),("Window",SMALL_RECT),("MaximumWindowSize",COORD)] + _fields_=[("Size",COORD),("CursorPosition",COORD),("Attributes",c_ushort),("Window",SMALL_RECT),("MaximumWindowSize",COORD)] class CONSOLE_CURSOR_INFO(Structure): _fields_=[('dwSize',c_ulong),('bVisible',c_int)] - sbinfo=CONSOLE_SCREEN_BUFFER_INFO() - csinfo=CONSOLE_CURSOR_INFO() - hconsole=windll.kernel32.GetStdHandle(-11) - windll.kernel32.GetConsoleScreenBufferInfo(hconsole,byref(sbinfo)) - if sbinfo.Size.X<9 or sbinfo.Size.Y<9:raise ValueError('small console') - windll.kernel32.GetConsoleCursorInfo(hconsole,byref(csinfo)) -except Exception: - pass -else: - import re,threading - is_vista=getattr(sys,"getwindowsversion",None)and sys.getwindowsversion()[0]>=6 try: _type=unicode - except: + except NameError: _type=str to_int=lambda number,default:number and int(number)or default - wlock=threading.Lock() STD_OUTPUT_HANDLE=-11 STD_ERROR_HANDLE=-12 + windll.kernel32.GetStdHandle.argtypes=[c_ulong] + windll.kernel32.GetStdHandle.restype=c_ulong + windll.kernel32.GetConsoleScreenBufferInfo.argtypes=[c_ulong,POINTER(CONSOLE_SCREEN_BUFFER_INFO)] + windll.kernel32.GetConsoleScreenBufferInfo.restype=c_long + windll.kernel32.SetConsoleTextAttribute.argtypes=[c_ulong,c_ushort] + windll.kernel32.SetConsoleTextAttribute.restype=c_long + windll.kernel32.FillConsoleOutputCharacterW.argtypes=[c_ulong,c_wchar,c_ulong,POINTER(COORD),POINTER(c_ulong)] + windll.kernel32.FillConsoleOutputCharacterW.restype=c_long + windll.kernel32.FillConsoleOutputAttribute.argtypes=[c_ulong,c_ushort,c_ulong,POINTER(COORD),POINTER(c_ulong)] + windll.kernel32.FillConsoleOutputAttribute.restype=c_long + windll.kernel32.SetConsoleCursorPosition.argtypes=[c_ulong,POINTER(COORD)] + windll.kernel32.SetConsoleCursorPosition.restype=c_long + windll.kernel32.SetConsoleCursorInfo.argtypes=[c_ulong,POINTER(CONSOLE_CURSOR_INFO)] + windll.kernel32.SetConsoleCursorInfo.restype=c_long class AnsiTerm(object): - def __init__(self): - self.encoding=sys.stdout.encoding - self.hconsole=windll.kernel32.GetStdHandle(STD_OUTPUT_HANDLE) + def __init__(self,s): + self.stream=s + try: + self.errors=s.errors + except AttributeError: + pass + self.encoding=s.encoding self.cursor_history=[] - self.orig_sbinfo=CONSOLE_SCREEN_BUFFER_INFO() - self.orig_csinfo=CONSOLE_CURSOR_INFO() - windll.kernel32.GetConsoleScreenBufferInfo(self.hconsole,byref(self.orig_sbinfo)) - windll.kernel32.GetConsoleCursorInfo(hconsole,byref(self.orig_csinfo)) + handle=(s.fileno()==2)and STD_ERROR_HANDLE or STD_OUTPUT_HANDLE + self.hconsole=windll.kernel32.GetStdHandle(handle) + self._sbinfo=CONSOLE_SCREEN_BUFFER_INFO() + self._csinfo=CONSOLE_CURSOR_INFO() + windll.kernel32.GetConsoleCursorInfo(self.hconsole,byref(self._csinfo)) + self._orig_sbinfo=CONSOLE_SCREEN_BUFFER_INFO() + r=windll.kernel32.GetConsoleScreenBufferInfo(self.hconsole,byref(self._orig_sbinfo)) + self._isatty=r==1 def screen_buffer_info(self): - sbinfo=CONSOLE_SCREEN_BUFFER_INFO() - windll.kernel32.GetConsoleScreenBufferInfo(self.hconsole,byref(sbinfo)) - return sbinfo + windll.kernel32.GetConsoleScreenBufferInfo(self.hconsole,byref(self._sbinfo)) + return self._sbinfo def clear_line(self,param): mode=param and int(param)or 0 sbinfo=self.screen_buffer_info() @@ -59,8 +91,8 @@ else: line_start=sbinfo.CursorPosition line_length=sbinfo.Size.X-sbinfo.CursorPosition.X - chars_written=c_int() - windll.kernel32.FillConsoleOutputCharacterA(self.hconsole,c_wchar(' '),line_length,line_start,byref(chars_written)) + chars_written=c_ulong() + windll.kernel32.FillConsoleOutputCharacterW(self.hconsole,c_wchar(' '),line_length,line_start,byref(chars_written)) windll.kernel32.FillConsoleOutputAttribute(self.hconsole,sbinfo.Attributes,line_length,line_start,byref(chars_written)) def clear_screen(self,param): mode=to_int(param,0) @@ -75,8 +107,8 @@ else: clear_start=sbinfo.CursorPosition clear_length=((sbinfo.Size.X-sbinfo.CursorPosition.X)+sbinfo.Size.X*(sbinfo.Size.Y-sbinfo.CursorPosition.Y)) - chars_written=c_int() - windll.kernel32.FillConsoleOutputCharacterA(self.hconsole,c_wchar(' '),clear_length,clear_start,byref(chars_written)) + chars_written=c_ulong() + windll.kernel32.FillConsoleOutputCharacterW(self.hconsole,c_wchar(' '),clear_length,clear_start,byref(chars_written)) windll.kernel32.FillConsoleOutputAttribute(self.hconsole,sbinfo.Attributes,clear_length,clear_start,byref(chars_written)) def push_cursor(self,param): sbinfo=self.screen_buffer_info() @@ -119,20 +151,16 @@ return((c&1)<<2)|(c&2)|((c&4)>>2) def set_color(self,param): cols=param.split(';') - sbinfo=CONSOLE_SCREEN_BUFFER_INFO() - windll.kernel32.GetConsoleScreenBufferInfo(self.hconsole,byref(sbinfo)) + sbinfo=self.screen_buffer_info() attr=sbinfo.Attributes for c in cols: - if is_vista: - c=int(c) - else: - c=to_int(c,0) - if c in range(30,38): + c=to_int(c,0) + if 29>4)|((attr&0x07)<<4) windll.kernel32.SetConsoleTextAttribute(self.hconsole,attr) def show_cursor(self,param): - csinfo.bVisible=1 - windll.kernel32.SetConsoleCursorInfo(self.hconsole,byref(csinfo)) + self._csinfo.bVisible=1 + windll.kernel32.SetConsoleCursorInfo(self.hconsole,byref(self._csinfo)) def hide_cursor(self,param): - csinfo.bVisible=0 - windll.kernel32.SetConsoleCursorInfo(self.hconsole,byref(csinfo)) + self._csinfo.bVisible=0 + windll.kernel32.SetConsoleCursorInfo(self.hconsole,byref(self._csinfo)) ansi_command_table={'A':move_up,'B':move_down,'C':move_right,'D':move_left,'E':next_line,'F':prev_line,'G':set_column,'H':set_cursor,'f':set_cursor,'J':clear_screen,'K':clear_line,'h':show_cursor,'l':hide_cursor,'m':set_color,'s':push_cursor,'u':pop_cursor,} ansi_tokens=re.compile('(?:\x1b\[([0-9?;]*)([a-zA-Z])|([^\x1b]+))') def write(self,text): try: wlock.acquire() - for param,cmd,txt in self.ansi_tokens.findall(text): - if cmd: - cmd_func=self.ansi_command_table.get(cmd) - if cmd_func: - cmd_func(self,param) - else: - self.writeconsole(txt) + if self._isatty: + for param,cmd,txt in self.ansi_tokens.findall(text): + if cmd: + cmd_func=self.ansi_command_table.get(cmd) + if cmd_func: + cmd_func(self,param) + else: + self.writeconsole(txt) + else: + self.stream.write(text) finally: wlock.release() def writeconsole(self,txt): - chars_written=c_int() + chars_written=c_ulong() writeconsole=windll.kernel32.WriteConsoleA if isinstance(txt,_type): writeconsole=windll.kernel32.WriteConsoleW - TINY_STEP=3000 - for x in range(0,len(txt),TINY_STEP): - tiny=txt[x:x+TINY_STEP] - writeconsole(self.hconsole,tiny,len(tiny),byref(chars_written),None) + done=0 + todo=len(txt) + chunk=32<<10 + while todo!=0: + doing=min(chunk,todo) + buf=txt[done:done+doing] + r=writeconsole(self.hconsole,buf,doing,byref(chars_written),None) + if r==0: + chunk>>=1 + continue + done+=doing + todo-=doing + def fileno(self): + return self.stream.fileno() def flush(self): pass def isatty(self): - return True - sys.stderr=sys.stdout=AnsiTerm() - os.environ['TERM']='vt100' + return self._isatty + if sys.stdout.isatty()or sys.stderr.isatty(): + handle=sys.stdout.isatty()and STD_OUTPUT_HANDLE or STD_ERROR_HANDLE + console=windll.kernel32.GetStdHandle(handle) + sbinfo=CONSOLE_SCREEN_BUFFER_INFO() + def get_term_cols(): + windll.kernel32.GetConsoleScreenBufferInfo(console,byref(sbinfo)) + return sbinfo.Size.X-1 +try: + import struct,fcntl,termios +except ImportError: + pass +else: + if(sys.stdout.isatty()or sys.stderr.isatty())and os.environ.get('TERM','')not in('dumb','emacs'): + FD=sys.stdout.isatty()and sys.stdout.fileno()or sys.stderr.fileno() + def fun(): + return struct.unpack("HHHH",fcntl.ioctl(FD,termios.TIOCGWINSZ,struct.pack("HHHH",0,0,0,0)))[1] + try: + fun() + except Exception as e: + pass + else: + get_term_cols=fun diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Build.py glmark2-2021.02/waflib/Build.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Build.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Build.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,21 +1,24 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys,errno,re,shutil -try:import cPickle -except:import pickle as cPickle -from waflib import Runner,TaskGen,Utils,ConfigSet,Task,Logs,Options,Context,Errors -import waflib.Node +import os,sys,errno,re,shutil,stat +try: + import cPickle +except ImportError: + import pickle as cPickle +from waflib import Node,Runner,TaskGen,Utils,ConfigSet,Task,Logs,Options,Context,Errors CACHE_DIR='c4che' CACHE_SUFFIX='_cache.py' INSTALL=1337 UNINSTALL=-1337 -SAVED_ATTRS='root node_deps raw_deps task_sigs'.split() +SAVED_ATTRS='root node_sigs task_sigs imp_sigs raw_deps node_deps'.split() CFG_FILES='cfg_files' POST_AT_ONCE=0 POST_LAZY=1 -POST_BOTH=2 +PROTOCOL=-1 +if sys.platform=='cli': + PROTOCOL=0 class BuildContext(Context.Context): '''executes the build''' cmd='build' @@ -24,29 +27,31 @@ super(BuildContext,self).__init__(**kw) self.is_install=0 self.top_dir=kw.get('top_dir',Context.top_dir) - self.run_dir=kw.get('run_dir',Context.run_dir) - self.post_mode=POST_AT_ONCE self.out_dir=kw.get('out_dir',Context.out_dir) - self.cache_dir=kw.get('cache_dir',None) + self.run_dir=kw.get('run_dir',Context.run_dir) + self.launch_dir=Context.launch_dir + self.post_mode=POST_LAZY + self.cache_dir=kw.get('cache_dir') if not self.cache_dir: - self.cache_dir=self.out_dir+os.sep+CACHE_DIR + self.cache_dir=os.path.join(self.out_dir,CACHE_DIR) self.all_envs={} + self.node_sigs={} self.task_sigs={} + self.imp_sigs={} self.node_deps={} self.raw_deps={} - self.cache_dir_contents={} self.task_gen_cache_names={} - self.launch_dir=Context.launch_dir self.jobs=Options.options.jobs self.targets=Options.options.targets self.keep=Options.options.keep - self.cache_global=Options.cache_global - self.nocache=Options.options.nocache self.progress_bar=Options.options.progress_bar self.deps_man=Utils.defaultdict(list) self.current_group=0 self.groups=[] self.group_names={} + for v in SAVED_ATTRS: + if not hasattr(self,v): + setattr(self,v,{}) def get_variant_dir(self): if not self.variant: return self.out_dir @@ -56,16 +61,16 @@ kw['bld']=self ret=TaskGen.task_gen(*k,**kw) self.task_gen_cache_names={} - self.add_to_group(ret,group=kw.get('group',None)) + self.add_to_group(ret,group=kw.get('group')) return ret + def rule(self,*k,**kw): + def f(rule): + ret=self(*k,**kw) + ret.rule=rule + return ret + return f def __copy__(self): - raise Errors.WafError('build contexts are not supposed to be copied') - def install_files(self,*k,**kw): - pass - def install_as(self,*k,**kw): - pass - def symlink_as(self,*k,**kw): - pass + raise Errors.WafError('build contexts cannot be copied') def load_envs(self): node=self.root.find_node(self.cache_dir) if not node: @@ -79,12 +84,8 @@ self.all_envs[name]=env for f in env[CFG_FILES]: newnode=self.root.find_resource(f) - try: - h=Utils.h_file(newnode.abspath()) - except(IOError,AttributeError): - Logs.error('cannot find %r'%f) - h=Utils.SIG_NIL - newnode.sig=h + if not newnode or not newnode.exists(): + raise Errors.WafError('Missing configuration file %r, reconfigure the project!'%f) def init_dirs(self): if not(os.path.isabs(self.top_dir)and os.path.isabs(self.out_dir)): raise Errors.WafError('The project was not configured: run "waf configure" first!') @@ -97,56 +98,52 @@ self.load_envs() self.execute_build() def execute_build(self): - Logs.info("Waf: Entering directory `%s'"%self.variant_dir) + Logs.info("Waf: Entering directory `%s'",self.variant_dir) self.recurse([self.run_dir]) self.pre_build() self.timer=Utils.Timer() - if self.progress_bar: - sys.stderr.write(Logs.colors.cursor_off) try: self.compile() finally: - if self.progress_bar==1: - c=len(self.returned_tasks)or 1 - self.to_log(self.progress_line(c,c,Logs.colors.BLUE,Logs.colors.NORMAL)) - print('') - sys.stdout.flush() - sys.stderr.write(Logs.colors.cursor_on) - Logs.info("Waf: Leaving directory `%s'"%self.variant_dir) + if self.progress_bar==1 and sys.stderr.isatty(): + c=self.producer.processed or 1 + m=self.progress_line(c,c,Logs.colors.BLUE,Logs.colors.NORMAL) + Logs.info(m,extra={'stream':sys.stderr,'c1':Logs.colors.cursor_off,'c2':Logs.colors.cursor_on}) + Logs.info("Waf: Leaving directory `%s'",self.variant_dir) + try: + self.producer.bld=None + del self.producer + except AttributeError: + pass self.post_build() def restore(self): try: env=ConfigSet.ConfigSet(os.path.join(self.cache_dir,'build.config.py')) - except(IOError,OSError): + except EnvironmentError: pass else: - if env['version']').ljust(cols) - msg=Utils.indicator%(left,bar,right) + msg=Logs.indicator%(left,bar,right) return msg def declare_chain(self,*k,**kw): return TaskGen.declare_chain(*k,**kw) @@ -303,14 +301,14 @@ return'' def get_group_idx(self,tg): se=id(tg) - for i in range(len(self.groups)): - for t in self.groups[i]: + for i,tmp in enumerate(self.groups): + for t in tmp: if id(t)==se: return i return None def add_group(self,name=None,move=True): if name and name in self.group_names: - Logs.error('add_group: name %s already present'%name) + raise Errors.WafError('add_group: name %s already present',name) g=[] self.group_names[name]=g self.groups.append(g) @@ -319,9 +317,10 @@ def set_group(self,idx): if isinstance(idx,str): g=self.group_names[idx] - for i in range(len(self.groups)): - if id(g)==id(self.groups[i]): + for i,tmp in enumerate(self.groups): + if id(g)==id(tmp): self.current_group=i + break else: self.current_group=idx def total(self): @@ -338,8 +337,6 @@ min_grp=0 for name in self.targets.split(','): tg=self.get_tgen_by_name(name) - if not tg: - raise Errors.WafError('target %r does not exist'%name) m=self.get_group_idx(tg) if m>min_grp: min_grp=m @@ -347,6 +344,11 @@ elif m==min_grp: to_post.append(tg) return(min_grp,to_post) + def get_all_task_gen(self): + lst=[] + for g in self.groups: + lst.extend(g) + return lst def post_group(self): if self.targets=='*': for tg in self.groups[self.cur]: @@ -370,6 +372,12 @@ tg.post() else: ln=self.launch_node() + if ln.is_child_of(self.bldnode): + Logs.warn('Building from the build directory, forcing --targets=*') + ln=self.srcnode + elif not ln.is_child_of(self.srcnode): + Logs.warn('CWD %s is not under %s, forcing --targets=* (run distclean?)',ln.abspath(),self.srcnode.abspath()) + ln=self.srcnode for tg in self.groups[self.cur]: try: f=tg.post @@ -381,10 +389,10 @@ def get_tasks_group(self,idx): tasks=[] for tg in self.groups[idx]: - if isinstance(tg,Task.TaskBase): - tasks.append(tg) - else: + try: tasks.extend(tg.tasks) + except AttributeError: + tasks.append(tg) return tasks def get_build_iterator(self): self.cur=0 @@ -409,74 +417,149 @@ yield tasks while 1: yield[] + def install_files(self,dest,files,**kw): + assert(dest) + tg=self(features='install_task',install_to=dest,install_from=files,**kw) + tg.dest=tg.install_to + tg.type='install_files' + if not kw.get('postpone',True): + tg.post() + return tg + def install_as(self,dest,srcfile,**kw): + assert(dest) + tg=self(features='install_task',install_to=dest,install_from=srcfile,**kw) + tg.dest=tg.install_to + tg.type='install_as' + if not kw.get('postpone',True): + tg.post() + return tg + def symlink_as(self,dest,src,**kw): + assert(dest) + tg=self(features='install_task',install_to=dest,install_from=src,**kw) + tg.dest=tg.install_to + tg.type='symlink_as' + tg.link=src + if not kw.get('postpone',True): + tg.post() + return tg +@TaskGen.feature('install_task') +@TaskGen.before_method('process_rule','process_source') +def process_install_task(self): + self.add_install_task(**self.__dict__) +@TaskGen.taskgen_method +def add_install_task(self,**kw): + if not self.bld.is_install: + return + if not kw['install_to']: + return + if kw['type']=='symlink_as'and Utils.is_win32: + if kw.get('win32_install'): + kw['type']='install_as' + else: + return + tsk=self.install_task=self.create_task('inst') + tsk.chmod=kw.get('chmod',Utils.O644) + tsk.link=kw.get('link','')or kw.get('install_from','') + tsk.relative_trick=kw.get('relative_trick',False) + tsk.type=kw['type'] + tsk.install_to=tsk.dest=kw['install_to'] + tsk.install_from=kw['install_from'] + tsk.relative_base=kw.get('cwd')or kw.get('relative_base',self.path) + tsk.init_files() + if not kw.get('postpone',True): + tsk.run_now() + return tsk +@TaskGen.taskgen_method +def add_install_files(self,**kw): + kw['type']='install_files' + return self.add_install_task(**kw) +@TaskGen.taskgen_method +def add_install_as(self,**kw): + kw['type']='install_as' + return self.add_install_task(**kw) +@TaskGen.taskgen_method +def add_symlink_as(self,**kw): + kw['type']='symlink_as' + return self.add_install_task(**kw) class inst(Task.Task): - color='CYAN' - def post(self): - buf=[] - for x in self.source: - if isinstance(x,waflib.Node.Node): - y=x - else: - y=self.path.find_resource(x) - if not y: - if Logs.verbose: - Logs.warn('Could not find %s immediately (may cause broken builds)'%x) - idx=self.generator.bld.get_group_idx(self) - for tg in self.generator.bld.groups[idx]: - if not isinstance(tg,inst)and id(tg)!=id(self): - tg.post() - y=self.path.find_resource(x) - if y: - break - else: - raise Errors.WafError('could not find %r in %r'%(x,self.path)) - buf.append(y) - self.inputs=buf + def __str__(self): + return'' + def uid(self): + lst=self.inputs+self.outputs+[self.link,self.generator.path.abspath()] + return Utils.h_list(lst) + def init_files(self): + if self.type=='symlink_as': + inputs=[] + else: + inputs=self.generator.to_nodes(self.install_from) + if self.type=='install_as': + assert len(inputs)==1 + self.set_inputs(inputs) + dest=self.get_install_path() + outputs=[] + if self.type=='symlink_as': + if self.relative_trick: + self.link=os.path.relpath(self.link,os.path.dirname(dest)) + outputs.append(self.generator.bld.root.make_node(dest)) + elif self.type=='install_as': + outputs.append(self.generator.bld.root.make_node(dest)) + else: + for y in inputs: + if self.relative_trick: + destfile=os.path.join(dest,y.path_from(self.relative_base)) + else: + destfile=os.path.join(dest,y.name) + outputs.append(self.generator.bld.root.make_node(destfile)) + self.set_outputs(outputs) def runnable_status(self): ret=super(inst,self).runnable_status() - if ret==Task.SKIP_ME: + if ret==Task.SKIP_ME and self.generator.bld.is_install: return Task.RUN_ME return ret - def __str__(self): - return'' - def run(self): - return self.generator.exec_task() + def post_run(self): + pass def get_install_path(self,destdir=True): - dest=Utils.subst_vars(self.dest,self.env) - dest=dest.replace('/',os.sep) + if isinstance(self.install_to,Node.Node): + dest=self.install_to.abspath() + else: + dest=Utils.subst_vars(self.install_to,self.env) if destdir and Options.options.destdir: dest=os.path.join(Options.options.destdir,os.path.splitdrive(dest)[1].lstrip(os.sep)) return dest - def exec_install_files(self): - destpath=self.get_install_path() - if not destpath: - raise Errors.WafError('unknown installation path %r'%self.generator) - for x,y in zip(self.source,self.inputs): - if self.relative_trick: - destfile=os.path.join(destpath,y.path_from(self.path)) - Utils.check_dir(os.path.dirname(destfile)) - else: - destfile=os.path.join(destpath,y.name) - self.generator.bld.do_install(y.abspath(),destfile,self.chmod) - def exec_install_as(self): - destfile=self.get_install_path() - self.generator.bld.do_install(self.inputs[0].abspath(),destfile,self.chmod) - def exec_symlink_as(self): - destfile=self.get_install_path() - self.generator.bld.do_link(self.link,destfile) -class InstallContext(BuildContext): - '''installs the targets on the system''' - cmd='install' - def __init__(self,**kw): - super(InstallContext,self).__init__(**kw) - self.uninstall=[] - self.is_install=INSTALL - def do_install(self,src,tgt,chmod=Utils.O644): - d,_=os.path.split(tgt) - if not d: - raise Errors.WafError('Invalid installation given %r->%r'%(src,tgt)) - Utils.check_dir(d) - srclbl=src.replace(self.srcnode.abspath()+os.sep,'') + def copy_fun(self,src,tgt): + if Utils.is_win32 and len(tgt)>259 and not tgt.startswith('\\\\?\\'): + tgt='\\\\?\\'+tgt + shutil.copy2(src,tgt) + os.chmod(tgt,self.chmod) + def rm_empty_dirs(self,tgt): + while tgt: + tgt=os.path.dirname(tgt) + try: + os.rmdir(tgt) + except OSError: + break + def run(self): + is_install=self.generator.bld.is_install + if not is_install: + return + for x in self.outputs: + if is_install==INSTALL: + x.parent.mkdir() + if self.type=='symlink_as': + fun=is_install==INSTALL and self.do_link or self.do_unlink + fun(self.link,self.outputs[0].abspath()) + else: + fun=is_install==INSTALL and self.do_install or self.do_uninstall + launch_node=self.generator.bld.launch_node() + for x,y in zip(self.inputs,self.outputs): + fun(x.abspath(),y.abspath(),x.path_from(launch_node)) + def run_now(self): + status=self.runnable_status() + if status not in(Task.RUN_ME,Task.SKIP_ME): + raise Errors.TaskNotReady('Could not process %r: status %r'%(self,status)) + self.run() + self.hasrun=Task.SUCCESS + def do_install(self,src,tgt,lbl,**kw): if not Options.options.force: try: st1=os.stat(tgt) @@ -485,124 +568,72 @@ pass else: if st1.st_mtime+2>=st2.st_mtime and st1.st_size==st2.st_size: - if not self.progress_bar: - Logs.info('- install %s (from %s)'%(tgt,srclbl)) + if not self.generator.bld.progress_bar: + Logs.info('- install %s (from %s)',tgt,lbl) return False - if not self.progress_bar: - Logs.info('+ install %s (from %s)'%(tgt,srclbl)) + if not self.generator.bld.progress_bar: + Logs.info('+ install %s (from %s)',tgt,lbl) + try: + os.chmod(tgt,Utils.O644|stat.S_IMODE(os.stat(tgt).st_mode)) + except EnvironmentError: + pass try: os.remove(tgt) except OSError: pass try: - shutil.copy2(src,tgt) - os.chmod(tgt,chmod) - except IOError: + self.copy_fun(src,tgt) + except EnvironmentError as e: + if not os.path.exists(src): + Logs.error('File %r does not exist',src) + elif not os.path.isfile(src): + Logs.error('Input %r is not a file',src) + raise Errors.WafError('Could not install the file %r'%tgt,e) + def do_link(self,src,tgt,**kw): + if os.path.islink(tgt)and os.readlink(tgt)==src: + if not self.generator.bld.progress_bar: + Logs.info('- symlink %s (to %s)',tgt,src) + else: try: - os.stat(src) - except(OSError,IOError): - Logs.error('File %r does not exist'%src) - raise Errors.WafError('Could not install the file %r'%tgt) - def do_link(self,src,tgt): - d,_=os.path.split(tgt) - Utils.check_dir(d) - link=False - if not os.path.islink(tgt): - link=True - elif os.readlink(tgt)!=src: - link=True - if link: - try:os.remove(tgt) - except OSError:pass - if not self.progress_bar: - Logs.info('+ symlink %s (to %s)'%(tgt,src)) + os.remove(tgt) + except OSError: + pass + if not self.generator.bld.progress_bar: + Logs.info('+ symlink %s (to %s)',tgt,src) os.symlink(src,tgt) - else: - if not self.progress_bar: - Logs.info('- symlink %s (to %s)'%(tgt,src)) - def run_task_now(self,tsk,postpone): - tsk.post() - if not postpone: - if tsk.runnable_status()==Task.ASK_LATER: - raise self.WafError('cannot post the task %r'%tsk) - tsk.run() - def install_files(self,dest,files,env=None,chmod=Utils.O644,relative_trick=False,cwd=None,add=True,postpone=True): - tsk=inst(env=env or self.env) - tsk.bld=self - tsk.path=cwd or self.path - tsk.chmod=chmod - if isinstance(files,waflib.Node.Node): - tsk.source=[files] - else: - tsk.source=Utils.to_list(files) - tsk.dest=dest - tsk.exec_task=tsk.exec_install_files - tsk.relative_trick=relative_trick - if add:self.add_to_group(tsk) - self.run_task_now(tsk,postpone) - return tsk - def install_as(self,dest,srcfile,env=None,chmod=Utils.O644,cwd=None,add=True,postpone=True): - tsk=inst(env=env or self.env) - tsk.bld=self - tsk.path=cwd or self.path - tsk.chmod=chmod - tsk.source=[srcfile] - tsk.dest=dest - tsk.exec_task=tsk.exec_install_as - if add:self.add_to_group(tsk) - self.run_task_now(tsk,postpone) - return tsk - def symlink_as(self,dest,src,env=None,cwd=None,add=True,postpone=True): - if Utils.is_win32: - return - tsk=inst(env=env or self.env) - tsk.bld=self - tsk.dest=dest - tsk.path=cwd or self.path - tsk.source=[] - tsk.link=src - tsk.exec_task=tsk.exec_symlink_as - if add:self.add_to_group(tsk) - self.run_task_now(tsk,postpone) - return tsk -class UninstallContext(InstallContext): - '''removes the targets installed''' - cmd='uninstall' - def __init__(self,**kw): - super(UninstallContext,self).__init__(**kw) - self.is_install=UNINSTALL - def do_install(self,src,tgt,chmod=Utils.O644): - if not self.progress_bar: - Logs.info('- remove %s'%tgt) - self.uninstall.append(tgt) + def do_uninstall(self,src,tgt,lbl,**kw): + if not self.generator.bld.progress_bar: + Logs.info('- remove %s',tgt) try: os.remove(tgt) - except OSError ,e: + except OSError as e: if e.errno!=errno.ENOENT: if not getattr(self,'uninstall_error',None): self.uninstall_error=True Logs.warn('build: some files could not be uninstalled (retry with -vv to list them)') if Logs.verbose>1: - Logs.warn('could not remove %s (error code %r)'%(e.filename,e.errno)) - while tgt: - tgt=os.path.dirname(tgt) - try: - os.rmdir(tgt) - except OSError: - break - def do_link(self,src,tgt): + Logs.warn('Could not remove %s (error code %r)',e.filename,e.errno) + self.rm_empty_dirs(tgt) + def do_unlink(self,src,tgt,**kw): try: - if not self.progress_bar: - Logs.info('- unlink %s'%tgt) + if not self.generator.bld.progress_bar: + Logs.info('- remove %s',tgt) os.remove(tgt) except OSError: pass - while tgt: - tgt=os.path.dirname(tgt) - try: - os.rmdir(tgt) - except OSError: - break + self.rm_empty_dirs(tgt) +class InstallContext(BuildContext): + '''installs the targets on the system''' + cmd='install' + def __init__(self,**kw): + super(InstallContext,self).__init__(**kw) + self.is_install=INSTALL +class UninstallContext(InstallContext): + '''removes the targets installed''' + cmd='uninstall' + def __init__(self,**kw): + super(UninstallContext,self).__init__(**kw) + self.is_install=UNINSTALL def execute(self): try: def runnable_status(self): @@ -627,13 +658,17 @@ def clean(self): Logs.debug('build: clean called') if self.bldnode!=self.srcnode: - lst=[self.root.find_or_declare(f)for f in self.env[CFG_FILES]] - for n in self.bldnode.ant_glob('**/*',excl='lock* *conf_check_*/** config.log c4che/*',quiet=True): + lst=[] + for env in self.all_envs.values(): + lst.extend(self.root.find_or_declare(f)for f in env[CFG_FILES]) + for n in self.bldnode.ant_glob('**/*',excl='.lock* *conf_check_*/** config.log c4che/*',quiet=True): if n in lst: continue n.delete() self.root.children={} - for v in'node_deps task_sigs raw_deps'.split(): + for v in SAVED_ATTRS: + if v=='root': + continue setattr(self,v,{}) class ListContext(BuildContext): '''lists the targets to execute''' @@ -655,11 +690,9 @@ f() try: self.get_tgen_by_name('') - except: + except Errors.WafError: pass - lst=list(self.task_gen_cache_names.keys()) - lst.sort() - for k in lst: + for k in sorted(self.task_gen_cache_names.keys()): Logs.pprint('GREEN',k) class StepContext(BuildContext): '''executes tasks in a step-by-step fashion, for debugging''' @@ -672,8 +705,13 @@ Logs.warn('Add a pattern for the debug build, for example "waf step --files=main.c,app"') BuildContext.compile(self) return + targets=[] + if self.targets and self.targets!='*': + targets=self.targets.split(',') for g in self.groups: for tg in g: + if targets and tg.name not in targets: + continue try: f=tg.post except AttributeError: @@ -699,7 +737,7 @@ break if do_exec: ret=tsk.run() - Logs.info('%s -> exit %r'%(str(tsk),ret)) + Logs.info('%s -> exit %r',tsk,ret) def get_matcher(self,pat): inn=True out=True @@ -727,5 +765,10 @@ else: return pattern.match(node.abspath()) return match -BuildContext.store=Utils.nogc(BuildContext.store) -BuildContext.restore=Utils.nogc(BuildContext.restore) +class EnvContext(BuildContext): + fun=cmd=None + def execute(self): + self.restore() + if not self.all_envs: + self.load_envs() + self.recurse([self.run_dir]) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/ConfigSet.py glmark2-2021.02/waflib/ConfigSet.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/ConfigSet.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/ConfigSet.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,7 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set import copy,re,os from waflib import Logs,Utils re_imp=re.compile('^(#)*?([^#=]*?)\ =\ (.*?)$',re.M) @@ -26,12 +24,14 @@ keys=list(keys) keys.sort() return keys + def __iter__(self): + return iter(self.keys()) def __str__(self): return"\n".join(["%r %r"%(x,self.__getitem__(x))for x in self.keys()]) def __getitem__(self,key): try: while 1: - x=self.table.get(key,None) + x=self.table.get(key) if not x is None: return x self=self.parent @@ -71,6 +71,7 @@ for x in keys: tbl[x]=copy.deepcopy(tbl[x]) self.table=tbl + return self def get_flat(self,key): s=self[key] if isinstance(s,str):return s @@ -79,21 +80,24 @@ try: value=self.table[key] except KeyError: - try:value=self.parent[key] - except AttributeError:value=[] - if isinstance(value,list): - value=value[:] + try: + value=self.parent[key] + except AttributeError: + value=[] else: - value=[value] + if isinstance(value,list): + value=value[:] + else: + value=[value] + self.table[key]=value else: if not isinstance(value,list): - value=[value] - self.table[key]=value + self.table[key]=value=[value] return value def append_value(self,var,val): - current_value=self._get_list_value_for_modification(var) if isinstance(val,str): val=[val] + current_value=self._get_list_value_for_modification(var) current_value.extend(val) def prepend_value(self,var,val): if isinstance(val,str): @@ -122,30 +126,34 @@ os.makedirs(os.path.split(filename)[0]) except OSError: pass - f=None + buf=[] + merged_table=self.get_merged_dict() + keys=list(merged_table.keys()) + keys.sort() try: - f=open(filename,'w') - merged_table=self.get_merged_dict() - keys=list(merged_table.keys()) - keys.sort() - for k in keys: - if k!='undo_stack': - f.write('%s = %r\n'%(k,merged_table[k])) - finally: - if f: - f.close() + fun=ascii + except NameError: + fun=repr + for k in keys: + if k!='undo_stack': + buf.append('%s = %s\n'%(k,fun(merged_table[k]))) + Utils.writef(filename,''.join(buf)) def load(self,filename): tbl=self.table - code=Utils.readf(filename) + code=Utils.readf(filename,m='rU') for m in re_imp.finditer(code): g=m.group tbl[g(2)]=eval(g(3)) - Logs.debug('env: %s'%str(self.table)) + Logs.debug('env: %s',self.table) def update(self,d): - for k,v in d.items(): - self[k]=v + self.table.update(d) def stash(self): - self.undo_stack=self.undo_stack+[self.table] - self.table=self.table.copy() + orig=self.table + tbl=self.table=self.table.copy() + for x in tbl.keys(): + tbl[x]=copy.deepcopy(tbl[x]) + self.undo_stack=self.undo_stack+[orig] + def commit(self): + self.undo_stack.pop(-1) def revert(self): self.table=self.undo_stack.pop(-1) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Configure.py glmark2-2021.02/waflib/Configure.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Configure.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Configure.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,54 +1,15 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,shlex,sys,time +import os,shlex,sys,time,re,shutil from waflib import ConfigSet,Utils,Options,Logs,Context,Build,Errors -try: - from urllib import request -except: - from urllib import urlopen -else: - urlopen=request.urlopen -BREAK='break' -CONTINUE='continue' WAF_CONFIG_LOG='config.log' autoconfig=False conf_template='''# project %(app)s configured on %(now)s by # waf %(wafver)s (abi %(abi)s, python %(pyver)x on %(systype)s) # using %(args)s #''' -def download_check(node): - pass -def download_tool(tool,force=False,ctx=None): - for x in Utils.to_list(Context.remote_repo): - for sub in Utils.to_list(Context.remote_locs): - url='/'.join((x,sub,tool+'.py')) - try: - web=urlopen(url) - try: - if web.getcode()!=200: - continue - except AttributeError: - pass - except Exception: - continue - else: - tmp=ctx.root.make_node(os.sep.join((Context.waf_dir,'waflib','extras',tool+'.py'))) - tmp.write(web.read()) - Logs.warn('Downloaded %s from %s'%(tool,url)) - download_check(tmp) - try: - module=Context.load_tool(tool) - except: - Logs.warn('The tool %s from %s is unusable'%(tool,url)) - try: - tmp.delete() - except: - pass - continue - return module - raise Errors.WafError('Could not load the Waf tool') class ConfigurationContext(Context.Context): '''configures the project''' cmd='configure' @@ -96,10 +57,11 @@ out=getattr(Context.g_module,Context.OUT,None) if not out: out=Options.lockfile.replace('.lock-waf_%s_'%sys.platform,'').replace('.lock-waf','') + out=os.path.realpath(out) self.bldnode=(os.path.isabs(out)and self.root or self.path).make_node(out) self.bldnode.mkdir() if not os.path.isdir(self.bldnode.abspath()): - conf.fatal('could not create the build directory %s'%self.bldnode.abspath()) + conf.fatal('Could not create the build directory %s'%self.bldnode.abspath()) def execute(self): self.init_dirs() self.cachedir=self.bldnode.make_node(Build.CACHE_DIR) @@ -111,17 +73,12 @@ ver=getattr(Context.g_module,'VERSION','') if ver: app="%s (%s)"%(app,ver) - now=time.ctime() - pyver=sys.hexversion - systype=sys.platform - args=" ".join(sys.argv) - wafver=Context.WAFVERSION - abi=Context.ABI - self.to_log(conf_template%vars()) + params={'now':time.ctime(),'pyver':sys.hexversion,'systype':sys.platform,'args':" ".join(sys.argv),'wafver':Context.WAFVERSION,'abi':Context.ABI,'app':app} + self.to_log(conf_template%params) self.msg('Setting top to',self.srcnode.abspath()) self.msg('Setting out to',self.bldnode.abspath()) if id(self.srcnode)==id(self.bldnode): - Logs.warn('Setting top == out (remember to use "update_outputs")') + Logs.warn('Setting top == out') elif id(self.path)!=id(self.srcnode): if self.srcnode.is_child_of(self.path): Logs.warn('Are you certain that you do not want to set top="." ?') @@ -130,27 +87,37 @@ Context.top_dir=self.srcnode.abspath() Context.out_dir=self.bldnode.abspath() env=ConfigSet.ConfigSet() - env['argv']=sys.argv - env['options']=Options.options.__dict__ + env.argv=sys.argv + env.options=Options.options.__dict__ + env.config_cmd=self.cmd env.run_dir=Context.run_dir env.top_dir=Context.top_dir env.out_dir=Context.out_dir - env['hash']=self.hash - env['files']=self.files - env['environ']=dict(self.environ) - if not self.env.NO_LOCK_IN_RUN: - env.store(Context.run_dir+os.sep+Options.lockfile) - if not self.env.NO_LOCK_IN_TOP: - env.store(Context.top_dir+os.sep+Options.lockfile) - if not self.env.NO_LOCK_IN_OUT: - env.store(Context.out_dir+os.sep+Options.lockfile) + env.hash=self.hash + env.files=self.files + env.environ=dict(self.environ) + if not self.env.NO_LOCK_IN_RUN and not getattr(Options.options,'no_lock_in_run'): + env.store(os.path.join(Context.run_dir,Options.lockfile)) + if not self.env.NO_LOCK_IN_TOP and not getattr(Options.options,'no_lock_in_top'): + env.store(os.path.join(Context.top_dir,Options.lockfile)) + if not self.env.NO_LOCK_IN_OUT and not getattr(Options.options,'no_lock_in_out'): + env.store(os.path.join(Context.out_dir,Options.lockfile)) def prepare_env(self,env): if not env.PREFIX: - env.PREFIX=os.path.abspath(os.path.expanduser(Options.options.prefix)) + if Options.options.prefix or Utils.is_win32: + env.PREFIX=Utils.sane_path(Options.options.prefix) + else: + env.PREFIX='' if not env.BINDIR: - env.BINDIR=Utils.subst_vars('${PREFIX}/bin',env) + if Options.options.bindir: + env.BINDIR=Utils.sane_path(Options.options.bindir) + else: + env.BINDIR=Utils.subst_vars('${PREFIX}/bin',env) if not env.LIBDIR: - env.LIBDIR=Utils.subst_vars('${PREFIX}/lib',env) + if Options.options.libdir: + env.LIBDIR=Utils.sane_path(Options.options.libdir) + else: + env.LIBDIR=Utils.subst_vars('${PREFIX}/lib%s'%Utils.lib64(),env) def store(self): n=self.cachedir.make_node('build.config.py') n.write('version = 0x%x\ntools = %r\n'%(Context.HEXVERSION,self.tools)) @@ -159,26 +126,22 @@ for key in self.all_envs: tmpenv=self.all_envs[key] tmpenv.store(os.path.join(self.cachedir.abspath(),key+Build.CACHE_SUFFIX)) - def load(self,input,tooldir=None,funs=None,download=True): + def load(self,input,tooldir=None,funs=None,with_sys_path=True,cache=False): tools=Utils.to_list(input) if tooldir:tooldir=Utils.to_list(tooldir) for tool in tools: - mag=(tool,id(self.env),funs) - if mag in self.tool_cache: - self.to_log('(tool %s is already loaded, skipping)'%tool) - continue - self.tool_cache.append(mag) + if cache: + mag=(tool,id(self.env),tooldir,funs) + if mag in self.tool_cache: + self.to_log('(tool %s is already loaded, skipping)'%tool) + continue + self.tool_cache.append(mag) module=None try: - module=Context.load_tool(tool,tooldir) - except ImportError ,e: - if Options.options.download: - module=download_tool(tool,ctx=self) - if not module: - self.fatal('Could not load the Waf tool %r or download a suitable replacement from the repository (sys.path %r)\n%s'%(tool,sys.path,e)) - else: - self.fatal('Could not load the Waf tool %r from %r (try the --download option?):\n%s'%(tool,sys.path,e)) - except Exception ,e: + module=Context.load_tool(tool,tooldir,ctx=self,with_sys_path=with_sys_path) + except ImportError as e: + self.fatal('Could not load the Waf tool %r from %r\n%s'%(tool,sys.path,e)) + except Exception as e: self.to_log('imp %r (%r & %r)'%(tool,tooldir,funs)) self.to_log(Utils.ex_stack()) raise @@ -192,25 +155,15 @@ self.tools.append({'tool':tool,'tooldir':tooldir,'funs':funs}) def post_recurse(self,node): super(ConfigurationContext,self).post_recurse(node) - self.hash=hash((self.hash,node.read('rb'))) + self.hash=Utils.h_list((self.hash,node.read('rb'))) self.files.append(node.abspath()) def eval_rules(self,rules): self.rules=Utils.to_list(rules) for x in self.rules: f=getattr(self,x) - if not f:self.fatal("No such method '%s'."%x) - try: - f() - except Exception ,e: - ret=self.err_handler(x,e) - if ret==BREAK: - break - elif ret==CONTINUE: - continue - else: - raise - def err_handler(self,fun,error): - pass + if not f: + self.fatal('No such configuration function %r'%x) + f() def conf(f): def fun(*k,**kw): mandatory=True @@ -219,97 +172,196 @@ del kw['mandatory'] try: return f(*k,**kw) - except Errors.ConfigurationError ,e: + except Errors.ConfigurationError: if mandatory: - raise e + raise + fun.__name__=f.__name__ setattr(ConfigurationContext,f.__name__,fun) setattr(Build.BuildContext,f.__name__,fun) return f -def add_os_flags(self,var,dest=None): - try:self.env.append_value(dest or var,shlex.split(self.environ[var])) - except KeyError:pass +@conf +def add_os_flags(self,var,dest=None,dup=False): + try: + flags=shlex.split(self.environ[var]) + except KeyError: + return + if dup or''.join(flags)not in''.join(Utils.to_list(self.env[dest or var])): + self.env.append_value(dest or var,flags) +@conf def cmd_to_list(self,cmd): - if isinstance(cmd,str)and cmd.find(' '): - try: - os.stat(cmd) - except OSError: + if isinstance(cmd,str): + if os.path.isfile(cmd): + return[cmd] + if os.sep=='/': return shlex.split(cmd) else: - return[cmd] + try: + return shlex.split(cmd,posix=False) + except TypeError: + return shlex.split(cmd) return cmd -def check_waf_version(self,mini='1.6.0',maxi='1.7.0'): - self.start_msg('Checking for waf version in %s-%s'%(str(mini),str(maxi))) +@conf +def check_waf_version(self,mini='1.8.99',maxi='2.0.0',**kw): + self.start_msg('Checking for waf version in %s-%s'%(str(mini),str(maxi)),**kw) ver=Context.HEXVERSION if Utils.num2ver(mini)>ver: self.fatal('waf version should be at least %r (%r found)'%(Utils.num2ver(mini),ver)) if Utils.num2ver(maxi) %r'%(filename,path_list,var,ret)) + retmsg=ret + else: + retmsg=False + self.msg('Checking for program %r'%msg,retmsg,**kw) + if not kw.get('quiet'): + self.to_log('find program=%r paths=%r var=%r -> %r'%(filename,path_list,var,ret)) if not ret: - self.fatal(kw.get('errmsg','')or'Could not find the program %s'%','.join(filename)) - if var: + self.fatal(kw.get('errmsg','')or'Could not find the program %r'%filename) + interpreter=kw.get('interpreter') + if interpreter is None: + if not Utils.check_exe(ret[0],env=environ): + self.fatal('Program %r is not executable'%ret) self.env[var]=ret + else: + self.env[var]=self.env[interpreter]+ret return ret -def find_perl_program(self,filename,path_list=[],var=None,environ=None,exts=''): +@conf +def find_binary(self,filenames,exts,paths): + for f in filenames: + for ext in exts: + exe_name=f+ext + if os.path.isabs(exe_name): + if os.path.isfile(exe_name): + return exe_name + else: + for path in paths: + x=os.path.expanduser(os.path.join(path,exe_name)) + if os.path.isfile(x): + return x + return None +@conf +def run_build(self,*k,**kw): + lst=[str(v)for(p,v)in kw.items()if p!='env'] + h=Utils.h_list(lst) + dir=self.bldnode.abspath()+os.sep+(not Utils.is_win32 and'.'or'')+'conf_check_'+Utils.to_hex(h) + try: + os.makedirs(dir) + except OSError: + pass + try: + os.stat(dir) + except OSError: + self.fatal('cannot use the configuration test folder %r'%dir) + cachemode=getattr(Options.options,'confcache',None) + if cachemode==1: + try: + proj=ConfigSet.ConfigSet(os.path.join(dir,'cache_run_build')) + except EnvironmentError: + pass + else: + ret=proj['cache_run_build'] + if isinstance(ret,str)and ret.startswith('Test does not build'): + self.fatal(ret) + return ret + bdir=os.path.join(dir,'testbuild') + if not os.path.exists(bdir): + os.makedirs(bdir) + self.test_bld=bld=Context.create_context('build',top_dir=dir,out_dir=bdir) + bld.init_dirs() + bld.progress_bar=0 + bld.targets='*' + bld.logger=self.logger + bld.all_envs.update(self.all_envs) + bld.env=kw['env'] + bld.kw=kw + bld.conf=self + kw['build_fun'](bld) + ret=-1 try: - app=self.find_program(filename,path_list=path_list,var=var,environ=environ,exts=exts) - except: - self.find_program('perl',var='PERL') - app=self.find_file(filename,os.environ['PATH'].split(os.pathsep)) - if not app: + try: + bld.compile() + except Errors.WafError: + ret='Test does not build: %s'%Utils.ex_stack() + self.fatal(ret) + else: + ret=getattr(bld,'retval',0) + finally: + if cachemode==1: + proj=ConfigSet.ConfigSet() + proj['cache_run_build']=ret + proj.store(os.path.join(dir,'cache_run_build')) + else: + shutil.rmtree(dir) + return ret +@conf +def ret_msg(self,msg,args): + if isinstance(msg,str): + return msg + return msg(args) +@conf +def test(self,*k,**kw): + if not'env'in kw: + kw['env']=self.env.derive() + if kw.get('validate'): + kw['validate'](kw) + self.start_msg(kw['msg'],**kw) + ret=None + try: + ret=self.run_build(*k,**kw) + except self.errors.ConfigurationError: + self.end_msg(kw['errmsg'],'YELLOW',**kw) + if Logs.verbose>1: raise - if var: - self.env[var]=Utils.to_list(self.env['PERL'])+[app] - self.msg('Checking for %r'%filename,app) - -conf(add_os_flags) -conf(cmd_to_list) -conf(check_waf_version) -conf(find_file) -conf(find_program) -conf(find_perl_program) \ No newline at end of file + else: + self.fatal('The configuration failed') + else: + kw['success']=ret + if kw.get('post_check'): + ret=kw['post_check'](kw) + if ret: + self.end_msg(kw['errmsg'],'YELLOW',**kw) + self.fatal('The configuration failed %r'%ret) + else: + self.end_msg(self.ret_msg(kw['okmsg'],kw),**kw) + return ret diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Context.py glmark2-2021.02/waflib/Context.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Context.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Context.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,15 +1,15 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,imp,sys +import os,re,imp,sys from waflib import Utils,Errors,Logs import waflib.Node -HEXVERSION=0x1060b00 -WAFVERSION="1.6.11" -WAFREVISION="a7e69d6b81b04729804754c4d5214da063779a65" -ABI=98 -DBFILE='.wafpickle-%d'%ABI +HEXVERSION=0x1090300 +WAFVERSION="1.9.3" +WAFREVISION="d31398c4a3e8b8f055821d3db6cf6d2ef1b09a40" +ABI=99 +DBFILE='.wafpickle-%s-%d-%d'%(sys.platform,sys.hexversion,ABI) APPNAME='APPNAME' VERSION='VERSION' TOP='top' @@ -20,9 +20,6 @@ top_dir='' out_dir='' waf_dir='' -local_repo='' -remote_repo='http://waf.googlecode.com/git/' -remote_locs=['waflib/extras','waflib/Tools'] g_module=None STDOUT=1 STDERR=-1 @@ -40,7 +37,7 @@ def __init__(cls,name,bases,dict): super(store_context,cls).__init__(name,bases,dict) name=cls.__name__ - if name=='ctx'or name=='Context': + if name in('ctx','Context'): return try: cls.cmd @@ -60,11 +57,8 @@ except KeyError: global run_dir rd=run_dir - class node_class(waflib.Node.Node): - pass - self.node_class=node_class - self.node_class.__module__="waflib.Node" - self.node_class.__name__="Nod3" + self.node_class=type('Nod3',(waflib.Node.Node,),{}) + self.node_class.__module__='waflib.Node' self.node_class.ctx=self self.root=self.node_class('',None) self.cur_script=None @@ -72,13 +66,20 @@ self.stack_path=[] self.exec_dict={'ctx':self,'conf':self,'bld':self,'opt':self} self.logger=None - def __hash__(self): - return id(self) + def finalize(self): + try: + logger=self.logger + except AttributeError: + pass + else: + Logs.free_logger(logger) + delattr(self,'logger') def load(self,tool_list,*k,**kw): tools=Utils.to_list(tool_list) path=Utils.to_list(kw.get('tooldir','')) + with_sys_path=kw.get('with_sys_path',True) for t in tools: - module=load_tool(t,path) + module=load_tool(t,path,with_sys_path=with_sys_path) fun=getattr(module,kw.get('name',self.fun),None) if fun: fun(self) @@ -93,10 +94,10 @@ self.cur_script=self.stack_path.pop() if self.cur_script: self.path=self.cur_script.parent - def recurse(self,dirs,name=None,mandatory=True,once=True): + def recurse(self,dirs,name=None,mandatory=True,once=True,encoding=None): try: cache=self.recurse_cache - except: + except AttributeError: cache=self.recurse_cache={} for d in Utils.to_list(dirs): if not os.path.isabs(d): @@ -108,7 +109,7 @@ cache[node]=True self.pre_recurse(node) try: - function_code=node.read('rU') + function_code=node.read('rU',encoding) exec(compile(function_code,node.abspath(),'exec'),self.exec_dict) finally: self.post_recurse(node) @@ -119,7 +120,7 @@ cache[tup]=True self.pre_recurse(node) try: - wscript_module=load_module(node.abspath()) + wscript_module=load_module(node.abspath(),encoding=encoding) user_function=getattr(wscript_module,(name or self.fun),None) if not user_function: if not mandatory: @@ -131,32 +132,60 @@ elif not node: if not mandatory: continue + try: + os.listdir(d) + except OSError: + raise Errors.WafError('Cannot read the folder %r'%d) raise Errors.WafError('No wscript file in directory %s'%d) def exec_command(self,cmd,**kw): subprocess=Utils.subprocess kw['shell']=isinstance(cmd,str) - Logs.debug('runner: %r'%cmd) - Logs.debug('runner_env: kw=%s'%kw) + Logs.debug('runner: %r',cmd) + Logs.debug('runner_env: kw=%s',kw) + if self.logger: + self.logger.info(cmd) + if'stdout'not in kw: + kw['stdout']=subprocess.PIPE + if'stderr'not in kw: + kw['stderr']=subprocess.PIPE + if Logs.verbose and not kw['shell']and not Utils.check_exe(cmd[0]): + raise Errors.WafError('Program %s not found!'%cmd[0]) + wargs={} + if'timeout'in kw: + if kw['timeout']is not None: + wargs['timeout']=kw['timeout'] + del kw['timeout'] + if'input'in kw: + if kw['input']: + wargs['input']=kw['input'] + kw['stdin']=subprocess.PIPE + del kw['input'] + if'cwd'in kw: + if not isinstance(kw['cwd'],str): + kw['cwd']=kw['cwd'].abspath() try: + ret,out,err=Utils.run_process(cmd,kw,wargs) + except Exception as e: + raise Errors.WafError('Execution failure: %s'%str(e),ex=e) + if out: + if not isinstance(out,str): + out=out.decode(sys.stdout.encoding or'iso8859-1') if self.logger: - self.logger.info(cmd) - kw['stdout']=kw['stderr']=subprocess.PIPE - p=subprocess.Popen(cmd,**kw) - (out,err)=p.communicate() - if out: - self.logger.debug('out: %s'%out.decode(sys.stdout.encoding or'iso8859-1')) - if err: - self.logger.error('err: %s'%err.decode(sys.stdout.encoding or'iso8859-1')) - return p.returncode + self.logger.debug('out: %s',out) else: - p=subprocess.Popen(cmd,**kw) - return p.wait() - except OSError: - return-1 + Logs.info(out,extra={'stream':sys.stdout,'c1':''}) + if err: + if not isinstance(err,str): + err=err.decode(sys.stdout.encoding or'iso8859-1') + if self.logger: + self.logger.error('err: %s'%err) + else: + Logs.info(err,extra={'stream':sys.stderr,'c1':''}) + return ret def cmd_and_log(self,cmd,**kw): subprocess=Utils.subprocess kw['shell']=isinstance(cmd,str) - Logs.debug('runner: %r'%cmd) + Logs.debug('runner: %r',cmd) if'quiet'in kw: quiet=kw['quiet'] del kw['quiet'] @@ -167,13 +196,27 @@ del kw['output'] else: to_ret=STDOUT + if Logs.verbose and not kw['shell']and not Utils.check_exe(cmd[0]): + raise Errors.WafError('Program %r not found!'%cmd[0]) kw['stdout']=kw['stderr']=subprocess.PIPE if quiet is None: self.to_log(cmd) + wargs={} + if'timeout'in kw: + if kw['timeout']is not None: + wargs['timeout']=kw['timeout'] + del kw['timeout'] + if'input'in kw: + if kw['input']: + wargs['input']=kw['input'] + kw['stdin']=subprocess.PIPE + del kw['input'] + if'cwd'in kw: + if not isinstance(kw['cwd'],str): + kw['cwd']=kw['cwd'].abspath() try: - p=subprocess.Popen(cmd,**kw) - (out,err)=p.communicate() - except Exception ,e: + ret,out,err=Utils.run_process(cmd,kw,wargs) + except Exception as e: raise Errors.WafError('Execution failure: %s'%str(e),ex=e) if not isinstance(out,str): out=out.decode(sys.stdout.encoding or'iso8859-1') @@ -183,9 +226,9 @@ self.to_log('out: %s'%out) if err and quiet!=STDERR and quiet!=BOTH: self.to_log('err: %s'%err) - if p.returncode: - e=Errors.WafError('Command %r returned %r'%(cmd,p.returncode)) - e.returncode=p.returncode + if ret: + e=Errors.WafError('Command %r returned %r'%(cmd,ret)) + e.returncode=ret e.stderr=err e.stdout=out raise e @@ -199,7 +242,7 @@ self.logger.info('from %s: %s'%(self.path.abspath(),msg)) try: msg='%s\n(complete log in %s)'%(msg,self.logger.handlers[0].baseFilename) - except: + except AttributeError: pass raise self.errors.ConfigurationError(msg,ex=ex) def to_log(self,msg): @@ -210,17 +253,29 @@ else: sys.stderr.write(str(msg)) sys.stderr.flush() - def msg(self,msg,result,color=None): - self.start_msg(msg) + def msg(self,*k,**kw): + try: + msg=kw['msg'] + except KeyError: + msg=k[0] + self.start_msg(msg,**kw) + try: + result=kw['result'] + except KeyError: + result=k[1] + color=kw.get('color') if not isinstance(color,str): color=result and'GREEN'or'YELLOW' - self.end_msg(result,color) - def start_msg(self,msg): + self.end_msg(result,color,**kw) + def start_msg(self,*k,**kw): + if kw.get('quiet'): + return + msg=kw.get('msg')or k[0] try: if self.in_msg: self.in_msg+=1 return - except: + except AttributeError: self.in_msg=0 self.in_msg+=1 try: @@ -230,10 +285,13 @@ for x in(self.line_just*'-',msg): self.to_log(x) Logs.pprint('NORMAL',"%s :"%msg.ljust(self.line_just),sep='') - def end_msg(self,result,color=None): + def end_msg(self,*k,**kw): + if kw.get('quiet'): + return self.in_msg-=1 if self.in_msg: return + result=kw.get('result')or k[0] defcolor='GREEN' if result==True: msg='ok' @@ -243,57 +301,92 @@ else: msg=str(result) self.to_log(msg) - Logs.pprint(color or defcolor,msg) + try: + color=kw['color'] + except KeyError: + if len(k)>1 and k[1]in Logs.colors_lst: + color=k[1] + else: + color=defcolor + Logs.pprint(color,msg) def load_special_tools(self,var,ban=[]): global waf_dir - lst=self.root.find_node(waf_dir).find_node('waflib/extras').ant_glob(var) - for x in lst: - if not x.name in ban: - load_tool(x.name.replace('.py','')) + if os.path.isdir(waf_dir): + lst=self.root.find_node(waf_dir).find_node('waflib/extras').ant_glob(var) + for x in lst: + if not x.name in ban: + load_tool(x.name.replace('.py','')) + else: + from zipfile import PyZipFile + waflibs=PyZipFile(waf_dir) + lst=waflibs.namelist() + for x in lst: + if not re.match('waflib/extras/%s'%var.replace('*','.*'),var): + continue + f=os.path.basename(x) + doban=False + for b in ban: + r=b.replace('*','.*') + if re.match(r,f): + doban=True + if not doban: + f=f.replace('.py','') + load_tool(f) cache_modules={} -def load_module(path): +def load_module(path,encoding=None): try: return cache_modules[path] except KeyError: pass module=imp.new_module(WSCRIPT_FILE) try: - code=Utils.readf(path,m='rU') - except(IOError,OSError): + code=Utils.readf(path,m='rU',encoding=encoding) + except EnvironmentError: raise Errors.WafError('Could not read the file %r'%path) module_dir=os.path.dirname(path) sys.path.insert(0,module_dir) - exec(compile(code,path,'exec'),module.__dict__) - sys.path.remove(module_dir) + try: + exec(compile(code,path,'exec'),module.__dict__) + finally: + sys.path.remove(module_dir) cache_modules[path]=module return module -def load_tool(tool,tooldir=None): - tool=tool.replace('++','xx') - tool=tool.replace('java','javaw') - tool=tool.replace('compiler_cc','compiler_c') - if tooldir: - assert isinstance(tooldir,list) - sys.path=tooldir+sys.path - try: - __import__(tool) +def load_tool(tool,tooldir=None,ctx=None,with_sys_path=True): + if tool=='java': + tool='javaw' + else: + tool=tool.replace('++','xx') + if not with_sys_path: + back_path=sys.path + sys.path=[] + try: + if tooldir: + assert isinstance(tooldir,list) + sys.path=tooldir+sys.path + try: + __import__(tool) + finally: + for d in tooldir: + sys.path.remove(d) ret=sys.modules[tool] Context.tools[tool]=ret return ret - finally: - for d in tooldir: - sys.path.remove(d) - else: - global waf_dir - try: - os.stat(os.path.join(waf_dir,'waflib','extras',tool+'.py')) - d='waflib.extras.%s'%tool - except: + else: + if not with_sys_path:sys.path.insert(0,waf_dir) try: - os.stat(os.path.join(waf_dir,'waflib','Tools',tool+'.py')) - d='waflib.Tools.%s'%tool - except: - d=tool - __import__(d) - ret=sys.modules[d] - Context.tools[tool]=ret - return ret + for x in('waflib.Tools.%s','waflib.extras.%s','waflib.%s','%s'): + try: + __import__(x%tool) + break + except ImportError: + x=None + else: + __import__(tool) + finally: + if not with_sys_path:sys.path.remove(waf_dir) + ret=sys.modules[x%tool] + Context.tools[tool]=ret + return ret + finally: + if not with_sys_path: + sys.path+=back_path diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Errors.py glmark2-2021.02/waflib/Errors.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Errors.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Errors.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import traceback,sys class WafError(Exception): diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/extras/compat15.py glmark2-2021.02/waflib/extras/compat15.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/extras/compat15.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/extras/compat15.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,10 +1,8 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -import sys from waflib import ConfigSet,Logs,Options,Scripting,Task,Build,Configure,Node,Runner,TaskGen,Utils,Errors,Context sys.modules['Environment']=ConfigSet ConfigSet.Environment=ConfigSet.ConfigSet @@ -18,18 +16,50 @@ sys.modules['Runner']=Runner sys.modules['TaskGen']=TaskGen sys.modules['Utils']=Utils +sys.modules['Constants']=Context +Context.SRCDIR='' +Context.BLDDIR='' from waflib.Tools import c_preproc sys.modules['preproc']=c_preproc from waflib.Tools import c_config sys.modules['config_c']=c_config ConfigSet.ConfigSet.copy=ConfigSet.ConfigSet.derive ConfigSet.ConfigSet.set_variant=Utils.nada +Utils.pproc=Utils.subprocess Build.BuildContext.add_subdirs=Build.BuildContext.recurse Build.BuildContext.new_task_gen=Build.BuildContext.__call__ Build.BuildContext.is_install=0 Node.Node.relpath_gen=Node.Node.path_from +Utils.pproc=Utils.subprocess +Utils.get_term_cols=Logs.get_term_cols +def cmd_output(cmd,**kw): + silent=False + if'silent'in kw: + silent=kw['silent'] + del(kw['silent']) + if'e'in kw: + tmp=kw['e'] + del(kw['e']) + kw['env']=tmp + kw['shell']=isinstance(cmd,str) + kw['stdout']=Utils.subprocess.PIPE + if silent: + kw['stderr']=Utils.subprocess.PIPE + try: + p=Utils.subprocess.Popen(cmd,**kw) + output=p.communicate()[0] + except OSError as e: + raise ValueError(str(e)) + if p.returncode: + if not silent: + msg="command execution failed: %s -> %r"%(cmd,str(output)) + raise ValueError(msg) + output='' + return output +Utils.cmd_output=cmd_output def name_to_obj(self,s,env=None): - Logs.warn('compat: change "name_to_obj(name, env)" by "get_tgen_by_name(name)"') + if Logs.verbose: + Logs.warn('compat: change "name_to_obj(name, env)" by "get_tgen_by_name(name)"') return self.get_tgen_by_name(s) Build.BuildContext.name_to_obj=name_to_obj def env_of_name(self,name): @@ -51,13 +81,15 @@ self.prepare_env(env) self.all_envs[name]=env else: - if fromenv:Logs.warn("The environment %s may have been configured already"%name) + if fromenv: + Logs.warn('The environment %s may have been configured already',name) return env Configure.ConfigurationContext.retrieve=retrieve Configure.ConfigurationContext.sub_config=Configure.ConfigurationContext.recurse Configure.ConfigurationContext.check_tool=Configure.ConfigurationContext.load Configure.conftest=Configure.conf Configure.ConfigurationError=Errors.ConfigurationError +Utils.WafError=Errors.WafError Options.OptionsContext.sub_options=Options.OptionsContext.recurse Options.OptionsContext.tool_options=Context.Context.load Options.Handler=Options.OptionsContext @@ -68,6 +100,8 @@ key=key[1:] self.table[key]=value ConfigSet.ConfigSet.__setitem__=setitem +@TaskGen.feature('d') +@TaskGen.before('apply_incpaths') def old_importpaths(self): if getattr(self,'importpaths',[]): self.includes=self.importpaths @@ -76,40 +110,61 @@ def load_tool(*k,**kw): ret=eld(*k,**kw) if'set_options'in ret.__dict__: - Logs.warn('compat: rename "set_options" to options') + if Logs.verbose: + Logs.warn('compat: rename "set_options" to options') ret.options=ret.set_options if'detect'in ret.__dict__: - Logs.warn('compat: rename "detect" to "configure"') + if Logs.verbose: + Logs.warn('compat: rename "detect" to "configure"') ret.configure=ret.detect return ret Context.load_tool=load_tool +def get_curdir(self): + return self.path.abspath() +Context.Context.curdir=property(get_curdir,Utils.nada) +def get_srcdir(self): + return self.srcnode.abspath() +Configure.ConfigurationContext.srcdir=property(get_srcdir,Utils.nada) +def get_blddir(self): + return self.bldnode.abspath() +Configure.ConfigurationContext.blddir=property(get_blddir,Utils.nada) +Configure.ConfigurationContext.check_message_1=Configure.ConfigurationContext.start_msg +Configure.ConfigurationContext.check_message_2=Configure.ConfigurationContext.end_msg rev=Context.load_module -def load_module(path): - ret=rev(path) +def load_module(path,encoding=None): + ret=rev(path,encoding) if'set_options'in ret.__dict__: - Logs.warn('compat: rename "set_options" to "options" (%r)'%path) + if Logs.verbose: + Logs.warn('compat: rename "set_options" to "options" (%r)',path) ret.options=ret.set_options if'srcdir'in ret.__dict__: - Logs.warn('compat: rename "srcdir" to "top" (%r)'%path) + if Logs.verbose: + Logs.warn('compat: rename "srcdir" to "top" (%r)',path) ret.top=ret.srcdir if'blddir'in ret.__dict__: - Logs.warn('compat: rename "blddir" to "out" (%r)'%path) + if Logs.verbose: + Logs.warn('compat: rename "blddir" to "out" (%r)',path) ret.out=ret.blddir + Utils.g_module=Context.g_module + Options.launch_dir=Context.launch_dir return ret Context.load_module=load_module old_post=TaskGen.task_gen.post def post(self): self.features=self.to_list(self.features) if'cc'in self.features: - Logs.warn('compat: the feature cc does not exist anymore (use "c")') + if Logs.verbose: + Logs.warn('compat: the feature cc does not exist anymore (use "c")') self.features.remove('cc') self.features.append('c') if'cstaticlib'in self.features: - Logs.warn('compat: the feature cstaticlib does not exist anymore (use "cstlib" or "cxxstlib")') + if Logs.verbose: + Logs.warn('compat: the feature cstaticlib does not exist anymore (use "cstlib" or "cxxstlib")') self.features.remove('cstaticlib') self.features.append(('cxx'in self.features)and'cxxstlib'or'cstlib') if getattr(self,'ccflags',None): - Logs.warn('compat: "ccflags" was renamed to "cflags"') + if Logs.verbose: + Logs.warn('compat: "ccflags" was renamed to "cflags"') self.cflags=self.ccflags return old_post(self) TaskGen.task_gen.post=post @@ -117,6 +172,9 @@ Logs.warn('wrong version (waf_version was removed in waf 1.6)') Utils.waf_version=waf_version import os +@TaskGen.feature('c','cxx','d') +@TaskGen.before('apply_incpaths','propagate_uselib_vars') +@TaskGen.after('apply_link','process_source') def apply_uselib_local(self): env=self.env from waflib.Tools.ccroot import stlink_task @@ -125,9 +183,11 @@ names=self.to_list(getattr(self,'uselib_local',[])) get=self.bld.get_tgen_by_name seen=set([]) + seen_uselib=set([]) tmp=Utils.deque(names) if tmp: - Logs.warn('compat: "uselib_local" is deprecated, replace by "use"') + if Logs.verbose: + Logs.warn('compat: "uselib_local" is deprecated, replace by "use"') while tmp: lib_name=tmp.popleft() if lib_name in seen: @@ -154,11 +214,15 @@ if not tmp_path in env['LIBPATH']: env.prepend_value('LIBPATH',[tmp_path]) for v in self.to_list(getattr(y,'uselib',[])): - if not env['STLIB_'+v]: - if not v in self.uselib: - self.uselib.insert(0,v) + if v not in seen_uselib: + seen_uselib.add(v) + if not env['STLIB_'+v]: + if not v in self.uselib: + self.uselib.insert(0,v) if getattr(y,'export_includes',None): self.includes.extend(y.to_incnodes(y.export_includes)) +@TaskGen.feature('cprogram','cxxprogram','cstlib','cxxstlib','cshlib','cxxshlib','dprogram','dstlib','dshlib') +@TaskGen.after('apply_link') def apply_objdeps(self): names=getattr(self,'add_objects',[]) if not names: @@ -185,39 +249,53 @@ seen.append(x) for t in getattr(y,'compiled_tasks',[]): self.link_task.inputs.extend(t.outputs) +@TaskGen.after('apply_link') def process_obj_files(self): if not hasattr(self,'obj_files'): return for x in self.obj_files: node=self.path.find_resource(x) self.link_task.inputs.append(node) +@TaskGen.taskgen_method def add_obj_file(self,file): if not hasattr(self,'obj_files'):self.obj_files=[] if not'process_obj_files'in self.meths:self.meths.append('process_obj_files') self.obj_files.append(file) old_define=Configure.ConfigurationContext.__dict__['define'] -def define(self,key,val,quote=True): - old_define(self,key,val,quote) +@Configure.conf +def define(self,key,val,quote=True,comment=''): + old_define(self,key,val,quote,comment) if key.startswith('HAVE_'): self.env[key]=1 old_undefine=Configure.ConfigurationContext.__dict__['undefine'] -def undefine(self,key): - old_undefine(self,key) +@Configure.conf +def undefine(self,key,comment=''): + old_undefine(self,key,comment) if key.startswith('HAVE_'): self.env[key]=0 def set_incdirs(self,val): Logs.warn('compat: change "export_incdirs" by "export_includes"') self.export_includes=val TaskGen.task_gen.export_incdirs=property(None,set_incdirs) - -TaskGen.feature('d')(old_importpaths) -TaskGen.before('apply_incpaths')(old_importpaths) -TaskGen.feature('c','cxx','d')(apply_uselib_local) -TaskGen.before('apply_incpaths','propagate_uselib_vars')(apply_uselib_local) -TaskGen.after('apply_link','process_source')(apply_uselib_local) -TaskGen.feature('cprogram','cxxprogram','cstlib','cxxstlib','cshlib','cxxshlib','dprogram','dstlib','dshlib')(apply_objdeps) -TaskGen.after('apply_link')(apply_objdeps) -TaskGen.after('apply_link')(process_obj_files) -TaskGen.taskgen_method(add_obj_file) -Configure.conf(define) -Configure.conf(undefine) \ No newline at end of file +def install_dir(self,path): + if not path: + return[] + destpath=Utils.subst_vars(path,self.env) + if self.is_install>0: + Logs.info('* creating %s',destpath) + Utils.check_dir(destpath) + elif self.is_install<0: + Logs.info('* removing %s',destpath) + try: + os.remove(destpath) + except OSError: + pass +Build.BuildContext.install_dir=install_dir +repl={'apply_core':'process_source','apply_lib_vars':'process_source','apply_obj_vars':'propagate_uselib_vars','exec_rule':'process_rule'} +def after(*k): + k=[repl.get(key,key)for key in k] + return TaskGen.after_method(*k) +def before(*k): + k=[repl.get(key,key)for key in k] + return TaskGen.before_method(*k) +TaskGen.before=before diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/extras/__init__.py glmark2-2021.02/waflib/extras/__init__.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/extras/__init__.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/extras/__init__.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,4 +1,4 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/fixpy2.py glmark2-2021.02/waflib/fixpy2.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/fixpy2.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/fixpy2.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import os all_modifs={} @@ -21,12 +21,16 @@ return filename=os.path.join(dir,name) f=open(filename,'r') - txt=f.read() - f.close() + try: + txt=f.read() + finally: + f.close() txt=fun(txt) f=open(filename,'w') - f.write(txt) - f.close() + try: + f.write(txt) + finally: + f.close() def subst(*k): def do_subst(fun): global all_modifs @@ -37,14 +41,14 @@ all_modifs[x]=[fun] return fun return do_subst +@subst('*') def r1(code): - code=code.replace(',e:',',e:') - code=code.replace("",'') - code=code.replace('','') - return code + code=code.replace('as e:',',e:') + code=code.replace(".decode(sys.stdout.encoding or 'iso8859-1')",'') + return code.replace('.encode()','') +@subst('Runner.py') def r4(code): - code=code.replace('next(self.biter)','self.biter.next()') - return code - -subst('*')(r1) -subst('Runner.py')(r4) \ No newline at end of file + return code.replace('next(self.biter)','self.biter.next()') +@subst('Context.py') +def r5(code): + return code.replace("('Execution failure: %s'%str(e),ex=e)","('Execution failure: %s'%str(e),ex=e),None,sys.exc_info()[2]") diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/__init__.py glmark2-2021.02/waflib/__init__.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/__init__.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/__init__.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,4 +1,4 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Logs.py glmark2-2021.02/waflib/Logs.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Logs.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Logs.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,54 +1,53 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import os,re,traceback,sys -_nocolor=os.environ.get('NOCOLOR','no')not in('no','0','false') -try: - if not _nocolor: - import waflib.ansiterm -except: - pass +from waflib import Utils,ansiterm +if not os.environ.get('NOSYNC',False): + if sys.stdout.isatty()and id(sys.stdout)==id(sys.__stdout__): + sys.stdout=ansiterm.AnsiTerm(sys.stdout) + if sys.stderr.isatty()and id(sys.stderr)==id(sys.__stderr__): + sys.stderr=ansiterm.AnsiTerm(sys.stderr) import logging -LOG_FORMAT="%(asctime)s %(c1)s%(zone)s%(c2)s %(message)s" -HOUR_FORMAT="%H:%M:%S" -zones='' +LOG_FORMAT=os.environ.get('WAF_LOG_FORMAT','%(asctime)s %(c1)s%(zone)s%(c2)s %(message)s') +HOUR_FORMAT=os.environ.get('WAF_HOUR_FORMAT','%H:%M:%S') +zones=[] verbose=0 -colors_lst={'USE':True,'BOLD':'\x1b[01;1m','RED':'\x1b[01;31m','GREEN':'\x1b[32m','YELLOW':'\x1b[33m','PINK':'\x1b[35m','BLUE':'\x1b[01;34m','CYAN':'\x1b[36m','NORMAL':'\x1b[0m','cursor_on':'\x1b[?25h','cursor_off':'\x1b[?25l',} -got_tty=not os.environ.get('TERM','dumb')in['dumb','emacs'] -if got_tty: - try: - got_tty=sys.stderr.isatty() - except AttributeError: - got_tty=False -if(not got_tty and os.environ.get('TERM','dumb')!='msys')or _nocolor: - colors_lst['USE']=False -def get_term_cols(): - return 80 +colors_lst={'USE':True,'BOLD':'\x1b[01;1m','RED':'\x1b[01;31m','GREEN':'\x1b[32m','YELLOW':'\x1b[33m','PINK':'\x1b[35m','BLUE':'\x1b[01;34m','CYAN':'\x1b[36m','GREY':'\x1b[37m','NORMAL':'\x1b[0m','cursor_on':'\x1b[?25h','cursor_off':'\x1b[?25l',} +indicator='\r\x1b[K%s%s%s' try: - import struct,fcntl,termios -except ImportError: - pass -else: - if got_tty: - def get_term_cols_real(): - dummy_lines,cols=struct.unpack("HHHH",fcntl.ioctl(sys.stderr.fileno(),termios.TIOCGWINSZ,struct.pack("HHHH",0,0,0,0)))[:2] - return cols - try: - get_term_cols_real() - except: - pass + unicode +except NameError: + unicode=None +def enable_colors(use): + if use==1: + if not(sys.stderr.isatty()or sys.stdout.isatty()): + use=0 + if Utils.is_win32 and os.name!='java': + term=os.environ.get('TERM','') else: - get_term_cols=get_term_cols_real + term=os.environ.get('TERM','dumb') + if term in('dumb','emacs'): + use=0 + if use>=1: + os.environ['TERM']='vt100' + colors_lst['USE']=use +try: + get_term_cols=ansiterm.get_term_cols +except AttributeError: + def get_term_cols(): + return 80 get_term_cols.__doc__=""" - Get the console width in characters. + Returns the console width in characters. :return: the number of characters per line :rtype: int """ def get_color(cl): - if not colors_lst['USE']:return'' - return colors_lst.get(cl,'') + if colors_lst['USE']: + return colors_lst.get(cl,'') + return'' class color_dict(object): def __getattr__(self,a): return get_color(a) @@ -57,19 +56,12 @@ colors=color_dict() re_log=re.compile(r'(\w+): (.*)',re.M) class log_filter(logging.Filter): - def __init__(self,name=None): - pass + def __init__(self,name=''): + logging.Filter.__init__(self,name) def filter(self,rec): - rec.c1=colors.PINK - rec.c2=colors.NORMAL + global verbose rec.zone=rec.module if rec.levelno>=logging.INFO: - if rec.levelno>=logging.ERROR: - rec.c1=colors.RED - elif rec.levelno>=logging.WARNING: - rec.c1=colors.YELLOW - else: - rec.c1=colors.GREEN return True m=re_log.match(rec.msg) if m: @@ -80,26 +72,82 @@ elif not verbose>2: return False return True +class log_handler(logging.StreamHandler): + def emit(self,record): + try: + try: + self.stream=record.stream + except AttributeError: + if record.levelno>=logging.WARNING: + record.stream=self.stream=sys.stderr + else: + record.stream=self.stream=sys.stdout + self.emit_override(record) + self.flush() + except(KeyboardInterrupt,SystemExit): + raise + except: + self.handleError(record) + def emit_override(self,record,**kw): + self.terminator=getattr(record,'terminator','\n') + stream=self.stream + if unicode: + msg=self.formatter.format(record) + fs='%s'+self.terminator + try: + if(isinstance(msg,unicode)and getattr(stream,'encoding',None)): + fs=fs.decode(stream.encoding) + try: + stream.write(fs%msg) + except UnicodeEncodeError: + stream.write((fs%msg).encode(stream.encoding)) + else: + stream.write(fs%msg) + except UnicodeError: + stream.write((fs%msg).encode('utf-8')) + else: + logging.StreamHandler.emit(self,record) class formatter(logging.Formatter): def __init__(self): logging.Formatter.__init__(self,LOG_FORMAT,HOUR_FORMAT) def format(self,rec): - if rec.levelno>=logging.WARNING or rec.levelno==logging.INFO: - try: - msg=rec.msg.decode('utf-8') - except: - msg=rec.msg - return'%s%s%s'%(rec.c1,msg,rec.c2) + try: + msg=rec.msg.decode('utf-8') + except Exception: + msg=rec.msg + use=colors_lst['USE'] + if(use==1 and rec.stream.isatty())or use==2: + c1=getattr(rec,'c1',None) + if c1 is None: + c1='' + if rec.levelno>=logging.ERROR: + c1=colors.RED + elif rec.levelno>=logging.WARNING: + c1=colors.YELLOW + elif rec.levelno>=logging.INFO: + c1=colors.GREEN + c2=getattr(rec,'c2',colors.NORMAL) + msg='%s%s%s'%(c1,msg,c2) + else: + msg=re.sub(r'\r(?!\n)|\x1B\[(K|.*?(m|h|l))','',msg) + if rec.levelno>=logging.INFO: + if rec.args: + return msg%rec.args + return msg + rec.msg=msg + rec.c1=colors.PINK + rec.c2=colors.NORMAL return logging.Formatter.format(self,rec) log=None def debug(*k,**kw): + global verbose if verbose: k=list(k) k[0]=k[0].replace('\n',' ') global log log.debug(*k,**kw) def error(*k,**kw): - global log + global log,verbose log.error(*k,**kw) if verbose>2: st=traceback.extract_stack() @@ -107,10 +155,10 @@ st=st[:-1] buf=[] for filename,lineno,name,line in st: - buf.append(' File "%s", line %d, in %s'%(filename,lineno,name)) + buf.append(' File %r, line %d, in %s'%(filename,lineno,name)) if line: buf.append(' %s'%line.strip()) - if buf:log.error("\n".join(buf)) + if buf:log.error('\n'.join(buf)) def warn(*k,**kw): global log log.warn(*k,**kw) @@ -122,7 +170,7 @@ log=logging.getLogger('waflib') log.handlers=[] log.filters=[] - hdlr=logging.StreamHandler() + hdlr=log_handler() hdlr.setFormatter(formatter()) log.addHandler(hdlr) log.addFilter(log_filter()) @@ -135,7 +183,7 @@ logger.addHandler(hdlr) logger.setLevel(logging.DEBUG) return logger -def make_mem_logger(name,to_log,size=10000): +def make_mem_logger(name,to_log,size=8192): from logging.handlers import MemoryHandler logger=logging.getLogger(name) hdlr=MemoryHandler(size,target=to_log) @@ -145,5 +193,13 @@ logger.memhandler=hdlr logger.setLevel(logging.DEBUG) return logger -def pprint(col,str,label='',sep='\n'): - sys.stderr.write("%s%s%s %s%s"%(colors(col),str,colors.NORMAL,label,sep)) +def free_logger(logger): + try: + for x in logger.handlers: + x.close() + logger.removeHandler(x) + except Exception: + pass +def pprint(col,msg,label='',sep='\n'): + global info + info('%s%s%s %s',colors(col),msg,colors.NORMAL,label,extra={'terminator':sep}) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Node.py glmark2-2021.02/waflib/Node.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Node.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Node.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,7 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set import os,re,sys,shutil from waflib import Utils,Errors exclude_regs=''' @@ -35,28 +33,11 @@ **/{arch} **/_darcs **/_darcs/** +**/.intlcache **/.DS_Store''' -def split_path(path): - return path.split('/') -def split_path_cygwin(path): - if path.startswith('//'): - ret=path.split('/')[2:] - ret[0]='/'+ret[0] - return ret - return path.split('/') -re_sp=re.compile('[/\\\\]') -def split_path_win32(path): - if path.startswith('\\\\'): - ret=re.split(re_sp,path)[2:] - ret[0]='\\'+ret[0] - return ret - return re.split(re_sp,path) -if sys.platform=='cygwin': - split_path=split_path_cygwin -elif Utils.is_win32: - split_path=split_path_win32 class Node(object): - __slots__=('name','sig','children','parent','cache_abspath','cache_isdir') + dict_class=dict + __slots__=('name','parent','children','cache_abspath','cache_isdir') def __init__(self,name,parent): self.name=name self.parent=parent @@ -68,45 +49,71 @@ self.name=data[0] self.parent=data[1] if data[2]is not None: - self.children=data[2] - if data[3]is not None: - self.sig=data[3] + self.children=self.dict_class(data[2]) def __getstate__(self): - return(self.name,self.parent,getattr(self,'children',None),getattr(self,'sig',None)) + return(self.name,self.parent,getattr(self,'children',None)) def __str__(self): - return self.name + return self.abspath() def __repr__(self): return self.abspath() - def __hash__(self): - return id(self) - def __eq__(self,node): - return id(self)==id(node) def __copy__(self): raise Errors.WafError('nodes are not supposed to be copied') - def read(self,flags='r'): - return Utils.readf(self.abspath(),flags) - def write(self,data,flags='w'): - f=None - try: - f=open(self.abspath(),flags) - f.write(data) - finally: - if f: - f.close() + def read(self,flags='r',encoding='ISO8859-1'): + return Utils.readf(self.abspath(),flags,encoding) + def write(self,data,flags='w',encoding='ISO8859-1'): + Utils.writef(self.abspath(),data,flags,encoding) + def read_json(self,convert=True,encoding='utf-8'): + import json + object_pairs_hook=None + if convert and sys.hexversion<0x3000000: + try: + _type=unicode + except NameError: + _type=str + def convert(value): + if isinstance(value,list): + return[convert(element)for element in value] + elif isinstance(value,_type): + return str(value) + else: + return value + def object_pairs(pairs): + return dict((str(pair[0]),convert(pair[1]))for pair in pairs) + object_pairs_hook=object_pairs + return json.loads(self.read(encoding=encoding),object_pairs_hook=object_pairs_hook) + def write_json(self,data,pretty=True): + import json + indent=2 + separators=(',',': ') + sort_keys=pretty + newline=os.linesep + if not pretty: + indent=None + separators=(',',':') + newline='' + output=json.dumps(data,indent=indent,separators=separators,sort_keys=sort_keys)+newline + self.write(output,encoding='utf-8') + def exists(self): + return os.path.exists(self.abspath()) + def isdir(self): + return os.path.isdir(self.abspath()) def chmod(self,val): os.chmod(self.abspath(),val) - def delete(self): - try: - if getattr(self,'children',None): - shutil.rmtree(self.abspath()) - else: - os.unlink(self.abspath()) - except: - pass + def delete(self,evict=True): try: - delattr(self,'children') - except: - pass + try: + if os.path.isdir(self.abspath()): + shutil.rmtree(self.abspath()) + else: + os.remove(self.abspath()) + except OSError as e: + if os.path.exists(self.abspath()): + raise e + finally: + if evict: + self.evict() + def evict(self): + del self.parent.children[self.name] def suffix(self): k=max(0,self.name.rfind('.')) return self.name[k:] @@ -122,86 +129,80 @@ lst.sort() return lst def mkdir(self): - if getattr(self,'cache_isdir',None): + if self.isdir(): return try: self.parent.mkdir() - except: + except OSError: pass if self.name: try: os.makedirs(self.abspath()) except OSError: pass - if not os.path.isdir(self.abspath()): - raise Errors.WafError('Could not create the directory %s'%self.abspath()) + if not self.isdir(): + raise Errors.WafError('Could not create the directory %r'%self) try: self.children - except: - self.children={} - self.cache_isdir=True + except AttributeError: + self.children=self.dict_class() def find_node(self,lst): if isinstance(lst,str): - lst=[x for x in split_path(lst)if x and x!='.'] + lst=[x for x in Utils.split_path(lst)if x and x!='.'] cur=self for x in lst: if x=='..': cur=cur.parent or cur continue try: - if x in cur.children: - cur=cur.children[x] + ch=cur.children + except AttributeError: + cur.children=self.dict_class() + else: + try: + cur=ch[x] continue - except: - cur.children={} + except KeyError: + pass cur=self.__class__(x,cur) - try: - os.stat(cur.abspath()) - except: - del cur.parent.children[x] + if not cur.exists(): + cur.evict() return None - ret=cur - try: - os.stat(ret.abspath()) - except: - del ret.parent.children[ret.name] + if not cur.exists(): + cur.evict() return None - try: - while not getattr(cur.parent,'cache_isdir',None): - cur=cur.parent - cur.cache_isdir=True - except AttributeError: - pass - return ret + return cur def make_node(self,lst): if isinstance(lst,str): - lst=[x for x in split_path(lst)if x and x!='.'] + lst=[x for x in Utils.split_path(lst)if x and x!='.'] cur=self for x in lst: if x=='..': cur=cur.parent or cur continue - if getattr(cur,'children',{}): - if x in cur.children: - cur=cur.children[x] - continue + try: + cur=cur.children[x] + except AttributeError: + cur.children=self.dict_class() + except KeyError: + pass else: - cur.children={} + continue cur=self.__class__(x,cur) return cur - def search(self,lst): + def search_node(self,lst): if isinstance(lst,str): - lst=[x for x in split_path(lst)if x and x!='.'] + lst=[x for x in Utils.split_path(lst)if x and x!='.'] cur=self - try: - for x in lst: - if x=='..': - cur=cur.parent or cur - else: + for x in lst: + if x=='..': + cur=cur.parent or cur + else: + try: cur=cur.children[x] - return cur - except: - pass + except(AttributeError,KeyError): + return None + return cur def path_from(self,node): c1=self c2=node @@ -217,59 +218,70 @@ up+=1 c2=c2.parent c2h-=1 - while id(c1)!=id(c2): + while not c1 is c2: lst.append(c1.name) up+=1 c1=c1.parent c2=c2.parent - for i in range(up): - lst.append('..') + if c1.parent: + for i in range(up): + lst.append('..') + else: + if lst and not Utils.is_win32: + lst.append('') lst.reverse() return os.sep.join(lst)or'.' def abspath(self): try: return self.cache_abspath - except: + except AttributeError: pass - if os.sep=='/': - if not self.parent: - val=os.sep - elif not self.parent.name: - val=os.sep+self.name - else: - val=self.parent.abspath()+os.sep+self.name + if not self.parent: + val=os.sep + elif not self.parent.name: + val=os.sep+self.name else: + val=self.parent.abspath()+os.sep+self.name + self.cache_abspath=val + return val + if Utils.is_win32: + def abspath(self): + try: + return self.cache_abspath + except AttributeError: + pass if not self.parent: val='' elif not self.parent.name: val=self.name+os.sep else: val=self.parent.abspath().rstrip(os.sep)+os.sep+self.name - self.cache_abspath=val - return val + self.cache_abspath=val + return val def is_child_of(self,node): p=self diff=self.height()-node.height() while diff>0: diff-=1 p=p.parent - return id(p)==id(node) + return p is node def ant_iter(self,accept=None,maxdepth=25,pats=[],dir=False,src=True,remove=True): dircont=self.listdir() dircont.sort() try: lst=set(self.children.keys()) + except AttributeError: + self.children=self.dict_class() + else: if remove: for x in lst-set(dircont): - del self.children[x] - except: - self.children={} + self.children[x].evict() for name in dircont: npats=accept(name,pats) if npats and npats[0]: accepted=[]in npats[0] node=self.make_node([name]) - isdir=os.path.isdir(node.abspath()) + isdir=node.isdir() if accepted: if isdir: if dir: @@ -277,17 +289,17 @@ else: if src: yield node - if getattr(node,'cache_isdir',None)or isdir: + if isdir: node.cache_isdir=True if maxdepth: for k in node.ant_iter(accept=accept,maxdepth=maxdepth-1,pats=npats,dir=dir,src=src,remove=remove): yield k - raise StopIteration def ant_glob(self,*k,**kw): src=kw.get('src',True) dir=kw.get('dir',False) excl=kw.get('excl',exclude_regs) incl=k and k[0]or kw.get('incl','**') + reflags=kw.get('ignorecase',0)and re.I def to_pat(s): lst=Utils.to_list(s) ret=[] @@ -304,9 +316,9 @@ k=k.replace('.','[.]').replace('*','.*').replace('?','.').replace('+','\\+') k='^%s$'%k try: - accu.append(re.compile(k)) - except Exception ,e: - raise Errors.WafError("Invalid pattern: %s"%k,e) + accu.append(re.compile(k,flags=reflags)) + except Exception as e: + raise Errors.WafError('Invalid pattern: %s'%k,e) ret.append(accu) return ret def filtre(name,nn): @@ -330,77 +342,52 @@ if[]in nrej: nacc=[] return[nacc,nrej] - ret=[x for x in self.ant_iter(accept=accept,pats=[to_pat(incl),to_pat(excl)],maxdepth=25,dir=dir,src=src,remove=kw.get('remove',True))] + ret=[x for x in self.ant_iter(accept=accept,pats=[to_pat(incl),to_pat(excl)],maxdepth=kw.get('maxdepth',25),dir=dir,src=src,remove=kw.get('remove',True))] if kw.get('flat',False): return' '.join([x.path_from(self)for x in ret]) return ret - def find_nodes(self,find_dirs=True,find_files=True,match_fun=lambda x:True): - x=""" - Recursively finds nodes:: - - def configure(cnf): - cnf.find_nodes() - - :param find_dirs: whether to return directories - :param find_files: whether to return files - :param match_fun: matching function, taking a node as parameter - :rtype generator - :return: a generator that iterates over all the requested files - """ - files=self.listdir() - for f in files: - node=self.make_node([f]) - if os.path.isdir(node.abspath()): - if find_dirs and match_fun(node): - yield node - gen=node.find_nodes(find_dirs,find_files,match_fun) - for g in gen: - yield g - else: - if find_files and match_fun(node): - yield node def is_src(self): cur=self - x=id(self.ctx.srcnode) - y=id(self.ctx.bldnode) + x=self.ctx.srcnode + y=self.ctx.bldnode while cur.parent: - if id(cur)==y: + if cur is y: return False - if id(cur)==x: + if cur is x: return True cur=cur.parent return False def is_bld(self): cur=self - y=id(self.ctx.bldnode) + y=self.ctx.bldnode while cur.parent: - if id(cur)==y: + if cur is y: return True cur=cur.parent return False def get_src(self): cur=self - x=id(self.ctx.srcnode) - y=id(self.ctx.bldnode) + x=self.ctx.srcnode + y=self.ctx.bldnode lst=[] while cur.parent: - if id(cur)==y: + if cur is y: lst.reverse() - return self.ctx.srcnode.make_node(lst) - if id(cur)==x: + return x.make_node(lst) + if cur is x: return self lst.append(cur.name) cur=cur.parent return self def get_bld(self): cur=self - x=id(self.ctx.srcnode) - y=id(self.ctx.bldnode) + x=self.ctx.srcnode + y=self.ctx.bldnode lst=[] while cur.parent: - if id(cur)==y: + if cur is y: return self - if id(cur)==x: + if cur is x: lst.reverse() return self.ctx.bldnode.make_node(lst) lst.append(cur.name) @@ -411,51 +398,33 @@ return self.ctx.bldnode.make_node(['__root__']+lst) def find_resource(self,lst): if isinstance(lst,str): - lst=[x for x in split_path(lst)if x and x!='.'] - node=self.get_bld().search(lst) + lst=[x for x in Utils.split_path(lst)if x and x!='.'] + node=self.get_bld().search_node(lst) if not node: - self=self.get_src() - node=self.find_node(lst) - try: - pat=node.abspath() - if os.path.isdir(pat): - return None - except: - pass + node=self.get_src().find_node(lst) + if node and node.isdir(): + return None return node def find_or_declare(self,lst): if isinstance(lst,str): - lst=[x for x in split_path(lst)if x and x!='.'] - node=self.get_bld().search(lst) + lst=[x for x in Utils.split_path(lst)if x and x!='.'] + node=self.get_bld().search_node(lst) if node: if not os.path.isfile(node.abspath()): - node.sig=None - try: - node.parent.mkdir() - except: - pass + node.parent.mkdir() return node self=self.get_src() node=self.find_node(lst) if node: - if not os.path.isfile(node.abspath()): - node.sig=None - try: - node.parent.mkdir() - except: - pass return node node=self.get_bld().make_node(lst) node.parent.mkdir() return node def find_dir(self,lst): if isinstance(lst,str): - lst=[x for x in split_path(lst)if x and x!='.'] + lst=[x for x in Utils.split_path(lst)if x and x!='.'] node=self.find_node(lst) - try: - if not os.path.isdir(node.abspath()): - return None - except(OSError,AttributeError): + if node and not node.isdir(): return None return node def change_ext(self,ext,ext_in=None): @@ -469,38 +438,49 @@ else: name=name[:-len(ext_in)]+ext return self.parent.find_or_declare([name]) - def nice_path(self,env=None): - return self.path_from(self.ctx.launch_node()) def bldpath(self): return self.path_from(self.ctx.bldnode) def srcpath(self): return self.path_from(self.ctx.srcnode) def relpath(self): cur=self - x=id(self.ctx.bldnode) + x=self.ctx.bldnode while cur.parent: - if id(cur)==x: + if cur is x: return self.bldpath() cur=cur.parent return self.srcpath() def bld_dir(self): return self.parent.bldpath() - def bld_base(self): - s=os.path.splitext(self.name)[0] - return self.bld_dir()+os.sep+s + def h_file(self): + return Utils.h_file(self.abspath()) def get_bld_sig(self): try: - ret=self.ctx.hash_cache[id(self)] - except KeyError: - pass + cache=self.ctx.cache_sig except AttributeError: - self.ctx.hash_cache={} - else: - return ret - if not self.is_bld()or self.ctx.bldnode is self.ctx.srcnode: - self.sig=Utils.h_file(self.abspath()) - self.ctx.hash_cache[id(self)]=ret=self.sig + cache=self.ctx.cache_sig={} + try: + ret=cache[self] + except KeyError: + p=self.abspath() + try: + ret=cache[self]=self.h_file() + except EnvironmentError: + if self.isdir(): + st=os.stat(p) + ret=cache[self]=Utils.h_list([p,st.st_ino,st.st_mode]) + return ret + raise return ret + def get_sig(self): + return self.h_file() + def set_sig(self,val): + try: + del self.get_bld_sig.__cache__[(self,)] + except(AttributeError,KeyError): + pass + sig=property(get_sig,set_sig) + cache_sig=property(get_sig,set_sig) pickle_lock=Utils.threading.Lock() class Nod3(Node): pass diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Options.py glmark2-2021.02/waflib/Options.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Options.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Options.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,63 +1,30 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import os,tempfile,optparse,sys,re -from waflib import Logs,Utils,Context -cmds='distclean configure build install clean uninstall check dist distcheck'.split() +from waflib import Logs,Utils,Context,Errors options={} commands=[] +envvars=[] lockfile=os.environ.get('WAFLOCK','.lock-waf_%s_build'%sys.platform) -try:cache_global=os.path.abspath(os.environ['WAFCACHE']) -except KeyError:cache_global='' -platform=Utils.unversioned_sys_platform() class opt_parser(optparse.OptionParser): def __init__(self,ctx): optparse.OptionParser.__init__(self,conflict_handler="resolve",version='waf %s (%s)'%(Context.WAFVERSION,Context.WAFREVISION)) self.formatter.width=Logs.get_term_cols() - p=self.add_option self.ctx=ctx - jobs=ctx.jobs() - p('-j','--jobs',dest='jobs',default=jobs,type='int',help='amount of parallel jobs (%r)'%jobs) - p('-k','--keep',dest='keep',default=0,action='count',help='keep running happily even if errors are found') - p('-v','--verbose',dest='verbose',default=0,action='count',help='verbosity level -v -vv or -vvv [default: 0]') - p('--nocache',dest='nocache',default=False,action='store_true',help='ignore the WAFCACHE (if set)') - p('--zones',dest='zones',default='',action='store',help='debugging zones (task_gen, deps, tasks, etc)') - gr=optparse.OptionGroup(self,'configure options') - self.add_option_group(gr) - gr.add_option('-o','--out',action='store',default='',help='build dir for the project',dest='out') - gr.add_option('-t','--top',action='store',default='',help='src dir for the project',dest='top') - default_prefix=os.environ.get('PREFIX') - if not default_prefix: - if platform=='win32': - d=tempfile.gettempdir() - default_prefix=d[0].upper()+d[1:] - else: - default_prefix='/usr/local/' - gr.add_option('--prefix',dest='prefix',default=default_prefix,help='installation prefix [default: %r]'%default_prefix) - gr.add_option('--download',dest='download',default=False,action='store_true',help='try to download the tools if missing') - gr=optparse.OptionGroup(self,'build and install options') - self.add_option_group(gr) - gr.add_option('-p','--progress',dest='progress_bar',default=0,action='count',help='-p: progress bar; -pp: ide output') - gr.add_option('--targets',dest='targets',default='',action='store',help='task generators, e.g. "target1,target2"') - gr=optparse.OptionGroup(self,'step options') - self.add_option_group(gr) - gr.add_option('--files',dest='files',default='',action='store',help='files to process, by regexp, e.g. "*/main.c,*/test/main.o"') - default_destdir=os.environ.get('DESTDIR','') - gr=optparse.OptionGroup(self,'install/uninstall options') - self.add_option_group(gr) - gr.add_option('--destdir',help='installation root [default: %r]'%default_destdir,default=default_destdir,dest='destdir') - gr.add_option('-f','--force',dest='force',default=False,action='store_true',help='force file installation') + def print_usage(self,file=None): + return self.print_help(file) def get_usage(self): cmds_str={} for cls in Context.classes: - if not cls.cmd or cls.cmd=='options': + if not cls.cmd or cls.cmd=='options'or cls.cmd.startswith('_'): continue s=cls.__doc__ or'' cmds_str[cls.cmd]=s if Context.g_module: for(k,v)in Context.g_module.__dict__.items(): - if k in['options','init','shutdown']: + if k in('options','init','shutdown'): continue if type(v)is type(Context.create_context): if v.__doc__ and not k.startswith('_'): @@ -80,6 +47,45 @@ super(OptionsContext,self).__init__(**kw) self.parser=opt_parser(self) self.option_groups={} + jobs=self.jobs() + p=self.add_option + color=os.environ.get('NOCOLOR','')and'no'or'auto' + p('-c','--color',dest='colors',default=color,action='store',help='whether to use colors (yes/no/auto) [default: auto]',choices=('yes','no','auto')) + p('-j','--jobs',dest='jobs',default=jobs,type='int',help='amount of parallel jobs (%r)'%jobs) + p('-k','--keep',dest='keep',default=0,action='count',help='continue despite errors (-kk to try harder)') + p('-v','--verbose',dest='verbose',default=0,action='count',help='verbosity level -v -vv or -vvv [default: 0]') + p('--zones',dest='zones',default='',action='store',help='debugging zones (task_gen, deps, tasks, etc)') + p('--profile',dest='profile',default='',action='store_true',help=optparse.SUPPRESS_HELP) + gr=self.add_option_group('Configuration options') + self.option_groups['configure options']=gr + gr.add_option('-o','--out',action='store',default='',help='build dir for the project',dest='out') + gr.add_option('-t','--top',action='store',default='',help='src dir for the project',dest='top') + gr.add_option('--no-lock-in-run',action='store_true',default='',help=optparse.SUPPRESS_HELP,dest='no_lock_in_run') + gr.add_option('--no-lock-in-out',action='store_true',default='',help=optparse.SUPPRESS_HELP,dest='no_lock_in_out') + gr.add_option('--no-lock-in-top',action='store_true',default='',help=optparse.SUPPRESS_HELP,dest='no_lock_in_top') + default_prefix=getattr(Context.g_module,'default_prefix',os.environ.get('PREFIX')) + if not default_prefix: + if Utils.unversioned_sys_platform()=='win32': + d=tempfile.gettempdir() + default_prefix=d[0].upper()+d[1:] + else: + default_prefix='/usr/local/' + gr.add_option('--prefix',dest='prefix',default=default_prefix,help='installation prefix [default: %r]'%default_prefix) + gr.add_option('--bindir',dest='bindir',help='bindir') + gr.add_option('--libdir',dest='libdir',help='libdir') + gr=self.add_option_group('Build and installation options') + self.option_groups['build and install options']=gr + gr.add_option('-p','--progress',dest='progress_bar',default=0,action='count',help='-p: progress bar; -pp: ide output') + gr.add_option('--targets',dest='targets',default='',action='store',help='task generators, e.g. "target1,target2"') + gr=self.add_option_group('Step options') + self.option_groups['step options']=gr + gr.add_option('--files',dest='files',default='',action='store',help='files to process, by regexp, e.g. "*/main.c,*/test/main.o"') + default_destdir=os.environ.get('DESTDIR','') + gr=self.add_option_group('Installation and uninstallation options') + self.option_groups['install/uninstall options']=gr + gr.add_option('--destdir',help='installation root [default: %r]'%default_destdir,default=default_destdir,dest='destdir') + gr.add_option('-f','--force',dest='force',default=False,action='store_true',help='force file installation') + gr.add_option('--distcheck-args',metavar='ARGS',help='arguments to pass to distcheck',default=None,action='store') def jobs(self): count=int(os.environ.get('JOBS',0)) if count<1: @@ -94,7 +100,7 @@ if not count and os.name not in('nt','java'): try: tmp=self.cmd_and_log(['sysctl','-n','hw.ncpu'],quiet=0) - except Exception: + except Errors.WafError: pass else: if re.match('^[0-9]+$',tmp): @@ -105,11 +111,11 @@ count=1024 return count def add_option(self,*k,**kw): - self.parser.add_option(*k,**kw) + return self.parser.add_option(*k,**kw) def add_option_group(self,*k,**kw): try: gr=self.option_groups[k[0]] - except: + except KeyError: gr=self.parser.add_option_group(*k,**kw) self.option_groups[k[0]]=gr return gr @@ -122,13 +128,20 @@ return group return None def parse_args(self,_args=None): - global options,commands + global options,commands,envvars (options,leftover_args)=self.parser.parse_args(args=_args) - commands=leftover_args + for arg in leftover_args: + if'='in arg: + envvars.append(arg) + else: + commands.append(arg) if options.destdir: - options.destdir=os.path.abspath(os.path.expanduser(options.destdir)) + options.destdir=Utils.sane_path(options.destdir) if options.verbose>=1: self.load('errcheck') + colors={'yes':2,'auto':1,'no':0}[options.colors] + Logs.enable_colors(colors) def execute(self): super(OptionsContext,self).execute() self.parse_args() + Utils.alloc_process_pool(options.jobs) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/processor.py glmark2-2021.02/waflib/processor.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/processor.py 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/waflib/processor.py 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,40 @@ +#! /usr/bin/env python +# encoding: utf-8 +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file + +import sys,traceback,base64 +try: + import cPickle +except ImportError: + import pickle as cPickle +try: + import subprocess32 as subprocess +except ImportError: + import subprocess +def run(): + txt=sys.stdin.readline().strip() + if not txt: + sys.exit(1) + [cmd,kwargs,cargs]=cPickle.loads(base64.b64decode(txt)) + cargs=cargs or{} + ret=1 + out,err,ex,trace=(None,None,None,None) + try: + proc=subprocess.Popen(cmd,**kwargs) + out,err=proc.communicate(**cargs) + ret=proc.returncode + except(OSError,ValueError,Exception)as e: + exc_type,exc_value,tb=sys.exc_info() + exc_lines=traceback.format_exception(exc_type,exc_value,tb) + trace=str(cmd)+'\n'+''.join(exc_lines) + ex=e.__class__.__name__ + tmp=[ret,out,err,ex,trace] + obj=base64.b64encode(cPickle.dumps(tmp)) + sys.stdout.write(obj.decode()) + sys.stdout.write('\n') + sys.stdout.flush() +while 1: + try: + run() + except KeyboardInterrupt: + break diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Runner.py glmark2-2021.02/waflib/Runner.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Runner.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Runner.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,57 +1,57 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import random,atexit +import random try: from queue import Queue -except: +except ImportError: from Queue import Queue from waflib import Utils,Task,Errors,Logs -GAP=10 -class TaskConsumer(Utils.threading.Thread): - def __init__(self): +GAP=20 +class Consumer(Utils.threading.Thread): + __slots__=('task','spawner') + def __init__(self,spawner,task): Utils.threading.Thread.__init__(self) - self.ready=Queue() + self.task=task + self.spawner=spawner + self.setDaemon(1) + self.start() + def run(self): + try: + if not self.spawner.master.stop: + self.task.process() + finally: + self.spawner.sem.release() + self.spawner.master.out.put(self.task) + self.task=None + self.spawner=None +class Spawner(Utils.threading.Thread): + def __init__(self,master): + Utils.threading.Thread.__init__(self) + self.master=master + self.sem=Utils.threading.Semaphore(master.numjobs) self.setDaemon(1) self.start() def run(self): try: self.loop() - except: + except Exception: pass def loop(self): + master=self.master while 1: - tsk=self.ready.get() - if not isinstance(tsk,Task.TaskBase): - tsk(self) - else: - tsk.process() -pool=Queue() -def get_pool(): - try: - return pool.get(False) - except: - return TaskConsumer() -def put_pool(x): - pool.put(x) -def _free_resources(): - global pool - lst=[] - while pool.qsize(): - lst.append(pool.get()) - for x in lst: - x.ready.put(None) - for x in lst: - x.join() - pool=None -atexit.register(_free_resources) + task=master.ready.get() + self.sem.acquire() + task.log_display(task.generator.bld) + Consumer(self,task) class Parallel(object): def __init__(self,bld,j=2): self.numjobs=j self.bld=bld - self.outstanding=[] - self.frozen=[] + self.outstanding=Utils.deque() + self.frozen=Utils.deque() + self.ready=Queue(0) self.out=Queue(0) self.count=0 self.processed=1 @@ -59,13 +59,14 @@ self.error=[] self.biter=None self.dirty=False + self.spawner=Spawner(self) def get_next_task(self): if not self.outstanding: return None - return self.outstanding.pop(0) + return self.outstanding.popleft() def postpone(self,tsk): if random.randint(0,1): - self.frozen.insert(0,tsk) + self.frozen.appendleft(tsk) else: self.frozen.append(tsk) def refill_task_list(self): @@ -77,7 +78,7 @@ elif self.frozen: try: cond=self.deadlock==self.processed - except: + except AttributeError: pass else: if cond: @@ -92,15 +93,15 @@ raise Errors.WafError('Deadlock detected: %s%s'%(msg,''.join(lst))) self.deadlock=self.processed if self.frozen: - self.outstanding+=self.frozen - self.frozen=[] + self.outstanding.extend(self.frozen) + self.frozen.clear() elif not self.count: - self.outstanding.extend(self.biter.next()) + self.outstanding.extend(next(self.biter)) self.total=self.bld.total() break def add_more_tasks(self,tsk): if getattr(tsk,'more_tasks',None): - self.outstanding+=tsk.more_tasks + self.outstanding.extend(tsk.more_tasks) self.total+=len(tsk.more_tasks) def get_out(self): tsk=self.out.get() @@ -109,40 +110,38 @@ self.count-=1 self.dirty=True return tsk + def add_task(self,tsk): + self.ready.put(tsk) + def skip(self,tsk): + tsk.hasrun=Task.SKIPPED def error_handler(self,tsk): + if hasattr(tsk,'scan')and hasattr(tsk,'uid'): + try: + del self.bld.imp_sigs[tsk.uid()] + except KeyError: + pass if not self.bld.keep: self.stop=True self.error.append(tsk) - def add_task(self,tsk): + def task_status(self,tsk): try: - self.pool - except AttributeError: - self.init_task_pool() - self.ready.put(tsk) - def init_task_pool(self): - pool=self.pool=[get_pool()for i in range(self.numjobs)] - self.ready=Queue(0) - def setq(consumer): - consumer.ready=self.ready - for x in pool: - x.ready.put(setq) - return pool - def free_task_pool(self): - def setq(consumer): - consumer.ready=Queue(0) - self.out.put(self) - try: - pool=self.pool - except: - pass - else: - for x in pool: - self.ready.put(setq) - for x in pool: - self.get_out() - for x in pool: - put_pool(x) - self.pool=[] + return tsk.runnable_status() + except Exception: + self.processed+=1 + tsk.err_msg=Utils.ex_stack() + if not self.stop and self.bld.keep: + self.skip(tsk) + if self.bld.keep==1: + if Logs.verbose>1 or not self.error: + self.error.append(tsk) + self.stop=True + else: + if Logs.verbose>1: + self.error.append(tsk) + return Task.EXCEPTION + tsk.hasrun=Task.EXCEPTION + self.error_handler(tsk) + return Task.EXCEPTION def start(self): self.total=self.bld.total() while not self.stop: @@ -158,40 +157,25 @@ continue if self.stop: break - try: - st=tsk.runnable_status() - except Exception: + st=self.task_status(tsk) + if st==Task.RUN_ME: + self.count+=1 self.processed+=1 - tsk.err_msg=Utils.ex_stack() - if not self.stop and self.bld.keep: - tsk.hasrun=Task.SKIPPED - if self.bld.keep==1: - if Logs.verbose>1 or not self.error: - self.error.append(tsk) - self.stop=True - else: - if Logs.verbose>1: - self.error.append(tsk) - continue - tsk.hasrun=Task.EXCEPTION - self.error_handler(tsk) - continue + if self.numjobs==1: + tsk.log_display(tsk.generator.bld) + try: + tsk.process() + finally: + self.out.put(tsk) + else: + self.add_task(tsk) if st==Task.ASK_LATER: self.postpone(tsk) elif st==Task.SKIP_ME: self.processed+=1 - tsk.hasrun=Task.SKIPPED + self.skip(tsk) self.add_more_tasks(tsk) - else: - tsk.position=(self.processed,self.total) - self.count+=1 - tsk.master=self - self.processed+=1 - if self.numjobs==1: - tsk.process() - else: - self.add_task(tsk) while self.error and self.count: self.get_out() + self.ready.put(None) assert(self.count==0 or self.stop) - self.free_task_pool() diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Scripting.py glmark2-2021.02/waflib/Scripting.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Scripting.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Scripting.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,8 +1,8 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,shutil,traceback,errno,sys,stat +import os,shlex,shutil,traceback,errno,sys,stat from waflib import Utils,Configure,Logs,Options,ConfigSet,Context,Errors,Build,Node build_dir_override=None no_climb_commands=['configure'] @@ -10,7 +10,7 @@ def waf_entry_point(current_directory,version,wafdir): Logs.init_log() if Context.WAFVERSION!=version: - Logs.error('Waf script %r and library %r do not match (directory %r)'%(version,Context.WAFVERSION,wafdir)) + Logs.error('Waf script %r and library %r do not match (directory %r)',version,Context.WAFVERSION,wafdir) sys.exit(1) if'--version'in sys.argv: Context.run_dir=current_directory @@ -18,26 +18,43 @@ ctx.curdir=current_directory ctx.parse_args() sys.exit(0) + if len(sys.argv)>1: + potential_wscript=os.path.join(current_directory,sys.argv[1]) + if os.path.basename(potential_wscript)=='wscript'and os.path.isfile(potential_wscript): + current_directory=os.path.normpath(os.path.dirname(potential_wscript)) + sys.argv.pop(1) Context.waf_dir=wafdir Context.launch_dir=current_directory - no_climb=os.environ.get('NOCLIMB',None) + no_climb=os.environ.get('NOCLIMB') if not no_climb: for k in no_climb_commands: - if k in sys.argv: - no_climb=True - break + for y in sys.argv: + if y.startswith(k): + no_climb=True + break + for i,x in enumerate(sys.argv): + if x.startswith('--top='): + Context.run_dir=Context.top_dir=Utils.sane_path(x[6:]) + sys.argv[i]='--top='+Context.run_dir + if x.startswith('--out='): + Context.out_dir=Utils.sane_path(x[6:]) + sys.argv[i]='--out='+Context.out_dir cur=current_directory - while cur: - lst=os.listdir(cur) + while cur and not Context.top_dir: + try: + lst=os.listdir(cur) + except OSError: + lst=[] + Logs.error('Directory %r is unreadable!',cur) if Options.lockfile in lst: env=ConfigSet.ConfigSet() try: env.load(os.path.join(cur,Options.lockfile)) ino=os.stat(cur)[stat.ST_INO] - except Exception: + except EnvironmentError: pass else: - for x in[env.run_dir,env.top_dir,env.out_dir]: + for x in(env.run_dir,env.top_dir,env.out_dir): if Utils.is_win32: if cur==x: load=True @@ -45,14 +62,14 @@ else: try: ino2=os.stat(x)[stat.ST_INO] - except: + except OSError: pass else: if ino==ino2: load=True break else: - Logs.warn('invalid lock file in %s'%cur) + Logs.warn('invalid lock file in %s',cur) load=False if load: Context.run_dir=env.run_dir @@ -76,36 +93,44 @@ ctx.curdir=current_directory ctx.parse_args() sys.exit(0) - Logs.error('Waf: Run from a directory containing a file named %r'%Context.WSCRIPT_FILE) + Logs.error('Waf: Run from a directory containing a file named %r',Context.WSCRIPT_FILE) sys.exit(1) try: os.chdir(Context.run_dir) except OSError: - Logs.error('Waf: The folder %r is unreadable'%Context.run_dir) + Logs.error('Waf: The folder %r is unreadable',Context.run_dir) sys.exit(1) try: - set_main_module(Context.run_dir+os.sep+Context.WSCRIPT_FILE) - except Errors.WafError ,e: + set_main_module(os.path.normpath(os.path.join(Context.run_dir,Context.WSCRIPT_FILE))) + except Errors.WafError as e: Logs.pprint('RED',e.verbose_msg) Logs.error(str(e)) sys.exit(1) - except Exception ,e: - Logs.error('Waf: The wscript in %r is unreadable'%Context.run_dir,e) - traceback.print_exc(file=sys.stdout) - sys.exit(2) - try: - run_commands() - except Errors.WafError ,e: - if Logs.verbose>1: - Logs.pprint('RED',e.verbose_msg) - Logs.error(e.msg) - sys.exit(1) - except Exception ,e: + except Exception as e: + Logs.error('Waf: The wscript in %r is unreadable',Context.run_dir) traceback.print_exc(file=sys.stdout) sys.exit(2) - except KeyboardInterrupt: - Logs.pprint('RED','Interrupted') - sys.exit(68) + if'--profile'in sys.argv: + import cProfile,pstats + cProfile.runctx('from waflib import Scripting; Scripting.run_commands()',{},{},'profi.txt') + p=pstats.Stats('profi.txt') + p.sort_stats('time').print_stats(75) + else: + try: + run_commands() + except Errors.WafError as e: + if Logs.verbose>1: + Logs.pprint('RED',e.verbose_msg) + Logs.error(e.msg) + sys.exit(1) + except SystemExit: + raise + except Exception as e: + traceback.print_exc(file=sys.stdout) + sys.exit(2) + except KeyboardInterrupt: + Logs.pprint('RED','Interrupted') + sys.exit(68) def set_main_module(file_path): Context.g_module=Context.load_module(file_path) Context.g_module.root_path=file_path @@ -113,7 +138,7 @@ name=obj.__name__ if not name in Context.g_module.__dict__: setattr(Context.g_module,name,obj) - for k in[update,dist,distclean,distcheck,update]: + for k in(dist,distclean,distcheck): set_def(k) if not'init'in Context.g_module.__dict__: Context.g_module.init=Utils.nada @@ -123,11 +148,13 @@ Context.g_module.options=Utils.nada def parse_options(): Context.create_context('options').execute() + for var in Options.envvars: + (name,value)=var.split('=',1) + os.environ[name.strip()]=value if not Options.commands: Options.commands=[default_cmd] Options.commands=[x for x in Options.commands if x!='options'] Logs.verbose=Options.options.verbose - Logs.init_log() if Options.options.zones: Logs.zones=Options.options.zones.split(',') if not Logs.verbose: @@ -138,43 +165,39 @@ Logs.zones=['*'] def run_command(cmd_name): ctx=Context.create_context(cmd_name) + ctx.log_timer=Utils.Timer() ctx.options=Options.options ctx.cmd=cmd_name - ctx.execute() + try: + ctx.execute() + finally: + ctx.finalize() return ctx def run_commands(): parse_options() run_command('init') while Options.commands: cmd_name=Options.commands.pop(0) - timer=Utils.Timer() - run_command(cmd_name) - if not Options.options.progress_bar: - elapsed=' (%s)'%str(timer) - Logs.info('%r finished successfully%s'%(cmd_name,elapsed)) + ctx=run_command(cmd_name) + Logs.info('%r finished successfully (%s)',cmd_name,ctx.log_timer) run_command('shutdown') -def _can_distclean(name): - for k in'.o .moc .exe'.split(): - if name.endswith(k): - return True - return False def distclean_dir(dirname): for(root,dirs,files)in os.walk(dirname): for f in files: - if _can_distclean(f): - fname=root+os.sep+f + if f.endswith(('.o','.moc','.exe')): + fname=os.path.join(root,f) try: - os.unlink(fname) - except: - Logs.warn('could not remove %r'%fname) - for x in[Context.DBFILE,'config.log']: + os.remove(fname) + except OSError: + Logs.warn('Could not remove %r',fname) + for x in(Context.DBFILE,'config.log'): try: - os.unlink(x) - except: + os.remove(x) + except OSError: pass try: shutil.rmtree('c4che') - except: + except OSError: pass def distclean(ctx): '''removes the build directory''' @@ -183,28 +206,30 @@ if f==Options.lockfile: try: proj=ConfigSet.ConfigSet(f) - except: - Logs.warn('could not read %r'%f) + except IOError: + Logs.warn('Could not read %r',f) continue if proj['out_dir']!=proj['top_dir']: try: shutil.rmtree(proj['out_dir']) - except IOError: - pass - except OSError ,e: + except EnvironmentError as e: if e.errno!=errno.ENOENT: - Logs.warn('project %r cannot be removed'%proj[Context.OUT]) + Logs.warn('Could not remove %r',proj['out_dir']) else: distclean_dir(proj['out_dir']) for k in(proj['out_dir'],proj['top_dir'],proj['run_dir']): + p=os.path.join(k,Options.lockfile) try: - os.remove(os.path.join(k,Options.lockfile)) - except OSError ,e: + os.remove(p) + except OSError as e: if e.errno!=errno.ENOENT: - Logs.warn('file %r cannot be removed'%f) - if f.startswith('.waf')and not Options.commands: - shutil.rmtree(f,ignore_errors=True) + Logs.warn('Could not remove %r',p) + if not Options.commands: + for x in'.waf-1. waf-1. .waf3-1. waf3-1.'.split(): + if f.startswith(x): + shutil.rmtree(f,ignore_errors=True) class Dist(Context.Context): + '''creates an archive containing the project source code''' cmd='dist' fun='dist' algo='tar.bz2' @@ -217,12 +242,12 @@ arch_name=self.get_arch_name() try: self.base_path - except: + except AttributeError: self.base_path=self.path node=self.base_path.make_node(arch_name) try: node.delete() - except: + except OSError: pass files=self.get_files() if self.algo.startswith('tar.'): @@ -238,16 +263,14 @@ zip.write(x.abspath(),archive_name,zipfile.ZIP_DEFLATED) zip.close() else: - self.fatal('Valid algo types are tar.bz2, tar.gz or zip') + self.fatal('Valid algo types are tar.bz2, tar.gz, tar.xz or zip') try: - from hashlib import sha1 as sha + from hashlib import sha1 except ImportError: - from sha import sha - try: - digest=" (sha=%r)"%sha(node.read()).hexdigest() - except: digest='' - Logs.info('New archive created: %s%s'%(self.arch_name,digest)) + else: + digest=' (sha=%r)'%sha1(node.read(flags='rb')).hexdigest() + Logs.info('New archive created: %s%s',self.arch_name,digest) def get_tar_path(self,node): return node.abspath() def add_tar_file(self,x,tar): @@ -257,28 +280,29 @@ tinfo.gid=0 tinfo.uname='root' tinfo.gname='root' - fu=None - try: + if os.path.isfile(p): fu=open(p,'rb') - tar.addfile(tinfo,fileobj=fu) - finally: - if fu: + try: + tar.addfile(tinfo,fileobj=fu) + finally: fu.close() + else: + tar.addfile(tinfo) def get_tar_prefix(self): try: return self.tar_prefix - except: + except AttributeError: return self.get_base_name() def get_arch_name(self): try: self.arch_name - except: + except AttributeError: self.arch_name=self.get_base_name()+'.'+self.ext_algo.get(self.algo,self.algo) return self.arch_name def get_base_name(self): try: self.base_name - except: + except AttributeError: appname=getattr(Context.g_module,Context.APPNAME,'noname') version=getattr(Context.g_module,Context.VERSION,'1.0') self.base_name=appname+'-'+version @@ -286,16 +310,17 @@ def get_excl(self): try: return self.excl - except: - self.excl=Node.exclude_regs+' **/waf-1.6.* **/.waf-1.6* **/waf3-1.6.* **/.waf3-1.6* **/*~ **/*.rej **/*.orig **/*.pyc **/*.pyo **/*.bak **/*.swp **/.lock-w*' - nd=self.root.find_node(Context.out_dir) - if nd: - self.excl+=' '+nd.path_from(self.base_path) + except AttributeError: + self.excl=Node.exclude_regs+' **/waf-1.8.* **/.waf-1.8* **/waf3-1.8.* **/.waf3-1.8* **/*~ **/*.rej **/*.orig **/*.pyc **/*.pyo **/*.bak **/*.swp **/.lock-w*' + if Context.out_dir: + nd=self.root.find_node(Context.out_dir) + if nd: + self.excl+=' '+nd.path_from(self.base_path) return self.excl def get_files(self): try: files=self.files - except: + except AttributeError: files=self.base_path.ant_glob('**/*',excl=self.get_excl()) return files def dist(ctx): @@ -310,35 +335,27 @@ self.check() def check(self): import tempfile,tarfile - t=None try: t=tarfile.open(self.get_arch_name()) for x in t: t.extract(x) finally: - if t: - t.close() + t.close() + cfg=[] + if Options.options.distcheck_args: + cfg=shlex.split(Options.options.distcheck_args) + else: + cfg=[x for x in sys.argv if x.startswith('-')] instdir=tempfile.mkdtemp('.inst',self.get_base_name()) - ret=Utils.subprocess.Popen([sys.argv[0],'configure','install','uninstall','--destdir='+instdir],cwd=self.get_base_name()).wait() + ret=Utils.subprocess.Popen([sys.executable,sys.argv[0],'configure','install','uninstall','--destdir='+instdir]+cfg,cwd=self.get_base_name()).wait() if ret: - raise Errors.WafError('distcheck failed with code %i'%ret) + raise Errors.WafError('distcheck failed with code %r'%ret) if os.path.exists(instdir): raise Errors.WafError('distcheck succeeded, but files were left in %s'%instdir) shutil.rmtree(self.get_base_name()) def distcheck(ctx): '''checks if the project compiles (tarball from 'dist')''' pass -def update(ctx): - '''updates the plugins from the *waflib/extras* directory''' - lst=Options.options.files.split(',') - if not lst: - lst=[x for x in Utils.listdir(Context.waf_dir+'/waflib/extras')if x.endswith('.py')] - for x in lst: - tool=x.replace('.py','') - try: - Configure.download_tool(tool,force=True,ctx=ctx) - except Errors.WafError: - Logs.error('Could not find the tool %s in the remote repository'%x) def autoconfigure(execute_method): def execute(self): if not Configure.autoconfig: @@ -347,7 +364,7 @@ do_config=False try: env.load(os.path.join(Context.top_dir,Options.lockfile)) - except Exception: + except EnvironmentError: Logs.warn('Configuring the project') do_config=True else: @@ -355,13 +372,27 @@ do_config=True else: h=0 - for f in env['files']: - h=hash((h,Utils.readf(f,'rb'))) - do_config=h!=env.hash + for f in env.files: + try: + h=Utils.h_list((h,Utils.readf(f,'rb'))) + except EnvironmentError: + do_config=True + break + else: + do_config=h!=env.hash if do_config: - Options.commands.insert(0,self.cmd) - Options.commands.insert(0,'configure') - return - return execute_method(self) + cmd=env.config_cmd or'configure' + if Configure.autoconfig=='clobber': + tmp=Options.options.__dict__ + Options.options.__dict__=env.options + try: + run_command(cmd) + finally: + Options.options.__dict__=tmp + else: + run_command(cmd) + run_command(self.cmd) + else: + return execute_method(self) return execute Build.BuildContext.execute=autoconfigure(Build.BuildContext.execute) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/TaskGen.py glmark2-2021.02/waflib/TaskGen.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/TaskGen.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/TaskGen.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,21 +1,18 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set import copy,re,os -from waflib import Task,Utils,Logs,Errors,ConfigSet +from waflib import Task,Utils,Logs,Errors,ConfigSet,Node feats=Utils.defaultdict(set) +HEADER_EXTS=['.h','.hpp','.hxx','.hh'] class task_gen(object): - mappings={} + mappings=Utils.ordered_iter_dict() prec=Utils.defaultdict(list) def __init__(self,*k,**kw): self.source='' self.target='' self.meths=[] - self.prec=Utils.defaultdict(list) - self.mappings={} self.features=[] self.tasks=[] if not'bld'in kw: @@ -27,20 +24,22 @@ self.env=self.bld.env.derive() self.path=self.bld.path try: - self.idx=self.bld.idx[id(self.path)]=self.bld.idx.get(id(self.path),0)+1 + self.idx=self.bld.idx[self.path]=self.bld.idx.get(self.path,0)+1 except AttributeError: self.bld.idx={} - self.idx=self.bld.idx[id(self.path)]=1 + self.idx=self.bld.idx[self.path]=1 for key,val in kw.items(): setattr(self,key,val) def __str__(self): return""%(self.name,self.path.abspath()) def __repr__(self): lst=[] - for x in self.__dict__.keys(): - if x not in['env','bld','compiled_tasks','tasks']: + for x in self.__dict__: + if x not in('env','bld','compiled_tasks','tasks'): lst.append("%s=%s"%(x,repr(getattr(self,x)))) return"bld(%s) in %s"%(", ".join(lst),self.path.abspath()) + def get_cwd(self): + return self.bld.bldnode def get_name(self): try: return self._name @@ -55,8 +54,10 @@ self._name=name name=property(get_name,set_name) def to_list(self,val): - if isinstance(val,str):return val.split() - else:return val + if isinstance(val,str): + return val.split() + else: + return val def post(self): if getattr(self,'posted',None): return False @@ -67,10 +68,10 @@ st=feats[x] if not st: if not x in Task.classes: - Logs.warn('feature %r does not exist - bind at least one method to it'%x) + Logs.warn('feature %r does not exist - bind at least one method to it',x) keys.update(list(st)) prec={} - prec_tbl=self.prec or task_gen.prec + prec_tbl=self.prec for x in prec_tbl: if x in keys: prec[x]=prec_tbl[x] @@ -80,6 +81,7 @@ if a in x:break else: tmp.append(a) + tmp.sort() out=[] while tmp: e=tmp.pop() @@ -97,42 +99,45 @@ else: tmp.append(x) if prec: - raise Errors.WafError('Cycle detected in the method execution %r'%prec) + txt='\n'.join(['- %s after %s'%(k,repr(v))for k,v in prec.items()]) + raise Errors.WafError('Cycle detected in the method execution\n%s'%txt) out.reverse() self.meths=out - Logs.debug('task_gen: posting %s %d'%(self,id(self))) + Logs.debug('task_gen: posting %s %d',self,id(self)) for x in out: try: v=getattr(self,x) except AttributeError: raise Errors.WafError('%r is not a valid task generator method'%x) - Logs.debug('task_gen: -> %s (%d)'%(x,id(self))) + Logs.debug('task_gen: -> %s (%d)',x,id(self)) v() - Logs.debug('task_gen: posted %s'%self.name) + Logs.debug('task_gen: posted %s',self.name) return True def get_hook(self,node): name=node.name for k in self.mappings: - if name.endswith(k): - return self.mappings[k] - for k in task_gen.mappings: - if name.endswith(k): - return task_gen.mappings[k] - raise Errors.WafError("File %r has no mapping in %r (did you forget to load a waf tool?)"%(node,task_gen.mappings.keys())) - def create_task(self,name,src=None,tgt=None): + try: + if name.endswith(k): + return self.mappings[k] + except TypeError: + if k.match(name): + return self.mappings[k] + raise Errors.WafError("File %r has no mapping in %r (have you forgotten to load a waf tool?)"%(node,self.mappings.keys())) + def create_task(self,name,src=None,tgt=None,**kw): task=Task.classes[name](env=self.env.derive(),generator=self) if src: task.set_inputs(src) if tgt: task.set_outputs(tgt) + task.__dict__.update(kw) self.tasks.append(task) return task def clone(self,env): newobj=self.bld() for x in self.__dict__: - if x in['env','bld']: + if x in('env','bld'): continue - elif x in['path','features']: + elif x in('path','features'): setattr(newobj,x,getattr(self,x)) else: setattr(newobj,x,copy.copy(getattr(self,x))) @@ -149,12 +154,11 @@ name=rule cls=Task.task_factory(name,rule,color=color,ext_in=ext_in,ext_out=ext_out,before=before,after=after,scan=scan,shell=shell) def x_file(self,node): - ext=decider and decider(self,node)or cls.ext_out if ext_in: _ext_in=ext_in[0] tsk=self.create_task(name,node) cnt=0 - keys=self.mappings.keys()+self.__class__.mappings.keys() + ext=decider(self,node)if decider else cls.ext_out for x in ext: k=node.change_ext(x,ext_in=_ext_in) tsk.outputs.append(k) @@ -162,13 +166,13 @@ if cnt=8192 if Utils.is_win32 else len(cmd)>200000): + cmd,args=self.split_argfile(cmd) + try: + (fd,tmp)=tempfile.mkstemp() + os.write(fd,'\r\n'.join(args).encode()) + os.close(fd) + return self.generator.bld.exec_command(cmd+['@'+tmp],**kw) + finally: + try: + os.remove(tmp) + except OSError: + pass + else: + return self.generator.bld.exec_command(cmd,**kw) def runnable_status(self): return RUN_ME + def uid(self): + return Utils.SIG_NIL def process(self): - m=self.master - if m.stop: - m.out.put(self) - return + m=self.generator.bld.producer try: del self.generator.bld.task_sigs[self.uid()] - except: + except KeyError: pass try: - self.generator.bld.returned_tasks.append(self) - self.log_display(self.generator.bld) ret=self.run() except Exception: self.err_msg=Utils.ex_stack() self.hasrun=EXCEPTION m.error_handler(self) - m.out.put(self) return if ret: self.err_code=ret @@ -144,7 +156,6 @@ self.hasrun=SUCCESS if self.hasrun!=SUCCESS: m.error_handler(self) - m.out.put(self) def run(self): if hasattr(self,'fun'): return self.fun(self) @@ -152,11 +163,24 @@ def post_run(self): pass def log_display(self,bld): - bld.to_log(self.display()) + if self.generator.bld.progress_bar==3: + return + s=self.display() + if s: + if bld.logger: + logger=bld.logger + else: + logger=Logs + if self.generator.bld.progress_bar==1: + c1=Logs.colors.cursor_off + c2=Logs.colors.cursor_on + logger.info(s,extra={'stream':sys.stderr,'terminator':'','c1':c1,'c2':c2}) + else: + logger.info(s,extra={'terminator':'','c1':'','c2':''}) def display(self): col1=Logs.colors(self.color) col2=Logs.colors.NORMAL - master=self.master + master=self.generator.bld.producer def cur(): tmp=-1 if hasattr(master,'ready'): @@ -180,17 +204,15 @@ return None total=master.total n=len(str(total)) - fs='[%%%dd/%%%dd] %%s%%s%%s'%(n,n) - return fs%(cur(),total,col1,s,col2) - def attr(self,att,default=None): - ret=getattr(self,att,self) - if ret is self:return getattr(self.__class__,att,default) - return ret + fs='[%%%dd/%%%dd] %%s%%s%%s%%s\n'%(n,n) + kw=self.keyword() + if kw: + kw+=' ' + return fs%(cur(),total,kw,col1,s,col2) def hash_constraints(self): cls=self.__class__ tup=(str(cls.before),str(cls.after),str(cls.ext_in),str(cls.ext_out),cls.__name__,cls.hcode) - h=hash(tup) - return h + return hash(tup) def format_error(self): msg=getattr(self,'last_cmd','') name=getattr(self.generator,'name','') @@ -209,6 +231,8 @@ return'invalid status for task in %r: %r'%(name,self.hasrun) def colon(self,var1,var2): tmp=self.env[var1] + if not tmp: + return[] if isinstance(var2,str): it=self.env[var2] else: @@ -216,8 +240,6 @@ if isinstance(tmp,str): return[tmp%x for x in it] else: - if Logs.verbose and not tmp and it: - Logs.warn('Missing env variable %r for task %r (generator %r)'%(var1,self,self.generator)) lst=[] for y in it: lst.extend(tmp) @@ -225,6 +247,7 @@ return lst class Task(TaskBase): vars=[] + always_run=False shell=False def __init__(self,*k,**kw): TaskBase.__init__(self,*k,**kw) @@ -234,21 +257,47 @@ self.dep_nodes=[] self.run_after=set([]) def __str__(self): - env=self.env - src_str=' '.join([a.nice_path(env)for a in self.inputs]) - tgt_str=' '.join([a.nice_path(env)for a in self.outputs]) + name=self.__class__.__name__ + if self.outputs: + if name.endswith(('lib','program'))or not self.inputs: + node=self.outputs[0] + return node.path_from(node.ctx.launch_node()) + if not(self.inputs or self.outputs): + return self.__class__.__name__ + if len(self.inputs)==1: + node=self.inputs[0] + return node.path_from(node.ctx.launch_node()) + src_str=' '.join([a.path_from(a.ctx.launch_node())for a in self.inputs]) + tgt_str=' '.join([a.path_from(a.ctx.launch_node())for a in self.outputs]) if self.outputs:sep=' -> ' else:sep='' - return'%s: %s%s%s\n'%(self.__class__.__name__.replace('_task',''),src_str,sep,tgt_str) + return'%s: %s%s%s'%(self.__class__.__name__,src_str,sep,tgt_str) + def keyword(self): + name=self.__class__.__name__ + if name.endswith(('lib','program')): + return'Linking' + if len(self.inputs)==1 and len(self.outputs)==1: + return'Compiling' + if not self.inputs: + if self.outputs: + return'Creating' + else: + return'Running' + return'Processing' def __repr__(self): - return"".join(['\n\t{task %r: '%id(self),self.__class__.__name__," ",",".join([x.name for x in self.inputs])," -> ",",".join([x.name for x in self.outputs]),'}']) + try: + ins=",".join([x.name for x in self.inputs]) + outs=",".join([x.name for x in self.outputs]) + except AttributeError: + ins=",".join([str(x)for x in self.inputs]) + outs=",".join([str(x)for x in self.outputs]) + return"".join(['\n\t{task %r: '%id(self),self.__class__.__name__," ",ins," -> ",outs,'}']) def uid(self): try: return self.uid_ except AttributeError: - m=Utils.md5() + m=Utils.md5(self.__class__.__name__) up=m.update - up(self.__class__.__name__) for x in self.inputs+self.outputs: up(x.abspath()) self.uid_=m.digest() @@ -263,10 +312,11 @@ assert isinstance(task,TaskBase) self.run_after.add(task) def signature(self): - try:return self.cache_sig - except AttributeError:pass - self.m=Utils.md5() - self.m.update(self.hcode) + try: + return self.cache_sig + except AttributeError: + pass + self.m=Utils.md5(self.hcode) self.sig_explicit_deps() self.sig_vars() if self.scan: @@ -280,124 +330,108 @@ for t in self.run_after: if not t.hasrun: return ASK_LATER - bld=self.generator.bld try: new_sig=self.signature() except Errors.TaskNotReady: return ASK_LATER + bld=self.generator.bld key=self.uid() try: prev_sig=bld.task_sigs[key] except KeyError: - Logs.debug("task: task %r must run as it was never run before or the task code changed"%self) + Logs.debug('task: task %r must run: it was never run before or the task code changed',self) return RUN_ME - for node in self.outputs: - try: - if node.sig!=new_sig: - return RUN_ME - except AttributeError: - Logs.debug("task: task %r must run as the output nodes do not exist"%self) - return RUN_ME if new_sig!=prev_sig: + Logs.debug('task: task %r must run: the task signature changed',self) return RUN_ME - return SKIP_ME + for node in self.outputs: + sig=bld.node_sigs.get(node) + if not sig: + Logs.debug('task: task %r must run: an output node has no signature',self) + return RUN_ME + if sig!=key: + Logs.debug('task: task %r must run: an output node was produced by another task',self) + return RUN_ME + if not node.exists(): + Logs.debug('task: task %r must run: an output node does not exist',self) + return RUN_ME + return(self.always_run and RUN_ME)or SKIP_ME def post_run(self): bld=self.generator.bld - sig=self.signature() for node in self.outputs: - try: - os.stat(node.abspath()) - except OSError: + if not node.exists(): self.hasrun=MISSING self.err_msg='-> missing file: %r'%node.abspath() raise Errors.WafError(self.err_msg) - node.sig=sig - bld.task_sigs[self.uid()]=self.cache_sig + bld.node_sigs[node]=self.uid() + bld.task_sigs[self.uid()]=self.signature() + if not self.keep_last_cmd: + try: + del self.last_cmd + except AttributeError: + pass def sig_explicit_deps(self): bld=self.generator.bld upd=self.m.update for x in self.inputs+self.dep_nodes: - try: - upd(x.get_bld_sig()) - except(AttributeError,TypeError): - raise Errors.WafError('Missing node signature for %r (required by %r)'%(x,self)) + upd(x.get_bld_sig()) if bld.deps_man: additional_deps=bld.deps_man for x in self.inputs+self.outputs: try: - d=additional_deps[id(x)] + d=additional_deps[x] except KeyError: continue for v in d: if isinstance(v,bld.root.__class__): - try: - v=v.get_bld_sig() - except AttributeError: - raise Errors.WafError('Missing node signature for %r (required by %r)'%(v,self)) + v=v.get_bld_sig() elif hasattr(v,'__call__'): v=v() upd(v) - return self.m.digest() def sig_vars(self): - bld=self.generator.bld - env=self.env - upd=self.m.update - act_sig=bld.hash_env_vars(env,self.__class__.vars) - upd(act_sig) - dep_vars=getattr(self,'dep_vars',None) - if dep_vars: - upd(bld.hash_env_vars(env,dep_vars)) - return self.m.digest() + sig=self.generator.bld.hash_env_vars(self.env,self.__class__.vars) + self.m.update(sig) scan=None def sig_implicit_deps(self): bld=self.generator.bld key=self.uid() - prev=bld.task_sigs.get((key,'imp'),[]) + prev=bld.imp_sigs.get(key,[]) if prev: try: if prev==self.compute_sig_implicit_deps(): return prev - except: + except Errors.TaskNotReady: + raise + except EnvironmentError: for x in bld.node_deps.get(self.uid(),[]): - if x.is_child_of(bld.srcnode): + if not x.is_bld()and not x.exists(): try: - os.stat(x.abspath()) - except: - try: - del x.parent.children[x.name] - except: - pass - del bld.task_sigs[(key,'imp')] + del x.parent.children[x.name] + except KeyError: + pass + del bld.imp_sigs[key] raise Errors.TaskRescan('rescan') - (nodes,names)=self.scan() + (bld.node_deps[key],bld.raw_deps[key])=self.scan() if Logs.verbose: - Logs.debug('deps: scanner for %s returned %s %s'%(str(self),str(nodes),str(names))) - bld.node_deps[key]=nodes - bld.raw_deps[key]=names - self.are_implicit_nodes_ready() + Logs.debug('deps: scanner for %s: %r; unresolved: %r',self,bld.node_deps[key],bld.raw_deps[key]) try: - bld.task_sigs[(key,'imp')]=sig=self.compute_sig_implicit_deps() - except: - if Logs.verbose: - for k in bld.node_deps.get(self.uid(),[]): - try: - k.get_bld_sig() - except: - Logs.warn('Missing signature for node %r (may cause rebuilds)'%k) - else: - return sig + bld.imp_sigs[key]=self.compute_sig_implicit_deps() + except EnvironmentError: + for k in bld.node_deps.get(self.uid(),[]): + if not k.exists(): + Logs.warn('Dependency %r for %r is missing: check the task declaration and the build order!',k,self) + raise def compute_sig_implicit_deps(self): upd=self.m.update - bld=self.generator.bld self.are_implicit_nodes_ready() - for k in bld.node_deps.get(self.uid(),[]): + for k in self.generator.bld.node_deps.get(self.uid(),[]): upd(k.get_bld_sig()) return self.m.digest() def are_implicit_nodes_ready(self): bld=self.generator.bld try: cache=bld.dct_implicit_nodes - except: + except AttributeError: bld.dct_implicit_nodes=cache={} try: dct=cache[bld.cur] @@ -415,71 +449,19 @@ for tsk in self.run_after: if not tsk.hasrun: raise Errors.TaskNotReady('not ready') - def can_retrieve_cache(self): - if not getattr(self,'outputs',None): - return None - sig=self.signature() - ssig=Utils.to_hex(self.uid())+Utils.to_hex(sig) - dname=os.path.join(self.generator.bld.cache_global,ssig) - try: - t1=os.stat(dname).st_mtime - except OSError: - return None - for node in self.outputs: - orig=os.path.join(dname,node.name) - try: - shutil.copy2(orig,node.abspath()) - os.utime(orig,None) - except(OSError,IOError): - Logs.debug('task: failed retrieving file') - return None - try: - t2=os.stat(dname).st_mtime - except OSError: - return None - if t1!=t2: - return None - for node in self.outputs: - node.sig=sig - if self.generator.bld.progress_bar<1: - self.generator.bld.to_log('restoring from cache %r\n'%node.abspath()) - self.cached=True - return True - def put_files_cache(self): - if getattr(self,'cached',None): - return None - if not getattr(self,'outputs',None): - return None - sig=self.signature() - ssig=Utils.to_hex(self.uid())+Utils.to_hex(sig) - dname=os.path.join(self.generator.bld.cache_global,ssig) - tmpdir=tempfile.mkdtemp(prefix=self.generator.bld.cache_global+os.sep+'waf') - try: - shutil.rmtree(dname) - except: - pass +if sys.hexversion>0x3000000: + def uid(self): try: - for node in self.outputs: - dest=os.path.join(tmpdir,node.name) - shutil.copy2(node.abspath(),dest) - except(OSError,IOError): - try: - shutil.rmtree(tmpdir) - except: - pass - else: - try: - os.rename(tmpdir,dname) - except OSError: - try: - shutil.rmtree(tmpdir) - except: - pass - else: - try: - os.chmod(dname,Utils.O755) - except: - pass + return self.uid_ + except AttributeError: + m=Utils.md5(self.__class__.__name__.encode('iso8859-1','xmlcharrefreplace')) + up=m.update + for x in self.inputs+self.outputs: + up(x.abspath().encode('iso8859-1','xmlcharrefreplace')) + self.uid_=m.digest() + return self.uid_ + uid.__doc__=Task.uid.__doc__ + Task.uid=uid def is_before(t1,t2): to_list=Utils.to_list for k in to_list(t2.ext_in): @@ -521,109 +503,178 @@ b=i else: continue + aval=set(cstr_groups[keys[a]]) for x in cstr_groups[keys[b]]: - x.run_after.update(cstr_groups[keys[a]]) + x.run_after.update(aval) def funex(c): dc={} exec(c,dc) return dc['f'] -reg_act=re.compile(r"(?P\\)|(?P\$\$)|(?P\$\{(?P\w+)(?P.*?)\})",re.M) +re_cond=re.compile('(?P\w+)|(?P\|)|(?P&)') +re_novar=re.compile(r'^(SRC|TGT)\W+.*?$') +reg_act=re.compile(r'(?P\\)|(?P\$\$)|(?P\$\{(?P\w+)(?P.*?)\})',re.M) def compile_fun_shell(line): extr=[] def repl(match): g=match.group - if g('dollar'):return"$" - elif g('backslash'):return'\\\\' - elif g('subst'):extr.append((g('var'),g('code')));return"%s" + if g('dollar'): + return"$" + elif g('backslash'): + return'\\\\' + elif g('subst'): + extr.append((g('var'),g('code'))) + return"%s" return None line=reg_act.sub(repl,line)or line + def replc(m): + if m.group('and'): + return' and ' + elif m.group('or'): + return' or ' + else: + x=m.group('var') + if x not in dvars: + dvars.append(x) + return'env[%r]'%x parm=[] dvars=[] app=parm.append for(var,meth)in extr: if var=='SRC': if meth:app('tsk.inputs%s'%meth) - else:app('" ".join([a.path_from(bld.bldnode) for a in tsk.inputs])') + else:app('" ".join([a.path_from(cwdx) for a in tsk.inputs])') elif var=='TGT': if meth:app('tsk.outputs%s'%meth) - else:app('" ".join([a.path_from(bld.bldnode) for a in tsk.outputs])') + else:app('" ".join([a.path_from(cwdx) for a in tsk.outputs])') elif meth: if meth.startswith(':'): + if var not in dvars: + dvars.append(var) m=meth[1:] if m=='SRC': - m='[a.path_from(bld.bldnode) for a in tsk.inputs]' + m='[a.path_from(cwdx) for a in tsk.inputs]' elif m=='TGT': - m='[a.path_from(bld.bldnode) for a in tsk.outputs]' + m='[a.path_from(cwdx) for a in tsk.outputs]' + elif re_novar.match(m): + m='[tsk.inputs%s]'%m[3:] + elif re_novar.match(m): + m='[tsk.outputs%s]'%m[3:] elif m[:3]not in('tsk','gen','bld'): - dvars.extend([var,meth[1:]]) + dvars.append(meth[1:]) m='%r'%m app('" ".join(tsk.colon(%r, %s))'%(var,m)) + elif meth.startswith('?'): + expr=re_cond.sub(replc,meth[1:]) + app('p(%r) if (%s) else ""'%(var,expr)) else: app('%s%s'%(var,meth)) else: - if not var in dvars:dvars.append(var) + if var not in dvars: + dvars.append(var) app("p('%s')"%var) if parm:parm="%% (%s) "%(',\n\t\t'.join(parm)) else:parm='' c=COMPILE_TEMPLATE_SHELL%(line,parm) - Logs.debug('action: %s'%c) + Logs.debug('action: %s',c.strip().splitlines()) return(funex(c),dvars) +reg_act_noshell=re.compile(r"(?P\s+)|(?P\$\{(?P\w+)(?P.*?)\})|(?P\S+)",re.M) def compile_fun_noshell(line): - extr=[] - def repl(match): - g=match.group - if g('dollar'):return"$" - elif g('subst'):extr.append((g('var'),g('code')));return"<<|@|>>" - return None - line2=reg_act.sub(repl,line) - params=line2.split('<<|@|>>') - assert(extr) buf=[] dvars=[] + merge=False app=buf.append - for x in range(len(extr)): - params[x]=params[x].strip() - if params[x]: - app("lst.extend(%r)"%params[x].split()) - (var,meth)=extr[x] - if var=='SRC': - if meth:app('lst.append(tsk.inputs%s)'%meth) - else:app("lst.extend([a.path_from(bld.bldnode) for a in tsk.inputs])") - elif var=='TGT': - if meth:app('lst.append(tsk.outputs%s)'%meth) - else:app("lst.extend([a.path_from(bld.bldnode) for a in tsk.outputs])") - elif meth: - if meth.startswith(':'): - m=meth[1:] - if m=='SRC': - m='[a.path_from(bld.bldnode) for a in tsk.inputs]' - elif m=='TGT': - m='[a.path_from(bld.bldnode) for a in tsk.outputs]' - elif m[:3]not in('tsk','gen','bld'): - dvars.extend([var,m]) - m='%r'%m - app('lst.extend(tsk.colon(%r, %s))'%(var,m)) - else: - app('lst.extend(gen.to_list(%s%s))'%(var,meth)) + def replc(m): + if m.group('and'): + return' and ' + elif m.group('or'): + return' or ' else: - app('lst.extend(to_list(env[%r]))'%var) - if not var in dvars:dvars.append(var) - if extr: - if params[-1]: - app("lst.extend(%r)"%params[-1].split()) + x=m.group('var') + if x not in dvars: + dvars.append(x) + return'env[%r]'%x + for m in reg_act_noshell.finditer(line): + if m.group('space'): + merge=False + continue + elif m.group('text'): + app('[%r]'%m.group('text')) + elif m.group('subst'): + var=m.group('var') + code=m.group('code') + if var=='SRC': + if code: + app('[tsk.inputs%s]'%code) + else: + app('[a.path_from(cwdx) for a in tsk.inputs]') + elif var=='TGT': + if code: + app('[tsk.outputs%s]'%code) + else: + app('[a.path_from(cwdx) for a in tsk.outputs]') + elif code: + if code.startswith(':'): + if not var in dvars: + dvars.append(var) + m=code[1:] + if m=='SRC': + m='[a.path_from(cwdx) for a in tsk.inputs]' + elif m=='TGT': + m='[a.path_from(cwdx) for a in tsk.outputs]' + elif re_novar.match(m): + m='[tsk.inputs%s]'%m[3:] + elif re_novar.match(m): + m='[tsk.outputs%s]'%m[3:] + elif m[:3]not in('tsk','gen','bld'): + dvars.append(m) + m='%r'%m + app('tsk.colon(%r, %s)'%(var,m)) + elif code.startswith('?'): + expr=re_cond.sub(replc,code[1:]) + app('to_list(env[%r] if (%s) else [])'%(var,expr)) + else: + app('gen.to_list(%s%s)'%(var,code)) + else: + app('to_list(env[%r])'%var) + if not var in dvars: + dvars.append(var) + if merge: + tmp='merge(%s, %s)'%(buf[-2],buf[-1]) + del buf[-1] + buf[-1]=tmp + merge=True + buf=['lst.extend(%s)'%x for x in buf] fun=COMPILE_TEMPLATE_NOSHELL%"\n\t".join(buf) - Logs.debug('action: %s'%fun) + Logs.debug('action: %s',fun.strip().splitlines()) return(funex(fun),dvars) def compile_fun(line,shell=False): - if line.find('<')>0 or line.find('>')>0 or line.find('&&')>0: - shell=True + if isinstance(line,str): + if line.find('<')>0 or line.find('>')>0 or line.find('&&')>0: + shell=True + else: + dvars_lst=[] + funs_lst=[] + for x in line: + if isinstance(x,str): + fun,dvars=compile_fun(x,shell) + dvars_lst+=dvars + funs_lst.append(fun) + else: + funs_lst.append(x) + def composed_fun(task): + for x in funs_lst: + ret=x(task) + if ret: + return ret + return None + return composed_fun,dvars if shell: return compile_fun_shell(line) else: return compile_fun_noshell(line) def task_factory(name,func=None,vars=None,color='GREEN',ext_in=[],ext_out=[],before=[],after=[],shell=False,scan=None): params={'vars':vars or[],'color':color,'name':name,'ext_in':Utils.to_list(ext_in),'ext_out':Utils.to_list(ext_out),'before':Utils.to_list(before),'after':Utils.to_list(after),'shell':shell,'scan':scan,} - if isinstance(func,str): + if isinstance(func,str)or isinstance(func,tuple): params['run_str']=func else: params['run']=func @@ -632,41 +683,8 @@ classes[name]=cls return cls def always_run(cls): - old=cls.runnable_status - def always(self): - ret=old(self) - if ret==SKIP_ME: - ret=RUN_ME - return ret - cls.runnable_status=always + Logs.warn('This decorator is deprecated, set always_run on the task class instead!') + cls.always_run=True return cls def update_outputs(cls): - old_post_run=cls.post_run - def post_run(self): - old_post_run(self) - for node in self.outputs: - node.sig=Utils.h_file(node.abspath()) - self.generator.bld.task_sigs[node.abspath()]=self.uid() - cls.post_run=post_run - old_runnable_status=cls.runnable_status - def runnable_status(self): - status=old_runnable_status(self) - if status!=RUN_ME: - return status - try: - bld=self.generator.bld - prev_sig=bld.task_sigs[self.uid()] - if prev_sig==self.signature(): - for x in self.outputs: - if not x.sig or bld.task_sigs[x.abspath()]!=self.uid(): - return RUN_ME - return SKIP_ME - except KeyError: - pass - except IndexError: - pass - except AttributeError: - pass - return RUN_ME - cls.runnable_status=runnable_status return cls diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/ar.py glmark2-2021.02/waflib/Tools/ar.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/ar.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/ar.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,12 +1,13 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib.Configure import conf +@conf def find_ar(conf): conf.load('ar') def configure(conf): conf.find_program('ar',var='AR') - conf.env.ARFLAGS='rcs' - -conf(find_ar) \ No newline at end of file + conf.add_os_flags('ARFLAGS') + if not conf.env.ARFLAGS: + conf.env.ARFLAGS=['rcs'] diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/asm.py glmark2-2021.02/waflib/Tools/asm.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/asm.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/asm.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,25 +1,23 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys -from waflib import Task,Utils -import waflib.Task +from waflib import Task from waflib.Tools.ccroot import link_task,stlink_task -from waflib.TaskGen import extension,feature +from waflib.TaskGen import extension class asm(Task.Task): color='BLUE' - run_str='${AS} ${ASFLAGS} ${CPPPATH_ST:INCPATHS} ${AS_SRC_F}${SRC} ${AS_TGT_F}${TGT}' + run_str='${AS} ${ASFLAGS} ${ASMPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${AS_SRC_F}${SRC} ${AS_TGT_F}${TGT}' +@extension('.s','.S','.asm','.ASM','.spp','.SPP') def asm_hook(self,node): return self.create_compiled_task('asm',node) class asmprogram(link_task): run_str='${ASLINK} ${ASLINKFLAGS} ${ASLNK_TGT_F}${TGT} ${ASLNK_SRC_F}${SRC}' ext_out=['.bin'] inst_to='${BINDIR}' - chmod=Utils.O755 class asmshlib(asmprogram): inst_to='${LIBDIR}' class asmstlib(stlink_task): pass - -extension('.s','.S','.asm','.ASM','.spp','.SPP')(asm_hook) \ No newline at end of file +def configure(conf): + conf.env.ASMPATH_ST='-I%s' diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/bison.py glmark2-2021.02/waflib/Tools/bison.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/bison.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/bison.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib import Task from waflib.TaskGen import extension @@ -8,8 +8,9 @@ color='BLUE' run_str='${BISON} ${BISONFLAGS} ${SRC[0].abspath()} -o ${TGT[0].name}' ext_out=['.h'] +@extension('.y','.yc','.yy') def big_bison(self,node): - has_h='-d'in self.env['BISONFLAGS'] + has_h='-d'in self.env.BISONFLAGS outs=[] if node.name.endswith('.yc'): outs.append(node.change_ext('.tab.cc')) @@ -20,10 +21,8 @@ if has_h: outs.append(node.change_ext('.tab.h')) tsk=self.create_task('bison',node,outs) - tsk.cwd=node.parent.get_bld().abspath() + tsk.cwd=node.parent.get_bld() self.source.append(outs[0]) def configure(conf): conf.find_program('bison',var='BISON') conf.env.BISONFLAGS=['-d'] - -extension('.y','.yc','.yy')(big_bison) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_aliases.py glmark2-2021.02/waflib/Tools/c_aliases.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_aliases.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/c_aliases.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,56 +1,60 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys,re -from waflib import Utils,Build +from waflib import Utils,Errors from waflib.Configure import conf def get_extensions(lst): ret=[] for x in Utils.to_list(lst): - try: - if not isinstance(x,str): - x=x.name - ret.append(x[x.rfind('.')+1:]) - except: - pass + if not isinstance(x,str): + x=x.name + ret.append(x[x.rfind('.')+1:]) return ret def sniff_features(**kw): exts=get_extensions(kw['source']) - type=kw['_type'] + typ=kw['typ'] feats=[] - if'cxx'in exts or'cpp'in exts or'c++'in exts or'cc'in exts or'C'in exts: - feats.append('cxx') - if'c'in exts or'vala'in exts: + for x in'cxx cpp c++ cc C'.split(): + if x in exts: + feats.append('cxx') + break + if'c'in exts or'vala'in exts or'gs'in exts: feats.append('c') + for x in'f f90 F F90 for FOR'.split(): + if x in exts: + feats.append('fc') + break if'd'in exts: feats.append('d') if'java'in exts: feats.append('java') - if'java'in exts: return'java' - if type in['program','shlib','stlib']: + if typ in('program','shlib','stlib'): + will_link=False for x in feats: - if x in['cxx','d','c']: - feats.append(x+type) + if x in('cxx','d','fc','c'): + feats.append(x+typ) + will_link=True + if not will_link and not kw.get('features',[]): + raise Errors.WafError('Cannot link from %r, try passing eg: features="c cprogram"?'%kw) return feats -def set_features(kw,_type): - kw['_type']=_type +def set_features(kw,typ): + kw['typ']=typ kw['features']=Utils.to_list(kw.get('features',[]))+Utils.to_list(sniff_features(**kw)) +@conf def program(bld,*k,**kw): set_features(kw,'program') return bld(*k,**kw) +@conf def shlib(bld,*k,**kw): set_features(kw,'shlib') return bld(*k,**kw) +@conf def stlib(bld,*k,**kw): set_features(kw,'stlib') return bld(*k,**kw) +@conf def objects(bld,*k,**kw): set_features(kw,'objects') return bld(*k,**kw) - -conf(program) -conf(shlib) -conf(stlib) -conf(objects) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_config.py glmark2-2021.02/waflib/Tools/c_config.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_config.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/c_config.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,108 +1,135 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -import os,imp,sys,re,shlex,shutil -from waflib import Build,Utils,Configure,Task,Options,Logs,TaskGen,Errors,ConfigSet,Runner -from waflib.TaskGen import before_method,after_method,feature +import os,re,shlex +from waflib import Build,Utils,Task,Options,Logs,Errors,Runner +from waflib.TaskGen import after_method,feature from waflib.Configure import conf WAF_CONFIG_H='config.h' DEFKEYS='define_key' INCKEYS='include_key' cfg_ver={'atleast-version':'>=','exact-version':'==','max-version':'<=',} SNIP_FUNCTION=''' - int main() { - void *p; - p=(void*)(%s); - return 0; +int main(int argc, char **argv) { + void (*p)(); + (void)argc; (void)argv; + p=(void(*)())(%s); + return !p; } ''' SNIP_TYPE=''' -int main() { +int main(int argc, char **argv) { + (void)argc; (void)argv; if ((%(type_name)s *) 0) return 0; if (sizeof (%(type_name)s)) return 0; -} -''' -SNIP_CLASS=''' -int main() { - if ( + return 1; } ''' SNIP_EMPTY_PROGRAM=''' -int main() { +int main(int argc, char **argv) { + (void)argc; (void)argv; return 0; } ''' SNIP_FIELD=''' -int main() { +int main(int argc, char **argv) { char *off; + (void)argc; (void)argv; off = (char*) &((%(type_name)s*)0)->%(field_name)s; return (size_t) off < sizeof(%(type_name)s); } ''' -MACRO_TO_DESTOS={'__linux__':'linux','__GNU__':'gnu','__FreeBSD__':'freebsd','__NetBSD__':'netbsd','__OpenBSD__':'openbsd','__sun':'sunos','__hpux':'hpux','__sgi':'irix','_AIX':'aix','__CYGWIN__':'cygwin','__MSYS__':'msys','_UWIN':'uwin','_WIN64':'win32','_WIN32':'win32','__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__':'darwin','__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__':'darwin','__QNX__':'qnx','__native_client__':'nacl'} -MACRO_TO_DEST_CPU={'__x86_64__':'x86_64','__i386__':'x86','__ia64__':'ia','__mips__':'mips','__sparc__':'sparc','__alpha__':'alpha','__arm__':'arm','__hppa__':'hppa','__powerpc__':'powerpc',} -def parse_flags(self,line,uselib,env=None,force_static=False): +MACRO_TO_DESTOS={'__linux__':'linux','__GNU__':'gnu','__FreeBSD__':'freebsd','__NetBSD__':'netbsd','__OpenBSD__':'openbsd','__sun':'sunos','__hpux':'hpux','__sgi':'irix','_AIX':'aix','__CYGWIN__':'cygwin','__MSYS__':'cygwin','_UWIN':'uwin','_WIN64':'win32','_WIN32':'win32','__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__':'darwin','__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__':'darwin','__QNX__':'qnx','__native_client__':'nacl'} +MACRO_TO_DEST_CPU={'__x86_64__':'x86_64','__amd64__':'x86_64','__i386__':'x86','__ia64__':'ia','__mips__':'mips','__sparc__':'sparc','__alpha__':'alpha','__aarch64__':'aarch64','__thumb__':'thumb','__arm__':'arm','__hppa__':'hppa','__powerpc__':'powerpc','__ppc__':'powerpc','__convex__':'convex','__m68k__':'m68k','__s390x__':'s390x','__s390__':'s390','__sh__':'sh','__xtensa__':'xtensa',} +@conf +def parse_flags(self,line,uselib_store,env=None,force_static=False,posix=None): assert(isinstance(line,str)) env=env or self.env - app=env.append_value - appu=env.append_unique - lex=shlex.shlex(line,posix=False) + if posix is None: + posix=True + if'\\'in line: + posix=('\\ 'in line)or('\\\\'in line) + lex=shlex.shlex(line,posix=posix) lex.whitespace_split=True lex.commenters='' lst=list(lex) + uselib=uselib_store + def app(var,val): + env.append_value('%s_%s'%(var,uselib),val) + def appu(var,val): + env.append_unique('%s_%s'%(var,uselib),val) + static=False while lst: x=lst.pop(0) st=x[:2] ot=x[2:] if st=='-I'or st=='/I': - if not ot:ot=lst.pop(0) - appu('INCLUDES_'+uselib,[ot]) - elif st=='-include': + if not ot: + ot=lst.pop(0) + appu('INCLUDES',ot) + elif st=='-i': tmp=[x,lst.pop(0)] app('CFLAGS',tmp) app('CXXFLAGS',tmp) - elif st=='-D'or(self.env.CXX_NAME=='msvc'and st=='/D'): - if not ot:ot=lst.pop(0) - app('DEFINES_'+uselib,[ot]) + elif st=='-D'or(env.CXX_NAME=='msvc'and st=='/D'): + if not ot: + ot=lst.pop(0) + app('DEFINES',ot) elif st=='-l': - if not ot:ot=lst.pop(0) - prefix=force_static and'STLIB_'or'LIB_' - appu(prefix+uselib,[ot]) + if not ot: + ot=lst.pop(0) + prefix='STLIB'if(force_static or static)else'LIB' + app(prefix,ot) elif st=='-L': - if not ot:ot=lst.pop(0) - appu('LIBPATH_'+uselib,[ot]) - elif x=='-pthread'or x.startswith('+')or x.startswith('-std'): - app('CFLAGS_'+uselib,[x]) - app('CXXFLAGS_'+uselib,[x]) - app('LINKFLAGS_'+uselib,[x]) + if not ot: + ot=lst.pop(0) + prefix='STLIBPATH'if(force_static or static)else'LIBPATH' + appu(prefix,ot) + elif x.startswith('/LIBPATH:'): + prefix='STLIBPATH'if(force_static or static)else'LIBPATH' + appu(prefix,x.replace('/LIBPATH:','')) + elif x.startswith('-std='): + prefix='CXXFLAGS'if'++'in x else'CFLAGS' + app(prefix,x) + elif x=='-pthread'or x.startswith('+'): + app('CFLAGS',x) + app('CXXFLAGS',x) + app('LINKFLAGS',x) elif x=='-framework': - appu('FRAMEWORK_'+uselib,[lst.pop(0)]) + appu('FRAMEWORK',lst.pop(0)) elif x.startswith('-F'): - appu('FRAMEWORKPATH_'+uselib,[x[2:]]) + appu('FRAMEWORKPATH',x[2:]) + elif x=='-Wl,-rpath'or x=='-Wl,-R': + app('RPATH',lst.pop(0).lstrip('-Wl,')) + elif x.startswith('-Wl,-R,'): + app('RPATH',x[7:]) + elif x.startswith('-Wl,-R'): + app('RPATH',x[6:]) + elif x.startswith('-Wl,-rpath,'): + app('RPATH',x[11:]) + elif x=='-Wl,-Bstatic'or x=='-Bstatic': + static=True + elif x=='-Wl,-Bdynamic'or x=='-Bdynamic': + static=False elif x.startswith('-Wl'): - app('LINKFLAGS_'+uselib,[x]) - elif x.startswith('-m')or x.startswith('-f')or x.startswith('-dynamic'): - app('CFLAGS_'+uselib,[x]) - app('CXXFLAGS_'+uselib,[x]) + app('LINKFLAGS',x) + elif x.startswith(('-m','-f','-dynamic')): + app('CFLAGS',x) + app('CXXFLAGS',x) elif x.startswith('-bundle'): - app('LINKFLAGS_'+uselib,[x]) - elif x.startswith('-undefined'): + app('LINKFLAGS',x) + elif x.startswith(('-undefined','-Xlinker')): arg=lst.pop(0) - app('LINKFLAGS_'+uselib,[x,arg]) - elif x.startswith('-arch')or x.startswith('-isysroot'): + app('LINKFLAGS',[x,arg]) + elif x.startswith(('-arch','-isysroot')): tmp=[x,lst.pop(0)] - app('CFLAGS_'+uselib,tmp) - app('CXXFLAGS_'+uselib,tmp) - app('LINKFLAGS_'+uselib,tmp) - elif x.endswith('.a')or x.endswith('.so')or x.endswith('.dylib'): - appu('LINKFLAGS_'+uselib,[x]) -def ret_msg(self,f,kw): - if isinstance(f,str): - return f - return f(kw) + app('CFLAGS',tmp) + app('CXXFLAGS',tmp) + app('LINKFLAGS',tmp) + elif x.endswith(('.a','.so','.dylib','.lib')): + appu('LINKFLAGS',x) +@conf def validate_cfg(self,kw): if not'path'in kw: if not self.env.PKGCONFIG: @@ -119,67 +146,93 @@ if'modversion'in kw: if not'msg'in kw: kw['msg']='Checking for %r version'%kw['modversion'] + if not'uselib_store'in kw: + kw['uselib_store']=kw['modversion'] + if not'define_name'in kw: + kw['define_name']='%s_VERSION'%Utils.quote_define_name(kw['uselib_store']) return - for x in cfg_ver.keys(): + if not'package'in kw: + raise ValueError('a package name is required') + if not'uselib_store'in kw: + kw['uselib_store']=kw['package'].upper() + if not'define_name'in kw: + kw['define_name']=self.have_define(kw['uselib_store']) + if not'msg'in kw: + kw['msg']='Checking for %r'%(kw['package']or kw['path']) + for x in cfg_ver: y=x.replace('-','_') if y in kw: - if not'package'in kw: - raise ValueError('%s requires a package'%x) + package=kw['package'] + if Logs.verbose: + Logs.warn('Passing %r to conf.check_cfg() is obsolete, pass parameters directly, eg:',y) + Logs.warn(" conf.check_cfg(package='%s', args=['--libs', '--cflags', '%s >= 1.6'])",package,package) if not'msg'in kw: - kw['msg']='Checking for %r %s %s'%(kw['package'],cfg_ver[x],kw[y]) - return - if not'msg'in kw: - kw['msg']='Checking for %r'%(kw['package']or kw['path']) + kw['msg']='Checking for %r %s %s'%(package,cfg_ver[x],kw[y]) + break +@conf def exec_cfg(self,kw): + path=Utils.to_list(kw['path']) + env=self.env.env or None + if kw.get('pkg_config_path'): + if not env: + env=dict(self.environ) + env['PKG_CONFIG_PATH']=kw['pkg_config_path'] + def define_it(): + define_name=kw['define_name'] + if kw.get('global_define',1): + self.define(define_name,1,False) + else: + self.env.append_unique('DEFINES_%s'%kw['uselib_store'],"%s=1"%define_name) + if kw.get('add_have_to_env',1): + self.env[define_name]=1 if'atleast_pkgconfig_version'in kw: - cmd=[kw['path'],'--atleast-pkgconfig-version=%s'%kw['atleast_pkgconfig_version']] - self.cmd_and_log(cmd) + cmd=path+['--atleast-pkgconfig-version=%s'%kw['atleast_pkgconfig_version']] + self.cmd_and_log(cmd,env=env) if not'okmsg'in kw: kw['okmsg']='yes' return for x in cfg_ver: y=x.replace('-','_') if y in kw: - self.cmd_and_log([kw['path'],'--%s=%s'%(x,kw[y]),kw['package']]) + self.cmd_and_log(path+['--%s=%s'%(x,kw[y]),kw['package']],env=env) if not'okmsg'in kw: kw['okmsg']='yes' - self.define(self.have_define(kw.get('uselib_store',kw['package'])),1,0) + define_it() break if'modversion'in kw: - version=self.cmd_and_log([kw['path'],'--modversion',kw['modversion']]).strip() - self.define('%s_VERSION'%Utils.quote_define_name(kw.get('uselib_store',kw['modversion'])),version) + version=self.cmd_and_log(path+['--modversion',kw['modversion']],env=env).strip() + self.define(kw['define_name'],version) return version - lst=[kw['path']] - defi=kw.get('define_variable',None) + lst=[]+path + defi=kw.get('define_variable') if not defi: defi=self.env.PKG_CONFIG_DEFINES or{} for key,val in defi.items(): lst.append('--define-variable=%s=%s'%(key,val)) - if kw['package']: - lst.extend(Utils.to_list(kw['package'])) + static=kw.get('force_static',False) + if'args'in kw: + args=Utils.to_list(kw['args']) + if'--static'in args or'--static-libs'in args: + static=True + lst+=args + lst.extend(Utils.to_list(kw['package'])) if'variables'in kw: - env=kw.get('env',self.env) - uselib=kw.get('uselib_store',kw['package'].upper()) + v_env=kw.get('env',self.env) vars=Utils.to_list(kw['variables']) for v in vars: - val=self.cmd_and_log(lst+['--variable='+v]).strip() - var='%s_%s'%(uselib,v) - env[var]=val + val=self.cmd_and_log(lst+['--variable='+v],env=env).strip() + var='%s_%s'%(kw['uselib_store'],v) + v_env[var]=val if not'okmsg'in kw: kw['okmsg']='yes' return - static=False - if'args'in kw: - args=Utils.to_list(kw['args']) - if'--static'in args or'--static-libs'in args: - static=True - lst+=args - ret=self.cmd_and_log(lst) + ret=self.cmd_and_log(lst,env=env) if not'okmsg'in kw: kw['okmsg']='yes' - self.define(self.have_define(kw.get('uselib_store',kw['package'])),1,0) - self.parse_flags(ret,kw.get('uselib_store',kw['package'].upper()),kw.get('env',self.env),force_static=static) + define_it() + self.parse_flags(ret,kw['uselib_store'],kw.get('env',self.env),force_static=static,posix=kw.get('posix')) return ret +@conf def check_cfg(self,*k,**kw): if k: lst=k[0].split() @@ -187,34 +240,48 @@ kw['args']=' '.join(lst[1:]) self.validate_cfg(kw) if'msg'in kw: - self.start_msg(kw['msg']) + self.start_msg(kw['msg'],**kw) ret=None try: ret=self.exec_cfg(kw) - except self.errors.WafError ,e: + except self.errors.WafError: if'errmsg'in kw: - self.end_msg(kw['errmsg'],'YELLOW') + self.end_msg(kw['errmsg'],'YELLOW',**kw) if Logs.verbose>1: raise else: self.fatal('The configuration failed') else: + if not ret: + ret=True kw['success']=ret if'okmsg'in kw: - self.end_msg(self.ret_msg(kw['okmsg'],kw)) + self.end_msg(self.ret_msg(kw['okmsg'],kw),**kw) return ret +def build_fun(bld): + if bld.kw['compile_filename']: + node=bld.srcnode.make_node(bld.kw['compile_filename']) + node.write(bld.kw['code']) + o=bld(features=bld.kw['features'],source=bld.kw['compile_filename'],target='testprog') + for k,v in bld.kw.items(): + setattr(o,k,v) + if not bld.kw.get('quiet'): + bld.conf.to_log("==>\n%s\n<=="%bld.kw['code']) +@conf def validate_c(self,kw): + if not'build_fun'in kw: + kw['build_fun']=build_fun if not'env'in kw: kw['env']=self.env.derive() env=kw['env'] if not'compiler'in kw and not'features'in kw: kw['compiler']='c' - if env['CXX_NAME']and Task.classes.get('cxx',None): + if env.CXX_NAME and Task.classes.get('cxx'): kw['compiler']='cxx' - if not self.env['CXX']: + if not self.env.CXX: self.fatal('a c++ compiler is required') else: - if not self.env['CC']: + if not self.env.CC: self.fatal('a c compiler is required') if not'compile_mode'in kw: kw['compile_mode']='c' @@ -223,7 +290,10 @@ if not'type'in kw: kw['type']='cprogram' if not'features'in kw: - kw['features']=[kw['compile_mode'],kw['type']] + if not'header_name'in kw or kw.get('link_header_test',True): + kw['features']=[kw['compile_mode'],kw['type']] + else: + kw['features']=[kw['compile_mode']] else: kw['features']=Utils.to_list(kw['features']) if not'compile_filename'in kw: @@ -241,7 +311,7 @@ if not'header_name'in kw: kw['header_name']=[] fwk='%s/%s.h'%(fwkname,fwkname) - if kw.get('remove_dot_h',None): + if kw.get('remove_dot_h'): fwk=fwk[:-2] kw['header_name']=Utils.to_list(kw['header_name'])+[fwk] kw['msg']='Checking for framework %s'%fwkname @@ -276,7 +346,7 @@ if not'msg'in kw: kw['msg']='Checking for header %s'%kw['header_name'] l=Utils.to_list(kw['header_name']) - assert len(l)>0,'list of headers in header_name is empty' + assert len(l),'list of headers in header_name is empty' kw['code']=to_header(kw)+SNIP_EMPTY_PROGRAM if not'uselib_store'in kw: kw['uselib_store']=l[0].upper() @@ -298,7 +368,7 @@ kw['msg']='Checking for code snippet' if not'errmsg'in kw: kw['errmsg']='no' - for(flagsname,flagstype)in[('cxxflags','compiler'),('cflags','compiler'),('linkflags','linker')]: + for(flagsname,flagstype)in(('cxxflags','compiler'),('cflags','compiler'),('linkflags','linker')): if flagsname in kw: if not'msg'in kw: kw['msg']='Checking for %s flags %s'%(flagstype,kw[flagsname]) @@ -308,6 +378,7 @@ kw['execute']=False if kw['execute']: kw['features'].append('test_exec') + kw['chmod']=Utils.O755 if not'errmsg'in kw: kw['errmsg']='not found' if not'okmsg'in kw: @@ -316,10 +387,15 @@ kw['code']=SNIP_EMPTY_PROGRAM if self.env[INCKEYS]: kw['code']='\n'.join(['#include <%s>'%x for x in self.env[INCKEYS]])+'\n'+kw['code'] + if kw.get('merge_config_header',False)or env.merge_config_header: + kw['code']='%s\n\n%s'%(self.get_config_header(),kw['code']) + env.DEFINES=[] if not kw.get('success'):kw['success']=None if'define_name'in kw: self.undefine(kw['define_name']) - assert'msg'in kw,'invalid parameters, read http://freehackers.org/~tnagy/wafbook/single.html#config_helpers_c' + if not'msg'in kw: + self.fatal('missing "msg" in conf.check(...)') +@conf def post_check(self,*k,**kw): is_success=0 if kw['execute']: @@ -330,15 +406,31 @@ is_success=(kw['success']==0) else: is_success=(kw['success']==0) - if'define_name'in kw: - if'header_name'in kw or'function_name'in kw or'type_name'in kw or'fragment'in kw: - nm=kw['define_name'] - if kw['execute']and kw.get('define_ret',None)and isinstance(is_success,str): - self.define(kw['define_name'],is_success,quote=kw.get('quote',1)) + if kw.get('define_name'): + comment=kw.get('comment','') + define_name=kw['define_name'] + if kw['execute']and kw.get('define_ret')and isinstance(is_success,str): + if kw.get('global_define',1): + self.define(define_name,is_success,quote=kw.get('quote',1),comment=comment) else: - self.define_cond(kw['define_name'],is_success) + if kw.get('quote',1): + succ='"%s"'%is_success + else: + succ=int(is_success) + val='%s=%s'%(define_name,succ) + var='DEFINES_%s'%kw['uselib_store'] + self.env.append_value(var,val) else: - self.define_cond(kw['define_name'],is_success) + if kw.get('global_define',1): + self.define_cond(define_name,is_success,comment=comment) + else: + var='DEFINES_%s'%kw['uselib_store'] + self.env.append_value(var,'%s=%s'%(define_name,int(is_success))) + if kw.get('add_have_to_env',1): + if kw.get('uselib_store'): + self.env[self.have_define(kw['uselib_store'])]=1 + else: + self.env[define_name]=int(is_success) if'header_name'in kw: if kw.get('auto_add_header_name',False): self.env.append_value(INCKEYS,Utils.to_list(kw['header_name'])) @@ -349,33 +441,31 @@ if x in ccroot.USELIB_VARS: _vars|=ccroot.USELIB_VARS[x] for k in _vars: - lk=k.lower() - if k=='INCLUDES':lk='includes' - if k=='DEFINES':lk='defines' - if lk in kw: - val=kw[lk] - if isinstance(val,str): - val=val.rstrip(os.path.sep) - self.env.append_unique(k+'_'+kw['uselib_store'],val) + x=k.lower() + if x in kw: + self.env.append_value(k+'_'+kw['uselib_store'],kw[x]) return is_success +@conf def check(self,*k,**kw): self.validate_c(kw) - self.start_msg(kw['msg']) + self.start_msg(kw['msg'],**kw) ret=None try: - ret=self.run_c_code(*k,**kw) - except self.errors.ConfigurationError ,e: - self.end_msg(kw['errmsg'],'YELLOW') + ret=self.run_build(*k,**kw) + except self.errors.ConfigurationError: + self.end_msg(kw['errmsg'],'YELLOW',**kw) if Logs.verbose>1: raise else: self.fatal('The configuration failed') else: kw['success']=ret - self.end_msg(self.ret_msg(kw['okmsg'],kw)) ret=self.post_check(*k,**kw) if not ret: + self.end_msg(kw['errmsg'],'YELLOW',**kw) self.fatal('The configuration failed %r'%ret) + else: + self.end_msg(self.ret_msg(kw['okmsg'],kw),**kw) return ret class test_exec(Task.Task): color='PINK' @@ -394,80 +484,44 @@ self.generator.bld.retval=self.generator.bld.cmd_and_log([self.inputs[0].abspath()],env=env) else: self.generator.bld.retval=self.generator.bld.exec_command([self.inputs[0].abspath()],env=env) +@feature('test_exec') +@after_method('apply_link') def test_exec_fun(self): self.create_task('test_exec',self.link_task.outputs[0]) -CACHE_RESULTS=1 -COMPILE_ERRORS=2 -def run_c_code(self,*k,**kw): - lst=[str(v)for(p,v)in kw.items()if p!='env'] - h=Utils.h_list(lst) - dir=self.bldnode.abspath()+os.sep+(not Utils.is_win32 and'.'or'')+'conf_check_'+Utils.to_hex(h) - try: - os.makedirs(dir) - except: - pass - try: - os.stat(dir) - except: - self.fatal('cannot use the configuration test folder %r'%dir) - cachemode=getattr(Options.options,'confcache',None) - if cachemode==CACHE_RESULTS: - try: - proj=ConfigSet.ConfigSet(os.path.join(dir,'cache_run_c_code')) - ret=proj['cache_run_c_code'] - except: - pass - else: - if isinstance(ret,str)and ret.startswith('Test does not build'): - self.fatal(ret) - return ret - bdir=os.path.join(dir,'testbuild') - if not os.path.exists(bdir): - os.makedirs(bdir) - self.test_bld=bld=Build.BuildContext(top_dir=dir,out_dir=bdir) - bld.init_dirs() - bld.progress_bar=0 - bld.targets='*' - if kw['compile_filename']: - node=bld.srcnode.make_node(kw['compile_filename']) - node.write(kw['code']) - bld.logger=self.logger - bld.all_envs.update(self.all_envs) - bld.env=kw['env'] - o=bld(features=kw['features'],source=kw['compile_filename'],target='testprog') - for k,v in kw.items(): - setattr(o,k,v) - self.to_log("==>\n%s\n<=="%kw['code']) - bld.targets='*' - ret=-1 - try: - try: - bld.compile() - except Errors.WafError: - ret='Test does not build: %s'%Utils.ex_stack() - self.fatal(ret) - else: - ret=getattr(bld,'retval',0) - finally: - proj=ConfigSet.ConfigSet() - proj['cache_run_c_code']=ret - proj.store(os.path.join(dir,'cache_run_c_code')) - return ret +@conf def check_cxx(self,*k,**kw): kw['compiler']='cxx' return self.check(*k,**kw) +@conf def check_cc(self,*k,**kw): kw['compiler']='c' return self.check(*k,**kw) -def define(self,key,val,quote=True): - assert key and isinstance(key,str) +@conf +def set_define_comment(self,key,comment): + coms=self.env.DEFINE_COMMENTS + if not coms: + coms=self.env.DEFINE_COMMENTS={} + coms[key]=comment or'' +@conf +def get_define_comment(self,key): + coms=self.env.DEFINE_COMMENTS or{} + return coms.get(key,'') +@conf +def define(self,key,val,quote=True,comment=''): + assert isinstance(key,str) + if not key: + return + if val is True: + val=1 + elif val in(False,None): + val=0 if isinstance(val,int)or isinstance(val,float): s='%s=%s' else: s=quote and'%s="%s"'or'%s=%s' app=s%(key,str(val)) ban=key+'=' - lst=self.env['DEFINES'] + lst=self.env.DEFINES for x in lst: if x.startswith(ban): lst[lst.index(x)]=app @@ -475,102 +529,129 @@ else: self.env.append_value('DEFINES',app) self.env.append_unique(DEFKEYS,key) -def undefine(self,key): - assert key and isinstance(key,str) + self.set_define_comment(key,comment) +@conf +def undefine(self,key,comment=''): + assert isinstance(key,str) + if not key: + return ban=key+'=' - lst=[x for x in self.env['DEFINES']if not x.startswith(ban)] - self.env['DEFINES']=lst + lst=[x for x in self.env.DEFINES if not x.startswith(ban)] + self.env.DEFINES=lst self.env.append_unique(DEFKEYS,key) -def define_cond(self,key,val): - assert key and isinstance(key,str) + self.set_define_comment(key,comment) +@conf +def define_cond(self,key,val,comment=''): + assert isinstance(key,str) + if not key: + return if val: - self.define(key,1) + self.define(key,1,comment=comment) else: - self.undefine(key) + self.undefine(key,comment=comment) +@conf def is_defined(self,key): assert key and isinstance(key,str) ban=key+'=' - for x in self.env['DEFINES']: + for x in self.env.DEFINES: if x.startswith(ban): return True return False +@conf def get_define(self,key): assert key and isinstance(key,str) ban=key+'=' - for x in self.env['DEFINES']: + for x in self.env.DEFINES: if x.startswith(ban): return x[len(ban):] return None +@conf def have_define(self,key): - return self.__dict__.get('HAVE_PAT','HAVE_%s')%Utils.quote_define_name(key) -def write_config_header(self,configfile='',guard='',top=False,env=None,defines=True,headers=False,remove=True): + return(self.env.HAVE_PAT or'HAVE_%s')%Utils.quote_define_name(key) +@conf +def write_config_header(self,configfile='',guard='',top=False,defines=True,headers=False,remove=True,define_prefix=''): if not configfile:configfile=WAF_CONFIG_H - waf_guard=guard or'_%s_WAF'%Utils.quote_define_name(configfile) + waf_guard=guard or'W_%s_WAF'%Utils.quote_define_name(configfile) node=top and self.bldnode or self.path.get_bld() node=node.make_node(configfile) node.parent.mkdir() lst=['/* WARNING! All changes made to this file will be lost! */\n'] lst.append('#ifndef %s\n#define %s\n'%(waf_guard,waf_guard)) - lst.append(self.get_config_header(defines,headers)) + lst.append(self.get_config_header(defines,headers,define_prefix=define_prefix)) lst.append('\n#endif /* %s */\n'%waf_guard) node.write('\n'.join(lst)) - env=env or self.env - env.append_unique(Build.CFG_FILES,[node.abspath()]) + self.env.append_unique(Build.CFG_FILES,[node.abspath()]) if remove: for key in self.env[DEFKEYS]: self.undefine(key) self.env[DEFKEYS]=[] -def get_config_header(self,defines=True,headers=False): +@conf +def get_config_header(self,defines=True,headers=False,define_prefix=''): lst=[] + if self.env.WAF_CONFIG_H_PRELUDE: + lst.append(self.env.WAF_CONFIG_H_PRELUDE) if headers: for x in self.env[INCKEYS]: lst.append('#include <%s>'%x) if defines: - for x in self.env[DEFKEYS]: - if self.is_defined(x): - val=self.get_define(x) - lst.append('#define %s %s'%(x,val)) - else: - lst.append('/* #undef %s */'%x) + tbl={} + for k in self.env.DEFINES: + a,_,b=k.partition('=') + tbl[a]=b + for k in self.env[DEFKEYS]: + caption=self.get_define_comment(k) + if caption: + caption=' /* %s */'%caption + try: + txt='#define %s%s %s%s'%(define_prefix,k,tbl[k],caption) + except KeyError: + txt='/* #undef %s%s */%s'%(define_prefix,k,caption) + lst.append(txt) return"\n".join(lst) +@conf def cc_add_flags(conf): - conf.add_os_flags('CPPFLAGS','CFLAGS') - conf.add_os_flags('CFLAGS') + conf.add_os_flags('CPPFLAGS',dup=False) + conf.add_os_flags('CFLAGS',dup=False) +@conf def cxx_add_flags(conf): - conf.add_os_flags('CPPFLAGS','CXXFLAGS') - conf.add_os_flags('CXXFLAGS') + conf.add_os_flags('CPPFLAGS',dup=False) + conf.add_os_flags('CXXFLAGS',dup=False) +@conf def link_add_flags(conf): - conf.add_os_flags('LINKFLAGS') - conf.add_os_flags('LDFLAGS','LINKFLAGS') + conf.add_os_flags('LINKFLAGS',dup=False) + conf.add_os_flags('LDFLAGS',dup=False) +@conf def cc_load_tools(conf): if not conf.env.DEST_OS: conf.env.DEST_OS=Utils.unversioned_sys_platform() conf.load('c') +@conf def cxx_load_tools(conf): if not conf.env.DEST_OS: conf.env.DEST_OS=Utils.unversioned_sys_platform() conf.load('cxx') -def get_cc_version(conf,cc,gcc=False,icc=False): +@conf +def get_cc_version(conf,cc,gcc=False,icc=False,clang=False): cmd=cc+['-dM','-E','-'] env=conf.env.env or None try: - p=Utils.subprocess.Popen(cmd,stdin=Utils.subprocess.PIPE,stdout=Utils.subprocess.PIPE,stderr=Utils.subprocess.PIPE,env=env) - p.stdin.write('\n') - out=p.communicate()[0] - except: + out,err=conf.cmd_and_log(cmd,output=0,input='\n'.encode(),env=env) + except Exception: conf.fatal('Could not determine the compiler version %r'%cmd) - if not isinstance(out,str): - out=out.decode(sys.stdout.encoding) if gcc: if out.find('__INTEL_COMPILER')>=0: conf.fatal('The intel compiler pretends to be gcc') - if out.find('__GNUC__')<0: + if out.find('__GNUC__')<0 and out.find('__clang__')<0: conf.fatal('Could not determine the compiler type') if icc and out.find('__INTEL_COMPILER')<0: conf.fatal('Not icc/icpc') + if clang and out.find('__clang__')<0: + conf.fatal('Not clang/clang++') + if not clang and out.find('__clang__')>=0: + conf.fatal('Could not find gcc/g++ (only Clang), if renamed try eg: CC=gcc48 CXX=g++48 waf configure') k={} - if icc or gcc: - out=out.split('\n') + if icc or gcc or clang: + out=out.splitlines() for line in out: lst=shlex.split(line) if len(lst)>2: @@ -579,8 +660,6 @@ k[key]=val def isD(var): return var in k - def isT(var): - return var in k and k[var]!='0' if not conf.env.DEST_OS: conf.env.DEST_OS='' for i in MACRO_TO_DESTOS: @@ -594,9 +673,9 @@ conf.env.DEST_OS='generic' if isD('__ELF__'): conf.env.DEST_BINFMT='elf' - elif isD('__WINNT__')or isD('__CYGWIN__'): + elif isD('__WINNT__')or isD('__CYGWIN__')or isD('_WIN32'): conf.env.DEST_BINFMT='pe' - conf.env.LIBDIR=conf.env['PREFIX']+'/bin' + conf.env.LIBDIR=conf.env.BINDIR elif isD('__APPLE__'): conf.env.DEST_BINFMT='mac-o' if not conf.env.DEST_BINFMT: @@ -608,31 +687,59 @@ Logs.debug('ccroot: dest platform: '+' '.join([conf.env[x]or'?'for x in('DEST_OS','DEST_BINFMT','DEST_CPU')])) if icc: ver=k['__INTEL_COMPILER'] - conf.env['CC_VERSION']=(ver[:-2],ver[-2],ver[-1]) + conf.env.CC_VERSION=(ver[:-2],ver[-2],ver[-1]) else: - conf.env['CC_VERSION']=(k['__GNUC__'],k['__GNUC_MINOR__'],k['__GNUC_PATCHLEVEL__']) + if isD('__clang__')and isD('__clang_major__'): + conf.env.CC_VERSION=(k['__clang_major__'],k['__clang_minor__'],k['__clang_patchlevel__']) + else: + conf.env.CC_VERSION=(k['__GNUC__'],k['__GNUC_MINOR__'],k.get('__GNUC_PATCHLEVEL__','0')) return k +@conf def get_xlc_version(conf,cc): - version_re=re.compile(r"IBM XL C/C\+\+.*, V(?P\d*)\.(?P\d*)",re.I).search cmd=cc+['-qversion'] try: out,err=conf.cmd_and_log(cmd,output=0) except Errors.WafError: conf.fatal('Could not find xlc %r'%cmd) - if out:match=version_re(out) - else:match=version_re(err) - if not match: + for v in(r"IBM XL C/C\+\+.* V(?P\d*)\.(?P\d*)",): + version_re=re.compile(v,re.I).search + match=version_re(out or err) + if match: + k=match.groupdict() + conf.env.CC_VERSION=(k['major'],k['minor']) + break + else: conf.fatal('Could not determine the XLC version.') - k=match.groupdict() - conf.env['CC_VERSION']=(k['major'],k['minor']) +@conf +def get_suncc_version(conf,cc): + cmd=cc+['-V'] + try: + out,err=conf.cmd_and_log(cmd,output=0) + except Errors.WafError as e: + if not(hasattr(e,'returncode')and hasattr(e,'stdout')and hasattr(e,'stderr')): + conf.fatal('Could not find suncc %r'%cmd) + out=e.stdout + err=e.stderr + version=(out or err) + version=version.splitlines()[0] + version_re=re.compile(r'cc: (studio.*?|\s+)?(sun\s+(c\+\+|c)|(WorkShop\s+Compilers))?\s+(?P\d*)\.(?P\d*)',re.I).search + match=version_re(version) + if match: + k=match.groupdict() + conf.env.CC_VERSION=(k['major'],k['minor']) + else: + conf.fatal('Could not determine the suncc version.') +@conf def add_as_needed(self): if self.env.DEST_BINFMT=='elf'and'gcc'in(self.env.CXX_NAME,self.env.CC_NAME): - self.env.append_unique('LINKFLAGS','--as-needed') + self.env.append_unique('LINKFLAGS','-Wl,--as-needed') class cfgtask(Task.TaskBase): def display(self): return'' def runnable_status(self): return Task.RUN_ME + def uid(self): + return Utils.SIG_NIL def run(self): conf=self.conf bld=Build.BuildContext(top_dir=conf.srcnode.abspath(),out_dir=conf.bldnode.abspath()) @@ -640,26 +747,40 @@ bld.init_dirs() bld.in_msg=1 bld.logger=self.logger + bld.multicheck_task=self + args=self.args try: - bld.check(**self.args) - except: + if'func'in args: + bld.test(build_fun=args['func'],msg=args.get('msg',''),okmsg=args.get('okmsg',''),errmsg=args.get('errmsg',''),) + else: + args['multicheck_mandatory']=args.get('mandatory',True) + args['mandatory']=True + try: + bld.check(**args) + finally: + args['mandatory']=args['multicheck_mandatory'] + except Exception: return 1 +@conf def multicheck(self,*k,**kw): - self.start_msg(kw.get('msg','Executing %d configuration tests'%len(k))) + self.start_msg(kw.get('msg','Executing %d configuration tests'%len(k)),**kw) + for var in('DEFINES',DEFKEYS): + self.env.append_value(var,[]) + self.env.DEFINE_COMMENTS=self.env.DEFINE_COMMENTS or{} class par(object): def __init__(self): self.keep=False - self.cache_global=Options.cache_global - self.nocache=Options.options.nocache - self.returned_tasks=[] + self.task_sigs={} + self.progress_bar=0 def total(self): return len(tasks) def to_log(self,*k,**kw): return bld=par() + bld.keep=kw.get('run_all_tests',True) tasks=[] for dct in k: - x=cfgtask(bld=bld) + x=Task.classes['cfgtask'](bld=bld) tasks.append(x) x.args=dct x.bld=bld @@ -670,44 +791,35 @@ yield tasks while 1: yield[] - p=Runner.Parallel(bld,Options.options.jobs) + bld.producer=p=Runner.Parallel(bld,Options.options.jobs) p.biter=it() p.start() for x in tasks: x.logger.memhandler.flush() + if p.error: + for x in p.error: + if getattr(x,'err_msg',None): + self.to_log(x.err_msg) + self.end_msg('fail',color='RED') + raise Errors.WafError('There is an error in the library, read config.log for more information') + failure_count=0 + for x in tasks: + if x.hasrun not in(Task.SUCCESS,Task.NOT_RUN): + failure_count+=1 + if failure_count: + self.end_msg(kw.get('errmsg','%s test failed'%failure_count),color='YELLOW',**kw) + else: + self.end_msg('all ok',**kw) + for x in tasks: + if'msg'in x.args: + self.start_msg(x.args['msg']) + if x.hasrun==Task.NOT_RUN: + self.end_msg('test cancelled','YELLOW') + elif x.hasrun!=Task.SUCCESS: + self.end_msg(x.args.get('errmsg','no'),'YELLOW') + else: + self.end_msg(x.args.get('okmsg','yes'),'GREEN') for x in tasks: if x.hasrun!=Task.SUCCESS: - self.end_msg(kw.get('errmsg','no'),color='YELLOW') - self.fatal(kw.get('fatalmsg',None)or'One of the tests has failed, see the config.log for more information') - self.end_msg('ok') - -conf(parse_flags) -conf(ret_msg) -conf(validate_cfg) -conf(exec_cfg) -conf(check_cfg) -conf(validate_c) -conf(post_check) -conf(check) -feature('test_exec')(test_exec_fun) -after_method('apply_link')(test_exec_fun) -conf(run_c_code) -conf(check_cxx) -conf(check_cc) -conf(define) -conf(undefine) -conf(define_cond) -conf(is_defined) -conf(get_define) -conf(have_define) -conf(write_config_header) -conf(get_config_header) -conf(cc_add_flags) -conf(cxx_add_flags) -conf(link_add_flags) -conf(cc_load_tools) -conf(cxx_load_tools) -conf(get_cc_version) -conf(get_xlc_version) -conf(add_as_needed) -conf(multicheck) \ No newline at end of file + if x.args.get('mandatory',True): + self.fatal(kw.get('fatalmsg')or'One of the tests has failed, read config.log for more information') diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/ccroot.py glmark2-2021.02/waflib/Tools/ccroot.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/ccroot.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/ccroot.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,28 +1,26 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -import os,sys,re -from waflib import TaskGen,Task,Utils,Logs,Build,Options,Node,Errors -from waflib.Logs import error,debug,warn +import os,re +from waflib import Task,Utils,Node,Errors,Logs from waflib.TaskGen import after_method,before_method,feature,taskgen_method,extension from waflib.Tools import c_aliases,c_preproc,c_config,c_osx,c_tests from waflib.Configure import conf +SYSTEM_LIB_PATHS=['/usr/lib64','/usr/lib','/usr/local/lib64','/usr/local/lib'] USELIB_VARS=Utils.defaultdict(set) USELIB_VARS['c']=set(['INCLUDES','FRAMEWORKPATH','DEFINES','CPPFLAGS','CCDEPS','CFLAGS','ARCH']) USELIB_VARS['cxx']=set(['INCLUDES','FRAMEWORKPATH','DEFINES','CPPFLAGS','CXXDEPS','CXXFLAGS','ARCH']) USELIB_VARS['d']=set(['INCLUDES','DFLAGS']) -USELIB_VARS['cprogram']=USELIB_VARS['cxxprogram']=set(['LIB','STLIB','LIBPATH','STLIBPATH','LINKFLAGS','RPATH','LINKDEPS','FRAMEWORK','FRAMEWORKPATH','ARCH']) -USELIB_VARS['cshlib']=USELIB_VARS['cxxshlib']=set(['LIB','STLIB','LIBPATH','STLIBPATH','LINKFLAGS','RPATH','LINKDEPS','FRAMEWORK','FRAMEWORKPATH','ARCH']) +USELIB_VARS['includes']=set(['INCLUDES','FRAMEWORKPATH','ARCH']) +USELIB_VARS['cprogram']=USELIB_VARS['cxxprogram']=set(['LIB','STLIB','LIBPATH','STLIBPATH','LINKFLAGS','RPATH','LINKDEPS','FRAMEWORK','FRAMEWORKPATH','ARCH','LDFLAGS']) +USELIB_VARS['cshlib']=USELIB_VARS['cxxshlib']=set(['LIB','STLIB','LIBPATH','STLIBPATH','LINKFLAGS','RPATH','LINKDEPS','FRAMEWORK','FRAMEWORKPATH','ARCH','LDFLAGS']) USELIB_VARS['cstlib']=USELIB_VARS['cxxstlib']=set(['ARFLAGS','LINKDEPS']) USELIB_VARS['dprogram']=set(['LIB','STLIB','LIBPATH','STLIBPATH','LINKFLAGS','RPATH','LINKDEPS']) USELIB_VARS['dshlib']=set(['LIB','STLIB','LIBPATH','STLIBPATH','LINKFLAGS','RPATH','LINKDEPS']) USELIB_VARS['dstlib']=set(['ARFLAGS','LINKDEPS']) -USELIB_VARS['go']=set(['GOCFLAGS']) -USELIB_VARS['goprogram']=set(['GOLFLAGS']) USELIB_VARS['asm']=set(['ASFLAGS']) +@taskgen_method def create_compiled_task(self,name,node): out='%s.%d.o'%(node.name,self.idx) task=self.create_task(name,node,node.parent.find_or_declare(out)) @@ -31,6 +29,7 @@ except AttributeError: self.compiled_tasks=[task] return task +@taskgen_method def to_incnodes(self,inlst): lst=[] seen=set([]) @@ -55,28 +54,71 @@ lst.append(p) lst.append(v) return lst +@feature('c','cxx','d','asm','fc','includes') +@after_method('propagate_uselib_vars','process_source') def apply_incpaths(self): - lst=self.to_incnodes(self.to_list(getattr(self,'includes',[]))+self.env['INCLUDES']) + lst=self.to_incnodes(self.to_list(getattr(self,'includes',[]))+self.env.INCLUDES) self.includes_nodes=lst - self.env['INCPATHS']=[x.abspath()for x in lst] + cwd=self.get_cwd() + self.env.INCPATHS=[x.path_from(cwd)for x in lst] class link_task(Task.Task): color='YELLOW' inst_to=None - chmod=Utils.O644 + chmod=Utils.O755 def add_target(self,target): if isinstance(target,str): + base=self.generator.path + if target.startswith('#'): + target=target[1:] + base=self.generator.bld.bldnode pattern=self.env[self.__class__.__name__+'_PATTERN'] if not pattern: pattern='%s' folder,name=os.path.split(target) - if self.__class__.__name__.find('shlib')>0: - if self.env.DEST_BINFMT=='pe'and getattr(self.generator,'vnum',None): - name=name+'-'+self.generator.vnum.split('.')[0] - tmp=folder+os.sep+pattern%name - target=self.generator.path.find_or_declare(tmp) + if self.__class__.__name__.find('shlib')>0 and getattr(self.generator,'vnum',None): + nums=self.generator.vnum.split('.') + if self.env.DEST_BINFMT=='pe': + name=name+'-'+nums[0] + elif self.env.DEST_OS=='openbsd': + pattern='%s.%s'%(pattern,nums[0]) + if len(nums)>=2: + pattern+='.%s'%nums[1] + if folder: + tmp=folder+os.sep+pattern%name + else: + tmp=pattern%name + target=base.find_or_declare(tmp) self.set_outputs(target) + def exec_command(self,*k,**kw): + ret=super(link_task,self).exec_command(*k,**kw) + if not ret and self.env.DO_MANIFEST: + ret=self.exec_mf() + return ret + def exec_mf(self): + if not self.env.MT: + return 0 + manifest=None + for out_node in self.outputs: + if out_node.name.endswith('.manifest'): + manifest=out_node.abspath() + break + else: + return 0 + mode='' + for x in Utils.to_list(self.generator.features): + if x in('cprogram','cxxprogram','fcprogram','fcprogram_test'): + mode=1 + elif x in('cshlib','cxxshlib','fcshlib'): + mode=2 + Logs.debug('msvc: embedding manifest in mode %r',mode) + lst=[]+self.env.MT + lst.extend(Utils.to_list(self.env.MTFLAGS)) + lst.extend(['-manifest',manifest]) + lst.append('-outputresource:%s;%s'%(self.outputs[0].abspath(),mode)) + return super(link_task,self).exec_command(lst) class stlink_task(link_task): run_str='${AR} ${ARFLAGS} ${AR_TGT_F}${TGT} ${AR_SRC_F}${SRC}' + chmod=Utils.O644 def rm_tgt(cls): old=cls.run def wrap(self): @@ -85,6 +127,8 @@ return old(self) setattr(cls,'run',wrap) rm_tgt(stlink_task) +@feature('c','cxx','d','fc','asm') +@after_method('process_source') def apply_link(self): for x in self.features: if x=='cprogram'and'cxx'in self.features: @@ -105,7 +149,8 @@ except AttributeError: inst_to=self.link_task.__class__.inst_to if inst_to: - self.install_task=self.bld.install_files(inst_to,self.link_task.outputs[:],env=self.env,chmod=self.link_task.chmod) + self.install_task=self.add_install_files(install_to=inst_to,install_from=self.link_task.outputs[:],chmod=self.link_task.chmod,task=self.link_task) +@taskgen_method def use_rec(self,name,**kw): if name in self.tmp_use_not or name in self.tmp_use_seen: return @@ -125,21 +170,26 @@ y.tmp_use_var='' else: objects=False - if not isinstance(y.link_task,stlink_task): + if not isinstance(link_task,stlink_task): stlib=False y.tmp_use_var='LIB' else: y.tmp_use_var='STLIB' p=self.tmp_use_prec for x in self.to_list(getattr(y,'use',[])): + if self.env["STLIB_"+x]: + continue try: p[x].append(name) - except: + except KeyError: p[x]=[name] self.use_rec(x,objects=objects,stlib=stlib) +@feature('c','cxx','d','use','fc') +@before_method('apply_incpaths','propagate_uselib_vars') +@after_method('apply_link','process_source') def process_use(self): use_not=self.tmp_use_not=set([]) - use_seen=self.tmp_use_seen=[] + self.tmp_use_seen=[] use_prec=self.tmp_use_prec={} self.uselib=self.to_list(getattr(self,'uselib',[])) self.includes=self.to_list(getattr(self,'includes',[])) @@ -149,7 +199,7 @@ for x in use_not: if x in use_prec: del use_prec[x] - out=[] + out=self.tmp_use_sorted=[] tmp=[] for x in self.tmp_use_seen: for k in use_prec.values(): @@ -180,26 +230,32 @@ y=self.bld.get_tgen_by_name(x) var=y.tmp_use_var if var and link_task: - if var=='LIB'or y.tmp_use_stlib: + if var=='LIB'or y.tmp_use_stlib or x in names: self.env.append_value(var,[y.target[y.target.rfind(os.sep)+1:]]) self.link_task.dep_nodes.extend(y.link_task.outputs) - tmp_path=y.link_task.outputs[0].parent.path_from(self.bld.bldnode) - self.env.append_value(var+'PATH',[tmp_path]) + tmp_path=y.link_task.outputs[0].parent.path_from(self.get_cwd()) + self.env.append_unique(var+'PATH',[tmp_path]) else: if y.tmp_use_objects: self.add_objects_from_tgen(y) if getattr(y,'export_includes',None): self.includes.extend(y.to_incnodes(y.export_includes)) + if getattr(y,'export_defines',None): + self.env.append_value('DEFINES',self.to_list(y.export_defines)) for x in names: try: y=self.bld.get_tgen_by_name(x) - except: + except Errors.WafError: if not self.env['STLIB_'+x]and not x in self.uselib: self.uselib.append(x) else: - for k in self.to_list(getattr(y,'uselib',[])): + for k in self.to_list(getattr(y,'use',[])): if not self.env['STLIB_'+k]and not k in self.uselib: self.uselib.append(k) +@taskgen_method +def accept_node_to_link(self,node): + return not node.name.endswith('.pdb') +@taskgen_method def add_objects_from_tgen(self,tg): try: link_task=self.link_task @@ -208,27 +264,33 @@ else: for tsk in getattr(tg,'compiled_tasks',[]): for x in tsk.outputs: - if x.name.endswith('.o')or x.name.endswith('.obj'): + if self.accept_node_to_link(x): link_task.inputs.append(x) +@taskgen_method def get_uselib_vars(self): _vars=set([]) for x in self.features: if x in USELIB_VARS: _vars|=USELIB_VARS[x] return _vars +@feature('c','cxx','d','fc','javac','cs','uselib','asm') +@after_method('process_use') def propagate_uselib_vars(self): _vars=self.get_uselib_vars() env=self.env - for x in _vars: - y=x.lower() - env.append_unique(x,self.to_list(getattr(self,y,[]))) - for x in self.features: - for var in _vars: - compvar='%s_%s'%(var,x) - env.append_value(var,env[compvar]) - for x in self.to_list(getattr(self,'uselib',[])): - for v in _vars: - env.append_value(v,env[v+'_'+x]) + app=env.append_value + feature_uselib=self.features+self.to_list(getattr(self,'uselib',[])) + for var in _vars: + y=var.lower() + val=getattr(self,y,[]) + if val: + app(var,self.to_list(val)) + for x in feature_uselib: + val=env['%s_%s'%(var,x)] + if val: + app(var,val) +@feature('cshlib','cxxshlib','fcshlib') +@after_method('apply_link') def apply_implib(self): if not self.env.DEST_BINFMT=='pe': return @@ -237,58 +299,92 @@ name=self.target.name else: name=os.path.split(self.target)[1] - implib=self.env['implib_PATTERN']%name + implib=self.env.implib_PATTERN%name implib=dll.parent.find_or_declare(implib) - self.env.append_value('LINKFLAGS',self.env['IMPLIB_ST']%implib.bldpath()) + self.env.append_value('LINKFLAGS',self.env.IMPLIB_ST%implib.bldpath()) self.link_task.outputs.append(implib) if getattr(self,'defs',None)and self.env.DEST_BINFMT=='pe': node=self.path.find_resource(self.defs) if not node: raise Errors.WafError('invalid def file %r'%self.defs) if'msvc'in(self.env.CC_NAME,self.env.CXX_NAME): - self.env.append_value('LINKFLAGS','/def:%s'%node.path_from(self.bld.bldnode)) + self.env.append_value('LINKFLAGS','/def:%s'%node.path_from(self.get_cwd())) self.link_task.dep_nodes.append(node) else: self.link_task.inputs.append(node) - try: - inst_to=self.install_path - except AttributeError: - inst_to=self.link_task.__class__.inst_to - if not inst_to: - return - self.implib_install_task=self.bld.install_as('${PREFIX}/lib/%s'%implib.name,implib,self.env) + if getattr(self,'install_task',None): + try: + inst_to=self.install_path_implib + except AttributeError: + try: + inst_to=self.install_path + except AttributeError: + inst_to='${IMPLIBDIR}' + self.install_task.install_to='${BINDIR}' + if not self.env.IMPLIBDIR: + self.env.IMPLIBDIR=self.env.LIBDIR + self.implib_install_task=self.add_install_files(install_to=inst_to,install_from=implib,chmod=self.link_task.chmod,task=self.link_task) +re_vnum=re.compile('^([1-9]\\d*|0)([.]([1-9]\\d*|0)){0,2}?$') +@feature('cshlib','cxxshlib','dshlib','fcshlib','vnum') +@after_method('apply_link','propagate_uselib_vars') def apply_vnum(self): if not getattr(self,'vnum','')or os.name!='posix'or self.env.DEST_BINFMT not in('elf','mac-o'): return link=self.link_task + if not re_vnum.match(self.vnum): + raise Errors.WafError('Invalid vnum %r for target %r'%(self.vnum,getattr(self,'name',self))) nums=self.vnum.split('.') node=link.outputs[0] + cnum=getattr(self,'cnum',str(nums[0])) + cnums=cnum.split('.') + if len(cnums)>len(nums)or nums[0:len(cnums)]!=cnums: + raise Errors.WafError('invalid compatibility version %s'%cnum) libname=node.name if libname.endswith('.dylib'): name3=libname.replace('.dylib','.%s.dylib'%self.vnum) - name2=libname.replace('.dylib','.%s.dylib'%nums[0]) + name2=libname.replace('.dylib','.%s.dylib'%cnum) else: name3=libname+'.'+self.vnum - name2=libname+'.'+nums[0] + name2=libname+'.'+cnum if self.env.SONAME_ST: v=self.env.SONAME_ST%name2 self.env.append_value('LINKFLAGS',v.split()) - tsk=self.create_task('vnum',node,[node.parent.find_or_declare(name2),node.parent.find_or_declare(name3)]) - if getattr(self.bld,'is_install',None): + if self.env.DEST_OS!='openbsd': + outs=[node.parent.make_node(name3)] + if name2!=name3: + outs.append(node.parent.make_node(name2)) + self.create_task('vnum',node,outs) + if getattr(self,'install_task',None): self.install_task.hasrun=Task.SKIP_ME - bld=self.bld - path=self.install_task.dest - t1=bld.install_as(path+os.sep+name3,node,env=self.env,chmod=self.link_task.chmod) - t2=bld.symlink_as(path+os.sep+name2,name3) - t3=bld.symlink_as(path+os.sep+libname,name3) - self.vnum_install_task=(t1,t2,t3) - if'-dynamiclib'in self.env['LINKFLAGS']and getattr(self,'install_task',None): - path=os.path.join(self.install_task.get_install_path(),self.link_task.outputs[0].name) - self.env.append_value('LINKFLAGS',['-install_name',path]) + path=self.install_task.install_to + if self.env.DEST_OS=='openbsd': + libname=self.link_task.outputs[0].name + t1=self.add_install_as(install_to='%s/%s'%(path,libname),install_from=node,chmod=self.link_task.chmod) + self.vnum_install_task=(t1,) + else: + t1=self.add_install_as(install_to=path+os.sep+name3,install_from=node,chmod=self.link_task.chmod) + t3=self.add_symlink_as(install_to=path+os.sep+libname,install_from=name3) + if name2!=name3: + t2=self.add_symlink_as(install_to=path+os.sep+name2,install_from=name3) + self.vnum_install_task=(t1,t2,t3) + else: + self.vnum_install_task=(t1,t3) + if'-dynamiclib'in self.env.LINKFLAGS: + try: + inst_to=self.install_path + except AttributeError: + inst_to=self.link_task.__class__.inst_to + if inst_to: + p=Utils.subst_vars(inst_to,self.env) + path=os.path.join(p,name2) + self.env.append_value('LINKFLAGS',['-install_name',path]) + self.env.append_value('LINKFLAGS','-Wl,-compatibility_version,%s'%cnum) + self.env.append_value('LINKFLAGS','-Wl,-current_version,%s'%self.vnum) class vnum(Task.Task): color='CYAN' - quient=True ext_in=['.bin'] + def keyword(self): + return'Symlinking' def run(self): for x in self.outputs: path=x.abspath() @@ -305,26 +401,25 @@ for t in self.run_after: if not t.hasrun: return Task.ASK_LATER - for x in self.outputs: - x.sig=Utils.h_file(x.abspath()) return Task.SKIP_ME class fake_stlib(stlink_task): def runnable_status(self): for t in self.run_after: if not t.hasrun: return Task.ASK_LATER - for x in self.outputs: - x.sig=Utils.h_file(x.abspath()) return Task.SKIP_ME -def read_shlib(self,name,paths=[]): - return self(name=name,features='fake_lib',lib_paths=paths,lib_type='shlib') -def read_stlib(self,name,paths=[]): - return self(name=name,features='fake_lib',lib_paths=paths,lib_type='stlib') -lib_patterns={'shlib':['lib%s.so','%s.so','lib%s.dll','%s.dll'],'stlib':['lib%s.a','%s.a','lib%s.dll','%s.dll','lib%s.lib','%s.lib'],} +@conf +def read_shlib(self,name,paths=[],export_includes=[],export_defines=[]): + return self(name=name,features='fake_lib',lib_paths=paths,lib_type='shlib',export_includes=export_includes,export_defines=export_defines) +@conf +def read_stlib(self,name,paths=[],export_includes=[],export_defines=[]): + return self(name=name,features='fake_lib',lib_paths=paths,lib_type='stlib',export_includes=export_includes,export_defines=export_defines) +lib_patterns={'shlib':['lib%s.so','%s.so','lib%s.dylib','lib%s.dll','%s.dll'],'stlib':['lib%s.a','%s.a','lib%s.dll','%s.dll','lib%s.lib','%s.lib'],} +@feature('fake_lib') def process_lib(self): node=None names=[x%self.name for x in lib_patterns[self.lib_type]] - for x in self.lib_paths+[self.path,'/usr/lib64','/usr/lib','/usr/local/lib64','/usr/local/lib']: + for x in self.lib_paths+[self.path]+SYSTEM_LIB_PATHS: if not isinstance(x,Node.Node): x=self.bld.root.find_node(x)or self.path.find_node(x) if not x: @@ -332,7 +427,10 @@ for y in names: node=x.find_node(y) if node: - node.sig=Utils.h_file(node.abspath()) + try: + Utils.h_file(node.abspath()) + except EnvironmentError: + raise ValueError('Could not read %r'%y) break else: continue @@ -344,32 +442,35 @@ class fake_o(Task.Task): def runnable_status(self): return Task.SKIP_ME +@extension('.o','.obj') def add_those_o_files(self,node): tsk=self.create_task('fake_o',[],node) try: self.compiled_tasks.append(tsk) except AttributeError: self.compiled_tasks=[tsk] - -taskgen_method(create_compiled_task) -taskgen_method(to_incnodes) -feature('c','cxx','d','go','asm','fc','includes')(apply_incpaths) -after_method('propagate_uselib_vars','process_source')(apply_incpaths) -feature('c','cxx','d','go','fc','asm')(apply_link) -after_method('process_source')(apply_link) -taskgen_method(use_rec) -feature('c','cxx','d','use','fc')(process_use) -before_method('apply_incpaths','propagate_uselib_vars')(process_use) -after_method('apply_link','process_source')(process_use) -taskgen_method(add_objects_from_tgen) -taskgen_method(get_uselib_vars) -feature('c','cxx','d','fc','javac','cs','uselib')(propagate_uselib_vars) -after_method('process_use')(propagate_uselib_vars) -feature('cshlib','cxxshlib','fcshlib')(apply_implib) -after_method('apply_link')(apply_implib) -feature('cshlib','cxxshlib','dshlib','fcshlib','vnum')(apply_vnum) -after_method('apply_link')(apply_vnum) -conf(read_shlib) -conf(read_stlib) -feature('fake_lib')(process_lib) -extension('.o','.obj')(add_those_o_files) \ No newline at end of file +@feature('fake_obj') +@before_method('process_source') +def process_objs(self): + for node in self.to_nodes(self.source): + self.add_those_o_files(node) + self.source=[] +@conf +def read_object(self,obj): + if not isinstance(obj,self.path.__class__): + obj=self.path.find_resource(obj) + return self(features='fake_obj',source=obj,name=obj.name) +@feature('cxxprogram','cprogram') +@after_method('apply_link','process_use') +def set_full_paths_hpux(self): + if self.env.DEST_OS!='hp-ux': + return + base=self.bld.bldnode.abspath() + for var in['LIBPATH','STLIBPATH']: + lst=[] + for x in self.env[var]: + if x.startswith('/'): + lst.append(x) + else: + lst.append(os.path.normpath(os.path.join(base,x))) + self.env[var]=lst diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/clang.py glmark2-2021.02/waflib/Tools/clang.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/clang.py 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/clang.py 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,20 @@ +#! /usr/bin/env python +# encoding: utf-8 +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file + +from waflib.Tools import ccroot,ar,gcc +from waflib.Configure import conf +@conf +def find_clang(conf): + cc=conf.find_program('clang',var='CC') + conf.get_cc_version(cc,clang=True) + conf.env.CC_NAME='clang' +def configure(conf): + conf.find_clang() + conf.find_program(['llvm-ar','ar'],var='AR') + conf.find_ar() + conf.gcc_common_flags() + conf.gcc_modifier_platform() + conf.cc_load_tools() + conf.cc_add_flags() + conf.link_add_flags() diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/clangxx.py glmark2-2021.02/waflib/Tools/clangxx.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/clangxx.py 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/clangxx.py 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,20 @@ +#! /usr/bin/env python +# encoding: utf-8 +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file + +from waflib.Tools import ccroot,ar,gxx +from waflib.Configure import conf +@conf +def find_clangxx(conf): + cxx=conf.find_program('clang++',var='CXX') + conf.get_cc_version(cxx,clang=True) + conf.env.CXX_NAME='clang' +def configure(conf): + conf.find_clangxx() + conf.find_program(['llvm-ar','ar'],var='AR') + conf.find_ar() + conf.gxx_common_flags() + conf.gxx_modifier_platform() + conf.cxx_load_tools() + conf.cxx_add_flags() + conf.link_add_flags() diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/compiler_c.py glmark2-2021.02/waflib/Tools/compiler_c.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/compiler_c.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/compiler_c.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,39 +1,44 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys,imp,types +import re from waflib.Tools import ccroot -from waflib import Utils,Configure +from waflib import Utils from waflib.Logs import debug -c_compiler={'win32':['msvc','gcc'],'cygwin':['gcc'],'darwin':['gcc'],'aix':['xlc','gcc'],'linux':['gcc','icc'],'sunos':['suncc','gcc'],'irix':['gcc','irixcc'],'hpux':['gcc'],'gnu':['gcc'],'java':['gcc','msvc','icc'],'default':['gcc'],} +c_compiler={'win32':['msvc','gcc','clang'],'cygwin':['gcc'],'darwin':['clang','gcc'],'aix':['xlc','gcc','clang'],'linux':['gcc','clang','icc'],'sunos':['suncc','gcc'],'irix':['gcc','irixcc'],'hpux':['gcc'],'osf1V':['gcc'],'gnu':['gcc','clang'],'java':['gcc','msvc','clang','icc'],'default':['clang','gcc'],} +def default_compilers(): + build_platform=Utils.unversioned_sys_platform() + possible_compiler_list=c_compiler.get(build_platform,c_compiler['default']) + return' '.join(possible_compiler_list) def configure(conf): - try:test_for_compiler=conf.options.check_c_compiler - except AttributeError:conf.fatal("Add options(opt): opt.load('compiler_c')") - for compiler in test_for_compiler.split(): + try: + test_for_compiler=conf.options.check_c_compiler or default_compilers() + except AttributeError: + conf.fatal("Add options(opt): opt.load('compiler_c')") + for compiler in re.split('[ ,]+',test_for_compiler): conf.env.stash() - conf.start_msg('Checking for %r (c compiler)'%compiler) + conf.start_msg('Checking for %r (C compiler)'%compiler) try: conf.load(compiler) - except conf.errors.ConfigurationError ,e: + except conf.errors.ConfigurationError as e: conf.env.revert() conf.end_msg(False) - debug('compiler_c: %r'%e) + debug('compiler_c: %r',e) else: - if conf.env['CC']: + if conf.env.CC: conf.end_msg(conf.env.get_flat('CC')) - conf.env['COMPILER_CC']=compiler + conf.env.COMPILER_CC=compiler + conf.env.commit() break + conf.env.revert() conf.end_msg(False) else: - conf.fatal('could not configure a c compiler!') + conf.fatal('could not configure a C compiler!') def options(opt): + test_for_compiler=default_compilers() opt.load_special_tools('c_*.py',ban=['c_dumbpreproc.py']) - global c_compiler - build_platform=Utils.unversioned_sys_platform() - possible_compiler_list=c_compiler[build_platform in c_compiler and build_platform or'default'] - test_for_compiler=' '.join(possible_compiler_list) - cc_compiler_opts=opt.add_option_group("C Compiler Options") - cc_compiler_opts.add_option('--check-c-compiler',default="%s"%test_for_compiler,help='On this platform (%s) the following C-Compiler will be checked by default: "%s"'%(build_platform,test_for_compiler),dest="check_c_compiler") + cc_compiler_opts=opt.add_option_group('Configuration options') + cc_compiler_opts.add_option('--check-c-compiler',default=None,help='list of C compilers to try [%s]'%test_for_compiler,dest="check_c_compiler") for x in test_for_compiler.split(): opt.load('%s'%x) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/compiler_cxx.py glmark2-2021.02/waflib/Tools/compiler_cxx.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/compiler_cxx.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/compiler_cxx.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,39 +1,44 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys,imp,types +import re from waflib.Tools import ccroot -from waflib import Utils,Configure +from waflib import Utils from waflib.Logs import debug -cxx_compiler={'win32':['msvc','g++'],'cygwin':['g++'],'darwin':['g++'],'aix':['xlc++','g++'],'linux':['g++','icpc'],'sunos':['sunc++','g++'],'irix':['g++'],'hpux':['g++'],'gnu':['g++'],'java':['g++','msvc','icpc'],'default':['g++']} +cxx_compiler={'win32':['msvc','g++','clang++'],'cygwin':['g++'],'darwin':['clang++','g++'],'aix':['xlc++','g++','clang++'],'linux':['g++','clang++','icpc'],'sunos':['sunc++','g++'],'irix':['g++'],'hpux':['g++'],'osf1V':['g++'],'gnu':['g++','clang++'],'java':['g++','msvc','clang++','icpc'],'default':['clang++','g++']} +def default_compilers(): + build_platform=Utils.unversioned_sys_platform() + possible_compiler_list=cxx_compiler.get(build_platform,cxx_compiler['default']) + return' '.join(possible_compiler_list) def configure(conf): - try:test_for_compiler=conf.options.check_cxx_compiler - except AttributeError:conf.fatal("Add options(opt): opt.load('compiler_cxx')") - for compiler in test_for_compiler.split(): + try: + test_for_compiler=conf.options.check_cxx_compiler or default_compilers() + except AttributeError: + conf.fatal("Add options(opt): opt.load('compiler_cxx')") + for compiler in re.split('[ ,]+',test_for_compiler): conf.env.stash() - conf.start_msg('Checking for %r (c++ compiler)'%compiler) + conf.start_msg('Checking for %r (C++ compiler)'%compiler) try: conf.load(compiler) - except conf.errors.ConfigurationError ,e: + except conf.errors.ConfigurationError as e: conf.env.revert() conf.end_msg(False) - debug('compiler_cxx: %r'%e) + debug('compiler_cxx: %r',e) else: - if conf.env['CXX']: + if conf.env.CXX: conf.end_msg(conf.env.get_flat('CXX')) - conf.env['COMPILER_CXX']=compiler + conf.env.COMPILER_CXX=compiler + conf.env.commit() break + conf.env.revert() conf.end_msg(False) else: - conf.fatal('could not configure a c++ compiler!') + conf.fatal('could not configure a C++ compiler!') def options(opt): + test_for_compiler=default_compilers() opt.load_special_tools('cxx_*.py') - global cxx_compiler - build_platform=Utils.unversioned_sys_platform() - possible_compiler_list=cxx_compiler[build_platform in cxx_compiler and build_platform or'default'] - test_for_compiler=' '.join(possible_compiler_list) - cxx_compiler_opts=opt.add_option_group('C++ Compiler Options') - cxx_compiler_opts.add_option('--check-cxx-compiler',default="%s"%test_for_compiler,help='On this platform (%s) the following C++ Compiler will be checked by default: "%s"'%(build_platform,test_for_compiler),dest="check_cxx_compiler") + cxx_compiler_opts=opt.add_option_group('Configuration options') + cxx_compiler_opts.add_option('--check-cxx-compiler',default=None,help='list of C++ compilers to try [%s]'%test_for_compiler,dest="check_cxx_compiler") for x in test_for_compiler.split(): opt.load('%s'%x) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/compiler_d.py glmark2-2021.02/waflib/Tools/compiler_d.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/compiler_d.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/compiler_d.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,30 +1,41 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys,imp,types -from waflib import Utils,Configure,Options,Logs +import re +from waflib import Utils,Logs +d_compiler={'default':['gdc','dmd','ldc2']} +def default_compilers(): + build_platform=Utils.unversioned_sys_platform() + possible_compiler_list=d_compiler.get(build_platform,d_compiler['default']) + return' '.join(possible_compiler_list) def configure(conf): - for compiler in conf.options.dcheck.split(','): + try: + test_for_compiler=conf.options.check_d_compiler or default_compilers() + except AttributeError: + conf.fatal("Add options(opt): opt.load('compiler_d')") + for compiler in re.split('[ ,]+',test_for_compiler): conf.env.stash() - conf.start_msg('Checking for %r (d compiler)'%compiler) + conf.start_msg('Checking for %r (D compiler)'%compiler) try: conf.load(compiler) - except conf.errors.ConfigurationError ,e: + except conf.errors.ConfigurationError as e: conf.env.revert() conf.end_msg(False) - Logs.debug('compiler_cxx: %r'%e) + Logs.debug('compiler_d: %r',e) else: if conf.env.D: conf.end_msg(conf.env.get_flat('D')) - conf.env['COMPILER_D']=compiler - conf.env.D_COMPILER=conf.env.D + conf.env.COMPILER_D=compiler + conf.env.commit() break + conf.env.revert() conf.end_msg(False) else: - conf.fatal('no suitable d compiler was found') + conf.fatal('could not configure a D compiler!') def options(opt): - d_compiler_opts=opt.add_option_group('D Compiler Options') - d_compiler_opts.add_option('--check-d-compiler',default='gdc,dmd',action='store',help='check for the compiler [Default:gdc,dmd]',dest='dcheck') - for d_compiler in['gdc','dmd']: - opt.load('%s'%d_compiler) + test_for_compiler=default_compilers() + d_compiler_opts=opt.add_option_group('Configuration options') + d_compiler_opts.add_option('--check-d-compiler',default=None,help='list of D compilers to try [%s]'%test_for_compiler,dest='check_d_compiler') + for x in test_for_compiler.split(): + opt.load('%s'%x) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/compiler_fc.py glmark2-2021.02/waflib/Tools/compiler_fc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/compiler_fc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/compiler_fc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,43 +1,43 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys,imp,types -from waflib import Utils,Configure,Options,Logs,Errors +import re +from waflib import Utils,Logs from waflib.Tools import fc fc_compiler={'win32':['gfortran','ifort'],'darwin':['gfortran','g95','ifort'],'linux':['gfortran','g95','ifort'],'java':['gfortran','g95','ifort'],'default':['gfortran'],'aix':['gfortran']} -def __list_possible_compiler(platform): - try: - return fc_compiler[platform] - except KeyError: - return fc_compiler["default"] +def default_compilers(): + build_platform=Utils.unversioned_sys_platform() + possible_compiler_list=fc_compiler.get(build_platform,fc_compiler['default']) + return' '.join(possible_compiler_list) def configure(conf): - try:test_for_compiler=conf.options.check_fc - except AttributeError:conf.fatal("Add options(opt): opt.load('compiler_fc')") - for compiler in test_for_compiler.split(): + try: + test_for_compiler=conf.options.check_fortran_compiler or default_compilers() + except AttributeError: + conf.fatal("Add options(opt): opt.load('compiler_fc')") + for compiler in re.split('[ ,]+',test_for_compiler): conf.env.stash() - conf.start_msg('Checking for %r (fortran compiler)'%compiler) + conf.start_msg('Checking for %r (Fortran compiler)'%compiler) try: conf.load(compiler) - except conf.errors.ConfigurationError ,e: + except conf.errors.ConfigurationError as e: conf.env.revert() conf.end_msg(False) - Logs.debug('compiler_fortran: %r'%e) + Logs.debug('compiler_fortran: %r',e) else: - if conf.env['FC']: + if conf.env.FC: conf.end_msg(conf.env.get_flat('FC')) conf.env.COMPILER_FORTRAN=compiler + conf.env.commit() break + conf.env.revert() conf.end_msg(False) else: - conf.fatal('could not configure a fortran compiler!') + conf.fatal('could not configure a Fortran compiler!') def options(opt): + test_for_compiler=default_compilers() opt.load_special_tools('fc_*.py') - build_platform=Utils.unversioned_sys_platform() - detected_platform=Options.platform - possible_compiler_list=__list_possible_compiler(detected_platform) - test_for_compiler=' '.join(possible_compiler_list) - fortran_compiler_opts=opt.add_option_group("Fortran Compiler Options") - fortran_compiler_opts.add_option('--check-fortran-compiler',default="%s"%test_for_compiler,help='On this platform (%s) the following Fortran Compiler will be checked by default: "%s"'%(detected_platform,test_for_compiler),dest="check_fc") - for compiler in test_for_compiler.split(): - opt.load('%s'%compiler) + fortran_compiler_opts=opt.add_option_group('Configuration options') + fortran_compiler_opts.add_option('--check-fortran-compiler',default=None,help='list of Fortran compiler to try [%s]'%test_for_compiler,dest="check_fortran_compiler") + for x in test_for_compiler.split(): + opt.load('%s'%x) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_osx.py glmark2-2021.02/waflib/Tools/c_osx.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_osx.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/c_osx.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,9 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,shutil,sys,platform -from waflib import TaskGen,Task,Build,Options,Utils,Errors +import os,shutil,platform +from waflib import Task,Utils from waflib.TaskGen import taskgen_method,feature,after_method,before_method app_info=''' @@ -19,18 +19,19 @@ NOTE THIS IS A GENERATED FILE, DO NOT MODIFY CFBundleExecutable - %s + {app_name} ''' +@feature('c','cxx') def set_macosx_deployment_target(self): - if self.env['MACOSX_DEPLOYMENT_TARGET']: - os.environ['MACOSX_DEPLOYMENT_TARGET']=self.env['MACOSX_DEPLOYMENT_TARGET'] + if self.env.MACOSX_DEPLOYMENT_TARGET: + os.environ['MACOSX_DEPLOYMENT_TARGET']=self.env.MACOSX_DEPLOYMENT_TARGET elif'MACOSX_DEPLOYMENT_TARGET'not in os.environ: if Utils.unversioned_sys_platform()=='darwin': os.environ['MACOSX_DEPLOYMENT_TARGET']='.'.join(platform.mac_ver()[0].split('.')[:2]) +@taskgen_method def create_bundle_dirs(self,name,out): - bld=self.bld dir=out.parent.find_or_declare(name) dir.mkdir() macos=dir.find_or_declare(['Contents','MacOS']) @@ -44,40 +45,44 @@ else: name=name+'.app' return name +@feature('cprogram','cxxprogram') +@after_method('apply_link') def create_task_macapp(self): - if self.env['MACAPP']or getattr(self,'mac_app',False): + if self.env.MACAPP or getattr(self,'mac_app',False): out=self.link_task.outputs[0] name=bundle_name_for_output(out) dir=self.create_bundle_dirs(name,out) n1=dir.find_or_declare(['Contents','MacOS',out.name]) self.apptask=self.create_task('macapp',self.link_task.outputs,n1) inst_to=getattr(self,'install_path','/Applications')+'/%s/Contents/MacOS/'%name - self.bld.install_files(inst_to,n1,chmod=Utils.O755) - if getattr(self,'mac_resources',None): + self.add_install_files(install_to=inst_to,install_from=n1,chmod=Utils.O755) + if getattr(self,'mac_files',None): + mac_files_root=getattr(self,'mac_files_root',None) + if isinstance(mac_files_root,str): + mac_files_root=self.path.find_node(mac_files_root) + if not mac_files_root: + self.bld.fatal('Invalid mac_files_root %r'%self.mac_files_root) res_dir=n1.parent.parent.make_node('Resources') inst_to=getattr(self,'install_path','/Applications')+'/%s/Resources'%name - for x in self.to_list(self.mac_resources): - node=self.path.find_node(x) - if not node: - raise Errors.WafError('Missing mac_resource %r in %r'%(x,self)) - parent=node.parent - if os.path.isdir(node.abspath()): - nodes=node.ant_glob('**') - else: - nodes=[node] - for node in nodes: - rel=node.path_from(parent) - tsk=self.create_task('macapp',node,res_dir.make_node(rel)) - self.bld.install_as(inst_to+'/%s'%rel,node) + for node in self.to_nodes(self.mac_files): + relpath=node.path_from(mac_files_root or node.parent) + self.create_task('macapp',node,res_dir.make_node(relpath)) + self.add_install_as(install_to=os.path.join(inst_to,relpath),install_source=node) if getattr(self.bld,'is_install',None): self.install_task.hasrun=Task.SKIP_ME +@feature('cprogram','cxxprogram') +@after_method('apply_link') def create_task_macplist(self): - if self.env['MACAPP']or getattr(self,'mac_app',False): + if self.env.MACAPP or getattr(self,'mac_app',False): out=self.link_task.outputs[0] name=bundle_name_for_output(out) dir=self.create_bundle_dirs(name,out) n1=dir.find_or_declare(['Contents','Info.plist']) self.plisttask=plisttask=self.create_task('macplist',[],n1) + plisttask.context={'app_name':self.link_task.outputs[0].name,'env':self.env} + plist_ctx=getattr(self,'plist_context',None) + if(plist_ctx): + plisttask.context.update(plist_ctx) if getattr(self,'mac_plist',False): node=self.path.find_resource(self.mac_plist) if node: @@ -85,13 +90,15 @@ else: plisttask.code=self.mac_plist else: - plisttask.code=app_info%self.link_task.outputs[0].name + plisttask.code=app_info inst_to=getattr(self,'install_path','/Applications')+'/%s/Contents/'%name - self.bld.install_files(inst_to,n1) + self.add_install_files(install_to=inst_to,install_from=n1) +@feature('cshlib','cxxshlib') +@before_method('apply_link','propagate_uselib_vars') def apply_bundle(self): - if self.env['MACBUNDLE']or getattr(self,'mac_bundle',False): - self.env['LINKFLAGS_cshlib']=self.env['LINKFLAGS_cxxshlib']=[] - self.env['cshlib_PATTERN']=self.env['cxxshlib_PATTERN']=self.env['macbundle_PATTERN'] + if self.env.MACBUNDLE or getattr(self,'mac_bundle',False): + self.env.LINKFLAGS_cshlib=self.env.LINKFLAGS_cxxshlib=[] + self.env.cshlib_PATTERN=self.env.cxxshlib_PATTERN=self.env.macbundle_PATTERN use=self.use=self.to_list(getattr(self,'use',[])) if not'MACBUNDLE'in use: use.append('MACBUNDLE') @@ -109,13 +116,6 @@ txt=self.code else: txt=self.inputs[0].read() + context=getattr(self,'context',{}) + txt=txt.format(**context) self.outputs[0].write(txt) - -feature('c','cxx')(set_macosx_deployment_target) -taskgen_method(create_bundle_dirs) -feature('cprogram','cxxprogram')(create_task_macapp) -after_method('apply_link')(create_task_macapp) -feature('cprogram','cxxprogram')(create_task_macplist) -after_method('apply_link')(create_task_macplist) -feature('cshlib','cxxshlib')(apply_bundle) -before_method('apply_link','propagate_uselib_vars')(apply_bundle) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_preproc.py glmark2-2021.02/waflib/Tools/c_preproc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_preproc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/c_preproc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,12 +1,9 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -import re,sys,os,string,traceback -from waflib import Logs,Build,Utils,Errors -from waflib.Logs import debug,error +import re,string,traceback +from waflib import Logs,Utils,Errors class PreprocError(Errors.WafError): pass POPFILE='-' @@ -17,13 +14,13 @@ standard_includes=[] use_trigraphs=0 strict_quotes=0 -g_optrans={'not':'!','and':'&&','bitand':'&','and_eq':'&=','or':'||','bitor':'|','or_eq':'|=','xor':'^','xor_eq':'^=','compl':'~',} -re_lines=re.compile('^[ \t]*(#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$',re.IGNORECASE|re.MULTILINE) +g_optrans={'not':'!','not_eq':'!','and':'&&','and_eq':'&=','or':'||','or_eq':'|=','xor':'^','xor_eq':'^=','bitand':'&','bitor':'|','compl':'~',} +re_lines=re.compile('^[ \t]*(?:#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$',re.IGNORECASE|re.MULTILINE) re_mac=re.compile("^[a-zA-Z_]\w*") re_fun=re.compile('^[a-zA-Z_][a-zA-Z0-9_]*[(]') re_pragma_once=re.compile('^\s*once\s*',re.IGNORECASE) re_nl=re.compile('\\\\\r*\n',re.MULTILINE) -re_cpp=re.compile(r"""(/\*[^*]*\*+([^/*][^*]*\*+)*/)|//[^\n]*|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)""",re.MULTILINE) +re_cpp=re.compile(r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',re.DOTALL|re.MULTILINE) trig_def=[('??'+a,b)for a,b in zip("=-/!'()<>",r'#~\|^[]{}')] chr_esc={'0':0,'a':7,'b':8,'t':9,'n':10,'f':11,'v':12,'r':13,'\\':92,"'":39} NUM='i' @@ -39,21 +36,13 @@ undefined='u' skipped='s' def repl(m): - s=m.group(1) - if s: + s=m.group() + if s[0]=='/': return' ' - return m.group(3)or'' -def filter_comments(filename): - code=Utils.readf(filename) - if use_trigraphs: - for(a,b)in trig_def:code=code.split(a).join(b) - code=re_nl.sub('',code) - code=re_cpp.sub(repl,code) - return[(m.group(2),m.group(3))for m in re.finditer(re_lines,code)] + return s prec={} ops=['* / %','+ -','<< >>','< <= >= >','== !=','& | ^','&& ||',','] -for x in range(len(ops)): - syms=ops[x] +for x,syms in enumerate(ops): for u in syms.split(): prec[u]=x def trimquotes(s): @@ -73,23 +62,23 @@ elif d=='*':c=a*b elif d=='/':c=a/b elif d=='^':c=a^b - elif d=='|':c=a|b - elif d=='||':c=int(a or b) - elif d=='&':c=a&b - elif d=='&&':c=int(a and b) elif d=='==':c=int(a==b) - elif d=='!=':c=int(a!=b) + elif d=='|'or d=='bitor':c=a|b + elif d=='||'or d=='or':c=int(a or b) + elif d=='&'or d=='bitand':c=a&b + elif d=='&&'or d=='and':c=int(a and b) + elif d=='!='or d=='not_eq':c=int(a!=b) + elif d=='^'or d=='xor':c=int(a^b) elif d=='<=':c=int(a<=b) elif d=='<':c=int(a':c=int(a>b) elif d=='>=':c=int(a>=b) - elif d=='^':c=int(a^b) elif d=='<<':c=a<>':c=a>>b else:c=0 return c def get_num(lst): - if not lst:raise PreprocError("empty list for get_num") + if not lst:raise PreprocError('empty list for get_num') (p,v)=lst[0] if p==OP: if v=='(': @@ -106,7 +95,7 @@ count_par+=1 i+=1 else: - raise PreprocError("rparen expected %r"%lst) + raise PreprocError('rparen expected %r'%lst) (num,_)=get_term(lst[1:i]) return(num,lst[i+1:]) elif v=='+': @@ -118,27 +107,24 @@ num,lst=get_num(lst[1:]) return(int(not int(num)),lst) elif v=='~': + num,lst=get_num(lst[1:]) return(~int(num),lst) else: - raise PreprocError("Invalid op token %r for get_num"%lst) + raise PreprocError('Invalid op token %r for get_num'%lst) elif p==NUM: return v,lst[1:] elif p==IDENT: return 0,lst[1:] else: - raise PreprocError("Invalid token %r for get_num"%lst) + raise PreprocError('Invalid token %r for get_num'%lst) def get_term(lst): - if not lst:raise PreprocError("empty list for get_term") + if not lst:raise PreprocError('empty list for get_term') num,lst=get_num(lst) if not lst: return(num,[]) (p,v)=lst[0] if p==OP: - if v=='&&'and not num: - return(num,[]) - elif v=='||'and num: - return(num,[]) - elif v==',': + if v==',': return get_term(lst[1:]) elif v=='?': count_par=0 @@ -155,7 +141,7 @@ break i+=1 else: - raise PreprocError("rparen expected %r"%lst) + raise PreprocError('rparen expected %r'%lst) if int(num): return get_term(lst[1:i]) else: @@ -167,7 +153,7 @@ return get_term([(NUM,num2)]+lst) p2,v2=lst[0] if p2!=OP: - raise PreprocError("op expected %r"%lst) + raise PreprocError('op expected %r'%lst) if prec[v2]>=prec[v]: num2=reduce_nums(num,num2,v) return get_term([(NUM,num2)]+lst) @@ -175,7 +161,7 @@ num3,lst=get_num(lst[1:]) num3=reduce_nums(num2,num3,v2) return get_term([(NUM,num),(p,v),(NUM,num3)]+lst) - raise PreprocError("cannot reduce %r"%lst) + raise PreprocError('cannot reduce %r'%lst) def reduce_eval(lst): num,lst=get_term(lst) return(NUM,num) @@ -215,7 +201,7 @@ else: lst[i]=(NUM,0) else: - raise PreprocError("Invalid define expression %r"%lst) + raise PreprocError('Invalid define expression %r'%lst) elif p==IDENT and v in defs: if isinstance(defs[v],str): a,b=extract_macro(defs[v]) @@ -224,17 +210,19 @@ to_add=macro_def[1] if isinstance(macro_def[0],list): del lst[i] - for x in range(len(to_add)): - lst.insert(i,to_add[x]) + accu=to_add[:] + reduce_tokens(accu,defs,ban+[v]) + for tmp in accu: + lst.insert(i,tmp) i+=1 else: args=[] del lst[i] if i>=len(lst): - raise PreprocError("expected '(' after %r (got nothing)"%v) + raise PreprocError('expected ( after %r (got nothing)'%v) (p2,v2)=lst[i] if p2!=OP or v2!='(': - raise PreprocError("expected '(' after %r"%v) + raise PreprocError('expected ( after %r'%v) del lst[i] one_param=[] count_paren=0 @@ -249,7 +237,7 @@ if one_param:args.append(one_param) break elif v2==',': - if not one_param:raise PreprocError("empty param in funcall %s"%p) + if not one_param:raise PreprocError('empty param in funcall %r'%v) args.append(one_param) one_param=[] else: @@ -317,7 +305,7 @@ i+=1 def eval_macro(lst,defs): reduce_tokens(lst,defs,[]) - if not lst:raise PreprocError("missing tokens to evaluate") + if not lst:raise PreprocError('missing tokens to evaluate') (p,v)=reduce_eval(lst) return int(v)!=0 def extract_macro(txt): @@ -325,7 +313,7 @@ if re_fun.search(txt): p,name=t[0] p,v=t[1] - if p!=OP:raise PreprocError("expected open parenthesis") + if p!=OP:raise PreprocError('expected (') i=1 pindex=0 params={} @@ -341,50 +329,55 @@ elif p==OP and v==')': break else: - raise PreprocError("unexpected token (3)") + raise PreprocError('unexpected token (3)') elif prev==IDENT: if p==OP and v==',': prev=v elif p==OP and v==')': break else: - raise PreprocError("comma or ... expected") + raise PreprocError('comma or ... expected') elif prev==',': if p==IDENT: params[v]=pindex pindex+=1 prev=p elif p==OP and v=='...': - raise PreprocError("not implemented (1)") + raise PreprocError('not implemented (1)') else: - raise PreprocError("comma or ... expected (2)") + raise PreprocError('comma or ... expected (2)') elif prev=='...': - raise PreprocError("not implemented (2)") + raise PreprocError('not implemented (2)') else: - raise PreprocError("unexpected else") + raise PreprocError('unexpected else') return(name,[params,t[i+1:]]) else: (p,v)=t[0] - return(v,[[],t[1:]]) -re_include=re.compile('^\s*(<(?P.*)>|"(?P.*)")') + if len(t)>1: + return(v,[[],t[1:]]) + else: + return(v,[[],[('T','')]]) +re_include=re.compile('^\s*(<(?:.*)>|"(?:.*)")') def extract_include(txt,defs): m=re_include.search(txt) if m: - if m.group('a'):return'<',m.group('a') - if m.group('b'):return'"',m.group('b') + txt=m.group(1) + return txt[0],txt[1:-1] toks=tokenize(txt) reduce_tokens(toks,defs,['waf_include']) if not toks: - raise PreprocError("could not parse include %s"%txt) + raise PreprocError('could not parse include %r'%txt) if len(toks)==1: if toks[0][0]==STR: return'"',toks[0][1] else: if toks[0][1]=='<'and toks[-1][1]=='>': - return stringize(toks).lstrip('<').rstrip('>') - raise PreprocError("could not parse include %s."%txt) + ret='<',stringize(toks).lstrip('<').rstrip('>') + return ret + raise PreprocError('could not parse include %r'%txt) def parse_char(txt): - if not txt:raise PreprocError("attempted to parse a null char") + if not txt: + raise PreprocError('attempted to parse a null char') if txt[0]!='\\': return ord(txt) c=txt[1] @@ -398,8 +391,10 @@ return(1+i,int(txt[1:1+i],8)) else: try:return chr_esc[c] - except KeyError:raise PreprocError("could not parse char literal '%s'"%txt) + except KeyError:raise PreprocError('could not parse char literal %r'%txt) def tokenize(s): + return tokenize_private(s)[:] +def tokenize_private(s): ret=[] for match in re_clexer.finditer(s): m=match.group @@ -407,7 +402,9 @@ v=m(name) if v: if name==IDENT: - try:v=g_optrans[v];name=OP + try: + g_optrans[v] + name=OP except KeyError: if v.lower()=="true": v=1 @@ -431,8 +428,18 @@ ret.append((name,v)) break return ret -def define_name(line): - return re_mac.match(line).group(0) +def format_defines(lst): + ret=[] + for y in lst: + if y: + pos=y.find('=') + if pos==-1: + ret.append(y) + elif pos>0: + ret.append('%s %s'%(y[:pos],y[pos+1:])) + else: + raise ValueError('Invalid define expression %r'%y) + return ret class c_parser(object): def __init__(self,nodepaths=None,defines=None): self.lines=[] @@ -450,81 +457,84 @@ self.ban_includes=set([]) def cached_find_resource(self,node,filename): try: - nd=node.ctx.cache_nd - except: - nd=node.ctx.cache_nd={} - tup=(node,filename) + cache=node.ctx.preproc_cache_node + except AttributeError: + cache=node.ctx.preproc_cache_node=Utils.lru_cache(1000) + key=(node,filename) try: - return nd[tup] + return cache[key] except KeyError: ret=node.find_resource(filename) if ret: if getattr(ret,'children',None): ret=None elif ret.is_child_of(node.ctx.bldnode): - tmp=node.ctx.srcnode.search(ret.path_from(node.ctx.bldnode)) + tmp=node.ctx.srcnode.search_node(ret.path_from(node.ctx.bldnode)) if tmp and getattr(tmp,'children',None): ret=None - nd[tup]=ret + cache[key]=ret return ret def tryfind(self,filename): + if filename.endswith('.moc'): + self.names.append(filename) + return None self.curfile=filename found=self.cached_find_resource(self.currentnode_stack[-1],filename) for n in self.nodepaths: if found: break found=self.cached_find_resource(n,filename) - if found: + if found and not found in self.ban_includes: self.nodes.append(found) - if filename[-4:]!='.moc': - self.addlines(found) + self.addlines(found) else: if not filename in self.names: self.names.append(filename) return found + def filter_comments(self,node): + code=node.read() + if use_trigraphs: + for(a,b)in trig_def:code=code.split(a).join(b) + code=re_nl.sub('',code) + code=re_cpp.sub(repl,code) + return re_lines.findall(code) + def parse_lines(self,node): + try: + cache=node.ctx.preproc_cache_lines + except AttributeError: + cache=node.ctx.preproc_cache_lines=Utils.lru_cache(1000) + try: + return cache[node] + except KeyError: + cache[node]=lines=self.filter_comments(node) + lines.append((POPFILE,'')) + lines.reverse() + return lines def addlines(self,node): self.currentnode_stack.append(node.parent) - filepath=node.abspath() self.count_files+=1 if self.count_files>recursion_limit: - raise PreprocError("recursion limit exceeded") - pc=self.parse_cache - debug('preproc: reading file %r',filepath) - try: - lns=pc[filepath] - except KeyError: - pass - else: - self.lines.extend(lns) - return + raise PreprocError('recursion limit exceeded') + if Logs.verbose: + Logs.debug('preproc: reading file %r',node) try: - lines=filter_comments(filepath) - lines.append((POPFILE,'')) - lines.reverse() - pc[filepath]=lines - self.lines.extend(lines) - except IOError: - raise PreprocError("could not read the file %s"%filepath) + lines=self.parse_lines(node) + except EnvironmentError: + raise PreprocError('could not read the file %r'%node) except Exception: if Logs.verbose>0: - error("parsing %s failed"%filepath) + Logs.error('parsing %r failed',node) traceback.print_exc() + else: + self.lines.extend(lines) def start(self,node,env): - debug('preproc: scanning %s (in %s)',node.name,node.parent.name) - bld=node.ctx - try: - self.parse_cache=bld.parse_cache - except AttributeError: - bld.parse_cache={} - self.parse_cache=bld.parse_cache + Logs.debug('preproc: scanning %s (in %s)',node.name,node.parent.name) + self.current_file=node self.addlines(node) - if env['DEFINES']: - try: - lst=['%s %s'%(x[0],trimquotes('='.join(x[1:])))for x in[y.split('=')for y in env['DEFINES']]] - lst.reverse() - self.lines.extend([('define',x)for x in lst]) - except AttributeError: - pass + if env.DEFINES: + lst=format_defines(env.DEFINES) + lst.reverse() + self.lines.extend([('define',x)for x in lst]) while self.lines: (token,line)=self.lines.pop() if token==POPFILE: @@ -533,7 +543,7 @@ continue try: ve=Logs.verbose - if ve:debug('preproc: line is %s - %s state is %s',token,line,self.state) + if ve:Logs.debug('preproc: line is %s - %s state is %s',token,line,self.state) state=self.state if token[:2]=='if': state.append(undefined) @@ -548,20 +558,19 @@ else:state[-1]=ignored elif token=='ifdef': m=re_mac.match(line) - if m and m.group(0)in self.defs:state[-1]=accepted + if m and m.group()in self.defs:state[-1]=accepted else:state[-1]=ignored elif token=='ifndef': m=re_mac.match(line) - if m and m.group(0)in self.defs:state[-1]=ignored + if m and m.group()in self.defs:state[-1]=ignored else:state[-1]=accepted elif token=='include'or token=='import': (kind,inc)=extract_include(line,self.defs) - if inc in self.ban_includes: - continue - if token=='import':self.ban_includes.add(inc) - if ve:debug('preproc: include found %s (%s) ',inc,kind) + if ve:Logs.debug('preproc: include found %s (%s) ',inc,kind) if kind=='"'or not strict_quotes: - self.tryfind(inc) + self.current_file=self.tryfind(inc) + if token=='import': + self.ban_includes.add(self.current_file) elif token=='elif': if state[-1]==accepted: state[-1]=skipped @@ -573,19 +582,21 @@ elif state[-1]==ignored:state[-1]=accepted elif token=='define': try: - self.defs[define_name(line)]=line - except: - raise PreprocError("Invalid define line %s"%line) + self.defs[self.define_name(line)]=line + except AttributeError: + raise PreprocError('Invalid define line %r'%line) elif token=='undef': m=re_mac.match(line) - if m and m.group(0)in self.defs: - self.defs.__delitem__(m.group(0)) + if m and m.group()in self.defs: + self.defs.__delitem__(m.group()) elif token=='pragma': if re_pragma_once.match(line.lower()): - self.ban_includes.add(self.curfile) - except Exception ,e: + self.ban_includes.add(self.current_file) + except Exception as e: if Logs.verbose: - debug('preproc: line parsing failed (%s): %s %s',e,line,Utils.ex_stack()) + Logs.debug('preproc: line parsing failed (%s): %s %s',e,line,Utils.ex_stack()) + def define_name(self,line): + return re_mac.match(line).group() def scan(task): global go_absolute try: @@ -593,14 +604,9 @@ except AttributeError: raise Errors.WafError('%r is missing a feature such as "c", "cxx" or "includes": '%task.generator) if go_absolute: - nodepaths=incn+standard_includes + nodepaths=incn+[task.generator.bld.root.find_dir(x)for x in standard_includes] else: nodepaths=[x for x in incn if x.is_child_of(x.ctx.srcnode)or x.is_child_of(x.ctx.bldnode)] tmp=c_parser(nodepaths) tmp.start(task.inputs[0],task.env) - if Logs.verbose: - debug('deps: deps for %r: %r; unresolved %r'%(task.inputs,tmp.nodes,tmp.names)) return(tmp.nodes,tmp.names) - -Utils.run_once(tokenize) -Utils.run_once(define_name) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c.py glmark2-2021.02/waflib/Tools/c.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/c.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,27 +1,26 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -from waflib import TaskGen,Task,Utils +from waflib import TaskGen,Task from waflib.Tools import c_preproc from waflib.Tools.ccroot import link_task,stlink_task +@TaskGen.extension('.c') def c_hook(self,node): + if not self.env.CC and self.env.CXX: + return self.create_compiled_task('cxx',node) return self.create_compiled_task('c',node) class c(Task.Task): - run_str='${CC} ${ARCH_ST:ARCH} ${CFLAGS} ${CPPFLAGS} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${CC_SRC_F}${SRC} ${CC_TGT_F}${TGT}' + run_str='${CC} ${ARCH_ST:ARCH} ${CFLAGS} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${CC_SRC_F}${SRC} ${CC_TGT_F}${TGT[0].abspath()} ${CPPFLAGS}' vars=['CCDEPS'] ext_in=['.h'] scan=c_preproc.scan -Task.classes['cc']=cc=c class cprogram(link_task): - run_str='${LINK_CC} ${LINKFLAGS} ${CCLNK_SRC_F}${SRC} ${CCLNK_TGT_F}${TGT[0].abspath()} ${RPATH_ST:RPATH} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${FRAMEWORK_ST:FRAMEWORK} ${ARCH_ST:ARCH} ${STLIB_MARKER} ${STLIBPATH_ST:STLIBPATH} ${STLIB_ST:STLIB} ${SHLIB_MARKER} ${LIBPATH_ST:LIBPATH} ${LIB_ST:LIB}' + run_str='${LINK_CC} ${LINKFLAGS} ${CCLNK_SRC_F}${SRC} ${CCLNK_TGT_F}${TGT[0].abspath()} ${RPATH_ST:RPATH} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${FRAMEWORK_ST:FRAMEWORK} ${ARCH_ST:ARCH} ${STLIB_MARKER} ${STLIBPATH_ST:STLIBPATH} ${STLIB_ST:STLIB} ${SHLIB_MARKER} ${LIBPATH_ST:LIBPATH} ${LIB_ST:LIB} ${LDFLAGS}' ext_out=['.bin'] vars=['LINKDEPS'] inst_to='${BINDIR}' - chmod=Utils.O755 class cshlib(cprogram): inst_to='${LIBDIR}' class cstlib(stlink_task): pass - -TaskGen.extension('.c')(c_hook) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/cs.py glmark2-2021.02/waflib/Tools/cs.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/cs.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/cs.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,15 +1,15 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -from waflib import Utils,Task,Options,Logs,Errors +from waflib import Utils,Task,Options,Errors from waflib.TaskGen import before_method,after_method,feature from waflib.Tools import ccroot from waflib.Configure import conf ccroot.USELIB_VARS['cs']=set(['CSFLAGS','ASSEMBLIES','RESOURCES']) ccroot.lib_patterns['csshlib']=['%s'] +@feature('cs') +@before_method('process_source') def apply_cs(self): cs_nodes=[] no_nodes=[] @@ -19,14 +19,17 @@ else: no_nodes.append(x) self.source=no_nodes - bintype=getattr(self,'type',self.gen.endswith('.dll')and'library'or'exe') + bintype=getattr(self,'bintype',self.gen.endswith('.dll')and'library'or'exe') self.cs_task=tsk=self.create_task('mcs',cs_nodes,self.path.find_or_declare(self.gen)) tsk.env.CSTYPE='/target:%s'%bintype tsk.env.OUT='/out:%s'%tsk.outputs[0].abspath() + self.env.append_value('CSFLAGS','/platform:%s'%getattr(self,'platform','anycpu')) inst_to=getattr(self,'install_path',bintype=='exe'and'${BINDIR}'or'${LIBDIR}') if inst_to: mod=getattr(self,'chmod',bintype=='exe'and Utils.O755 or Utils.O644) - self.install_task=self.bld.install_files(inst_to,self.cs_task.outputs[:],env=self.env,chmod=mod) + self.install_task=self.add_install_files(install_to=inst_to,install_from=self.cs_task.outputs[:],chmod=mod) +@feature('cs') +@after_method('apply_cs') def use_cs(self): names=self.to_list(getattr(self,'use',[])) get=self.bld.get_tgen_by_name @@ -34,7 +37,7 @@ try: y=get(x) except Errors.WafError: - self.cs_task.env.append_value('CSFLAGS','/reference:%s'%x) + self.env.append_value('CSFLAGS','/reference:%s'%x) continue y.post() tsk=getattr(y,'cs_task',None)or getattr(y,'link_task',None) @@ -42,7 +45,9 @@ self.bld.fatal('cs task has no link task for use %r'%self) self.cs_task.dep_nodes.extend(tsk.outputs) self.cs_task.set_run_after(tsk) - self.cs_task.env.append_value('CSFLAGS','/reference:%s'%tsk.outputs[0].abspath()) + self.env.append_value('CSFLAGS','/reference:%s'%tsk.outputs[0].abspath()) +@feature('cs') +@after_method('apply_cs','use_cs') def debug_cs(self): csdebug=getattr(self,'csdebug',self.env.CSDEBUG) if not csdebug: @@ -63,10 +68,14 @@ val=['/debug+','/debug:full'] else: val=['/debug-'] - self.cs_task.env.append_value('CSFLAGS',val) + self.env.append_value('CSFLAGS',val) class mcs(Task.Task): color='YELLOW' run_str='${MCS} ${CSTYPE} ${CSFLAGS} ${ASS_ST:ASSEMBLIES} ${RES_ST:RESOURCES} ${OUT} ${SRC}' + def exec_command(self,cmd,**kw): + if'/noconfig'in cmd: + raise ValueError('/noconfig is not allowed when using response files, check your flags!') + return super(self.__class__,self).exec_command(cmd,**kw) def configure(conf): csc=getattr(Options.options,'cscbinary',None) if csc: @@ -83,16 +92,7 @@ color='YELLOW' inst_to=None def runnable_status(self): - for x in self.outputs: - x.sig=Utils.h_file(x.abspath()) return Task.SKIP_ME +@conf def read_csshlib(self,name,paths=[]): return self(name=name,features='fake_lib',lib_paths=paths,lib_type='csshlib') - -feature('cs')(apply_cs) -before_method('process_source')(apply_cs) -feature('cs')(use_cs) -after_method('apply_cs')(use_cs) -feature('cs')(debug_cs) -after_method('apply_cs','use_cs')(debug_cs) -conf(read_csshlib) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_tests.py glmark2-2021.02/waflib/Tools/c_tests.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/c_tests.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/c_tests.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,10 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib import Task from waflib.Configure import conf from waflib.TaskGen import feature,before_method,after_method -import sys LIB_CODE=''' #ifdef _MSC_VER #define testEXPORT __declspec(dllexport) @@ -21,8 +20,13 @@ #define testEXPORT #endif testEXPORT int lib_func(void); -int main(void) {return !(lib_func() == 9);} +int main(int argc, char **argv) { + (void)argc; (void)argv; + return !(lib_func() == 9); +} ''' +@feature('link_lib_test') +@before_method('process_source') def link_lib_test_fun(self): def write_test_file(task): task.outputs[0].write(task.generator.code) @@ -37,12 +41,13 @@ bld(rule=write_test_file,target='main.'+mode,code=MAIN_CODE) bld(features='%sshlib'%m,source='test.'+mode,target='test') bld(features='%sprogram %s'%(m,ex),source='main.'+mode,target='app',use='test',rpath=rpath) +@conf def check_library(self,mode=None,test_exec=True): if not mode: mode='c' if self.env.CXX: mode='cxx' - self.check(compile_filename=[],features='link_lib_test',msg='Checking for libraries',mode=mode,test_exec=test_exec,) + self.check(compile_filename=[],features='link_lib_test',msg='Checking for libraries',mode=mode,test_exec=test_exec) INLINE_CODE=''' typedef int foo_t; static %s foo_t static_foo () {return 0; } @@ -51,6 +56,7 @@ } ''' INLINE_VALUES=['inline','__inline__','__inline'] +@conf def check_inline(self,**kw): self.start_msg('Checking for inline') if not'define_name'in kw: @@ -72,7 +78,13 @@ self.define('inline',x,quote=False) return x self.fatal('could not use inline functions') -LARGE_FRAGMENT='#include \nint main() { return !(sizeof(off_t) >= 8); }\n' +LARGE_FRAGMENT='''#include +int main(int argc, char **argv) { + (void)argc; (void)argv; + return !(sizeof(off_t) >= 8); +} +''' +@conf def check_large_file(self,**kw): if not'define_name'in kw: kw['define_name']='HAVE_LARGEFILE' @@ -127,20 +139,14 @@ self.generator.tmp.append('big') else: return-1 +@feature('grep_for_endianness') +@after_method('process_source') def grep_for_endianness_fun(self): self.create_task('grep_for_endianness',self.compiled_tasks[0].outputs[0]) +@conf def check_endianness(self): tmp=[] def check_msg(self): return tmp[0] - self.check(fragment=ENDIAN_FRAGMENT,features='c grep_for_endianness',msg="Checking for endianness",define='ENDIANNESS',tmp=tmp,okmsg=check_msg) + self.check(fragment=ENDIAN_FRAGMENT,features='c grep_for_endianness',msg='Checking for endianness',define='ENDIANNESS',tmp=tmp,okmsg=check_msg) return tmp[0] - -feature('link_lib_test')(link_lib_test_fun) -before_method('process_source')(link_lib_test_fun) -conf(check_library) -conf(check_inline) -conf(check_large_file) -feature('grep_for_endianness')(grep_for_endianness_fun) -after_method('process_source')(grep_for_endianness_fun) -conf(check_endianness) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/cxx.py glmark2-2021.02/waflib/Tools/cxx.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/cxx.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/cxx.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,26 +1,25 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -from waflib import TaskGen,Task,Utils +from waflib import TaskGen,Task from waflib.Tools import c_preproc from waflib.Tools.ccroot import link_task,stlink_task +@TaskGen.extension('.cpp','.cc','.cxx','.C','.c++') def cxx_hook(self,node): return self.create_compiled_task('cxx',node) -TaskGen.extension('.cpp','.cc','.cxx','.C','.c++')(cxx_hook) if not'.c'in TaskGen.task_gen.mappings: TaskGen.task_gen.mappings['.c']=TaskGen.task_gen.mappings['.cpp'] class cxx(Task.Task): - run_str='${CXX} ${ARCH_ST:ARCH} ${CXXFLAGS} ${CPPFLAGS} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${CXX_SRC_F}${SRC} ${CXX_TGT_F}${TGT}' + run_str='${CXX} ${ARCH_ST:ARCH} ${CXXFLAGS} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${CXX_SRC_F}${SRC} ${CXX_TGT_F}${TGT[0].abspath()} ${CPPFLAGS}' vars=['CXXDEPS'] ext_in=['.h'] scan=c_preproc.scan class cxxprogram(link_task): - run_str='${LINK_CXX} ${LINKFLAGS} ${CXXLNK_SRC_F}${SRC} ${CXXLNK_TGT_F}${TGT[0].abspath()} ${RPATH_ST:RPATH} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${FRAMEWORK_ST:FRAMEWORK} ${ARCH_ST:ARCH} ${STLIB_MARKER} ${STLIBPATH_ST:STLIBPATH} ${STLIB_ST:STLIB} ${SHLIB_MARKER} ${LIBPATH_ST:LIBPATH} ${LIB_ST:LIB}' + run_str='${LINK_CXX} ${LINKFLAGS} ${CXXLNK_SRC_F}${SRC} ${CXXLNK_TGT_F}${TGT[0].abspath()} ${RPATH_ST:RPATH} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${FRAMEWORK_ST:FRAMEWORK} ${ARCH_ST:ARCH} ${STLIB_MARKER} ${STLIBPATH_ST:STLIBPATH} ${STLIB_ST:STLIB} ${SHLIB_MARKER} ${LIBPATH_ST:LIBPATH} ${LIB_ST:LIB} ${LDFLAGS}' vars=['LINKDEPS'] ext_out=['.bin'] inst_to='${BINDIR}' - chmod=Utils.O755 class cxxshlib(cxxprogram): inst_to='${LIBDIR}' class cxxstlib(stlink_task): diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/dbus.py glmark2-2021.02/waflib/Tools/dbus.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/dbus.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/dbus.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,15 +1,17 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib import Task,Errors from waflib.TaskGen import taskgen_method,before_method +@taskgen_method def add_dbus_file(self,filename,prefix,mode): if not hasattr(self,'dbus_lst'): self.dbus_lst=[] if not'process_dbus'in self.meths: self.meths.append('process_dbus') self.dbus_lst.append([filename,prefix,mode]) +@before_method('apply_core') def process_dbus(self): for filename,prefix,mode in getattr(self,'dbus_lst',[]): node=self.path.find_resource(filename) @@ -24,7 +26,4 @@ run_str='${DBUS_BINDING_TOOL} --prefix=${DBUS_BINDING_TOOL_PREFIX} --mode=${DBUS_BINDING_TOOL_MODE} --output=${TGT} ${SRC}' shell=True def configure(conf): - dbus_binding_tool=conf.find_program('dbus-binding-tool',var='DBUS_BINDING_TOOL') - -taskgen_method(add_dbus_file) -before_method('apply_core')(process_dbus) \ No newline at end of file + conf.find_program('dbus-binding-tool',var='DBUS_BINDING_TOOL') diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/d_config.py glmark2-2021.02/waflib/Tools/d_config.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/d_config.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/d_config.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,21 +1,27 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib import Utils from waflib.Configure import conf +@conf def d_platform_flags(self): v=self.env if not v.DEST_OS: v.DEST_OS=Utils.unversioned_sys_platform() - if Utils.destos_to_binfmt(self.env.DEST_OS)=='pe': - v['dprogram_PATTERN']='%s.exe' - v['dshlib_PATTERN']='lib%s.dll' - v['dstlib_PATTERN']='lib%s.a' + binfmt=Utils.destos_to_binfmt(self.env.DEST_OS) + if binfmt=='pe': + v.dprogram_PATTERN='%s.exe' + v.dshlib_PATTERN='lib%s.dll' + v.dstlib_PATTERN='lib%s.a' + elif binfmt=='mac-o': + v.dprogram_PATTERN='%s' + v.dshlib_PATTERN='lib%s.dylib' + v.dstlib_PATTERN='lib%s.a' else: - v['dprogram_PATTERN']='%s' - v['dshlib_PATTERN']='lib%s.so' - v['dstlib_PATTERN']='lib%s.a' + v.dprogram_PATTERN='%s' + v.dshlib_PATTERN='lib%s.so' + v.dstlib_PATTERN='lib%s.a' DLIB=''' version(D_Version2) { import std.stdio; @@ -39,9 +45,8 @@ } } ''' -def check_dlibrary(self): - ret=self.check_cc(features='d dprogram',fragment=DLIB,compile_filename='test.d',execute=True,define_ret=True) - self.env.DLIBRARY=ret.strip() - -conf(d_platform_flags) -conf(check_dlibrary) \ No newline at end of file +@conf +def check_dlibrary(self,execute=True): + ret=self.check_cc(features='d dprogram',fragment=DLIB,compile_filename='test.d',execute=execute,define_ret=True) + if execute: + self.env.DLIBRARY=ret.strip() diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/dmd.py glmark2-2021.02/waflib/Tools/dmd.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/dmd.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/dmd.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,47 +1,51 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import sys from waflib.Tools import ar,d from waflib.Configure import conf +@conf def find_dmd(conf): - conf.find_program(['dmd','ldc'],var='D') + conf.find_program(['dmd','dmd2','ldc'],var='D') + out=conf.cmd_and_log(conf.env.D+['--help']) + if out.find("D Compiler v")==-1: + out=conf.cmd_and_log(conf.env.D+['-version']) + if out.find("based on DMD v1.")==-1: + conf.fatal("detected compiler is not dmd/ldc") +@conf def common_flags_ldc(conf): v=conf.env - v['DFLAGS']=['-d-version=Posix'] - v['LINKFLAGS']=[] - v['DFLAGS_dshlib']=['-relocation-model=pic'] + v.DFLAGS=['-d-version=Posix'] + v.LINKFLAGS=[] + v.DFLAGS_dshlib=['-relocation-model=pic'] +@conf def common_flags_dmd(conf): v=conf.env - v['D_SRC_F']=['-c'] - v['D_TGT_F']='-of%s' - v['D_LINKER']=v['D'] - v['DLNK_SRC_F']='' - v['DLNK_TGT_F']='-of%s' - v['DINC_ST']='-I%s' - v['DSHLIB_MARKER']=v['DSTLIB_MARKER']='' - v['DSTLIB_ST']=v['DSHLIB_ST']='-L-l%s' - v['DSTLIBPATH_ST']=v['DLIBPATH_ST']='-L-L%s' - v['LINKFLAGS_dprogram']=['-quiet'] - v['DFLAGS_dshlib']=['-fPIC'] - v['LINKFLAGS_dshlib']=['-L-shared'] - v['DHEADER_ext']='.di' + v.D_SRC_F=['-c'] + v.D_TGT_F='-of%s' + v.D_LINKER=v.D + v.DLNK_SRC_F='' + v.DLNK_TGT_F='-of%s' + v.DINC_ST='-I%s' + v.DSHLIB_MARKER=v.DSTLIB_MARKER='' + v.DSTLIB_ST=v.DSHLIB_ST='-L-l%s' + v.DSTLIBPATH_ST=v.DLIBPATH_ST='-L-L%s' + v.LINKFLAGS_dprogram=['-quiet'] + v.DFLAGS_dshlib=['-fPIC'] + v.LINKFLAGS_dshlib=['-L-shared'] + v.DHEADER_ext='.di' v.DFLAGS_d_with_header=['-H','-Hf'] - v['D_HDR_F']='%s' + v.D_HDR_F='%s' def configure(conf): conf.find_dmd() if sys.platform=='win32': - out=conf.cmd_and_log([conf.env.D,'--help']) - if out.find("D Compiler v2.")>-1: - conf.fatal('dmd2 on Windows is not supported, use gdc or ldc instead') + out=conf.cmd_and_log(conf.env.D+['--help']) + if out.find('D Compiler v2.')>-1: + conf.fatal('dmd2 on Windows is not supported, use gdc or ldc2 instead') conf.load('ar') conf.load('d') conf.common_flags_dmd() conf.d_platform_flags() if str(conf.env.D).find('ldc')>-1: conf.common_flags_ldc() - -conf(find_dmd) -conf(common_flags_ldc) -conf(common_flags_dmd) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/d.py glmark2-2021.02/waflib/Tools/d.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/d.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/d.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,6 +1,6 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib import Utils,Task,Errors from waflib.TaskGen import taskgen_method,feature,extension @@ -18,11 +18,11 @@ class dprogram(link_task): run_str='${D_LINKER} ${LINKFLAGS} ${DLNK_SRC_F}${SRC} ${DLNK_TGT_F:TGT} ${RPATH_ST:RPATH} ${DSTLIB_MARKER} ${DSTLIBPATH_ST:STLIBPATH} ${DSTLIB_ST:STLIB} ${DSHLIB_MARKER} ${DLIBPATH_ST:LIBPATH} ${DSHLIB_ST:LIB}' inst_to='${BINDIR}' - chmod=Utils.O755 class dshlib(dprogram): inst_to='${LIBDIR}' class dstlib(stlink_task): pass +@extension('.d','.di','.D') def d_hook(self,node): ext=Utils.destos_to_binfmt(self.env.DEST_OS)=='pe'and'obj'or'o' out='%s.%d.%s'%(node.name,self.idx,ext) @@ -35,22 +35,20 @@ return task if getattr(self,'generate_headers',None): tsk=create_compiled_task(self,'d_with_header',node) - tsk.outputs.append(node.change_ext(self.env['DHEADER_ext'])) + tsk.outputs.append(node.change_ext(self.env.DHEADER_ext)) else: tsk=create_compiled_task(self,'d',node) return tsk -def generate_header(self,filename,install_path=None): +@taskgen_method +def generate_header(self,filename): try: - self.header_lst.append([filename,install_path]) + self.header_lst.append([filename,self.install_path]) except AttributeError: - self.header_lst=[[filename,install_path]] + self.header_lst=[[filename,self.install_path]] +@feature('d') def process_header(self): for i in getattr(self,'header_lst',[]): node=self.path.find_resource(i[0]) if not node: raise Errors.WafError('file %r not found on d obj'%i[0]) self.create_task('d_header',node,node.change_ext('.di')) - -extension('.d','.di','.D')(d_hook) -taskgen_method(generate_header) -feature('d')(process_header) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/d_scan.py glmark2-2021.02/waflib/Tools/d_scan.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/d_scan.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/d_scan.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,9 +1,9 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import re -from waflib import Utils,Logs +from waflib import Utils def filter_comments(filename): txt=Utils.readf(filename) i=0 @@ -128,6 +128,4 @@ gruik.start(node) nodes=gruik.nodes names=gruik.names - if Logs.verbose: - Logs.debug('deps: deps for %s: %r; unresolved %r'%(str(node),nodes,names)) return(nodes,names) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/errcheck.py glmark2-2021.02/waflib/Tools/errcheck.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/errcheck.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/errcheck.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,24 +1,25 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -typos={'feature':'features','sources':'source','targets':'target','include':'includes','export_include':'export_includes','define':'defines','importpath':'includes','installpath':'install_path',} +typos={'feature':'features','sources':'source','targets':'target','include':'includes','export_include':'export_includes','define':'defines','importpath':'includes','installpath':'install_path','iscopy':'is_copy',} meths_typos=['__call__','program','shlib','stlib','objects'] +import sys from waflib import Logs,Build,Node,Task,TaskGen,ConfigSet,Errors,Utils -import waflib.Tools.ccroot +from waflib.Tools import ccroot def check_same_targets(self): mp=Utils.defaultdict(list) uids={} def check_task(tsk): if not isinstance(tsk,Task.Task): return + if hasattr(tsk,'no_errcheck_out'): + return for node in tsk.outputs: mp[node].append(tsk) try: uids[tsk.uid()].append(tsk) - except: + except KeyError: uids[tsk.uid()]=[tsk] for g in self.groups: for tg in g: @@ -31,19 +32,20 @@ for(k,v)in mp.items(): if len(v)>1: dupe=True - msg='* Node %r is created by more than once%s. The task generators are:'%(k,Logs.verbose==1 and" (full message on 'waf -v -v')"or"") + msg='* Node %r is created more than once%s. The task generators are:'%(k,Logs.verbose==1 and" (full message on 'waf -v -v')"or"") Logs.error(msg) for x in v: if Logs.verbose>1: - Logs.error(' %d. %r'%(1+v.index(x),x.generator)) + Logs.error(' %d. %r',1+v.index(x),x.generator) else: - Logs.error(' %d. %r in %r'%(1+v.index(x),x.generator.name,getattr(x.generator,'path',None))) + Logs.error(' %d. %r in %r',1+v.index(x),x.generator.name,getattr(x.generator,'path',None)) + Logs.error('If you think that this is an error, set no_errcheck_out on the task instance') if not dupe: for(k,v)in uids.items(): if len(v)>1: - Logs.error('* Several tasks use the same identifier. Please check the information on\n http://waf.googlecode.com/git/docs/apidocs/Task.html#waflib.Task.Task.uid') + Logs.error('* Several tasks use the same identifier. Please check the information on\n https://waf.io/apidocs/Task.html?highlight=uid#waflib.Task.Task.uid') for tsk in v: - Logs.error(' - object %r (%r) defined in %r'%(tsk.__class__.__name__,tsk,tsk.generator)) + Logs.error(' - object %r (%r) defined in %r',tsk.__class__.__name__,tsk,tsk.generator) def check_invalid_constraints(self): feat=set([]) for x in list(TaskGen.feats.values()): @@ -56,22 +58,25 @@ ext.add(x.__name__) invalid=ext&feat if invalid: - Logs.error('The methods %r have invalid annotations: @extension <-> @feature/@before_method/@after_method'%list(invalid)) + Logs.error('The methods %r have invalid annotations: @extension <-> @feature/@before_method/@after_method',list(invalid)) for cls in list(Task.classes.values()): + if sys.hexversion>0x3000000 and issubclass(cls,Task.Task)and isinstance(cls.hcode,str): + raise Errors.WafError('Class %r has hcode value %r of type , expecting (use Utils.h_cmd() ?)'%(cls,cls.hcode)) for x in('before','after'): for y in Utils.to_list(getattr(cls,x,[])): - if not Task.classes.get(y,None): - Logs.error('Erroneous order constraint %r=%r on task class %r'%(x,y,cls.__name__)) + if not Task.classes.get(y): + Logs.error('Erroneous order constraint %r=%r on task class %r',x,y,cls.__name__) if getattr(cls,'rule',None): - Logs.error('Erroneous attribute "rule" on task class %r (rename to "run_str")'%cls.__name__) + Logs.error('Erroneous attribute "rule" on task class %r (rename to "run_str")',cls.__name__) def replace(m): oldcall=getattr(Build.BuildContext,m) def call(self,*k,**kw): ret=oldcall(self,*k,**kw) for x in typos: if x in kw: - err=True - Logs.error('Fix the typo %r -> %r on %r'%(x,typos[x],ret)) + if x=='iscopy'and'subst'in getattr(self,'features',''): + continue + Logs.error('Fix the typo %r -> %r on %r',x,typos[x],ret) return ret setattr(Build.BuildContext,m,call) def enhance_lib(): @@ -82,11 +87,11 @@ lst=Utils.to_list(k[0]) for pat in lst: if'..'in pat.split('/'): - Logs.error("In ant_glob pattern %r: '..' means 'two dots', not 'parent directory'"%k[0]) + Logs.error("In ant_glob pattern %r: '..' means 'two dots', not 'parent directory'",k[0]) if kw.get('remove',True): try: if self.is_child_of(self.ctx.bldnode)and not kw.get('quiet',False): - Logs.error('Using ant_glob on the build folder (%r) is dangerous (quiet=True to disable this warning)'%self) + Logs.error('Using ant_glob on the build folder (%r) is dangerous (quiet=True to disable this warning)',self) except AttributeError: pass return self.old_ant_glob(*k,**kw) @@ -96,7 +101,7 @@ def is_before(t1,t2): ret=old(t1,t2) if ret and old(t2,t1): - Logs.error('Contradictory order constraints in classes %r %r'%(t1,t2)) + Logs.error('Contradictory order constraints in classes %r %r',t1,t2) return ret Task.is_before=is_before def check_err_features(self): @@ -105,18 +110,18 @@ Logs.error('feature shlib -> cshlib, dshlib or cxxshlib') for x in('c','cxx','d','fc'): if not x in lst and lst and lst[0]in[x+y for y in('program','shlib','stlib')]: - Logs.error('%r features is probably missing %r'%(self,x)) + Logs.error('%r features is probably missing %r',self,x) TaskGen.feature('*')(check_err_features) def check_err_order(self): - if not hasattr(self,'rule'): + if not hasattr(self,'rule')and not'subst'in Utils.to_list(self.features): for x in('before','after','ext_in','ext_out'): if hasattr(self,x): - Logs.warn('Erroneous order constraint %r on non-rule based task generator %r'%(x,self)) + Logs.warn('Erroneous order constraint %r on non-rule based task generator %r',x,self) else: for x in('before','after'): for y in self.to_list(getattr(self,x,[])): if not Task.classes.get(y,None): - Logs.error('Erroneous order constraint %s=%r on %r'%(x,y,self)) + Logs.error('Erroneous order constraint %s=%r on %r (no such class)',x,y,self) TaskGen.feature('*')(check_err_order) def check_compile(self): check_invalid_constraints(self) @@ -145,7 +150,7 @@ self.orig_use_rec(name,**kw) TaskGen.task_gen.orig_use_rec=TaskGen.task_gen.use_rec TaskGen.task_gen.use_rec=use_rec - def getattri(self,name,default=None): + def _getattr(self,name,default=None): if name=='append'or name=='add': raise Errors.WafError('env.append and env.add do not exist: use env.append_value/env.append_unique') elif name=='prepend': @@ -154,8 +159,6 @@ return object.__getattr__(self,name,default) else: return self[name] - ConfigSet.ConfigSet.__getattr__=getattri + ConfigSet.ConfigSet.__getattr__=_getattr def options(opt): enhance_lib() -def configure(conf): - pass diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/fc_config.py glmark2-2021.02/waflib/Tools/fc_config.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/fc_config.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/fc_config.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,35 +1,44 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import re,shutil,os,sys,string,shlex +import re,os,sys,shlex from waflib.Configure import conf -from waflib.TaskGen import feature,after_method,before_method -from waflib import Build,Utils +from waflib.TaskGen import feature,before_method FC_FRAGMENT=' program main\n end program main\n' FC_FRAGMENT2=' PROGRAM MAIN\n END\n' +@conf def fc_flags(conf): v=conf.env - v['FC_SRC_F']=[] - v['FC_TGT_F']=['-c','-o'] - v['FCINCPATH_ST']='-I%s' - v['FCDEFINES_ST']='-D%s' - if not v['LINK_FC']:v['LINK_FC']=v['FC'] - v['FCLNK_SRC_F']=[] - v['FCLNK_TGT_F']=['-o'] - v['FCFLAGS_fcshlib']=['-fpic'] - v['LINKFLAGS_fcshlib']=['-shared'] - v['fcshlib_PATTERN']='lib%s.so' - v['fcstlib_PATTERN']='lib%s.a' - v['FCLIB_ST']='-l%s' - v['FCLIBPATH_ST']='-L%s' - v['FCSTLIB_ST']='-l%s' - v['FCSTLIBPATH_ST']='-L%s' - v['FCSTLIB_MARKER']='-Wl,-Bstatic' - v['FCSHLIB_MARKER']='-Wl,-Bdynamic' - v['SONAME_ST']='-Wl,-h,%s' + v.FC_SRC_F=[] + v.FC_TGT_F=['-c','-o'] + v.FCINCPATH_ST='-I%s' + v.FCDEFINES_ST='-D%s' + if not v.LINK_FC: + v.LINK_FC=v.FC + v.FCLNK_SRC_F=[] + v.FCLNK_TGT_F=['-o'] + v.FCFLAGS_fcshlib=['-fpic'] + v.LINKFLAGS_fcshlib=['-shared'] + v.fcshlib_PATTERN='lib%s.so' + v.fcstlib_PATTERN='lib%s.a' + v.FCLIB_ST='-l%s' + v.FCLIBPATH_ST='-L%s' + v.FCSTLIB_ST='-l%s' + v.FCSTLIBPATH_ST='-L%s' + v.FCSTLIB_MARKER='-Wl,-Bstatic' + v.FCSHLIB_MARKER='-Wl,-Bdynamic' + v.SONAME_ST='-Wl,-h,%s' +@conf +def fc_add_flags(conf): + conf.add_os_flags('FCPPFLAGS',dup=False) + conf.add_os_flags('FCFLAGS',dup=False) + conf.add_os_flags('LINKFLAGS',dup=False) + conf.add_os_flags('LDFLAGS',dup=False) +@conf def check_fortran(self,*k,**kw): self.check_cc(fragment=FC_FRAGMENT,compile_filename='test.f',features='fc fcprogram',msg='Compiling a simple fortran app') +@conf def check_fc(self,*k,**kw): kw['compiler']='fc' if not'compile_mode'in kw: @@ -41,32 +50,35 @@ if not'code'in kw: kw['code']=FC_FRAGMENT return self.check(*k,**kw) +@conf def fortran_modifier_darwin(conf): v=conf.env - v['FCFLAGS_fcshlib']=['-fPIC','-compatibility_version','1','-current_version','1'] - v['LINKFLAGS_fcshlib']=['-dynamiclib'] - v['fcshlib_PATTERN']='lib%s.dylib' - v['FRAMEWORKPATH_ST']='-F%s' - v['FRAMEWORK_ST']='-framework %s' - v['LINKFLAGS_fcstlib']=[] - v['FCSHLIB_MARKER']='' - v['FCSTLIB_MARKER']='' - v['SONAME_ST']='' + v.FCFLAGS_fcshlib=['-fPIC'] + v.LINKFLAGS_fcshlib=['-dynamiclib'] + v.fcshlib_PATTERN='lib%s.dylib' + v.FRAMEWORKPATH_ST='-F%s' + v.FRAMEWORK_ST='-framework %s' + v.LINKFLAGS_fcstlib=[] + v.FCSHLIB_MARKER='' + v.FCSTLIB_MARKER='' + v.SONAME_ST='' +@conf def fortran_modifier_win32(conf): v=conf.env - v['fcprogram_PATTERN']=v['fcprogram_test_PATTERN']='%s.exe' - v['fcshlib_PATTERN']='%s.dll' - v['implib_PATTERN']='lib%s.dll.a' - v['IMPLIB_ST']='-Wl,--out-implib,%s' - v['FCFLAGS_fcshlib']=[] - v.append_value('FCFLAGS_fcshlib',['-DDLL_EXPORT']) + v.fcprogram_PATTERN=v.fcprogram_test_PATTERN='%s.exe' + v.fcshlib_PATTERN='%s.dll' + v.implib_PATTERN='lib%s.dll.a' + v.IMPLIB_ST='-Wl,--out-implib,%s' + v.FCFLAGS_fcshlib=[] v.append_value('LINKFLAGS',['-Wl,--enable-auto-import']) +@conf def fortran_modifier_cygwin(conf): fortran_modifier_win32(conf) v=conf.env - v['fcshlib_PATTERN']='cyg%s.dll' + v.fcshlib_PATTERN='cyg%s.dll' v.append_value('LINKFLAGS_fcshlib',['-Wl,--enable-auto-image-base']) - v['FCFLAGS_fcshlib']=[] + v.FCFLAGS_fcshlib=[] +@conf def check_fortran_dummy_main(self,*k,**kw): if not self.env.CC: self.fatal('A c compiler is required for check_fortran_dummy_main') @@ -93,6 +105,7 @@ GCC_DRIVER_LINE=re.compile('^Driving:') POSIX_STATIC_EXT=re.compile('\S+\.a') POSIX_LIB_FLAGS=re.compile('-l\S+') +@conf def is_link_verbose(self,txt): assert isinstance(txt,str) for line in txt.splitlines(): @@ -100,9 +113,10 @@ if POSIX_STATIC_EXT.search(line)or POSIX_LIB_FLAGS.search(line): return True return False +@conf def check_fortran_verbose_flag(self,*k,**kw): self.start_msg('fortran link verbose flag') - for x in['-v','--verbose','-verbose','-V']: + for x in('-v','--verbose','-verbose','-V'): try: self.check_cc(features='fc fcprogram_test',fragment=FC_FRAGMENT2,compile_filename='test.f',linkflags=[x],mandatory=True) except self.errors.ConfigurationError: @@ -135,41 +149,42 @@ return final_flags SPACE_OPTS=re.compile('^-[LRuYz]$') NOSPACE_OPTS=re.compile('^-[RL]') +def _parse_flink_token(lexer,token,tmp_flags): + if _match_ignore(token): + pass + elif token.startswith('-lkernel32')and sys.platform=='cygwin': + tmp_flags.append(token) + elif SPACE_OPTS.match(token): + t=lexer.get_token() + if t.startswith('P,'): + t=t[2:] + for opt in t.split(os.pathsep): + tmp_flags.append('-L%s'%opt) + elif NOSPACE_OPTS.match(token): + tmp_flags.append(token) + elif POSIX_LIB_FLAGS.match(token): + tmp_flags.append(token) + else: + pass + t=lexer.get_token() + return t def _parse_flink_line(line,final_flags): lexer=shlex.shlex(line,posix=True) lexer.whitespace_split=True t=lexer.get_token() tmp_flags=[] while t: - def parse(token): - if _match_ignore(token): - pass - elif token.startswith('-lkernel32')and sys.platform=='cygwin': - tmp_flags.append(token) - elif SPACE_OPTS.match(token): - t=lexer.get_token() - if t.startswith('P,'): - t=t[2:] - for opt in t.split(os.pathsep): - tmp_flags.append('-L%s'%opt) - elif NOSPACE_OPTS.match(token): - tmp_flags.append(token) - elif POSIX_LIB_FLAGS.match(token): - tmp_flags.append(token) - else: - pass - t=lexer.get_token() - return t - t=parse(t) + t=_parse_flink_token(lexer,t,tmp_flags) final_flags.extend(tmp_flags) return final_flags +@conf def check_fortran_clib(self,autoadd=True,*k,**kw): if not self.env.FC_VERBOSE_FLAG: self.fatal('env.FC_VERBOSE_FLAG is not set: execute check_fortran_verbose_flag?') self.start_msg('Getting fortran runtime link flags') try: self.check_cc(fragment=FC_FRAGMENT2,compile_filename='test.f',features='fc fcprogram_test',linkflags=[self.env.FC_VERBOSE_FLAG]) - except: + except Exception: self.end_msg(False) if kw.get('mandatory',True): conf.fatal('Could not find the c library flags') @@ -181,24 +196,24 @@ return flags return[] def getoutput(conf,cmd,stdin=False): - if stdin: - stdin=Utils.subprocess.PIPE + from waflib import Errors + if conf.env.env: + env=conf.env.env else: - stdin=None - env=conf.env.env or None + env=dict(os.environ) + env['LANG']='C' + input=stdin and'\n'.encode()or None try: - p=Utils.subprocess.Popen(cmd,stdin=stdin,stdout=Utils.subprocess.PIPE,stderr=Utils.subprocess.PIPE,env=env) - if stdin: - p.stdin.write('\n') - stdout,stderr=p.communicate() - except: + out,err=conf.cmd_and_log(cmd,env=env,output=0,input=input) + except Errors.WafError as e: + if not(hasattr(e,'stderr')and hasattr(e,'stdout')): + raise e + else: + out=e.stdout + err=e.stderr + except Exception: conf.fatal('could not determine the compiler version %r'%cmd) - else: - if not isinstance(stdout,str): - stdout=stdout.decode(sys.stdout.encoding) - if not isinstance(stderr,str): - stderr=stderr.decode(sys.stdout.encoding) - return stdout,stderr + return(out,err) ROUTINES_CODE="""\ subroutine foobar() return @@ -216,6 +231,8 @@ return 0; } """ +@feature('link_main_routines_func') +@before_method('process_source') def link_main_routines_tg_method(self): def write_test_file(task): task.outputs[0].write(task.generator.code) @@ -225,12 +242,13 @@ bld(features='fc fcstlib',source='test.f',target='test') bld(features='c fcprogram',source='main.c',target='app',use='test') def mangling_schemes(): - for u in['_','']: - for du in['','_']: - for c in["lower","upper"]: + for u in('_',''): + for du in('','_'): + for c in("lower","upper"): yield(u,du,c) def mangle_name(u,du,c,name): return getattr(name,c)()+u+(name.find('_')!=-1 and du or'') +@conf def check_fortran_mangling(self,*k,**kw): if not self.env.CC: self.fatal('A c compiler is required for link_main_routines') @@ -241,7 +259,7 @@ self.start_msg('Getting fortran mangling scheme') for(u,du,c)in mangling_schemes(): try: - self.check_cc(compile_filename=[],features='link_main_routines_func',msg='nomsg',errmsg='nomsg',mandatory=True,dummy_func_nounder=mangle_name(u,du,c,"foobar"),dummy_func_under=mangle_name(u,du,c,"foo_bar"),main_func_name=self.env.FC_MAIN) + self.check_cc(compile_filename=[],features='link_main_routines_func',msg='nomsg',errmsg='nomsg',dummy_func_nounder=mangle_name(u,du,c,'foobar'),dummy_func_under=mangle_name(u,du,c,'foo_bar'),main_func_name=self.env.FC_MAIN) except self.errors.ConfigurationError: pass else: @@ -252,10 +270,13 @@ self.end_msg(False) self.fatal('mangler not found') return(u,du,c) +@feature('pyext') +@before_method('propagate_uselib_vars','apply_link') def set_lib_pat(self): - self.env['fcshlib_PATTERN']=self.env['pyext_PATTERN'] + self.env.fcshlib_PATTERN=self.env.pyext_PATTERN +@conf def detect_openmp(self): - for x in['-fopenmp','-openmp','-mp','-xopenmp','-omp','-qsmp=omp']: + for x in('-fopenmp','-openmp','-mp','-xopenmp','-omp','-qsmp=omp'): try: self.check_fc(msg='Checking for OpenMP flag %s'%x,fragment='program main\n call omp_get_num_threads()\nend program main',fcflags=x,linkflags=x,uselib_store='OPENMP') except self.errors.ConfigurationError: @@ -264,20 +285,3 @@ break else: self.fatal('Could not find OpenMP') - -conf(fc_flags) -conf(check_fortran) -conf(check_fc) -conf(fortran_modifier_darwin) -conf(fortran_modifier_win32) -conf(fortran_modifier_cygwin) -conf(check_fortran_dummy_main) -conf(is_link_verbose) -conf(check_fortran_verbose_flag) -conf(check_fortran_clib) -feature('link_main_routines_func')(link_main_routines_tg_method) -before_method('process_source')(link_main_routines_tg_method) -conf(check_fortran_mangling) -feature('pyext')(set_lib_pat) -before_method('propagate_uselib_vars','apply_link')(set_lib_pat) -conf(detect_openmp) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/fc.py glmark2-2021.02/waflib/Tools/fc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/fc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/fc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,22 +1,19 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -import re -from waflib import Utils,Task,TaskGen,Logs +from waflib import Utils,Task from waflib.Tools import ccroot,fc_config,fc_scan -from waflib.TaskGen import feature,before_method,after_method,extension +from waflib.TaskGen import extension from waflib.Configure import conf -ccroot.USELIB_VARS['fc']=set(['FCFLAGS','DEFINES','INCLUDES']) +ccroot.USELIB_VARS['fc']=set(['FCFLAGS','DEFINES','INCLUDES','FCPPFLAGS']) ccroot.USELIB_VARS['fcprogram_test']=ccroot.USELIB_VARS['fcprogram']=set(['LIB','STLIB','LIBPATH','STLIBPATH','LINKFLAGS','RPATH','LINKDEPS']) ccroot.USELIB_VARS['fcshlib']=set(['LIB','STLIB','LIBPATH','STLIBPATH','LINKFLAGS','RPATH','LINKDEPS']) ccroot.USELIB_VARS['fcstlib']=set(['ARFLAGS','LINKDEPS']) -def dummy(self): - pass +@extension('.f','.f90','.F','.F90','.for','.FOR') def fc_hook(self,node): return self.create_compiled_task('fc',node) +@conf def modfile(conf,name): return{'lower':name.lower()+'.mod','lower.MOD':name.upper()+'.MOD','UPPER.mod':name.upper()+'.mod','UPPER':name.upper()+'.MOD'}[conf.env.FC_MOD_CAPITALIZATION or'lower'] def get_fortran_tasks(tsk): @@ -25,14 +22,12 @@ return[x for x in tasks if isinstance(x,fc)and not getattr(x,'nomod',None)and not getattr(x,'mod_fortran_done',None)] class fc(Task.Task): color='GREEN' - run_str='${FC} ${FCFLAGS} ${FCINCPATH_ST:INCPATHS} ${FCDEFINES_ST:DEFINES} ${_FCMODOUTFLAGS} ${FC_TGT_F}${TGT[0].abspath()} ${FC_SRC_F}${SRC[0].abspath()}' + run_str='${FC} ${FCFLAGS} ${FCINCPATH_ST:INCPATHS} ${FCDEFINES_ST:DEFINES} ${_FCMODOUTFLAGS} ${FC_TGT_F}${TGT[0].abspath()} ${FC_SRC_F}${SRC[0].abspath()} ${FCPPFLAGS}' vars=["FORTRANMODPATHFLAG"] def scan(self): tmp=fc_scan.fortran_parser(self.generator.includes_nodes) tmp.task=self tmp.start(self.inputs[0]) - if Logs.verbose: - Logs.debug('deps: deps for %r: %r; unresolved %r'%(self.inputs,tmp.nodes,tmp.names)) return(tmp.nodes,tmp.names) def runnable_status(self): if getattr(self,'mod_fortran_done',None): @@ -74,10 +69,7 @@ for t in outs[k]: tmp.extend(t.outputs) a.dep_nodes.extend(tmp) - try: - a.dep_nodes.sort(key=lambda x:x.abspath()) - except: - a.dep_nodes.sort(lambda x,y:cmp(x.abspath(),y.abspath())) + a.dep_nodes.sort(key=lambda x:x.abspath()) for tsk in lst: try: delattr(tsk,'cache_sig') @@ -86,14 +78,13 @@ return super(fc,self).runnable_status() class fcprogram(ccroot.link_task): color='YELLOW' - run_str='${FC} ${LINKFLAGS} ${FCLNK_SRC_F}${SRC} ${FCLNK_TGT_F}${TGT[0].abspath()} ${RPATH_ST:RPATH} ${FCSTLIB_MARKER} ${FCSTLIBPATH_ST:STLIBPATH} ${FCSTLIB_ST:STLIB} ${FCSHLIB_MARKER} ${FCLIBPATH_ST:LIBPATH} ${FCLIB_ST:LIB}' + run_str='${FC} ${LINKFLAGS} ${FCLNK_SRC_F}${SRC} ${FCLNK_TGT_F}${TGT[0].abspath()} ${RPATH_ST:RPATH} ${FCSTLIB_MARKER} ${FCSTLIBPATH_ST:STLIBPATH} ${FCSTLIB_ST:STLIB} ${FCSHLIB_MARKER} ${FCLIBPATH_ST:LIBPATH} ${FCLIB_ST:LIB} ${LDFLAGS}' inst_to='${BINDIR}' - chmod=Utils.O755 class fcshlib(fcprogram): inst_to='${LIBDIR}' +class fcstlib(ccroot.stlink_task): + pass class fcprogram_test(fcprogram): - def can_retrieve_cache(self): - return False def runnable_status(self): ret=super(fcprogram_test,self).runnable_status() if ret==Task.SKIP_ME: @@ -103,21 +94,15 @@ bld=self.generator.bld kw['shell']=isinstance(cmd,str) kw['stdout']=kw['stderr']=Utils.subprocess.PIPE - kw['cwd']=bld.variant_dir + kw['cwd']=self.get_cwd() bld.out=bld.err='' bld.to_log('command: %s\n'%cmd) kw['output']=0 try: (bld.out,bld.err)=bld.cmd_and_log(cmd,**kw) - except Exception ,e: + except Exception: return-1 if bld.out: - bld.to_log("out: %s\n"%bld.out) + bld.to_log('out: %s\n'%bld.out) if bld.err: - bld.to_log("err: %s\n"%bld.err) -class fcstlib(ccroot.stlink_task): - pass - -feature('fcprogram','fcshlib','fcstlib','fcprogram_test')(dummy) -extension('.f','.f90','.F','.F90','.for','.FOR')(fc_hook) -conf(modfile) \ No newline at end of file + bld.to_log('err: %s\n'%bld.err) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/fc_scan.py glmark2-2021.02/waflib/Tools/fc_scan.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/fc_scan.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/fc_scan.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,12 +1,9 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import re -from waflib import Utils,Task,TaskGen,Logs -from waflib.TaskGen import feature,before_method,after_method,extension -from waflib.Configure import conf -INC_REGEX="""(?:^|['">]\s*;)\s*INCLUDE\s+(?:\w+_)?[<"'](.+?)(?=["'>])""" +INC_REGEX="""(?:^|['">]\s*;)\s*(?:|#\s*)INCLUDE\s+(?:\w+_)?[<"'](.+?)(?=["'>])""" USE_REGEX="""(?:^|;)\s*USE(?:\s+|(?:(?:\s*,\s*(?:NON_)?INTRINSIC)?\s*::))\s*(\w+)""" MOD_REGEX="""(?:^|;)\s*MODULE(?!\s*PROCEDURE)(?:\s+|(?:(?:\s*,\s*(?:NON_)?INTRINSIC)?\s*::))\s*(\w+)""" re_inc=re.compile(INC_REGEX,re.I) @@ -40,7 +37,6 @@ nd=self.waiting.pop(0) self.iter(nd) def iter(self,node): - path=node.abspath() incs,uses,mods=self.find_deps(node) for x in incs: if x in self.seen: diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/flex.py glmark2-2021.02/waflib/Tools/flex.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/flex.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/flex.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,8 +1,10 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import waflib.TaskGen +import os,re +from waflib import Task,TaskGen +from waflib.Tools import ccroot def decide_ext(self,node): if'cxx'in self.features: return['.lex.cc'] @@ -15,13 +17,21 @@ if isinstance(xx,str):return[xx] return xx tsk.last_cmd=lst=[] - lst.extend(to_list(env['FLEX'])) - lst.extend(to_list(env['FLEXFLAGS'])) - lst.extend([a.path_from(bld.bldnode)for a in tsk.inputs]) + lst.extend(to_list(env.FLEX)) + lst.extend(to_list(env.FLEXFLAGS)) + inputs=[a.path_from(tsk.get_cwd())for a in tsk.inputs] + if env.FLEX_MSYS: + inputs=[x.replace(os.sep,'/')for x in inputs] + lst.extend(inputs) lst=[x for x in lst if x] txt=bld.cmd_and_log(lst,cwd=wd,env=env.env or None,quiet=0) - tsk.outputs[0].write(txt) -waflib.TaskGen.declare_chain(name='flex',rule=flexfun,ext_in='.l',decider=decide_ext,) + tsk.outputs[0].write(txt.replace('\r\n','\n').replace('\r','\n')) +TaskGen.declare_chain(name='flex',rule=flexfun,ext_in='.l',decider=decide_ext,) +Task.classes['flex'].vars=['FLEXFLAGS','FLEX'] +ccroot.USELIB_VARS['c'].add('FLEXFLAGS') +ccroot.USELIB_VARS['cxx'].add('FLEXFLAGS') def configure(conf): conf.find_program('flex',var='FLEX') conf.env.FLEXFLAGS=['-t'] + if re.search(r"\\msys\\[0-9.]+\\bin\\flex.exe$",conf.env.FLEX[0]): + conf.env.FLEX_MSYS=True diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/g95.py glmark2-2021.02/waflib/Tools/g95.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/g95.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/g95.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,32 +1,38 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import re from waflib import Utils -from waflib.Tools import fc,fc_config,fc_scan +from waflib.Tools import fc,fc_config,fc_scan,ar from waflib.Configure import conf +@conf def find_g95(conf): fc=conf.find_program('g95',var='FC') - fc=conf.cmd_to_list(fc) conf.get_g95_version(fc) conf.env.FC_NAME='G95' +@conf def g95_flags(conf): v=conf.env - v['FCFLAGS_fcshlib']=['-fPIC'] - v['FORTRANMODFLAG']=['-fmod=',''] - v['FCFLAGS_DEBUG']=['-Werror'] + v.FCFLAGS_fcshlib=['-fPIC'] + v.FORTRANMODFLAG=['-fmod=',''] + v.FCFLAGS_DEBUG=['-Werror'] +@conf def g95_modifier_win32(conf): fc_config.fortran_modifier_win32(conf) +@conf def g95_modifier_cygwin(conf): fc_config.fortran_modifier_cygwin(conf) +@conf def g95_modifier_darwin(conf): fc_config.fortran_modifier_darwin(conf) +@conf def g95_modifier_platform(conf): - dest_os=conf.env['DEST_OS']or Utils.unversioned_sys_platform() + dest_os=conf.env.DEST_OS or Utils.unversioned_sys_platform() g95_modifier_func=getattr(conf,'g95_modifier_'+dest_os,None) if g95_modifier_func: g95_modifier_func() +@conf def get_g95_version(conf,fc): version_re=re.compile(r"g95\s*(?P\d*)\.(?P\d*)").search cmd=fc+['--version'] @@ -38,18 +44,11 @@ if not match: conf.fatal('cannot determine g95 version') k=match.groupdict() - conf.env['FC_VERSION']=(k['major'],k['minor']) + conf.env.FC_VERSION=(k['major'],k['minor']) def configure(conf): conf.find_g95() conf.find_ar() conf.fc_flags() + conf.fc_add_flags() conf.g95_flags() conf.g95_modifier_platform() - -conf(find_g95) -conf(g95_flags) -conf(g95_modifier_win32) -conf(g95_modifier_cygwin) -conf(g95_modifier_darwin) -conf(g95_modifier_platform) -conf(get_g95_version) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gas.py glmark2-2021.02/waflib/Tools/gas.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gas.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/gas.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,12 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import waflib.Tools.asm from waflib.Tools import ar def configure(conf): - conf.find_program(['gas','as','gcc'],var='AS') - conf.env.AS_TGT_F=['-o'] + conf.find_program(['gas','gcc'],var='AS') + conf.env.AS_TGT_F=['-c','-o'] conf.env.ASLNK_TGT_F=['-o'] conf.find_ar() + conf.load('asm') diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gcc.py glmark2-2021.02/waflib/Tools/gcc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gcc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/gcc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,80 +1,94 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys -from waflib import Configure,Options,Utils from waflib.Tools import ccroot,ar from waflib.Configure import conf +@conf def find_gcc(conf): cc=conf.find_program(['gcc','cc'],var='CC') - cc=conf.cmd_to_list(cc) conf.get_cc_version(cc,gcc=True) conf.env.CC_NAME='gcc' - conf.env.CC=cc +@conf def gcc_common_flags(conf): v=conf.env - v['CC_SRC_F']=[] - v['CC_TGT_F']=['-c','-o'] - if not v['LINK_CC']:v['LINK_CC']=v['CC'] - v['CCLNK_SRC_F']=[] - v['CCLNK_TGT_F']=['-o'] - v['CPPPATH_ST']='-I%s' - v['DEFINES_ST']='-D%s' - v['LIB_ST']='-l%s' - v['LIBPATH_ST']='-L%s' - v['STLIB_ST']='-l%s' - v['STLIBPATH_ST']='-L%s' - v['RPATH_ST']='-Wl,-rpath,%s' - v['SONAME_ST']='-Wl,-h,%s' - v['SHLIB_MARKER']='-Wl,-Bdynamic' - v['STLIB_MARKER']='-Wl,-Bstatic' - v['cprogram_PATTERN']='%s' - v['CFLAGS_cshlib']=['-fPIC'] - v['LINKFLAGS_cshlib']=['-shared'] - v['cshlib_PATTERN']='lib%s.so' - v['LINKFLAGS_cstlib']=['-Wl,-Bstatic'] - v['cstlib_PATTERN']='lib%s.a' - v['LINKFLAGS_MACBUNDLE']=['-bundle','-undefined','dynamic_lookup'] - v['CFLAGS_MACBUNDLE']=['-fPIC'] - v['macbundle_PATTERN']='%s.bundle' + v.CC_SRC_F=[] + v.CC_TGT_F=['-c','-o'] + if not v.LINK_CC: + v.LINK_CC=v.CC + v.CCLNK_SRC_F=[] + v.CCLNK_TGT_F=['-o'] + v.CPPPATH_ST='-I%s' + v.DEFINES_ST='-D%s' + v.LIB_ST='-l%s' + v.LIBPATH_ST='-L%s' + v.STLIB_ST='-l%s' + v.STLIBPATH_ST='-L%s' + v.RPATH_ST='-Wl,-rpath,%s' + v.SONAME_ST='-Wl,-h,%s' + v.SHLIB_MARKER='-Wl,-Bdynamic' + v.STLIB_MARKER='-Wl,-Bstatic' + v.cprogram_PATTERN='%s' + v.CFLAGS_cshlib=['-fPIC'] + v.LINKFLAGS_cshlib=['-shared'] + v.cshlib_PATTERN='lib%s.so' + v.LINKFLAGS_cstlib=['-Wl,-Bstatic'] + v.cstlib_PATTERN='lib%s.a' + v.LINKFLAGS_MACBUNDLE=['-bundle','-undefined','dynamic_lookup'] + v.CFLAGS_MACBUNDLE=['-fPIC'] + v.macbundle_PATTERN='%s.bundle' +@conf def gcc_modifier_win32(conf): v=conf.env - v['cprogram_PATTERN']='%s.exe' - v['cshlib_PATTERN']='%s.dll' - v['implib_PATTERN']='lib%s.dll.a' - v['IMPLIB_ST']='-Wl,--out-implib,%s' - v['CFLAGS_cshlib']=[] - v.append_value('CFLAGS_cshlib',['-DDLL_EXPORT']) + v.cprogram_PATTERN='%s.exe' + v.cshlib_PATTERN='%s.dll' + v.implib_PATTERN='lib%s.dll.a' + v.IMPLIB_ST='-Wl,--out-implib,%s' + v.CFLAGS_cshlib=[] v.append_value('LINKFLAGS',['-Wl,--enable-auto-import']) +@conf def gcc_modifier_cygwin(conf): gcc_modifier_win32(conf) v=conf.env - v['cshlib_PATTERN']='cyg%s.dll' + v.cshlib_PATTERN='cyg%s.dll' v.append_value('LINKFLAGS_cshlib',['-Wl,--enable-auto-image-base']) - v['CFLAGS_cshlib']=[] + v.CFLAGS_cshlib=[] +@conf def gcc_modifier_darwin(conf): v=conf.env - v['CFLAGS_cshlib']=['-fPIC','-compatibility_version','1','-current_version','1'] - v['LINKFLAGS_cshlib']=['-dynamiclib'] - v['cshlib_PATTERN']='lib%s.dylib' - v['FRAMEWORKPATH_ST']='-F%s' - v['FRAMEWORK_ST']=['-framework'] - v['ARCH_ST']=['-arch'] - v['LINKFLAGS_cstlib']=[] - v['SHLIB_MARKER']=[] - v['STLIB_MARKER']=[] - v['SONAME_ST']=[] + v.CFLAGS_cshlib=['-fPIC'] + v.LINKFLAGS_cshlib=['-dynamiclib'] + v.cshlib_PATTERN='lib%s.dylib' + v.FRAMEWORKPATH_ST='-F%s' + v.FRAMEWORK_ST=['-framework'] + v.ARCH_ST=['-arch'] + v.LINKFLAGS_cstlib=[] + v.SHLIB_MARKER=[] + v.STLIB_MARKER=[] + v.SONAME_ST=[] +@conf def gcc_modifier_aix(conf): v=conf.env - v['LINKFLAGS_cprogram']=['-Wl,-brtl'] - v['LINKFLAGS_cshlib']=['-shared','-Wl,-brtl,-bexpfull'] - v['SHLIB_MARKER']=[] + v.LINKFLAGS_cprogram=['-Wl,-brtl'] + v.LINKFLAGS_cshlib=['-shared','-Wl,-brtl,-bexpfull'] + v.SHLIB_MARKER=[] +@conf def gcc_modifier_hpux(conf): v=conf.env - v['SHLIB_MARKER']=[] - v['CFLAGS_cshlib']=['-fPIC','-DPIC'] - v['cshlib_PATTERN']='lib%s.sl' + v.SHLIB_MARKER=[] + v.STLIB_MARKER=[] + v.CFLAGS_cshlib=['-fPIC','-DPIC'] + v.cshlib_PATTERN='lib%s.sl' +@conf +def gcc_modifier_openbsd(conf): + conf.env.SONAME_ST=[] +@conf +def gcc_modifier_osf1V(conf): + v=conf.env + v.SHLIB_MARKER=[] + v.STLIB_MARKER=[] + v.SONAME_ST=[] +@conf def gcc_modifier_platform(conf): gcc_modifier_func=getattr(conf,'gcc_modifier_'+conf.env.DEST_OS,None) if gcc_modifier_func: @@ -87,12 +101,3 @@ conf.cc_load_tools() conf.cc_add_flags() conf.link_add_flags() - -conf(find_gcc) -conf(gcc_common_flags) -conf(gcc_modifier_win32) -conf(gcc_modifier_cygwin) -conf(gcc_modifier_darwin) -conf(gcc_modifier_aix) -conf(gcc_modifier_hpux) -conf(gcc_modifier_platform) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gdc.py glmark2-2021.02/waflib/Tools/gdc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gdc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/gdc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,34 +1,35 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys from waflib.Tools import ar,d from waflib.Configure import conf +@conf def find_gdc(conf): conf.find_program('gdc',var='D') + out=conf.cmd_and_log(conf.env.D+['--version']) + if out.find("gdc")==-1: + conf.fatal("detected compiler is not gdc") +@conf def common_flags_gdc(conf): v=conf.env - v['DFLAGS']=[] - v['D_SRC_F']=['-c'] - v['D_TGT_F']='-o%s' - v['D_LINKER']=v['D'] - v['DLNK_SRC_F']='' - v['DLNK_TGT_F']='-o%s' - v['DINC_ST']='-I%s' - v['DSHLIB_MARKER']=v['DSTLIB_MARKER']='' - v['DSTLIB_ST']=v['DSHLIB_ST']='-l%s' - v['DSTLIBPATH_ST']=v['DLIBPATH_ST']='-L%s' - v['LINKFLAGS_dshlib']=['-shared'] - v['DHEADER_ext']='.di' + v.DFLAGS=[] + v.D_SRC_F=['-c'] + v.D_TGT_F='-o%s' + v.D_LINKER=v.D + v.DLNK_SRC_F='' + v.DLNK_TGT_F='-o%s' + v.DINC_ST='-I%s' + v.DSHLIB_MARKER=v.DSTLIB_MARKER='' + v.DSTLIB_ST=v.DSHLIB_ST='-l%s' + v.DSTLIBPATH_ST=v.DLIBPATH_ST='-L%s' + v.LINKFLAGS_dshlib=['-shared'] + v.DHEADER_ext='.di' v.DFLAGS_d_with_header='-fintfc' - v['D_HDR_F']='-fintfc-file=%s' + v.D_HDR_F='-fintfc-file=%s' def configure(conf): conf.find_gdc() conf.load('ar') conf.load('d') conf.common_flags_gdc() conf.d_platform_flags() - -conf(find_gdc) -conf(common_flags_gdc) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gfortran.py glmark2-2021.02/waflib/Tools/gfortran.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gfortran.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/gfortran.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,32 +1,38 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import re from waflib import Utils -from waflib.Tools import fc,fc_config,fc_scan +from waflib.Tools import fc,fc_config,fc_scan,ar from waflib.Configure import conf +@conf def find_gfortran(conf): fc=conf.find_program(['gfortran','g77'],var='FC') - fc=conf.cmd_to_list(fc) conf.get_gfortran_version(fc) conf.env.FC_NAME='GFORTRAN' +@conf def gfortran_flags(conf): v=conf.env - v['FCFLAGS_fcshlib']=['-fPIC'] - v['FORTRANMODFLAG']=['-J',''] - v['FCFLAGS_DEBUG']=['-Werror'] + v.FCFLAGS_fcshlib=['-fPIC'] + v.FORTRANMODFLAG=['-J',''] + v.FCFLAGS_DEBUG=['-Werror'] +@conf def gfortran_modifier_win32(conf): fc_config.fortran_modifier_win32(conf) +@conf def gfortran_modifier_cygwin(conf): fc_config.fortran_modifier_cygwin(conf) +@conf def gfortran_modifier_darwin(conf): fc_config.fortran_modifier_darwin(conf) +@conf def gfortran_modifier_platform(conf): - dest_os=conf.env['DEST_OS']or Utils.unversioned_sys_platform() + dest_os=conf.env.DEST_OS or Utils.unversioned_sys_platform() gfortran_modifier_func=getattr(conf,'gfortran_modifier_'+dest_os,None) if gfortran_modifier_func: gfortran_modifier_func() +@conf def get_gfortran_version(conf,fc): version_re=re.compile(r"GNU\s*Fortran",re.I).search cmd=fc+['--version'] @@ -40,7 +46,7 @@ if out.find('__GNUC__')<0: conf.fatal('Could not determine the compiler type') k={} - out=out.split('\n') + out=out.splitlines() import shlex for line in out: lst=shlex.split(line) @@ -52,18 +58,11 @@ return var in k def isT(var): return var in k and k[var]!='0' - conf.env['FC_VERSION']=(k['__GNUC__'],k['__GNUC_MINOR__'],k['__GNUC_PATCHLEVEL__']) + conf.env.FC_VERSION=(k['__GNUC__'],k['__GNUC_MINOR__'],k['__GNUC_PATCHLEVEL__']) def configure(conf): conf.find_gfortran() conf.find_ar() conf.fc_flags() + conf.fc_add_flags() conf.gfortran_flags() conf.gfortran_modifier_platform() - -conf(find_gfortran) -conf(gfortran_flags) -conf(gfortran_modifier_win32) -conf(gfortran_modifier_cygwin) -conf(gfortran_modifier_darwin) -conf(gfortran_modifier_platform) -conf(get_gfortran_version) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/glib2.py glmark2-2021.02/waflib/Tools/glib2.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/glib2.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/glib2.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,15 +1,18 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import os -from waflib import Task,Utils,Options,Errors,Logs -from waflib.TaskGen import taskgen_method,before_method,after_method,feature +from waflib import Context,Task,Utils,Options,Errors,Logs +from waflib.TaskGen import taskgen_method,before_method,feature,extension +from waflib.Configure import conf +@taskgen_method def add_marshal_file(self,filename,prefix): if not hasattr(self,'marshal_list'): self.marshal_list=[] self.meths.append('process_marshal') self.marshal_list.append((filename,prefix)) +@before_method('process_source') def process_marshal(self): for f,prefix in getattr(self,'marshal_list',[]): node=self.path.find_resource(f) @@ -22,8 +25,11 @@ self.source=self.to_nodes(getattr(self,'source',[])) self.source.append(c_node) class glib_genmarshal(Task.Task): + vars=['GLIB_GENMARSHAL_PREFIX','GLIB_GENMARSHAL'] + color='BLUE' + ext_out=['.h'] def run(self): - bld=self.inputs[0].__class__.ctx + bld=self.generator.bld get=self.env.get_flat cmd1="%s %s --prefix=%s --header > %s"%(get('GLIB_GENMARSHAL'),self.inputs[0].srcpath(),get('GLIB_GENMARSHAL_PREFIX'),self.outputs[0].abspath()) ret=bld.exec_command(cmd1) @@ -32,19 +38,19 @@ self.outputs[1].write(c) cmd2="%s %s --prefix=%s --body >> %s"%(get('GLIB_GENMARSHAL'),self.inputs[0].srcpath(),get('GLIB_GENMARSHAL_PREFIX'),self.outputs[1].abspath()) return bld.exec_command(cmd2) - vars=['GLIB_GENMARSHAL_PREFIX','GLIB_GENMARSHAL'] - color='BLUE' - ext_out=['.h'] +@taskgen_method def add_enums_from_template(self,source='',target='',template='',comments=''): if not hasattr(self,'enums_list'): self.enums_list=[] self.meths.append('process_enums') self.enums_list.append({'source':source,'target':target,'template':template,'file-head':'','file-prod':'','file-tail':'','enum-prod':'','value-head':'','value-prod':'','value-tail':'','comments':comments}) +@taskgen_method def add_enums(self,source='',target='',file_head='',file_prod='',file_tail='',enum_prod='',value_head='',value_prod='',value_tail='',comments=''): if not hasattr(self,'enums_list'): self.enums_list=[] self.meths.append('process_enums') self.enums_list.append({'source':source,'template':'','target':target,'file-head':file_head,'file-prod':file_prod,'file-tail':file_tail,'enum-prod':enum_prod,'value-head':value_head,'value-prod':value_prod,'value-tail':value_tail,'comments':comments}) +@before_method('process_source') def process_enums(self): for enum in getattr(self,'enums_list',[]): task=self.create_task('glib_mkenums') @@ -55,13 +61,13 @@ raise Errors.WafError('missing source '+str(enum)) source_list=[self.path.find_resource(k)for k in source_list] inputs+=source_list - env['GLIB_MKENUMS_SOURCE']=[k.abspath()for k in source_list] + env.GLIB_MKENUMS_SOURCE=[k.abspath()for k in source_list] if not enum['target']: raise Errors.WafError('missing target '+str(enum)) tgt_node=self.path.find_or_declare(enum['target']) if tgt_node.name.endswith('.c'): self.source.append(tgt_node) - env['GLIB_MKENUMS_TARGET']=tgt_node.abspath() + env.GLIB_MKENUMS_TARGET=tgt_node.abspath() options=[] if enum['template']: template_node=self.path.find_resource(enum['template']) @@ -71,104 +77,154 @@ for param,option in params.items(): if enum[param]: options.append('%s %r'%(option,enum[param])) - env['GLIB_MKENUMS_OPTIONS']=' '.join(options) + env.GLIB_MKENUMS_OPTIONS=' '.join(options) task.set_inputs(inputs) task.set_outputs(tgt_node) class glib_mkenums(Task.Task): run_str='${GLIB_MKENUMS} ${GLIB_MKENUMS_OPTIONS} ${GLIB_MKENUMS_SOURCE} > ${GLIB_MKENUMS_TARGET}' color='PINK' ext_out=['.h'] +@taskgen_method def add_settings_schemas(self,filename_list): if not hasattr(self,'settings_schema_files'): self.settings_schema_files=[] if not isinstance(filename_list,list): filename_list=[filename_list] self.settings_schema_files.extend(filename_list) +@taskgen_method def add_settings_enums(self,namespace,filename_list): if hasattr(self,'settings_enum_namespace'): - raise Errors.WafError("Tried to add gsettings enums to '%s' more than once"%self.name) + raise Errors.WafError("Tried to add gsettings enums to %r more than once"%self.name) self.settings_enum_namespace=namespace if type(filename_list)!='list': filename_list=[filename_list] self.settings_enum_files=filename_list -def r_change_ext(self,ext): - name=self.name - k=name.rfind('.') - if k>=0: - name=name[:k]+ext - else: - name=name+ext - return self.parent.find_or_declare([name]) +@feature('glib2') def process_settings(self): enums_tgt_node=[] install_files=[] settings_schema_files=getattr(self,'settings_schema_files',[]) - if settings_schema_files and not self.env['GLIB_COMPILE_SCHEMAS']: + if settings_schema_files and not self.env.GLIB_COMPILE_SCHEMAS: raise Errors.WafError("Unable to process GSettings schemas - glib-compile-schemas was not found during configure") if hasattr(self,'settings_enum_files'): enums_task=self.create_task('glib_mkenums') source_list=self.settings_enum_files source_list=[self.path.find_resource(k)for k in source_list] enums_task.set_inputs(source_list) - enums_task.env['GLIB_MKENUMS_SOURCE']=[k.abspath()for k in source_list] + enums_task.env.GLIB_MKENUMS_SOURCE=[k.abspath()for k in source_list] target=self.settings_enum_namespace+'.enums.xml' tgt_node=self.path.find_or_declare(target) enums_task.set_outputs(tgt_node) - enums_task.env['GLIB_MKENUMS_TARGET']=tgt_node.abspath() + enums_task.env.GLIB_MKENUMS_TARGET=tgt_node.abspath() enums_tgt_node=[tgt_node] install_files.append(tgt_node) options='--comments "" --fhead "" --vhead " <@type@ id=\\"%s.@EnumName@\\">" --vprod " " --vtail " " --ftail "" '%(self.settings_enum_namespace) - enums_task.env['GLIB_MKENUMS_OPTIONS']=options + enums_task.env.GLIB_MKENUMS_OPTIONS=options for schema in settings_schema_files: schema_task=self.create_task('glib_validate_schema') schema_node=self.path.find_resource(schema) if not schema_node: - raise Errors.WafError("Cannot find the schema file '%s'"%schema) + raise Errors.WafError("Cannot find the schema file %r"%schema) install_files.append(schema_node) source_list=enums_tgt_node+[schema_node] schema_task.set_inputs(source_list) - schema_task.env['GLIB_COMPILE_SCHEMAS_OPTIONS']=[("--schema-file="+k.abspath())for k in source_list] - target_node=r_change_ext(schema_node,'.xml.valid') + schema_task.env.GLIB_COMPILE_SCHEMAS_OPTIONS=[("--schema-file="+k.abspath())for k in source_list] + target_node=schema_node.change_ext('.xml.valid') schema_task.set_outputs(target_node) - schema_task.env['GLIB_VALIDATE_SCHEMA_OUTPUT']=target_node.abspath() + schema_task.env.GLIB_VALIDATE_SCHEMA_OUTPUT=target_node.abspath() def compile_schemas_callback(bld): if not bld.is_install:return Logs.pprint('YELLOW','Updating GSettings schema cache') command=Utils.subst_vars("${GLIB_COMPILE_SCHEMAS} ${GSETTINGSSCHEMADIR}",bld.env) - ret=self.bld.exec_command(command) + self.bld.exec_command(command) if self.bld.is_install: - if not self.env['GSETTINGSSCHEMADIR']: + if not self.env.GSETTINGSSCHEMADIR: raise Errors.WafError('GSETTINGSSCHEMADIR not defined (should have been set up automatically during configure)') if install_files: - self.bld.install_files(self.env['GSETTINGSSCHEMADIR'],install_files) + self.add_install_files(install_to=self.env.GSETTINGSSCHEMADIR,install_from=install_files) if not hasattr(self.bld,'_compile_schemas_registered'): self.bld.add_post_fun(compile_schemas_callback) self.bld._compile_schemas_registered=True class glib_validate_schema(Task.Task): run_str='rm -f ${GLIB_VALIDATE_SCHEMA_OUTPUT} && ${GLIB_COMPILE_SCHEMAS} --dry-run ${GLIB_COMPILE_SCHEMAS_OPTIONS} && touch ${GLIB_VALIDATE_SCHEMA_OUTPUT}' color='PINK' -def configure(conf): +@extension('.gresource.xml') +def process_gresource_source(self,node): + if not self.env.GLIB_COMPILE_RESOURCES: + raise Errors.WafError("Unable to process GResource file - glib-compile-resources was not found during configure") + if'gresource'in self.features: + return + h_node=node.change_ext('_xml.h') + c_node=node.change_ext('_xml.c') + self.create_task('glib_gresource_source',node,[h_node,c_node]) + self.source.append(c_node) +@feature('gresource') +def process_gresource_bundle(self): + for i in self.to_list(self.source): + node=self.path.find_resource(i) + task=self.create_task('glib_gresource_bundle',node,node.change_ext('')) + inst_to=getattr(self,'install_path',None) + if inst_to: + self.add_install_files(install_to=inst_to,install_from=task.outputs) +class glib_gresource_base(Task.Task): + color='BLUE' + base_cmd='${GLIB_COMPILE_RESOURCES} --sourcedir=${SRC[0].parent.srcpath()} --sourcedir=${SRC[0].bld_dir()}' + def scan(self): + bld=self.generator.bld + kw={} + kw['cwd']=self.get_cwd() + kw['quiet']=Context.BOTH + cmd=Utils.subst_vars('${GLIB_COMPILE_RESOURCES} --sourcedir=%s --sourcedir=%s --generate-dependencies %s'%(self.inputs[0].parent.srcpath(),self.inputs[0].bld_dir(),self.inputs[0].bldpath()),self.env) + output=bld.cmd_and_log(cmd,**kw) + nodes=[] + names=[] + for dep in output.splitlines(): + if dep: + node=bld.bldnode.find_node(dep) + if node: + nodes.append(node) + else: + names.append(dep) + return(nodes,names) +class glib_gresource_source(glib_gresource_base): + vars=['GLIB_COMPILE_RESOURCES'] + fun_h=Task.compile_fun_shell(glib_gresource_base.base_cmd+' --target=${TGT[0].abspath()} --generate-header ${SRC}') + fun_c=Task.compile_fun_shell(glib_gresource_base.base_cmd+' --target=${TGT[1].abspath()} --generate-source ${SRC}') + ext_out=['.h'] + def run(self): + return self.fun_h[0](self)or self.fun_c[0](self) +class glib_gresource_bundle(glib_gresource_base): + run_str=glib_gresource_base.base_cmd+' --target=${TGT} ${SRC}' + shell=True +@conf +def find_glib_genmarshal(conf): conf.find_program('glib-genmarshal',var='GLIB_GENMARSHAL') - conf.find_perl_program('glib-mkenums',var='GLIB_MKENUMS') - conf.find_program('glib-compile-schemas',var='GLIB_COMPILE_SCHEMAS',mandatory=False) +@conf +def find_glib_mkenums(conf): + if not conf.env.PERL: + conf.find_program('perl',var='PERL') + conf.find_program('glib-mkenums',interpreter='PERL',var='GLIB_MKENUMS') +@conf +def find_glib_compile_schemas(conf): + conf.find_program('glib-compile-schemas',var='GLIB_COMPILE_SCHEMAS') def getstr(varname): return getattr(Options.options,varname,getattr(conf.env,varname,'')) gsettingsschemadir=getstr('GSETTINGSSCHEMADIR') if not gsettingsschemadir: datadir=getstr('DATADIR') if not datadir: - prefix=conf.env['PREFIX'] + prefix=conf.env.PREFIX datadir=os.path.join(prefix,'share') gsettingsschemadir=os.path.join(datadir,'glib-2.0','schemas') - conf.env['GSETTINGSSCHEMADIR']=gsettingsschemadir + conf.env.GSETTINGSSCHEMADIR=gsettingsschemadir +@conf +def find_glib_compile_resources(conf): + conf.find_program('glib-compile-resources',var='GLIB_COMPILE_RESOURCES') +def configure(conf): + conf.find_glib_genmarshal() + conf.find_glib_mkenums() + conf.find_glib_compile_schemas(mandatory=False) + conf.find_glib_compile_resources(mandatory=False) def options(opt): - opt.add_option('--gsettingsschemadir',help='GSettings schema location [Default: ${datadir}/glib-2.0/schemas]',default='',dest='GSETTINGSSCHEMADIR') - -taskgen_method(add_marshal_file) -before_method('process_source')(process_marshal) -taskgen_method(add_enums_from_template) -taskgen_method(add_enums) -before_method('process_source')(process_enums) -taskgen_method(add_settings_schemas) -taskgen_method(add_settings_enums) -feature('glib2')(process_settings) \ No newline at end of file + gr=opt.add_option_group('Installation directories') + gr.add_option('--gsettingsschemadir',help='GSettings schema location [DATADIR/glib-2.0/schemas]',default='',dest='GSETTINGSSCHEMADIR') diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gnu_dirs.py glmark2-2021.02/waflib/Tools/gnu_dirs.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/gnu_dirs.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/gnu_dirs.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,37 +1,38 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os +import os,re from waflib import Utils,Options,Context -_options=[x.split(', ')for x in''' -bindir, user executables, ${EXEC_PREFIX}/bin -sbindir, system admin executables, ${EXEC_PREFIX}/sbin -libexecdir, program executables, ${EXEC_PREFIX}/libexec -sysconfdir, read-only single-machine data, ${PREFIX}/etc -sharedstatedir, modifiable architecture-independent data, ${PREFIX}/com -localstatedir, modifiable single-machine data, ${PREFIX}/var -libdir, object code libraries, ${EXEC_PREFIX}/lib -includedir, C header files, ${PREFIX}/include -oldincludedir, C header files for non-gcc, /usr/include -datarootdir, read-only arch.-independent data root, ${PREFIX}/share -datadir, read-only architecture-independent data, ${DATAROOTDIR} -infodir, info documentation, ${DATAROOTDIR}/info +gnuopts=''' +bindir, user commands, ${EXEC_PREFIX}/bin +sbindir, system binaries, ${EXEC_PREFIX}/sbin +libexecdir, program-specific binaries, ${EXEC_PREFIX}/libexec +sysconfdir, host-specific configuration, ${PREFIX}/etc +sharedstatedir, architecture-independent variable data, ${PREFIX}/com +localstatedir, variable data, ${PREFIX}/var +libdir, object code libraries, ${EXEC_PREFIX}/lib%s +includedir, header files, ${PREFIX}/include +oldincludedir, header files for non-GCC compilers, /usr/include +datarootdir, architecture-independent data root, ${PREFIX}/share +datadir, architecture-independent data, ${DATAROOTDIR} +infodir, GNU "info" documentation, ${DATAROOTDIR}/info localedir, locale-dependent data, ${DATAROOTDIR}/locale -mandir, man documentation, ${DATAROOTDIR}/man +mandir, manual pages, ${DATAROOTDIR}/man docdir, documentation root, ${DATAROOTDIR}/doc/${PACKAGE} -htmldir, html documentation, ${DOCDIR} -dvidir, dvi documentation, ${DOCDIR} -pdfdir, pdf documentation, ${DOCDIR} -psdir, ps documentation, ${DOCDIR} -'''.split('\n')if x] +htmldir, HTML documentation, ${DOCDIR} +dvidir, DVI documentation, ${DOCDIR} +pdfdir, PDF documentation, ${DOCDIR} +psdir, PostScript documentation, ${DOCDIR} +'''%Utils.lib64() +_options=[x.split(', ')for x in gnuopts.splitlines()if x] def configure(conf): def get_param(varname,default): return getattr(Options.options,varname,'')or default env=conf.env - conf.env.LIBDIR=conf.env.BINDIR=[] - env['EXEC_PREFIX']=get_param('EXEC_PREFIX',env['PREFIX']) - env['PACKAGE']=getattr(Context.g_module,'APPNAME',None)or env['PACKAGE'] + env.LIBDIR=env.BINDIR=[] + env.EXEC_PREFIX=get_param('EXEC_PREFIX',env.PREFIX) + env.PACKAGE=getattr(Context.g_module,'APPNAME',None)or env.PACKAGE complete=False iter=0 while not complete and iter\d*)\.(?P\d*)",re.I).search - cmd=fc+['--version'] - out,err=fc_config.getoutput(conf,cmd,stdin=False) - if out: - match=version_re(out) + version_re=re.compile(r"\bIntel\b.*\bVersion\s*(?P\d*)\.(?P\d*)",re.I).search + if Utils.is_win32: + cmd=fc else: - match=version_re(err) + cmd=fc+['-logo'] + out,err=fc_config.getoutput(conf,cmd,stdin=False) + match=version_re(out)or version_re(err) if not match: conf.fatal('cannot determine ifort version.') k=match.groupdict() - conf.env['FC_VERSION']=(k['major'],k['minor']) + conf.env.FC_VERSION=(k['major'],k['minor']) def configure(conf): - conf.find_ifort() - conf.find_program('xiar',var='AR') - conf.env.ARFLAGS='rcs' - conf.fc_flags() - conf.ifort_modifier_platform() - -conf(find_ifort) -conf(ifort_modifier_cygwin) -conf(ifort_modifier_win32) -conf(ifort_modifier_darwin) -conf(ifort_modifier_platform) -conf(get_ifort_version) \ No newline at end of file + if Utils.is_win32: + compiler,version,path,includes,libdirs,arch=conf.detect_ifort(True) + v=conf.env + v.DEST_CPU=arch + v.PATH=path + v.INCLUDES=includes + v.LIBPATH=libdirs + v.MSVC_COMPILER=compiler + try: + v.MSVC_VERSION=float(version) + except Exception: + raise + v.MSVC_VERSION=float(version[:-3]) + conf.find_ifort_win32() + conf.ifort_modifier_win32() + else: + conf.find_ifort() + conf.find_program('xiar',var='AR') + conf.find_ar() + conf.fc_flags() + conf.fc_add_flags() + conf.ifort_modifier_platform() +all_ifort_platforms=[('intel64','amd64'),('em64t','amd64'),('ia32','x86'),('Itanium','ia64')] +@conf +def gather_ifort_versions(conf,versions): + version_pattern=re.compile('^...?.?\....?.?') + try: + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Wow6432node\\Intel\\Compilers\\Fortran') + except WindowsError: + try: + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Intel\\Compilers\\Fortran') + except WindowsError: + return + index=0 + while 1: + try: + version=Utils.winreg.EnumKey(all_versions,index) + except WindowsError: + break + index+=1 + if not version_pattern.match(version): + continue + targets={} + for target,arch in all_ifort_platforms: + if target=='intel64':targetDir='EM64T_NATIVE' + else:targetDir=target + try: + Utils.winreg.OpenKey(all_versions,version+'\\'+targetDir) + icl_version=Utils.winreg.OpenKey(all_versions,version) + path,type=Utils.winreg.QueryValueEx(icl_version,'ProductDir') + except WindowsError: + pass + else: + batch_file=os.path.join(path,'bin','iclvars.bat') + if os.path.isfile(batch_file): + targets[target]=target_compiler(conf,'intel',arch,version,target,batch_file) + for target,arch in all_ifort_platforms: + try: + icl_version=Utils.winreg.OpenKey(all_versions,version+'\\'+target) + path,type=Utils.winreg.QueryValueEx(icl_version,'ProductDir') + except WindowsError: + continue + else: + batch_file=os.path.join(path,'bin','iclvars.bat') + if os.path.isfile(batch_file): + targets[target]=target_compiler(conf,'intel',arch,version,target,batch_file) + major=version[0:2] + versions['intel '+major]=targets +@conf +def setup_ifort(conf,versiondict): + platforms=Utils.to_list(conf.env.MSVC_TARGETS)or[i for i,j in all_ifort_platforms] + desired_versions=conf.env.MSVC_VERSIONS or list(reversed(list(versiondict.keys()))) + for version in desired_versions: + try: + targets=versiondict[version] + except KeyError: + continue + for arch in platforms: + try: + cfg=targets[arch] + except KeyError: + continue + cfg.evaluate() + if cfg.is_valid: + compiler,revision=version.rsplit(' ',1) + return compiler,revision,cfg.bindirs,cfg.incdirs,cfg.libdirs,cfg.cpu + conf.fatal('ifort: Impossible to find a valid architecture for building %r - %r'%(desired_versions,list(versiondict.keys()))) +@conf +def get_ifort_version_win32(conf,compiler,version,target,vcvars): + try: + conf.msvc_cnt+=1 + except AttributeError: + conf.msvc_cnt=1 + batfile=conf.bldnode.make_node('waf-print-msvc-%d.bat'%conf.msvc_cnt) + batfile.write("""@echo off +set INCLUDE= +set LIB= +call "%s" %s +echo PATH=%%PATH%% +echo INCLUDE=%%INCLUDE%% +echo LIB=%%LIB%%;%%LIBPATH%% +"""%(vcvars,target)) + sout=conf.cmd_and_log(['cmd.exe','/E:on','/V:on','/C',batfile.abspath()]) + batfile.delete() + lines=sout.splitlines() + if not lines[0]: + lines.pop(0) + MSVC_PATH=MSVC_INCDIR=MSVC_LIBDIR=None + for line in lines: + if line.startswith('PATH='): + path=line[5:] + MSVC_PATH=path.split(';') + elif line.startswith('INCLUDE='): + MSVC_INCDIR=[i for i in line[8:].split(';')if i] + elif line.startswith('LIB='): + MSVC_LIBDIR=[i for i in line[4:].split(';')if i] + if None in(MSVC_PATH,MSVC_INCDIR,MSVC_LIBDIR): + conf.fatal('ifort: Could not find a valid architecture for building (get_ifort_version_win32)') + env=dict(os.environ) + env.update(PATH=path) + compiler_name,linker_name,lib_name=_get_prog_names(conf,compiler) + fc=conf.find_program(compiler_name,path_list=MSVC_PATH) + if'CL'in env: + del(env['CL']) + try: + conf.cmd_and_log(fc+['/help'],env=env) + except UnicodeError: + st=Utils.ex_stack() + if conf.logger: + conf.logger.error(st) + conf.fatal('ifort: Unicode error - check the code page?') + except Exception as e: + Logs.debug('ifort: get_ifort_version: %r %r %r -> failure %s',compiler,version,target,str(e)) + conf.fatal('ifort: cannot run the compiler in get_ifort_version (run with -v to display errors)') + else: + Logs.debug('ifort: get_ifort_version: %r %r %r -> OK',compiler,version,target) + finally: + conf.env[compiler_name]='' + return(MSVC_PATH,MSVC_INCDIR,MSVC_LIBDIR) +class target_compiler(object): + def __init__(self,ctx,compiler,cpu,version,bat_target,bat,callback=None): + self.conf=ctx + self.name=None + self.is_valid=False + self.is_done=False + self.compiler=compiler + self.cpu=cpu + self.version=version + self.bat_target=bat_target + self.bat=bat + self.callback=callback + def evaluate(self): + if self.is_done: + return + self.is_done=True + try: + vs=self.conf.get_msvc_version(self.compiler,self.version,self.bat_target,self.bat) + except Errors.ConfigurationError: + self.is_valid=False + return + if self.callback: + vs=self.callback(self,vs) + self.is_valid=True + (self.bindirs,self.incdirs,self.libdirs)=vs + def __str__(self): + return str((self.bindirs,self.incdirs,self.libdirs)) + def __repr__(self): + return repr((self.bindirs,self.incdirs,self.libdirs)) +@conf +def detect_ifort(self): + return self.setup_ifort(self.get_ifort_versions(False)) +@conf +def get_ifort_versions(self,eval_and_save=True): + dct={} + self.gather_ifort_versions(dct) + return dct +def _get_prog_names(self,compiler): + if compiler=='intel': + compiler_name='ifort' + linker_name='XILINK' + lib_name='XILIB' + else: + compiler_name='CL' + linker_name='LINK' + lib_name='LIB' + return compiler_name,linker_name,lib_name +@conf +def find_ifort_win32(conf): + v=conf.env + path=v.PATH + compiler=v.MSVC_COMPILER + version=v.MSVC_VERSION + compiler_name,linker_name,lib_name=_get_prog_names(conf,compiler) + v.IFORT_MANIFEST=(compiler=='intel'and version>=11) + fc=conf.find_program(compiler_name,var='FC',path_list=path) + env=dict(conf.environ) + if path:env.update(PATH=';'.join(path)) + if not conf.cmd_and_log(fc+['/nologo','/help'],env=env): + conf.fatal('not intel fortran compiler could not be identified') + v.FC_NAME='IFORT' + if not v.LINK_FC: + conf.find_program(linker_name,var='LINK_FC',path_list=path,mandatory=True) + if not v.AR: + conf.find_program(lib_name,path_list=path,var='AR',mandatory=True) + v.ARFLAGS=['/nologo'] + if v.IFORT_MANIFEST: + conf.find_program('MT',path_list=path,var='MT') + v.MTFLAGS=['/nologo'] + try: + conf.load('winres') + except Errors.WafError: + Logs.warn('Resource compiler not found. Compiling resource file is disabled') +@after_method('apply_link') +@feature('fc') +def apply_flags_ifort(self): + if not self.env.IFORT_WIN32 or not getattr(self,'link_task',None): + return + is_static=isinstance(self.link_task,ccroot.stlink_task) + subsystem=getattr(self,'subsystem','') + if subsystem: + subsystem='/subsystem:%s'%subsystem + flags=is_static and'ARFLAGS'or'LINKFLAGS' + self.env.append_value(flags,subsystem) + if not is_static: + for f in self.env.LINKFLAGS: + d=f.lower() + if d[1:]=='debug': + pdbnode=self.link_task.outputs[0].change_ext('.pdb') + self.link_task.outputs.append(pdbnode) + if getattr(self,'install_task',None): + self.pdb_install_task=self.add_install_files(install_to=self.install_task.install_to,install_from=pdbnode) + break +@feature('fcprogram','fcshlib','fcprogram_test') +@after_method('apply_link') +def apply_manifest_ifort(self): + if self.env.IFORT_WIN32 and getattr(self,'link_task',None): + self.link_task.env.FC=self.env.LINK_FC + if self.env.IFORT_WIN32 and self.env.IFORT_MANIFEST and getattr(self,'link_task',None): + out_node=self.link_task.outputs[0] + man_node=out_node.parent.find_or_declare(out_node.name+'.manifest') + self.link_task.outputs.append(man_node) + self.env.DO_MANIFEST=True diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/__init__.py glmark2-2021.02/waflib/Tools/__init__.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/__init__.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/__init__.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,4 +1,4 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/intltool.py glmark2-2021.02/waflib/Tools/intltool.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/intltool.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/intltool.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,39 +1,58 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import os,re -from waflib import Configure,TaskGen,Task,Utils,Runner,Options,Build,Logs +from waflib import Context,Task,Utils,Logs import waflib.Tools.ccroot -from waflib.TaskGen import feature,before_method +from waflib.TaskGen import feature,before_method,taskgen_method from waflib.Logs import error +from waflib.Configure import conf +_style_flags={'ba':'-b','desktop':'-d','keys':'-k','quoted':'--quoted-style','quotedxml':'--quotedxml-style','rfc822deb':'-r','schemas':'-s','xml':'-x',} +@taskgen_method +def ensure_localedir(self): + if not self.env.LOCALEDIR: + if self.env.DATAROOTDIR: + self.env.LOCALEDIR=os.path.join(self.env.DATAROOTDIR,'locale') + else: + self.env.LOCALEDIR=os.path.join(self.env.PREFIX,'share','locale') +@before_method('process_source') +@feature('intltool_in') def apply_intltool_in_f(self): try:self.meths.remove('process_source') except ValueError:pass - if not self.env.LOCALEDIR: - self.env.LOCALEDIR=self.env.PREFIX+'/share/locale' + self.ensure_localedir() + podir=getattr(self,'podir','.') + podirnode=self.path.find_dir(podir) + if not podirnode: + error("could not find the podir %r"%podir) + return + cache=getattr(self,'intlcache','.intlcache') + self.env.INTLCACHE=[os.path.join(str(self.path.get_bld()),podir,cache)] + self.env.INTLPODIR=podirnode.bldpath() + self.env.append_value('INTLFLAGS',getattr(self,'flags',self.env.INTLFLAGS_DEFAULT)) + if'-c'in self.env.INTLFLAGS: + self.bld.fatal('Redundant -c flag in intltool task %r'%self) + style=getattr(self,'style',None) + if style: + try: + style_flag=_style_flags[style] + except KeyError: + self.bld.fatal('intltool_in style "%s" is not valid'%style) + self.env.append_unique('INTLFLAGS',[style_flag]) for i in self.to_list(self.source): node=self.path.find_resource(i) - podir=getattr(self,'podir','po') - podirnode=self.path.find_dir(podir) - if not podirnode: - error("could not find the podir %r"%podir) - continue - cache=getattr(self,'intlcache','.intlcache') - self.env['INTLCACHE']=os.path.join(self.path.bldpath(),podir,cache) - self.env['INTLPODIR']=podirnode.bldpath() - self.env['INTLFLAGS']=getattr(self,'flags',['-q','-u','-c']) task=self.create_task('intltool',node,node.change_ext('')) - inst=getattr(self,'install_path','${LOCALEDIR}') + inst=getattr(self,'install_path',None) if inst: - self.bld.install_files(inst,task.outputs) + self.add_install_files(install_to=inst,install_from=task.outputs) +@feature('intltool_po') def apply_intltool_po(self): try:self.meths.remove('process_source') except ValueError:pass - if not self.env.LOCALEDIR: - self.env.LOCALEDIR=self.env.PREFIX+'/share/locale' - appname=getattr(self,'appname','set_your_app_name') - podir=getattr(self,'podir','') + self.ensure_localedir() + appname=getattr(self,'appname',getattr(Context.g_module,Context.APPNAME,'set_your_app_name')) + podir=getattr(self,'podir','.') inst=getattr(self,'install_path','${LOCALEDIR}') linguas=self.path.find_node(os.path.join(podir,'LINGUAS')) if linguas: @@ -52,27 +71,27 @@ filename=task.outputs[0].name (langname,ext)=os.path.splitext(filename) inst_file=inst+os.sep+langname+os.sep+'LC_MESSAGES'+os.sep+appname+'.mo' - self.bld.install_as(inst_file,task.outputs[0],chmod=getattr(self,'chmod',Utils.O644),env=task.env) + self.add_install_as(install_to=inst_file,install_from=task.outputs[0],chmod=getattr(self,'chmod',Utils.O644)) else: Logs.pprint('RED',"Error no LINGUAS file found in po directory") class po(Task.Task): run_str='${MSGFMT} -o ${TGT} ${SRC}' color='BLUE' class intltool(Task.Task): - run_str='${INTLTOOL} ${INTLFLAGS} ${INTLCACHE} ${INTLPODIR} ${SRC} ${TGT}' + run_str='${INTLTOOL} ${INTLFLAGS} ${INTLCACHE_ST:INTLCACHE} ${INTLPODIR} ${SRC} ${TGT}' color='BLUE' -def configure(conf): +@conf +def find_msgfmt(conf): conf.find_program('msgfmt',var='MSGFMT') - conf.find_perl_program('intltool-merge',var='INTLTOOL') - prefix=conf.env.PREFIX - datadir=conf.env.DATADIR - if not datadir: - datadir=os.path.join(prefix,'share') - conf.define('LOCALEDIR',os.path.join(datadir,'locale').replace('\\','\\\\')) - conf.define('DATADIR',datadir.replace('\\','\\\\')) +@conf +def find_intltool_merge(conf): + if not conf.env.PERL: + conf.find_program('perl',var='PERL') + conf.env.INTLCACHE_ST='--cache=%s' + conf.env.INTLFLAGS_DEFAULT=['-q','-u'] + conf.find_program('intltool-merge',interpreter='PERL',var='INTLTOOL') +def configure(conf): + conf.find_msgfmt() + conf.find_intltool_merge() if conf.env.CC or conf.env.CXX: conf.check(header_name='locale.h') - -before_method('process_source')(apply_intltool_in_f) -feature('intltool_in')(apply_intltool_in_f) -feature('intltool_po')(apply_intltool_po) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/irixcc.py glmark2-2021.02/waflib/Tools/irixcc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/irixcc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/irixcc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,41 +1,45 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os -from waflib import Utils from waflib.Tools import ccroot,ar from waflib.Configure import conf +@conf def find_irixcc(conf): v=conf.env cc=None - if v['CC']:cc=v['CC'] - elif'CC'in conf.environ:cc=conf.environ['CC'] - if not cc:cc=conf.find_program('cc',var='CC') - if not cc:conf.fatal('irixcc was not found') - cc=conf.cmd_to_list(cc) + if v.CC: + cc=v.CC + elif'CC'in conf.environ: + cc=conf.environ['CC'] + if not cc: + cc=conf.find_program('cc',var='CC') + if not cc: + conf.fatal('irixcc was not found') try: conf.cmd_and_log(cc+['-version']) - except: + except Exception: conf.fatal('%r -version could not be executed'%cc) - v['CC']=cc - v['CC_NAME']='irix' + v.CC=cc + v.CC_NAME='irix' +@conf def irixcc_common_flags(conf): v=conf.env - v['CC_SRC_F']='' - v['CC_TGT_F']=['-c','-o'] - v['CPPPATH_ST']='-I%s' - v['DEFINES_ST']='-D%s' - if not v['LINK_CC']:v['LINK_CC']=v['CC'] - v['CCLNK_SRC_F']='' - v['CCLNK_TGT_F']=['-o'] - v['LIB_ST']='-l%s' - v['LIBPATH_ST']='-L%s' - v['STLIB_ST']='-l%s' - v['STLIBPATH_ST']='-L%s' - v['cprogram_PATTERN']='%s' - v['cshlib_PATTERN']='lib%s.so' - v['cstlib_PATTERN']='lib%s.a' + v.CC_SRC_F='' + v.CC_TGT_F=['-c','-o'] + v.CPPPATH_ST='-I%s' + v.DEFINES_ST='-D%s' + if not v.LINK_CC: + v.LINK_CC=v.CC + v.CCLNK_SRC_F='' + v.CCLNK_TGT_F=['-o'] + v.LIB_ST='-l%s' + v.LIBPATH_ST='-L%s' + v.STLIB_ST='-l%s' + v.STLIBPATH_ST='-L%s' + v.cprogram_PATTERN='%s' + v.cshlib_PATTERN='lib%s.so' + v.cstlib_PATTERN='lib%s.a' def configure(conf): conf.find_irixcc() conf.find_cpp() @@ -44,6 +48,3 @@ conf.cc_load_tools() conf.cc_add_flags() conf.link_add_flags() - -conf(find_irixcc) -conf(irixcc_common_flags) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/javaw.py glmark2-2021.02/waflib/Tools/javaw.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/javaw.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/javaw.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,12 +1,10 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -import os,re,tempfile,shutil +import os,shutil +from waflib import Task,Utils,Errors,Node from waflib.Configure import conf -from waflib import TaskGen,Task,Utils,Options,Build,Errors,Node,Logs from waflib.TaskGen import feature,before_method,after_method from waflib.Tools import ccroot ccroot.USELIB_VARS['javac']=set(['CLASSPATH','JAVACFLAGS']) @@ -31,9 +29,10 @@ } } ''' +@feature('javac') +@before_method('process_source') def apply_java(self): Utils.def_attrs(self,jarname='',classpath='',sourcepath='.',srcdir='.',jar_mf_attributes={},jar_mf_classpath=[]) - nodes_lst=[] outdir=getattr(self,'outdir',None) if outdir: if not isinstance(outdir,Node.Node): @@ -42,7 +41,7 @@ outdir=self.path.get_bld() outdir.mkdir() self.outdir=outdir - self.env['OUTDIR']=outdir.abspath() + self.env.OUTDIR=outdir.abspath() self.javac_task=tsk=self.create_task('javac') tmp=[] srcdir=getattr(self,'srcdir','') @@ -66,6 +65,8 @@ names=[x.srcpath()for x in tsk.srcdir] if names: tsk.env.append_value('JAVACFLAGS',['-sourcepath',names]) +@feature('javac') +@after_method('apply_java') def use_javac_files(self): lst=[] self.uselib=self.to_list(getattr(self,'uselib',[])) @@ -74,18 +75,26 @@ for x in names: try: y=get(x) - except: + except Errors.WafError: self.uselib.append(x) else: y.post() - lst.append(y.jar_task.outputs[0].abspath()) - self.javac_task.set_run_after(y.jar_task) - if lst: - self.env.append_value('CLASSPATH',lst) + if hasattr(y,'jar_task'): + lst.append(y.jar_task.outputs[0].abspath()) + self.javac_task.set_run_after(y.jar_task) + else: + for tsk in y.tasks: + self.javac_task.set_run_after(tsk) + self.env.append_value('CLASSPATH',lst) +@feature('javac') +@after_method('apply_java','propagate_uselib_vars','use_javac_files') def set_classpath(self): self.env.append_value('CLASSPATH',getattr(self,'classpath',[])) for x in self.tasks: x.env.CLASSPATH=os.pathsep.join(self.env.CLASSPATH)+os.pathsep +@feature('jar') +@after_method('apply_java','use_javac_files') +@before_method('process_source') def jar_files(self): destfile=getattr(self,'destfile','test.jar') jaropts=getattr(self,'jaropts',[]) @@ -101,7 +110,10 @@ self.jar_task=tsk=self.create_task('jar_create') if manifest: jarcreate=getattr(self,'jarcreate','cfm') - node=self.path.find_node(manifest) + if not isinstance(manifest,Node.Node): + node=self.path.find_or_declare(manifest) + else: + node=manifest tsk.dep_nodes.append(node) jaropts.insert(0,node.abspath()) else: @@ -115,24 +127,35 @@ jaropts.append('-C') jaropts.append(basedir.bldpath()) jaropts.append('.') - tsk.env['JAROPTS']=jaropts - tsk.env['JARCREATE']=jarcreate + tsk.env.JAROPTS=jaropts + tsk.env.JARCREATE=jarcreate if getattr(self,'javac_task',None): tsk.set_run_after(self.javac_task) +@feature('jar') +@after_method('jar_files') def use_jar_files(self): - lst=[] self.uselib=self.to_list(getattr(self,'uselib',[])) names=self.to_list(getattr(self,'use',[])) get=self.bld.get_tgen_by_name for x in names: try: y=get(x) - except: + except Errors.WafError: self.uselib.append(x) else: y.post() self.jar_task.run_after.update(y.tasks) -class jar_create(Task.Task): +class JTask(Task.Task): + def split_argfile(self,cmd): + inline=[cmd[0]] + infile=[] + for x in cmd[1:]: + if x.startswith('-J'): + inline.append(x) + else: + infile.append(self.quote_flag(x)) + return(inline,infile) +class jar_create(JTask): color='GREEN' run_str='${JAR} ${JARCREATE} ${TGT} ${JAROPTS}' def runnable_status(self): @@ -143,13 +166,18 @@ global JAR_RE try: self.inputs=[x for x in self.basedir.ant_glob(JAR_RE,remove=False)if id(x)!=id(self.outputs[0])] - except: + except Exception: raise Errors.WafError('Could not find the basedir %r for %r'%(self.basedir,self)) return super(jar_create,self).runnable_status() -class javac(Task.Task): +class javac(JTask): color='BLUE' - nocache=True + run_str='${JAVAC} -classpath ${CLASSPATH} -d ${OUTDIR} ${JAVACFLAGS} ${SRC}' vars=['CLASSPATH','JAVACFLAGS','JAVAC','OUTDIR'] + def uid(self): + lst=[self.__class__.__name__,self.generator.outdir.abspath()] + for x in self.srcdir: + lst.append(x.abspath()) + return Utils.h_list(lst) def runnable_status(self): for t in self.run_after: if not t.hasrun: @@ -160,87 +188,92 @@ for x in self.srcdir: self.inputs.extend(x.ant_glob(SOURCE_RE,remove=False)) return super(javac,self).runnable_status() + def post_run(self): + for node in self.generator.outdir.ant_glob('**/*.class'): + self.generator.bld.node_sigs[node]=self.uid() + self.generator.bld.task_sigs[self.uid()]=self.cache_sig +@feature('javadoc') +@after_method('process_rule') +def create_javadoc(self): + tsk=self.create_task('javadoc') + tsk.classpath=getattr(self,'classpath',[]) + self.javadoc_package=Utils.to_list(self.javadoc_package) + if not isinstance(self.javadoc_output,Node.Node): + self.javadoc_output=self.bld.path.find_or_declare(self.javadoc_output) +class javadoc(Task.Task): + color='BLUE' + def __str__(self): + return'%s: %s -> %s\n'%(self.__class__.__name__,self.generator.srcdir,self.generator.javadoc_output) def run(self): env=self.env - gen=self.generator - bld=gen.bld - wd=bld.bldnode.abspath() - def to_list(xx): - if isinstance(xx,str):return[xx] - return xx - cmd=[] - cmd.extend(to_list(env['JAVAC'])) - cmd.extend(['-classpath']) - cmd.extend(to_list(env['CLASSPATH'])) - cmd.extend(['-d']) - cmd.extend(to_list(env['OUTDIR'])) - cmd.extend(to_list(env['JAVACFLAGS'])) - files=[a.path_from(bld.bldnode)for a in self.inputs] - tmp=None - try: - if len(str(files))+len(str(cmd))>8192: - (fd,tmp)=tempfile.mkstemp(dir=bld.bldnode.abspath()) - try: - os.write(fd,'\n'.join(files)) - finally: - if tmp: - os.close(fd) - if Logs.verbose: - Logs.debug('runner: %r'%(cmd+files)) - cmd.append('@'+tmp) - else: - cmd+=files - ret=self.exec_command(cmd,cwd=wd,env=env.env or None) - finally: - if tmp: - os.unlink(tmp) - return ret + bld=self.generator.bld + wd=bld.bldnode + srcpath=self.generator.path.abspath()+os.sep+self.generator.srcdir + srcpath+=os.pathsep + srcpath+=self.generator.path.get_bld().abspath()+os.sep+self.generator.srcdir + classpath=env.CLASSPATH + classpath+=os.pathsep + classpath+=os.pathsep.join(self.classpath) + classpath="".join(classpath) + self.last_cmd=lst=[] + lst.extend(Utils.to_list(env.JAVADOC)) + lst.extend(['-d',self.generator.javadoc_output.abspath()]) + lst.extend(['-sourcepath',srcpath]) + lst.extend(['-classpath',classpath]) + lst.extend(['-subpackages']) + lst.extend(self.generator.javadoc_package) + lst=[x for x in lst if x] + self.generator.bld.cmd_and_log(lst,cwd=wd,env=env.env or None,quiet=0) def post_run(self): - for n in self.generator.outdir.ant_glob('**/*.class'): - n.sig=Utils.h_file(n.abspath()) + nodes=self.generator.javadoc_output.ant_glob('**') + for node in nodes: + self.generator.bld.node_sigs[node]=self.uid() self.generator.bld.task_sigs[self.uid()]=self.cache_sig def configure(self): java_path=self.environ['PATH'].split(os.pathsep) v=self.env if'JAVA_HOME'in self.environ: java_path=[os.path.join(self.environ['JAVA_HOME'],'bin')]+java_path - self.env['JAVA_HOME']=[self.environ['JAVA_HOME']] - for x in'javac java jar'.split(): + self.env.JAVA_HOME=[self.environ['JAVA_HOME']] + for x in'javac java jar javadoc'.split(): self.find_program(x,var=x.upper(),path_list=java_path) - self.env[x.upper()]=self.cmd_to_list(self.env[x.upper()]) if'CLASSPATH'in self.environ: - v['CLASSPATH']=self.environ['CLASSPATH'] - if not v['JAR']:self.fatal('jar is required for making java packages') - if not v['JAVAC']:self.fatal('javac is required for compiling java classes') - v['JARCREATE']='cf' - v['JAVACFLAGS']=[] + v.CLASSPATH=self.environ['CLASSPATH'] + if not v.JAR: + self.fatal('jar is required for making java packages') + if not v.JAVAC: + self.fatal('javac is required for compiling java classes') + v.JARCREATE='cf' + v.JAVACFLAGS=[] +@conf def check_java_class(self,classname,with_classpath=None): javatestdir='.waf-javatest' classpath=javatestdir - if self.env['CLASSPATH']: - classpath+=os.pathsep+self.env['CLASSPATH'] + if self.env.CLASSPATH: + classpath+=os.pathsep+self.env.CLASSPATH if isinstance(with_classpath,str): classpath+=os.pathsep+with_classpath shutil.rmtree(javatestdir,True) os.mkdir(javatestdir) - java_file=open(os.path.join(javatestdir,'Test.java'),'w') - java_file.write(class_check_source) - java_file.close() - self.exec_command(self.env['JAVAC']+[os.path.join(javatestdir,'Test.java')],shell=False) - cmd=self.env['JAVA']+['-cp',classpath,'Test',classname] + Utils.writef(os.path.join(javatestdir,'Test.java'),class_check_source) + self.exec_command(self.env.JAVAC+[os.path.join(javatestdir,'Test.java')],shell=False) + cmd=self.env.JAVA+['-cp',classpath,'Test',classname] self.to_log("%s\n"%str(cmd)) found=self.exec_command(cmd,shell=False) self.msg('Checking for java class %s'%classname,not found) shutil.rmtree(javatestdir,True) return found +@conf def check_jni_headers(conf): if not conf.env.CC_NAME and not conf.env.CXX_NAME: conf.fatal('load a compiler first (gcc, g++, ..)') if not conf.env.JAVA_HOME: conf.fatal('set JAVA_HOME in the system environment') - javaHome=conf.env['JAVA_HOME'][0] + javaHome=conf.env.JAVA_HOME[0] dir=conf.root.find_dir(conf.env.JAVA_HOME[0]+'/include') if dir is None: + dir=conf.root.find_dir(conf.env.JAVA_HOME[0]+'/../Headers') + if dir is None: conf.fatal('JAVA_HOME does not seem to be set properly') f=dir.ant_glob('**/(jni|jni_md).h') incDirs=[x.parent.abspath()for x in f] @@ -250,26 +283,14 @@ f=dir.ant_glob('**/*jvm.(lib)') if f: libDirs=[[x,y.parent.abspath()]for x in libDirs for y in f] + if conf.env.DEST_OS=='freebsd': + conf.env.append_unique('LINKFLAGS_JAVA','-pthread') for d in libDirs: try: conf.check(header_name='jni.h',define_name='HAVE_JNI_H',lib='jvm',libpath=d,includes=incDirs,uselib_store='JAVA',uselib='JAVA') - except: + except Exception: pass else: break else: conf.fatal('could not find lib jvm in %r (see config.log)'%libDirs) - -feature('javac')(apply_java) -before_method('process_source')(apply_java) -feature('javac')(use_javac_files) -after_method('apply_java')(use_javac_files) -feature('javac')(set_classpath) -after_method('apply_java','propagate_uselib_vars','use_javac_files')(set_classpath) -feature('jar')(jar_files) -after_method('apply_java','use_javac_files')(jar_files) -before_method('process_source')(jar_files) -feature('jar')(use_jar_files) -after_method('jar_files')(use_jar_files) -conf(check_java_class) -conf(check_jni_headers) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/kde4.py glmark2-2021.02/waflib/Tools/kde4.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/kde4.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/kde4.py 1970-01-01 08:00:00.000000000 +0800 @@ -1,49 +0,0 @@ -#! /usr/bin/env python -# encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file - -import os,sys,re -from waflib import Options,TaskGen,Task,Utils -from waflib.TaskGen import feature,after_method -def apply_msgfmt(self): - for lang in self.to_list(self.langs): - node=self.path.find_resource(lang+'.po') - task=self.create_task('msgfmt',node,node.change_ext('.mo')) - langname=lang.split('/') - langname=langname[-1] - inst=getattr(self,'install_path','${KDE4_LOCALE_INSTALL_DIR}') - self.bld.install_as(inst+os.sep+langname+os.sep+'LC_MESSAGES'+os.sep+getattr(self,'appname','set_your_appname')+'.mo',task.outputs[0],chmod=getattr(self,'chmod',Utils.O644)) -class msgfmt(Task.Task): - color='BLUE' - run_str='${MSGFMT} ${SRC} -o ${TGT}' -def configure(self): - kdeconfig=self.find_program('kde4-config') - prefix=self.cmd_and_log('%s --prefix'%kdeconfig).strip() - fname='%s/share/apps/cmake/modules/KDELibsDependencies.cmake'%prefix - try:os.stat(fname) - except OSError: - fname='%s/share/kde4/apps/cmake/modules/KDELibsDependencies.cmake'%prefix - try:os.stat(fname) - except OSError:self.fatal('could not open %s'%fname) - try: - txt=Utils.readf(fname) - except(OSError,IOError): - self.fatal('could not read %s'%fname) - txt=txt.replace('\\\n','\n') - fu=re.compile('#(.*)\n') - txt=fu.sub('',txt) - setregexp=re.compile('([sS][eE][tT]\s*\()\s*([^\s]+)\s+\"([^"]+)\"\)') - found=setregexp.findall(txt) - for(_,key,val)in found: - self.env[key]=val - self.env['LIB_KDECORE']=['kdecore'] - self.env['LIB_KDEUI']=['kdeui'] - self.env['LIB_KIO']=['kio'] - self.env['LIB_KHTML']=['khtml'] - self.env['LIB_KPARTS']=['kparts'] - self.env['LIBPATH_KDECORE']=[self.env['KDE4_LIB_INSTALL_DIR']] - self.env['INCLUDES_KDECORE']=[self.env['KDE4_INCLUDE_INSTALL_DIR']] - self.env.append_value('INCLUDES_KDECORE',[self.env['KDE4_INCLUDE_INSTALL_DIR']+os.sep+'KDE']) - self.find_program('msgfmt',var='MSGFMT') - -feature('msgfmt')(apply_msgfmt) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/ldc2.py glmark2-2021.02/waflib/Tools/ldc2.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/ldc2.py 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/ldc2.py 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,36 @@ +#! /usr/bin/env python +# encoding: utf-8 +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file + +from waflib.Tools import ar,d +from waflib.Configure import conf +@conf +def find_ldc2(conf): + conf.find_program(['ldc2'],var='D') + out=conf.cmd_and_log(conf.env.D+['-version']) + if out.find("based on DMD v2.")==-1: + conf.fatal("detected compiler is not ldc2") +@conf +def common_flags_ldc2(conf): + v=conf.env + v.D_SRC_F=['-c'] + v.D_TGT_F='-of%s' + v.D_LINKER=v.D + v.DLNK_SRC_F='' + v.DLNK_TGT_F='-of%s' + v.DINC_ST='-I%s' + v.DSHLIB_MARKER=v.DSTLIB_MARKER='' + v.DSTLIB_ST=v.DSHLIB_ST='-L-l%s' + v.DSTLIBPATH_ST=v.DLIBPATH_ST='-L-L%s' + v.LINKFLAGS_dshlib=['-L-shared'] + v.DHEADER_ext='.di' + v.DFLAGS_d_with_header=['-H','-Hf'] + v.D_HDR_F='%s' + v.LINKFLAGS=[] + v.DFLAGS_dshlib=['-relocation-model=pic'] +def configure(conf): + conf.find_ldc2() + conf.load('ar') + conf.load('d') + conf.common_flags_ldc2() + conf.d_platform_flags() diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/lua.py glmark2-2021.02/waflib/Tools/lua.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/lua.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/lua.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,19 +1,18 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib.TaskGen import extension -from waflib import Task,Utils +from waflib import Task +@extension('.lua') def add_lua(self,node): tsk=self.create_task('luac',node,node.change_ext('.luac')) inst_to=getattr(self,'install_path',self.env.LUADIR and'${LUADIR}'or None) if inst_to: - self.bld.install_files(inst_to,tsk.outputs) + self.add_install_files(install_to=inst_to,install_from=tsk.outputs) return tsk class luac(Task.Task): run_str='${LUAC} -s -o ${TGT} ${SRC}' color='PINK' def configure(conf): conf.find_program('luac',var='LUAC') - -extension('.lua')(add_lua) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/md5_tstamp.py glmark2-2021.02/waflib/Tools/md5_tstamp.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/md5_tstamp.py 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/md5_tstamp.py 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,25 @@ +#! /usr/bin/env python +# encoding: utf-8 +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file + +import os,stat +from waflib import Utils,Build,Node +STRONGEST=True +Build.SAVED_ATTRS.append('hashes_md5_tstamp') +def h_file(self): + filename=self.abspath() + st=os.stat(filename) + cache=self.ctx.hashes_md5_tstamp + if filename in cache and cache[filename][0]==st.st_mtime: + return cache[filename][1] + global STRONGEST + if STRONGEST: + ret=Utils.h_file(filename) + else: + if stat.S_ISDIR(st[stat.ST_MODE]): + raise IOError('Not a file') + ret=Utils.md5(str((st.st_mtime,st.st_size)).encode()).digest() + cache[filename]=(st.st_mtime,ret) + return ret +h_file.__doc__=Node.Node.h_file.__doc__ +Node.Node.h_file=h_file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/msvc.py glmark2-2021.02/waflib/Tools/msvc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/msvc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/msvc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,20 +1,12 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys,re,tempfile -try: - import _winreg -except: - try: - import winreg as _winreg - except: - _winreg=None -from waflib import Utils,TaskGen,Runner,Configure,Task,Options -from waflib.Logs import debug,info,warn,error -from waflib.TaskGen import after_method,before_method,feature +import os,sys,re +from waflib import Utils,Logs,Options,Errors +from waflib.TaskGen import after_method,feature from waflib.Configure import conf -from waflib.Tools import ccroot,c,cxx,ar,winres +from waflib.Tools import ccroot,c,cxx,ar g_msvc_systemlibs=''' aclui activeds ad1 adptif adsiid advapi32 asycfilt authz bhsupp bits bufferoverflowu cabinet cap certadm certidl ciuuid clusapi comctl32 comdlg32 comsupp comsuppd comsuppw comsuppwd comsvcs @@ -35,52 +27,69 @@ version vfw32 wbemuuid webpost wiaguid wininet winmm winscard winspool winstrm wintrust wldap32 wmiutils wow32 ws2_32 wsnmp32 wsock32 wst wtsapi32 xaswitch xolehlp '''.split() -all_msvc_platforms=[('x64','amd64'),('x86','x86'),('ia64','ia64'),('x86_amd64','amd64'),('x86_ia64','ia64')] +all_msvc_platforms=[('x64','amd64'),('x86','x86'),('ia64','ia64'),('x86_amd64','amd64'),('x86_ia64','ia64'),('x86_arm','arm'),('amd64_x86','x86'),('amd64_arm','arm')] all_wince_platforms=[('armv4','arm'),('armv4i','arm'),('mipsii','mips'),('mipsii_fp','mips'),('mipsiv','mips'),('mipsiv_fp','mips'),('sh4','sh'),('x86','cex86')] all_icl_platforms=[('intel64','amd64'),('em64t','amd64'),('ia32','x86'),('Itanium','ia64')] def options(opt): opt.add_option('--msvc_version',type='string',help='msvc version, eg: "msvc 10.0,msvc 9.0"',default='') opt.add_option('--msvc_targets',type='string',help='msvc targets, eg: "x64,arm"',default='') -def setup_msvc(conf,versions): + opt.add_option('--no-msvc-lazy',action='store_false',help='lazily check msvc target environments',default=True,dest='msvc_lazy') +@conf +def setup_msvc(conf,versiondict): platforms=getattr(Options.options,'msvc_targets','').split(',') if platforms==['']: - platforms=Utils.to_list(conf.env['MSVC_TARGETS'])or[i for i,j in all_msvc_platforms+all_icl_platforms+all_wince_platforms] + platforms=Utils.to_list(conf.env.MSVC_TARGETS)or[i for i,j in all_msvc_platforms+all_icl_platforms+all_wince_platforms] desired_versions=getattr(Options.options,'msvc_version','').split(',') if desired_versions==['']: - desired_versions=conf.env['MSVC_VERSIONS']or[v for v,_ in versions][::-1] - versiondict=dict(versions) + desired_versions=conf.env.MSVC_VERSIONS or list(reversed(list(versiondict.keys()))) + lazy_detect=getattr(Options.options,'msvc_lazy',True) + if conf.env.MSVC_LAZY_AUTODETECT is False: + lazy_detect=False + if not lazy_detect: + for val in versiondict.values(): + for arch in list(val.keys()): + cfg=val[arch] + cfg.evaluate() + if not cfg.is_valid: + del val[arch] + conf.env.MSVC_INSTALLED_VERSIONS=versiondict for version in desired_versions: try: - targets=dict(versiondict[version]) - for target in platforms: - try: - arch,(p1,p2,p3)=targets[target] - compiler,revision=version.rsplit(' ',1) - return compiler,revision,p1,p2,p3 - except KeyError:continue - except KeyError:continue - conf.fatal('msvc: Impossible to find a valid architecture for building (in setup_msvc)') + targets=versiondict[version] + except KeyError: + continue + for arch in platforms: + try: + cfg=targets[arch] + except KeyError: + continue + cfg.evaluate() + if cfg.is_valid: + compiler,revision=version.rsplit(' ',1) + return compiler,revision,cfg.bindirs,cfg.incdirs,cfg.libdirs,cfg.cpu + conf.fatal('msvc: Impossible to find a valid architecture for building %r - %r'%(desired_versions,list(versiondict.keys()))) +@conf def get_msvc_version(conf,compiler,version,target,vcvars): - debug('msvc: get_msvc_version: %r %r %r',compiler,version,target) - batfile=conf.bldnode.make_node('waf-print-msvc.bat') + Logs.debug('msvc: get_msvc_version: %r %r %r',compiler,version,target) + try: + conf.msvc_cnt+=1 + except AttributeError: + conf.msvc_cnt=1 + batfile=conf.bldnode.make_node('waf-print-msvc-%d.bat'%conf.msvc_cnt) batfile.write("""@echo off set INCLUDE= set LIB= call "%s" %s echo PATH=%%PATH%% echo INCLUDE=%%INCLUDE%% -echo LIB=%%LIB%% +echo LIB=%%LIB%%;%%LIBPATH%% """%(vcvars,target)) - sout=conf.cmd_and_log(['cmd','/E:on','/V:on','/C',batfile.abspath()]) + sout=conf.cmd_and_log(['cmd.exe','/E:on','/V:on','/C',batfile.abspath()]) lines=sout.splitlines() - if not lines[0]:lines=lines[1:] - for x in('Setting environment','Setting SDK environment','Intel(R) C++ Compiler','Intel Parallel Studio'): - if lines[0].find(x)!=-1: - break - else: - debug('msvc: get_msvc_version: %r %r %r -> not found',compiler,version,target) - conf.fatal('msvc: Impossible to find a valid architecture for building (in get_msvc_version)') - for line in lines[1:]: + if not lines[0]: + lines.pop(0) + MSVC_PATH=MSVC_INCDIR=MSVC_LIBDIR=None + for line in lines: if line.startswith('PATH='): path=line[5:] MSVC_PATH=path.split(';') @@ -88,177 +97,206 @@ MSVC_INCDIR=[i for i in line[8:].split(';')if i] elif line.startswith('LIB='): MSVC_LIBDIR=[i for i in line[4:].split(';')if i] - env={} - env.update(os.environ) + if None in(MSVC_PATH,MSVC_INCDIR,MSVC_LIBDIR): + conf.fatal('msvc: Could not find a valid architecture for building (get_msvc_version_3)') + env=dict(os.environ) env.update(PATH=path) compiler_name,linker_name,lib_name=_get_prog_names(conf,compiler) cxx=conf.find_program(compiler_name,path_list=MSVC_PATH) - cxx=conf.cmd_to_list(cxx) if'CL'in env: del(env['CL']) try: - try: - conf.cmd_and_log(cxx+['/help'],env=env) - except Exception ,e: - debug('msvc: get_msvc_version: %r %r %r -> failure'%(compiler,version,target)) - debug(str(e)) - conf.fatal('msvc: cannot run the compiler (in get_msvc_version)') - else: - debug('msvc: get_msvc_version: %r %r %r -> OK',compiler,version,target) + conf.cmd_and_log(cxx+['/help'],env=env) + except UnicodeError: + st=Utils.ex_stack() + if conf.logger: + conf.logger.error(st) + conf.fatal('msvc: Unicode error - check the code page?') + except Exception as e: + Logs.debug('msvc: get_msvc_version: %r %r %r -> failure %s',compiler,version,target,str(e)) + conf.fatal('msvc: cannot run the compiler in get_msvc_version (run with -v to display errors)') + else: + Logs.debug('msvc: get_msvc_version: %r %r %r -> OK',compiler,version,target) finally: conf.env[compiler_name]='' return(MSVC_PATH,MSVC_INCDIR,MSVC_LIBDIR) +@conf def gather_wsdk_versions(conf,versions): version_pattern=re.compile('^v..?.?\...?.?') try: - all_versions=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Wow6432node\\Microsoft\\Microsoft SDKs\\Windows') + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Wow6432node\\Microsoft\\Microsoft SDKs\\Windows') except WindowsError: try: - all_versions=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows') + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows') except WindowsError: return index=0 while 1: try: - version=_winreg.EnumKey(all_versions,index) + version=Utils.winreg.EnumKey(all_versions,index) except WindowsError: break - index=index+1 + index+=1 if not version_pattern.match(version): continue try: - msvc_version=_winreg.OpenKey(all_versions,version) - path,type=_winreg.QueryValueEx(msvc_version,'InstallationFolder') + msvc_version=Utils.winreg.OpenKey(all_versions,version) + path,type=Utils.winreg.QueryValueEx(msvc_version,'InstallationFolder') except WindowsError: continue - if os.path.isfile(os.path.join(path,'bin','SetEnv.cmd')): - targets=[] + if path and os.path.isfile(os.path.join(path,'bin','SetEnv.cmd')): + targets={} for target,arch in all_msvc_platforms: - try: - targets.append((target,(arch,conf.get_msvc_version('wsdk',version,'/'+target,os.path.join(path,'bin','SetEnv.cmd'))))) - except conf.errors.ConfigurationError: - pass - versions.append(('wsdk '+version[1:],targets)) + targets[target]=target_compiler(conf,'wsdk',arch,version,'/'+target,os.path.join(path,'bin','SetEnv.cmd')) + versions['wsdk '+version[1:]]=targets def gather_wince_supported_platforms(): supported_wince_platforms=[] try: - ce_sdk=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Wow6432node\\Microsoft\\Windows CE Tools\\SDKs') + ce_sdk=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Wow6432node\\Microsoft\\Windows CE Tools\\SDKs') except WindowsError: try: - ce_sdk=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Microsoft\\Windows CE Tools\\SDKs') + ce_sdk=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Microsoft\\Windows CE Tools\\SDKs') except WindowsError: ce_sdk='' if not ce_sdk: return supported_wince_platforms - ce_index=0 + index=0 while 1: try: - sdk_device=_winreg.EnumKey(ce_sdk,ce_index) + sdk_device=Utils.winreg.EnumKey(ce_sdk,index) + sdk=Utils.winreg.OpenKey(ce_sdk,sdk_device) except WindowsError: break - ce_index=ce_index+1 - sdk=_winreg.OpenKey(ce_sdk,sdk_device) + index+=1 try: - path,type=_winreg.QueryValueEx(sdk,'SDKRootDir') + path,type=Utils.winreg.QueryValueEx(sdk,'SDKRootDir') except WindowsError: try: - path,type=_winreg.QueryValueEx(sdk,'SDKInformation') - path,xml=os.path.split(path) + path,type=Utils.winreg.QueryValueEx(sdk,'SDKInformation') except WindowsError: continue + path,xml=os.path.split(path) path=str(path) path,device=os.path.split(path) if not device: path,device=os.path.split(path) + platforms=[] for arch,compiler in all_wince_platforms: - platforms=[] if os.path.isdir(os.path.join(path,device,'Lib',arch)): platforms.append((arch,compiler,os.path.join(path,device,'Include',arch),os.path.join(path,device,'Lib',arch))) - if platforms: - supported_wince_platforms.append((device,platforms)) + if platforms: + supported_wince_platforms.append((device,platforms)) return supported_wince_platforms def gather_msvc_detected_versions(): version_pattern=re.compile('^(\d\d?\.\d\d?)(Exp)?$') detected_versions=[] - for vcver,vcvar in[('VCExpress','Exp'),('VisualStudio','')]: + for vcver,vcvar in(('VCExpress','Exp'),('VisualStudio','')): + prefix='SOFTWARE\\Wow6432node\\Microsoft\\'+vcver try: - prefix='SOFTWARE\\Wow6432node\\Microsoft\\'+vcver - all_versions=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,prefix) + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,prefix) except WindowsError: + prefix='SOFTWARE\\Microsoft\\'+vcver try: - prefix='SOFTWARE\\Microsoft\\'+vcver - all_versions=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,prefix) + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,prefix) except WindowsError: continue index=0 while 1: try: - version=_winreg.EnumKey(all_versions,index) + version=Utils.winreg.EnumKey(all_versions,index) except WindowsError: break - index=index+1 + index+=1 match=version_pattern.match(version) - if not match: - continue - else: + if match: versionnumber=float(match.group(1)) - detected_versions.append((versionnumber,version+vcvar,prefix+"\\"+version)) + else: + continue + detected_versions.append((versionnumber,version+vcvar,prefix+'\\'+version)) def fun(tup): return tup[0] - try: - detected_versions.sort(key=fun) - except: - detected_versions.sort(lambda x,y:cmp(x[0],y[0])) + detected_versions.sort(key=fun) return detected_versions +class target_compiler(object): + def __init__(self,ctx,compiler,cpu,version,bat_target,bat,callback=None): + self.conf=ctx + self.name=None + self.is_valid=False + self.is_done=False + self.compiler=compiler + self.cpu=cpu + self.version=version + self.bat_target=bat_target + self.bat=bat + self.callback=callback + def evaluate(self): + if self.is_done: + return + self.is_done=True + try: + vs=self.conf.get_msvc_version(self.compiler,self.version,self.bat_target,self.bat) + except Errors.ConfigurationError: + self.is_valid=False + return + if self.callback: + vs=self.callback(self,vs) + self.is_valid=True + (self.bindirs,self.incdirs,self.libdirs)=vs + def __str__(self): + return str((self.bindirs,self.incdirs,self.libdirs)) + def __repr__(self): + return repr((self.bindirs,self.incdirs,self.libdirs)) +@conf def gather_msvc_targets(conf,versions,version,vc_path): - targets=[] + targets={} if os.path.isfile(os.path.join(vc_path,'vcvarsall.bat')): for target,realtarget in all_msvc_platforms[::-1]: - try: - targets.append((target,(realtarget,conf.get_msvc_version('msvc',version,target,os.path.join(vc_path,'vcvarsall.bat'))))) - except conf.errors.ConfigurationError: - pass + targets[target]=target_compiler(conf,'msvc',realtarget,version,target,os.path.join(vc_path,'vcvarsall.bat')) elif os.path.isfile(os.path.join(vc_path,'Common7','Tools','vsvars32.bat')): - try: - targets.append(('x86',('x86',conf.get_msvc_version('msvc',version,'x86',os.path.join(vc_path,'Common7','Tools','vsvars32.bat'))))) - except conf.errors.ConfigurationError: - pass + targets['x86']=target_compiler(conf,'msvc','x86',version,'x86',os.path.join(vc_path,'Common7','Tools','vsvars32.bat')) elif os.path.isfile(os.path.join(vc_path,'Bin','vcvars32.bat')): - try: - targets.append(('x86',('x86',conf.get_msvc_version('msvc',version,'',os.path.join(vc_path,'Bin','vcvars32.bat'))))) - except conf.errors.ConfigurationError: - pass - versions.append(('msvc '+version,targets)) + targets['x86']=target_compiler(conf,'msvc','x86',version,'',os.path.join(vc_path,'Bin','vcvars32.bat')) + if targets: + versions['msvc '+version]=targets +@conf def gather_wince_targets(conf,versions,version,vc_path,vsvars,supported_platforms): for device,platforms in supported_platforms: - cetargets=[] + targets={} for platform,compiler,include,lib in platforms: winCEpath=os.path.join(vc_path,'ce') if not os.path.isdir(winCEpath): continue - try: - common_bindirs,_1,_2=conf.get_msvc_version('msvc',version,'x86',vsvars) - except conf.errors.ConfigurationError: - continue if os.path.isdir(os.path.join(winCEpath,'lib',platform)): - bindirs=[os.path.join(winCEpath,'bin',compiler),os.path.join(winCEpath,'bin','x86_'+compiler)]+common_bindirs + bindirs=[os.path.join(winCEpath,'bin',compiler),os.path.join(winCEpath,'bin','x86_'+compiler)] incdirs=[os.path.join(winCEpath,'include'),os.path.join(winCEpath,'atlmfc','include'),include] libdirs=[os.path.join(winCEpath,'lib',platform),os.path.join(winCEpath,'atlmfc','lib',platform),lib] - cetargets.append((platform,(platform,(bindirs,incdirs,libdirs)))) - if cetargets: - versions.append((device+' '+version,cetargets)) + def combine_common(obj,compiler_env): + (common_bindirs,_1,_2)=compiler_env + return(bindirs+common_bindirs,incdirs,libdirs) + targets[platform]=target_compiler(conf,'msvc',platform,version,'x86',vsvars,combine_common) + if targets: + versions[device+' '+version]=targets +@conf +def gather_winphone_targets(conf,versions,version,vc_path,vsvars): + targets={} + for target,realtarget in all_msvc_platforms[::-1]: + targets[target]=target_compiler(conf,'winphone',realtarget,version,target,vsvars) + if targets: + versions['winphone '+version]=targets +@conf def gather_msvc_versions(conf,versions): vc_paths=[] for(v,version,reg)in gather_msvc_detected_versions(): try: try: - msvc_version=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,reg+"\\Setup\\VC") + msvc_version=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,reg+"\\Setup\\VC") except WindowsError: - msvc_version=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,reg+"\\Setup\\Microsoft Visual C++") - path,type=_winreg.QueryValueEx(msvc_version,'ProductDir') - vc_paths.append((version,os.path.abspath(str(path)))) + msvc_version=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,reg+"\\Setup\\Microsoft Visual C++") + path,type=Utils.winreg.QueryValueEx(msvc_version,'ProductDir') except WindowsError: continue + else: + vc_paths.append((version,os.path.abspath(str(path)))) wince_supported_platforms=gather_wince_supported_platforms() for version,vc_path in vc_paths: vs_path=os.path.dirname(vc_path) @@ -267,72 +305,127 @@ conf.gather_wince_targets(versions,version,vc_path,vsvars,wince_supported_platforms) for version,vc_path in vc_paths: vs_path=os.path.dirname(vc_path) + vsvars=os.path.join(vs_path,'VC','WPSDK','WP80','vcvarsphoneall.bat') + if os.path.isfile(vsvars): + conf.gather_winphone_targets(versions,'8.0',vc_path,vsvars) + break + for version,vc_path in vc_paths: + vs_path=os.path.dirname(vc_path) conf.gather_msvc_targets(versions,version,vc_path) +@conf def gather_icl_versions(conf,versions): version_pattern=re.compile('^...?.?\....?.?') try: - all_versions=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Wow6432node\\Intel\\Compilers\\C++') + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Wow6432node\\Intel\\Compilers\\C++') except WindowsError: try: - all_versions=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Intel\\Compilers\\C++') + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Intel\\Compilers\\C++') except WindowsError: return index=0 while 1: try: - version=_winreg.EnumKey(all_versions,index) + version=Utils.winreg.EnumKey(all_versions,index) except WindowsError: break - index=index+1 + index+=1 if not version_pattern.match(version): continue - targets=[] + targets={} for target,arch in all_icl_platforms: + if target=='intel64':targetDir='EM64T_NATIVE' + else:targetDir=target try: - if target=='intel64':targetDir='EM64T_NATIVE' - else:targetDir=target - _winreg.OpenKey(all_versions,version+'\\'+targetDir) - icl_version=_winreg.OpenKey(all_versions,version) - path,type=_winreg.QueryValueEx(icl_version,'ProductDir') - if os.path.isfile(os.path.join(path,'bin','iclvars.bat')): - try: - targets.append((target,(arch,conf.get_msvc_version('intel',version,target,os.path.join(path,'bin','iclvars.bat'))))) - except conf.errors.ConfigurationError: - pass + Utils.winreg.OpenKey(all_versions,version+'\\'+targetDir) + icl_version=Utils.winreg.OpenKey(all_versions,version) + path,type=Utils.winreg.QueryValueEx(icl_version,'ProductDir') except WindowsError: pass + else: + batch_file=os.path.join(path,'bin','iclvars.bat') + if os.path.isfile(batch_file): + targets[target]=target_compiler(conf,'intel',arch,version,target,batch_file) for target,arch in all_icl_platforms: try: - icl_version=_winreg.OpenKey(all_versions,version+'\\'+target) - path,type=_winreg.QueryValueEx(icl_version,'ProductDir') - if os.path.isfile(os.path.join(path,'bin','iclvars.bat')): - try: - targets.append((target,(arch,conf.get_msvc_version('intel',version,target,os.path.join(path,'bin','iclvars.bat'))))) - except conf.errors.ConfigurationError: - pass + icl_version=Utils.winreg.OpenKey(all_versions,version+'\\'+target) + path,type=Utils.winreg.QueryValueEx(icl_version,'ProductDir') except WindowsError: continue + else: + batch_file=os.path.join(path,'bin','iclvars.bat') + if os.path.isfile(batch_file): + targets[target]=target_compiler(conf,'intel',arch,version,target,batch_file) major=version[0:2] - versions.append(('intel '+major,targets)) -def get_msvc_versions(conf): - if not conf.env['MSVC_INSTALLED_VERSIONS']: - lst=[] - conf.gather_icl_versions(lst) - conf.gather_wsdk_versions(lst) - conf.gather_msvc_versions(lst) - conf.env['MSVC_INSTALLED_VERSIONS']=lst - return conf.env['MSVC_INSTALLED_VERSIONS'] -def print_all_msvc_detected(conf): - for version,targets in conf.env['MSVC_INSTALLED_VERSIONS']: - info(version) - for target,l in targets: - info("\t"+target) -def detect_msvc(conf): - versions=get_msvc_versions(conf) - return setup_msvc(conf,versions) + versions['intel '+major]=targets +@conf +def gather_intel_composer_versions(conf,versions): + version_pattern=re.compile('^...?.?\...?.?.?') + try: + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Wow6432node\\Intel\\Suites') + except WindowsError: + try: + all_versions=Utils.winreg.OpenKey(Utils.winreg.HKEY_LOCAL_MACHINE,'SOFTWARE\\Intel\\Suites') + except WindowsError: + return + index=0 + while 1: + try: + version=Utils.winreg.EnumKey(all_versions,index) + except WindowsError: + break + index+=1 + if not version_pattern.match(version): + continue + targets={} + for target,arch in all_icl_platforms: + if target=='intel64':targetDir='EM64T_NATIVE' + else:targetDir=target + try: + try: + defaults=Utils.winreg.OpenKey(all_versions,version+'\\Defaults\\C++\\'+targetDir) + except WindowsError: + if targetDir=='EM64T_NATIVE': + defaults=Utils.winreg.OpenKey(all_versions,version+'\\Defaults\\C++\\EM64T') + else: + raise + uid,type=Utils.winreg.QueryValueEx(defaults,'SubKey') + Utils.winreg.OpenKey(all_versions,version+'\\'+uid+'\\C++\\'+targetDir) + icl_version=Utils.winreg.OpenKey(all_versions,version+'\\'+uid+'\\C++') + path,type=Utils.winreg.QueryValueEx(icl_version,'ProductDir') + except WindowsError: + pass + else: + batch_file=os.path.join(path,'bin','iclvars.bat') + if os.path.isfile(batch_file): + targets[target]=target_compiler(conf,'intel',arch,version,target,batch_file) + compilervars_warning_attr='_compilervars_warning_key' + if version[0:2]=='13'and getattr(conf,compilervars_warning_attr,True): + setattr(conf,compilervars_warning_attr,False) + patch_url='http://software.intel.com/en-us/forums/topic/328487' + compilervars_arch=os.path.join(path,'bin','compilervars_arch.bat') + for vscomntool in('VS110COMNTOOLS','VS100COMNTOOLS'): + if vscomntool in os.environ: + vs_express_path=os.environ[vscomntool]+r'..\IDE\VSWinExpress.exe' + dev_env_path=os.environ[vscomntool]+r'..\IDE\devenv.exe' + if(r'if exist "%VS110COMNTOOLS%..\IDE\VSWinExpress.exe"'in Utils.readf(compilervars_arch)and not os.path.exists(vs_express_path)and not os.path.exists(dev_env_path)): + Logs.warn(('The Intel compilervar_arch.bat only checks for one Visual Studio SKU ''(VSWinExpress.exe) but it does not seem to be installed at %r. ''The intel command line set up will fail to configure unless the file %r''is patched. See: %s')%(vs_express_path,compilervars_arch,patch_url)) + major=version[0:2] + versions['intel '+major]=targets +@conf +def detect_msvc(self): + return self.setup_msvc(self.get_msvc_versions()) +@conf +def get_msvc_versions(self): + dct={} + self.gather_icl_versions(dct) + self.gather_intel_composer_versions(dct) + self.gather_wsdk_versions(dct) + self.gather_msvc_versions(dct) + return dct +@conf def find_lt_names_msvc(self,libname,is_static=False): lt_names=['lib%s.la'%libname,'%s.la'%libname,] - for path in self.env['LIBPATH']: + for path in self.env.LIBPATH: for la in lt_names: laf=os.path.join(path,la) dll=None @@ -357,6 +450,7 @@ else: raise self.errors.WafError('invalid libtool object file: %s'%laf) return(None,None,None) +@conf def libname_msvc(self,libname,is_static=False): lib=libname.lower() lib=re.sub('\.lib$','',lib) @@ -370,9 +464,9 @@ if lt_static==True: return os.path.join(lt_path,lt_libname) if lt_path!=None: - _libpaths=[lt_path]+self.env['LIBPATH'] + _libpaths=[lt_path]+self.env.LIBPATH else: - _libpaths=self.env['LIBPATH'] + _libpaths=self.env.LIBPATH static_libs=['lib%ss.lib'%lib,'lib%s.lib'%lib,'%ss.lib'%lib,'%s.lib'%lib,] dynamic_libs=['lib%s.dll.lib'%lib,'lib%s.dll.a'%lib,'%s.dll.lib'%lib,'%s.dll.a'%lib,'lib%s_d.lib'%lib,'%s_d.lib'%lib,'%s.lib'%lib,] libnames=static_libs @@ -381,10 +475,11 @@ for path in _libpaths: for libn in libnames: if os.path.exists(os.path.join(path,libn)): - debug('msvc: lib found: %s'%os.path.join(path,libn)) + Logs.debug('msvc: lib found: %s',os.path.join(path,libn)) return re.sub('\.lib$','',libn) - self.fatal("The library %r could not be found"%libname) + self.fatal('The library %r could not be found'%libname) return re.sub('\.lib$','',libname) +@conf def check_lib_msvc(self,libname,is_static=False,uselib_store=None): libn=self.libname_msvc(libname,is_static) if not uselib_store: @@ -393,11 +488,12 @@ self.env['STLIB_'+uselib_store]=[libn] else: self.env['LIB_'+uselib_store]=[libn] +@conf def check_libs_msvc(self,libnames,is_static=False): for libname in Utils.to_list(libnames): self.check_lib_msvc(libname,is_static) def configure(conf): - conf.autodetect() + conf.autodetect(True) conf.find_msvc() conf.msvc_common_flags() conf.cc_load_tools() @@ -406,22 +502,26 @@ conf.cxx_add_flags() conf.link_add_flags() conf.visual_studio_add_flags() +@conf def no_autodetect(conf): conf.env.NO_MSVC_DETECT=1 configure(conf) -def autodetect(conf): +@conf +def autodetect(conf,arch=False): v=conf.env if v.NO_MSVC_DETECT: return - compiler,version,path,includes,libdirs=conf.detect_msvc() - v['PATH']=path - v['INCLUDES']=includes - v['LIBPATH']=libdirs - v['MSVC_COMPILER']=compiler + compiler,version,path,includes,libdirs,cpu=conf.detect_msvc() + if arch: + v.DEST_CPU=cpu + v.PATH=path + v.INCLUDES=includes + v.LIBPATH=libdirs + v.MSVC_COMPILER=compiler try: - v['MSVC_VERSION']=float(version) - except: - v['MSVC_VERSION']=float(version[:-3]) + v.MSVC_VERSION=float(version) + except TypeError: + v.MSVC_VERSION=float(version[:-3]) def _get_prog_names(conf,compiler): if compiler=='intel': compiler_name='ICL' @@ -432,90 +532,89 @@ linker_name='LINK' lib_name='LIB' return compiler_name,linker_name,lib_name +@conf def find_msvc(conf): if sys.platform=='cygwin': conf.fatal('MSVC module does not work under cygwin Python!') v=conf.env - path=v['PATH'] - compiler=v['MSVC_COMPILER'] - version=v['MSVC_VERSION'] + path=v.PATH + compiler=v.MSVC_COMPILER + version=v.MSVC_VERSION compiler_name,linker_name,lib_name=_get_prog_names(conf,compiler) v.MSVC_MANIFEST=(compiler=='msvc'and version>=8)or(compiler=='wsdk'and version>=6)or(compiler=='intel'and version>=11) - cxx=None - if v['CXX']:cxx=v['CXX'] - elif'CXX'in conf.environ:cxx=conf.environ['CXX'] cxx=conf.find_program(compiler_name,var='CXX',path_list=path) - cxx=conf.cmd_to_list(cxx) env=dict(conf.environ) if path:env.update(PATH=';'.join(path)) if not conf.cmd_and_log(cxx+['/nologo','/help'],env=env): conf.fatal('the msvc compiler could not be identified') - v['CC']=v['CXX']=cxx - v['CC_NAME']=v['CXX_NAME']='msvc' - if not v['LINK_CXX']: - link=conf.find_program(linker_name,path_list=path) - if link:v['LINK_CXX']=link - else:conf.fatal('%s was not found (linker)'%linker_name) - v['LINK']=link - if not v['LINK_CC']: - v['LINK_CC']=v['LINK_CXX'] - if not v['AR']: + v.CC=v.CXX=cxx + v.CC_NAME=v.CXX_NAME='msvc' + if not v.LINK_CXX: + v.LINK_CXX=conf.find_program(linker_name,path_list=path,errmsg='%s was not found (linker)'%linker_name) + if not v.LINK_CC: + v.LINK_CC=v.LINK_CXX + if not v.AR: stliblink=conf.find_program(lib_name,path_list=path,var='AR') - if not stliblink:return - v['ARFLAGS']=['/NOLOGO'] + if not stliblink: + return + v.ARFLAGS=['/nologo'] if v.MSVC_MANIFEST: - mt=conf.find_program('MT',path_list=path,var='MT') - v['MTFLAGS']=['/NOLOGO'] - conf.load('winres') - if not conf.env['WINRC']: - warn('Resource compiler not found. Compiling resource file is disabled') + conf.find_program('MT',path_list=path,var='MT') + v.MTFLAGS=['/nologo'] + try: + conf.load('winres') + except Errors.ConfigurationError: + Logs.warn('Resource compiler not found. Compiling resource file is disabled') +@conf def visual_studio_add_flags(self): v=self.env - try:v.prepend_value('INCLUDES',self.environ['INCLUDE'].split(';')) - except:pass - try:v.prepend_value('LIBPATH',self.environ['LIB'].split(';')) - except:pass + if self.environ.get('INCLUDE'): + v.prepend_value('INCLUDES',[x for x in self.environ['INCLUDE'].split(';')if x]) + if self.environ.get('LIB'): + v.prepend_value('LIBPATH',[x for x in self.environ['LIB'].split(';')if x]) +@conf def msvc_common_flags(conf): v=conf.env - v['DEST_BINFMT']='pe' + v.DEST_BINFMT='pe' v.append_value('CFLAGS',['/nologo']) v.append_value('CXXFLAGS',['/nologo']) - v['DEFINES_ST']='/D%s' - v['CC_SRC_F']='' - v['CC_TGT_F']=['/c','/Fo'] - if v['MSVC_VERSION']>=8: - v['CC_TGT_F']=['/FC']+v['CC_TGT_F'] - v['CXX_SRC_F']='' - v['CXX_TGT_F']=['/c','/Fo'] - if v['MSVC_VERSION']>=8: - v['CXX_TGT_F']=['/FC']+v['CXX_TGT_F'] - v['CPPPATH_ST']='/I%s' - v['AR_TGT_F']=v['CCLNK_TGT_F']=v['CXXLNK_TGT_F']='/OUT:' - v['CFLAGS_CONSOLE']=v['CXXFLAGS_CONSOLE']=['/SUBSYSTEM:CONSOLE'] - v['CFLAGS_NATIVE']=v['CXXFLAGS_NATIVE']=['/SUBSYSTEM:NATIVE'] - v['CFLAGS_POSIX']=v['CXXFLAGS_POSIX']=['/SUBSYSTEM:POSIX'] - v['CFLAGS_WINDOWS']=v['CXXFLAGS_WINDOWS']=['/SUBSYSTEM:WINDOWS'] - v['CFLAGS_WINDOWSCE']=v['CXXFLAGS_WINDOWSCE']=['/SUBSYSTEM:WINDOWSCE'] - v['CFLAGS_CRT_MULTITHREADED']=v['CXXFLAGS_CRT_MULTITHREADED']=['/MT'] - v['CFLAGS_CRT_MULTITHREADED_DLL']=v['CXXFLAGS_CRT_MULTITHREADED_DLL']=['/MD'] - v['CFLAGS_CRT_MULTITHREADED_DBG']=v['CXXFLAGS_CRT_MULTITHREADED_DBG']=['/MTd'] - v['CFLAGS_CRT_MULTITHREADED_DLL_DBG']=v['CXXFLAGS_CRT_MULTITHREADED_DLL_DBG']=['/MDd'] - v['LIB_ST']='%s.lib' - v['LIBPATH_ST']='/LIBPATH:%s' - v['STLIB_ST']='lib%s.lib' - v['STLIBPATH_ST']='/LIBPATH:%s' - v.append_value('LINKFLAGS',['/NOLOGO']) - if v['MSVC_MANIFEST']: + v.append_value('LINKFLAGS',['/nologo']) + v.DEFINES_ST='/D%s' + v.CC_SRC_F='' + v.CC_TGT_F=['/c','/Fo'] + v.CXX_SRC_F='' + v.CXX_TGT_F=['/c','/Fo'] + if(v.MSVC_COMPILER=='msvc'and v.MSVC_VERSION>=8)or(v.MSVC_COMPILER=='wsdk'and v.MSVC_VERSION>=6): + v.CC_TGT_F=['/FC']+v.CC_TGT_F + v.CXX_TGT_F=['/FC']+v.CXX_TGT_F + v.CPPPATH_ST='/I%s' + v.AR_TGT_F=v.CCLNK_TGT_F=v.CXXLNK_TGT_F='/OUT:' + v.CFLAGS_CONSOLE=v.CXXFLAGS_CONSOLE=['/SUBSYSTEM:CONSOLE'] + v.CFLAGS_NATIVE=v.CXXFLAGS_NATIVE=['/SUBSYSTEM:NATIVE'] + v.CFLAGS_POSIX=v.CXXFLAGS_POSIX=['/SUBSYSTEM:POSIX'] + v.CFLAGS_WINDOWS=v.CXXFLAGS_WINDOWS=['/SUBSYSTEM:WINDOWS'] + v.CFLAGS_WINDOWSCE=v.CXXFLAGS_WINDOWSCE=['/SUBSYSTEM:WINDOWSCE'] + v.CFLAGS_CRT_MULTITHREADED=v.CXXFLAGS_CRT_MULTITHREADED=['/MT'] + v.CFLAGS_CRT_MULTITHREADED_DLL=v.CXXFLAGS_CRT_MULTITHREADED_DLL=['/MD'] + v.CFLAGS_CRT_MULTITHREADED_DBG=v.CXXFLAGS_CRT_MULTITHREADED_DBG=['/MTd'] + v.CFLAGS_CRT_MULTITHREADED_DLL_DBG=v.CXXFLAGS_CRT_MULTITHREADED_DLL_DBG=['/MDd'] + v.LIB_ST='%s.lib' + v.LIBPATH_ST='/LIBPATH:%s' + v.STLIB_ST='%s.lib' + v.STLIBPATH_ST='/LIBPATH:%s' + if v.MSVC_MANIFEST: v.append_value('LINKFLAGS',['/MANIFEST']) - v['CFLAGS_cshlib']=[] - v['CXXFLAGS_cxxshlib']=[] - v['LINKFLAGS_cshlib']=v['LINKFLAGS_cxxshlib']=['/DLL'] - v['cshlib_PATTERN']=v['cxxshlib_PATTERN']='%s.dll' - v['implib_PATTERN']='%s.lib' - v['IMPLIB_ST']='/IMPLIB:%s' - v['LINKFLAGS_cstlib']=[] - v['cstlib_PATTERN']=v['cxxstlib_PATTERN']='lib%s.lib' - v['cprogram_PATTERN']=v['cxxprogram_PATTERN']='%s.exe' + v.CFLAGS_cshlib=[] + v.CXXFLAGS_cxxshlib=[] + v.LINKFLAGS_cshlib=v.LINKFLAGS_cxxshlib=['/DLL'] + v.cshlib_PATTERN=v.cxxshlib_PATTERN='%s.dll' + v.implib_PATTERN='%s.lib' + v.IMPLIB_ST='/IMPLIB:%s' + v.LINKFLAGS_cstlib=[] + v.cstlib_PATTERN=v.cxxstlib_PATTERN='%s.lib' + v.cprogram_PATTERN=v.cxxprogram_PATTERN='%s.exe' +@after_method('apply_link') +@feature('c','cxx') def apply_flags_msvc(self): if self.env.CC_NAME!='msvc'or not getattr(self,'link_task',None): return @@ -531,124 +630,31 @@ if d[1:]=='debug': pdbnode=self.link_task.outputs[0].change_ext('.pdb') self.link_task.outputs.append(pdbnode) - try: - self.install_task.source.append(pdbnode) - except AttributeError: - pass + if getattr(self,'install_task',None): + self.pdb_install_task=self.add_install_files(install_to=self.install_task.install_to,install_from=pdbnode) break +@feature('cprogram','cshlib','cxxprogram','cxxshlib') +@after_method('apply_link') def apply_manifest(self): if self.env.CC_NAME=='msvc'and self.env.MSVC_MANIFEST and getattr(self,'link_task',None): out_node=self.link_task.outputs[0] man_node=out_node.parent.find_or_declare(out_node.name+'.manifest') self.link_task.outputs.append(man_node) - self.link_task.do_manifest=True -def exec_mf(self): - env=self.env - mtool=env['MT'] - if not mtool: - return 0 - self.do_manifest=False - outfile=self.outputs[0].abspath() - manifest=None - for out_node in self.outputs: - if out_node.name.endswith('.manifest'): - manifest=out_node.abspath() - break - if manifest is None: - return 0 - mode='' - if'cprogram'in self.generator.features or'cxxprogram'in self.generator.features: - mode='1' - elif'cshlib'in self.generator.features or'cxxshlib'in self.generator.features: - mode='2' - debug('msvc: embedding manifest in mode %r'%mode) - lst=[] - lst.append(env['MT']) - lst.extend(Utils.to_list(env['MTFLAGS'])) - lst.extend(['-manifest',manifest]) - lst.append('-outputresource:%s;%s'%(outfile,mode)) - lst=[lst] - return self.exec_command(*lst) -def quote_response_command(self,flag): - if flag.find(' ')>-1: - for x in('/LIBPATH:','/IMPLIB:','/OUT:','/I'): - if flag.startswith(x): - flag='%s"%s"'%(x,flag[len(x):]) - break - else: - flag='"%s"'%flag - return flag -def exec_response_command(self,cmd,**kw): - try: - tmp=None - if sys.platform.startswith('win')and isinstance(cmd,list)and len(' '.join(cmd))>=8192: - program=cmd[0] - cmd=[self.quote_response_command(x)for x in cmd] - (fd,tmp)=tempfile.mkstemp() - os.write(fd,'\r\n'.join(i.replace('\\','\\\\')for i in cmd[1:])) - os.close(fd) - cmd=[program,'@'+tmp] - ret=self.generator.bld.exec_command(cmd,**kw) - finally: - if tmp: - try: - os.remove(tmp) - except: - pass - return ret -def exec_command_msvc(self,*k,**kw): - if self.env['CC_NAME']=='msvc': - if isinstance(k[0],list): - lst=[] - carry='' - for a in k[0]: - if a=='/Fo'or a=='/doc'or a[-1]==':': - carry=a - else: - lst.append(carry+a) - carry='' - k=[lst] - if self.env['PATH']: - env=dict(os.environ) - env.update(PATH=';'.join(self.env['PATH'])) - kw['env']=env - bld=self.generator.bld - try: - if not kw.get('cwd',None): - kw['cwd']=bld.cwd - except AttributeError: - bld.cwd=kw['cwd']=bld.variant_dir - ret=self.exec_response_command(k[0],**kw) - if not ret and getattr(self,'do_manifest',None): - ret=self.exec_mf() - return ret -for k in'c cxx cprogram cxxprogram cshlib cxxshlib cstlib cxxstlib'.split(): - cls=Task.classes.get(k,None) - if cls: - cls.exec_command=exec_command_msvc - cls.exec_response_command=exec_response_command - cls.quote_response_command=quote_response_command - cls.exec_mf=exec_mf - -conf(get_msvc_version) -conf(gather_wsdk_versions) -conf(gather_msvc_targets) -conf(gather_wince_targets) -conf(gather_msvc_versions) -conf(gather_icl_versions) -conf(get_msvc_versions) -conf(print_all_msvc_detected) -conf(detect_msvc) -conf(find_lt_names_msvc) -conf(libname_msvc) -conf(check_lib_msvc) -conf(check_libs_msvc) -conf(no_autodetect) -conf(autodetect) -conf(find_msvc) -conf(visual_studio_add_flags) -conf(msvc_common_flags) -after_method('apply_link')(apply_flags_msvc) -feature('c','cxx')(apply_flags_msvc) -feature('cprogram','cshlib','cxxprogram','cxxshlib')(apply_manifest) -after_method('apply_link')(apply_manifest) \ No newline at end of file + self.env.DO_MANIFEST=True +def make_winapp(self,family): + append=self.env.append_unique + append('DEFINES','WINAPI_FAMILY=%s'%family) + append('CXXFLAGS',['/ZW','/TP']) + for lib_path in self.env.LIBPATH: + append('CXXFLAGS','/AI%s'%lib_path) +@feature('winphoneapp') +@after_method('process_use') +@after_method('propagate_uselib_vars') +def make_winphone_app(self): + make_winapp(self,'WINAPI_FAMILY_PHONE_APP') + conf.env.append_unique('LINKFLAGS',['/NODEFAULTLIB:ole32.lib','PhoneAppModelHost.lib']) +@feature('winapp') +@after_method('process_use') +@after_method('propagate_uselib_vars') +def make_windows_app(self): + make_winapp(self,'WINAPI_FAMILY_DESKTOP_APP') diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/nasm.py glmark2-2021.02/waflib/Tools/nasm.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/nasm.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/nasm.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,14 +1,16 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file +import os import waflib.Tools.asm from waflib.TaskGen import feature +@feature('asm') def apply_nasm_vars(self): self.env.append_value('ASFLAGS',self.to_list(getattr(self,'nasm_flags',[]))) def configure(conf): - nasm=conf.find_program(['nasm','yasm'],var='AS') + conf.find_program(['nasm','yasm'],var='AS') conf.env.AS_TGT_F=['-o'] conf.env.ASLNK_TGT_F=['-o'] - -feature('asm')(apply_nasm_vars) \ No newline at end of file + conf.load('asm') + conf.env.ASMPATH_ST='-I%s'+os.sep diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/nobuild.py glmark2-2021.02/waflib/Tools/nobuild.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/nobuild.py 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/nobuild.py 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,11 @@ +#! /usr/bin/env python +# encoding: utf-8 +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file + +from waflib import Task +def build(bld): + def run(self): + for x in self.outputs: + x.write('') + for(name,cls)in Task.classes.items(): + cls.run=run diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/perl.py glmark2-2021.02/waflib/Tools/perl.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/perl.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/perl.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,15 +1,18 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import os from waflib import Task,Options,Utils from waflib.Configure import conf from waflib.TaskGen import extension,feature,before_method +@before_method('apply_incpaths','apply_link','propagate_uselib_vars') +@feature('perlext') def init_perlext(self): self.uselib=self.to_list(getattr(self,'uselib',[])) if not'PERLEXT'in self.uselib:self.uselib.append('PERLEXT') - self.env['cshlib_PATTERN']=self.env['cxxshlib_PATTERN']=self.env['perlext_PATTERN'] + self.env.cshlib_PATTERN=self.env.cxxshlib_PATTERN=self.env.perlext_PATTERN +@extension('.xs') def xsubpp_file(self,node): outnode=node.change_ext('.c') self.create_task('xsubpp',node,outnode) @@ -18,6 +21,7 @@ run_str='${PERL} ${XSUBPP} -noprototypes -typemap ${EXTUTILS_TYPEMAP} ${SRC} > ${TGT}' color='BLUE' ext_out=['.h'] +@conf def check_perl_version(self,minver=None): res=True if minver: @@ -25,14 +29,8 @@ else: cver='' self.start_msg('Checking for minimum perl version %s'%cver) - perl=getattr(Options.options,'perlbinary',None) - if not perl: - perl=self.find_program('perl',var='PERL') - if not perl: - self.end_msg("Perl not found",color="YELLOW") - return False - self.env['PERL']=perl - version=self.cmd_and_log([perl,"-e",'printf \"%vd\", $^V']) + perl=self.find_program('perl',var='PERL',value=getattr(Options.options,'perlbinary',None)) + version=self.cmd_and_log(perl+["-e",'printf \"%vd\", $^V']) if not version: res=False version="Unknown" @@ -40,42 +38,47 @@ ver=tuple(map(int,version.split("."))) if ver0: - try: - st1=os.stat(path) - except: - error('The python file is missing, this should not happen') - for x in['c','o']: - do_inst=self.env['PY'+x.upper()] - try: - st2=os.stat(path+x) - except OSError: - pass - else: - if st1.st_mtime<=st2.st_mtime: - do_inst=False - if do_inst: - lst=(x=='o')and[self.env['PYFLAGS_OPT']]or[] - (a,b,c)=(path,path+x,tsk.get_install_path(destdir=False)+x) - argv=self.env['PYTHON']+lst+['-c',INST,a,b,c] - info('+ byte compiling %r'%(path+x)) - env=self.env.env or None - ret=Utils.subprocess.Popen(argv,env=env).wait() - if ret: - raise Errors.WafError('py%s compilation failed %r'%(x,path)) +@before_method('process_source') +@feature('py') def feature_py(self): - pass + self.install_path=getattr(self,'install_path','${PYTHONDIR}') + install_from=getattr(self,'install_from',None) + if install_from and not isinstance(install_from,Node.Node): + install_from=self.path.find_dir(install_from) + self.install_from=install_from + ver=self.env.PYTHON_VERSION + if not ver: + self.bld.fatal('Installing python files requires PYTHON_VERSION, try conf.check_python_version') + if int(ver.replace('.',''))>31: + self.install_32=True +@extension('.py') +def process_py(self,node): + assert(getattr(self,'install_path')),'add features="py"' + if self.install_path: + if self.install_from: + self.add_install_files(install_to=self.install_path,install_from=node,cwd=self.install_from,relative_trick=True) + else: + self.add_install_files(install_to=self.install_path,install_from=node,relative_trick=True) + lst=[] + if self.env.PYC: + lst.append('pyc') + if self.env.PYO: + lst.append('pyo') + if self.install_path: + if self.install_from: + pyd=Utils.subst_vars("%s/%s"%(self.install_path,node.path_from(self.install_from)),self.env) + else: + pyd=Utils.subst_vars("%s/%s"%(self.install_path,node.path_from(self.path)),self.env) + else: + pyd=node.abspath() + for ext in lst: + if self.env.PYTAG: + name=node.name[:-3] + pyobj=node.parent.get_bld().make_node('__pycache__').make_node("%s.%s.%s"%(name,self.env.PYTAG,ext)) + pyobj.parent.mkdir() + else: + pyobj=node.change_ext(".%s"%ext) + tsk=self.create_task(ext,node,pyobj) + tsk.pyd=pyd + if self.install_path: + self.add_install_files(install_to=os.path.dirname(pyd),install_from=pyobj,cwd=node.parent.get_bld(),relative_trick=True) +class pyc(Task.Task): + color='PINK' + def run(self): + cmd=[Utils.subst_vars('${PYTHON}',self.env),'-c',INST,self.inputs[0].abspath(),self.outputs[0].abspath(),self.pyd] + ret=self.generator.bld.exec_command(cmd) + return ret +class pyo(Task.Task): + color='PINK' + def run(self): + cmd=[Utils.subst_vars('${PYTHON}',self.env),Utils.subst_vars('${PYFLAGS_OPT}',self.env),'-c',INST,self.inputs[0].abspath(),self.outputs[0].abspath(),self.pyd] + ret=self.generator.bld.exec_command(cmd) + return ret +@feature('pyext') +@before_method('propagate_uselib_vars','apply_link') +@after_method('apply_bundle') def init_pyext(self): + self.uselib=self.to_list(getattr(self,'uselib',[])) + if not'PYEXT'in self.uselib: + self.uselib.append('PYEXT') + self.env.cshlib_PATTERN=self.env.cxxshlib_PATTERN=self.env.macbundle_PATTERN=self.env.pyext_PATTERN + self.env.fcshlib_PATTERN=self.env.dshlib_PATTERN=self.env.pyext_PATTERN try: if not self.install_path: return except AttributeError: self.install_path='${PYTHONARCHDIR}' - self.uselib=self.to_list(getattr(self,'uselib',[])) - if not'PYEXT'in self.uselib: - self.uselib.append('PYEXT') - self.env['cshlib_PATTERN']=self.env['cxxshlib_PATTERN']=self.env['macbundle_PATTERN']=self.env['pyext_PATTERN'] +@feature('pyext') +@before_method('apply_link','apply_bundle') def set_bundle(self): if Utils.unversioned_sys_platform()=='darwin': self.mac_bundle=True +@before_method('propagate_uselib_vars') +@feature('pyembed') def init_pyembed(self): self.uselib=self.to_list(getattr(self,'uselib',[])) if not'PYEMBED'in self.uselib: self.uselib.append('PYEMBED') +@conf def get_python_variables(self,variables,imports=None): if not imports: try: @@ -118,48 +130,119 @@ out=self.cmd_and_log(self.env.PYTHON+['-c','\n'.join(program)],env=os_env) except Errors.WafError: self.fatal('The distutils module is unusable: install "python-devel"?') + self.to_log(out) return_values=[] - for s in out.split('\n'): + for s in out.splitlines(): s=s.strip() if not s: continue if s=='None': return_values.append(None) - elif s[0]=="'"and s[-1]=="'": - return_values.append(s[1:-1]) + elif(s[0]=="'"and s[-1]=="'")or(s[0]=='"'and s[-1]=='"'): + return_values.append(eval(s)) elif s[0].isdigit(): return_values.append(int(s)) else:break return return_values -def check_python_headers(conf): - if not conf.env['CC_NAME']and not conf.env['CXX_NAME']: +@conf +def test_pyembed(self,mode,msg='Testing pyembed configuration'): + self.check(header_name='Python.h',define_name='HAVE_PYEMBED',msg=msg,fragment=FRAG,errmsg='Could not build a python embedded interpreter',features='%s %sprogram pyembed'%(mode,mode)) +@conf +def test_pyext(self,mode,msg='Testing pyext configuration'): + self.check(header_name='Python.h',define_name='HAVE_PYEXT',msg=msg,fragment=FRAG,errmsg='Could not build python extensions',features='%s %sshlib pyext'%(mode,mode)) +@conf +def python_cross_compile(self,features='pyembed pyext'): + features=Utils.to_list(features) + if not('PYTHON_LDFLAGS'in self.environ or'PYTHON_PYEXT_LDFLAGS'in self.environ or'PYTHON_PYEMBED_LDFLAGS'in self.environ): + return False + for x in'PYTHON_VERSION PYTAG pyext_PATTERN'.split(): + if not x in self.environ: + self.fatal('Please set %s in the os environment'%x) + else: + self.env[x]=self.environ[x] + xx=self.env.CXX_NAME and'cxx'or'c' + if'pyext'in features: + flags=self.environ.get('PYTHON_PYEXT_LDFLAGS',self.environ.get('PYTHON_LDFLAGS')) + if flags is None: + self.fatal('No flags provided through PYTHON_PYEXT_LDFLAGS as required') + else: + self.parse_flags(flags,'PYEXT') + self.test_pyext(xx) + if'pyembed'in features: + flags=self.environ.get('PYTHON_PYEMBED_LDFLAGS',self.environ.get('PYTHON_LDFLAGS')) + if flags is None: + self.fatal('No flags provided through PYTHON_PYEMBED_LDFLAGS as required') + else: + self.parse_flags(flags,'PYEMBED') + self.test_pyembed(xx) + return True +@conf +def check_python_headers(conf,features='pyembed pyext'): + features=Utils.to_list(features) + assert('pyembed'in features)or('pyext'in features),"check_python_headers features must include 'pyembed' and/or 'pyext'" + env=conf.env + if not env.CC_NAME and not env.CXX_NAME: conf.fatal('load a compiler first (gcc, g++, ..)') - if not conf.env['PYTHON_VERSION']: + if conf.python_cross_compile(features): + return + if not env.PYTHON_VERSION: conf.check_python_version() - env=conf.env - pybin=conf.env.PYTHON + pybin=env.PYTHON if not pybin: - conf.fatal('could not find the python executable') - v='prefix SO LDFLAGS LIBDIR LIBPL INCLUDEPY Py_ENABLE_SHARED MACOSX_DEPLOYMENT_TARGET LDSHARED CFLAGS'.split() + conf.fatal('Could not find the python executable') + v='prefix SO LDFLAGS LIBDIR LIBPL INCLUDEPY Py_ENABLE_SHARED MACOSX_DEPLOYMENT_TARGET LDSHARED CFLAGS LDVERSION'.split() try: lst=conf.get_python_variables(["get_config_var('%s') or ''"%x for x in v]) except RuntimeError: conf.fatal("Python development headers not found (-v for details).") vals=['%s = %r'%(x,y)for(x,y)in zip(v,lst)] - conf.to_log("Configuration returned from %r:\n%r\n"%(pybin,'\n'.join(vals))) + conf.to_log("Configuration returned from %r:\n%s\n"%(pybin,'\n'.join(vals))) dct=dict(zip(v,lst)) x='MACOSX_DEPLOYMENT_TARGET' if dct[x]: - conf.env[x]=conf.environ[x]=dct[x] - env['pyext_PATTERN']='%s'+dct['SO'] + env[x]=conf.environ[x]=dct[x] + env.pyext_PATTERN='%s'+dct['SO'] + num='.'.join(env.PYTHON_VERSION.split('.')[:2]) + conf.find_program([''.join(pybin)+'-config','python%s-config'%num,'python-config-%s'%num,'python%sm-config'%num],var='PYTHON_CONFIG',msg="python-config",mandatory=False) + if env.PYTHON_CONFIG: + all_flags=[['--cflags','--libs','--ldflags']] + if sys.hexversion<0x2070000: + all_flags=[[k]for k in all_flags[0]] + xx=env.CXX_NAME and'cxx'or'c' + if'pyembed'in features: + for flags in all_flags: + conf.check_cfg(msg='Asking python-config for pyembed %r flags'%' '.join(flags),path=env.PYTHON_CONFIG,package='',uselib_store='PYEMBED',args=flags) + try: + conf.test_pyembed(xx) + except conf.errors.ConfigurationError: + if dct['Py_ENABLE_SHARED']and dct['LIBDIR']: + env.append_unique('LIBPATH_PYEMBED',[dct['LIBDIR']]) + conf.test_pyembed(xx) + else: + raise + if'pyext'in features: + for flags in all_flags: + conf.check_cfg(msg='Asking python-config for pyext %r flags'%' '.join(flags),path=env.PYTHON_CONFIG,package='',uselib_store='PYEXT',args=flags) + try: + conf.test_pyext(xx) + except conf.errors.ConfigurationError: + if dct['Py_ENABLE_SHARED']and dct['LIBDIR']: + env.append_unique('LIBPATH_PYEXT',[dct['LIBDIR']]) + conf.test_pyext(xx) + else: + raise + conf.define('HAVE_PYTHON_H',1) + return all_flags=dct['LDFLAGS']+' '+dct['CFLAGS'] conf.parse_flags(all_flags,'PYEMBED') all_flags=dct['LDFLAGS']+' '+dct['LDSHARED']+' '+dct['CFLAGS'] conf.parse_flags(all_flags,'PYEXT') result=None - for name in('python'+env['PYTHON_VERSION'],'python'+env['PYTHON_VERSION'].replace('.','')): - if not result and env['LIBPATH_PYEMBED']: - path=env['LIBPATH_PYEMBED'] + if not dct["LDVERSION"]: + dct["LDVERSION"]=env.PYTHON_VERSION + for name in('python'+dct['LDVERSION'],'python'+env.PYTHON_VERSION+'m','python'+env.PYTHON_VERSION.replace('.','')): + if not result and env.LIBPATH_PYEMBED: + path=env.LIBPATH_PYEMBED conf.to_log("\n\n# Trying default LIBPATH_PYEMBED: %r\n"%path) result=conf.check(lib=name,uselib='PYEMBED',libpath=path,mandatory=False,msg='Checking for library %s in LIBPATH_PYEMBED'%name) if not result and dct['LIBDIR']: @@ -177,33 +260,20 @@ if result: break if result: - env['LIBPATH_PYEMBED']=path + env.LIBPATH_PYEMBED=path env.append_value('LIB_PYEMBED',[name]) else: conf.to_log("\n\n### LIB NOT FOUND\n") - if(Utils.is_win32 or sys.platform.startswith('os2')or dct['Py_ENABLE_SHARED']): - env['LIBPATH_PYEXT']=env['LIBPATH_PYEMBED'] - env['LIB_PYEXT']=env['LIB_PYEMBED'] - num='.'.join(env['PYTHON_VERSION'].split('.')[:2]) - conf.find_program(['python%s-config'%num,'python-config-%s'%num,'python%sm-config'%num],var='PYTHON_CONFIG',mandatory=False) - includes=[] - if conf.env.PYTHON_CONFIG: - for incstr in conf.cmd_and_log([conf.env.PYTHON_CONFIG,'--includes']).strip().split(): - if(incstr.startswith('-I')or incstr.startswith('/I')): - incstr=incstr[2:] - if incstr not in includes: - includes.append(incstr) - conf.to_log("Include path for Python extensions (found via python-config --includes): %r\n"%(includes,)) - env['INCLUDES_PYEXT']=includes - env['INCLUDES_PYEMBED']=includes - else: - conf.to_log("Include path for Python extensions ""(found via distutils module): %r\n"%(dct['INCLUDEPY'],)) - env['INCLUDES_PYEXT']=[dct['INCLUDEPY']] - env['INCLUDES_PYEMBED']=[dct['INCLUDEPY']] - if env['CC_NAME']=='gcc': + if Utils.is_win32 or dct['Py_ENABLE_SHARED']: + env.LIBPATH_PYEXT=env.LIBPATH_PYEMBED + env.LIB_PYEXT=env.LIB_PYEMBED + conf.to_log("Include path for Python extensions (found via distutils module): %r\n"%(dct['INCLUDEPY'],)) + env.INCLUDES_PYEXT=[dct['INCLUDEPY']] + env.INCLUDES_PYEMBED=[dct['INCLUDEPY']] + if env.CC_NAME=='gcc': env.append_value('CFLAGS_PYEMBED',['-fno-strict-aliasing']) env.append_value('CFLAGS_PYEXT',['-fno-strict-aliasing']) - if env['CXX_NAME']=='gcc': + if env.CXX_NAME=='gcc': env.append_value('CXXFLAGS_PYEMBED',['-fno-strict-aliasing']) env.append_value('CXXFLAGS_PYEXT',['-fno-strict-aliasing']) if env.CC_NAME=="msvc": @@ -213,72 +283,74 @@ env.append_value('CFLAGS_PYEXT',dist_compiler.compile_options) env.append_value('CXXFLAGS_PYEXT',dist_compiler.compile_options) env.append_value('LINKFLAGS_PYEXT',dist_compiler.ldflags_shared) - try: - conf.check(header_name='Python.h',define_name='HAVE_PYTHON_H',uselib='PYEMBED',fragment=FRAG,errmsg='Could not find the python development headers') - except conf.errors.ConfigurationError: - conf.check_cfg(path=conf.env.PYTHON_CONFIG,package='',uselib_store='PYEMBED',args=['--cflags','--libs']) - conf.check(header_name='Python.h',define_name='HAVE_PYTHON_H',msg='Getting the python flags from python-config',uselib='PYEMBED',fragment=FRAG,errmsg='Could not find the python development headers elsewhere') + conf.check(header_name='Python.h',define_name='HAVE_PYTHON_H',uselib='PYEMBED',fragment=FRAG,errmsg='Distutils not installed? Broken python installation? Get python-config now!') +@conf def check_python_version(conf,minver=None): assert minver is None or isinstance(minver,tuple) - pybin=conf.env['PYTHON'] + pybin=conf.env.PYTHON if not pybin: conf.fatal('could not find the python executable') cmd=pybin+['-c','import sys\nfor x in sys.version_info: print(str(x))'] - debug('python: Running python command %r'%cmd) + Logs.debug('python: Running python command %r',cmd) lines=conf.cmd_and_log(cmd).split() - assert len(lines)==5,"found %i lines, expected 5: %r"%(len(lines),lines) + assert len(lines)==5,"found %r lines, expected 5: %r"%(len(lines),lines) pyver_tuple=(int(lines[0]),int(lines[1]),int(lines[2]),lines[3],int(lines[4])) result=(minver is None)or(pyver_tuple>=minver) if result: pyver='.'.join([str(x)for x in pyver_tuple[:2]]) - conf.env['PYTHON_VERSION']=pyver - if'PYTHONDIR'in conf.environ: + conf.env.PYTHON_VERSION=pyver + if'PYTHONDIR'in conf.env: + pydir=conf.env.PYTHONDIR + elif'PYTHONDIR'in conf.environ: pydir=conf.environ['PYTHONDIR'] else: if Utils.is_win32: - (python_LIBDEST,pydir)=conf.get_python_variables(["get_config_var('LIBDEST') or ''","get_python_lib(standard_lib=0, prefix=%r) or ''"%conf.env['PREFIX']]) + (python_LIBDEST,pydir)=conf.get_python_variables(["get_config_var('LIBDEST') or ''","get_python_lib(standard_lib=0) or ''"]) else: python_LIBDEST=None - (pydir,)=conf.get_python_variables(["get_python_lib(standard_lib=0, prefix=%r) or ''"%conf.env['PREFIX']]) + (pydir,)=conf.get_python_variables(["get_python_lib(standard_lib=0, prefix=%r) or ''"%conf.env.PREFIX]) if python_LIBDEST is None: - if conf.env['LIBDIR']: - python_LIBDEST=os.path.join(conf.env['LIBDIR'],"python"+pyver) + if conf.env.LIBDIR: + python_LIBDEST=os.path.join(conf.env.LIBDIR,'python'+pyver) else: - python_LIBDEST=os.path.join(conf.env['PREFIX'],"lib","python"+pyver) - if'PYTHONARCHDIR'in conf.environ: + python_LIBDEST=os.path.join(conf.env.PREFIX,'lib','python'+pyver) + if'PYTHONARCHDIR'in conf.env: + pyarchdir=conf.env.PYTHONARCHDIR + elif'PYTHONARCHDIR'in conf.environ: pyarchdir=conf.environ['PYTHONARCHDIR'] else: - (pyarchdir,)=conf.get_python_variables(["get_python_lib(plat_specific=1, standard_lib=0, prefix=%r) or ''"%conf.env['PREFIX']]) + (pyarchdir,)=conf.get_python_variables(["get_python_lib(plat_specific=1, standard_lib=0, prefix=%r) or ''"%conf.env.PREFIX]) if not pyarchdir: pyarchdir=pydir if hasattr(conf,'define'): conf.define('PYTHONDIR',pydir) conf.define('PYTHONARCHDIR',pyarchdir) - conf.env['PYTHONDIR']=pydir - conf.env['PYTHONARCHDIR']=pyarchdir + conf.env.PYTHONDIR=pydir + conf.env.PYTHONARCHDIR=pyarchdir pyver_full='.'.join(map(str,pyver_tuple[:3])) if minver is None: conf.msg('Checking for python version',pyver_full) else: minver_str='.'.join(map(str,minver)) - conf.msg('Checking for python version',pyver_tuple,">= %s"%(minver_str,)and'GREEN'or'YELLOW') + conf.msg('Checking for python version >= %s'%(minver_str,),pyver_full,color=result and'GREEN'or'YELLOW') if not result: conf.fatal('The python version is too old, expecting %r'%(minver,)) PYTHON_MODULE_TEMPLATE=''' import %s as current_module version = getattr(current_module, '__version__', None) if version is not None: - print(str(version)) + print(str(version)) else: - print('unknown version') + print('unknown version') ''' +@conf def check_python_module(conf,module_name,condition=''): - msg='Python module %s'%module_name + msg="Checking for python module %r"%module_name if condition: msg='%s (%s)'%(msg,condition) conf.start_msg(msg) try: - ret=conf.cmd_and_log(conf.env['PYTHON']+['-c',PYTHON_MODULE_TEMPLATE%module_name]) + ret=conf.cmd_and_log(conf.env.PYTHON+['-c',PYTHON_MODULE_TEMPLATE%module_name]) except Exception: conf.end_msg(False) conf.fatal('Could not find the python module %r'%module_name) @@ -303,34 +375,24 @@ else: conf.end_msg(ret) def configure(conf): - try: - conf.find_program('python',var='PYTHON') - except conf.errors.ConfigurationError: - warn("could not find a python executable, setting to sys.executable '%s'"%sys.executable) - conf.env.PYTHON=sys.executable - if conf.env.PYTHON!=sys.executable: - warn("python executable '%s' different from sys.executable '%s'"%(conf.env.PYTHON,sys.executable)) - conf.env.PYTHON=conf.cmd_to_list(conf.env.PYTHON) v=conf.env - v['PYCMD']='"import sys, py_compile;py_compile.compile(sys.argv[1], sys.argv[2])"' - v['PYFLAGS']='' - v['PYFLAGS_OPT']='-O' - v['PYC']=getattr(Options.options,'pyc',1) - v['PYO']=getattr(Options.options,'pyo',1) + if Options.options.pythondir: + v.PYTHONDIR=Options.options.pythondir + if Options.options.pythonarchdir: + v.PYTHONARCHDIR=Options.options.pythonarchdir + conf.find_program('python',var='PYTHON',value=Options.options.pythondir or sys.executable) + v.PYFLAGS='' + v.PYFLAGS_OPT='-O' + v.PYC=getattr(Options.options,'pyc',1) + v.PYO=getattr(Options.options,'pyo',1) + try: + v.PYTAG=conf.cmd_and_log(conf.env.PYTHON+['-c',"import imp;print(imp.get_tag())"]).strip() + except Errors.WafError: + pass def options(opt): - opt.add_option('--nopyc',action='store_false',default=1,help='Do not install bytecode compiled .pyc files (configuration) [Default:install]',dest='pyc') - opt.add_option('--nopyo',action='store_false',default=1,help='Do not install optimised compiled .pyo files (configuration) [Default:install]',dest='pyo') - -extension('.py')(process_py) -feature('py')(feature_py) -feature('pyext')(init_pyext) -before_method('propagate_uselib_vars','apply_link')(init_pyext) -after_method('apply_bundle')(init_pyext) -feature('pyext')(set_bundle) -before_method('apply_link','apply_bundle')(set_bundle) -before_method('propagate_uselib_vars')(init_pyembed) -feature('pyembed')(init_pyembed) -conf(get_python_variables) -conf(check_python_headers) -conf(check_python_version) -conf(check_python_module) \ No newline at end of file + pyopt=opt.add_option_group("Python Options") + pyopt.add_option('--nopyc',dest='pyc',action='store_false',default=1,help='Do not install bytecode compiled .pyc files (configuration) [Default:install]') + pyopt.add_option('--nopyo',dest='pyo',action='store_false',default=1,help='Do not install optimised compiled .pyo files (configuration) [Default:install]') + pyopt.add_option('--python',dest="python",help='python binary to be used [Default: %s]'%sys.executable) + pyopt.add_option('--pythondir',dest='pythondir',help='Installation path for python modules (py, platform-independent .py and .pyc files)') + pyopt.add_option('--pythonarchdir',dest='pythonarchdir',help='Installation path for python extension (pyext, platform-dependent .so or .dylib files)') diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/qt4.py glmark2-2021.02/waflib/Tools/qt4.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/qt4.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/qt4.py 1970-01-01 08:00:00.000000000 +0800 @@ -1,434 +0,0 @@ -#! /usr/bin/env python -# encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file - -import sys -if sys.hexversion < 0x020400f0: from sets import Set as set -try: - from xml.sax import make_parser - from xml.sax.handler import ContentHandler -except ImportError: - has_xml=False - ContentHandler=object -else: - has_xml=True -import os,sys -from waflib.Tools import c_preproc,cxx -from waflib import Task,Utils,Options,Errors -from waflib.TaskGen import feature,after_method,extension -from waflib.Configure import conf -from waflib import Logs -MOC_H=['.h','.hpp','.hxx','.hh'] -EXT_RCC=['.qrc'] -EXT_UI=['.ui'] -EXT_QT4=['.cpp','.cc','.cxx','.C'] -QT4_LIBS="QtCore QtGui QtUiTools QtNetwork QtOpenGL QtSql QtSvg QtTest QtXml QtXmlPatterns QtWebKit Qt3Support QtHelp QtScript QtDeclarative" -class qxx(cxx.cxx): - def __init__(self,*k,**kw): - Task.Task.__init__(self,*k,**kw) - self.moc_done=0 - def scan(self): - (nodes,names)=c_preproc.scan(self) - for x in nodes: - if x.name.endswith('.moc'): - nodes.remove(x) - names.append(x.path_from(self.inputs[0].parent.get_bld())) - return(nodes,names) - def runnable_status(self): - if self.moc_done: - return Task.Task.runnable_status(self) - else: - for t in self.run_after: - if not t.hasrun: - return Task.ASK_LATER - self.add_moc_tasks() - return Task.Task.runnable_status(self) - def add_moc_tasks(self): - node=self.inputs[0] - bld=self.generator.bld - try: - self.signature() - except KeyError: - pass - else: - delattr(self,'cache_sig') - moctasks=[] - mocfiles=[] - try: - tmp_lst=bld.raw_deps[self.uid()] - bld.raw_deps[self.uid()]=[] - except KeyError: - tmp_lst=[] - for d in tmp_lst: - if not d.endswith('.moc'): - continue - if d in mocfiles: - Logs.error("paranoia owns") - continue - mocfiles.append(d) - h_node=None - try:ext=Options.options.qt_header_ext.split() - except AttributeError:pass - if not ext:ext=MOC_H - base2=d[:-4] - for x in[node.parent]+self.generator.includes_nodes: - for e in ext: - h_node=x.find_node(base2+e) - if h_node: - break - if h_node: - m_node=h_node.change_ext('.moc') - break - else: - for k in EXT_QT4: - if base2.endswith(k): - for x in[node.parent]+self.generator.includes_nodes: - h_node=x.find_node(base2) - if h_node: - break - if h_node: - m_node=h_node.change_ext(k+'.moc') - break - if not h_node: - raise Errors.WafError('no header found for %r which is a moc file'%d) - bld.node_deps[(self.inputs[0].parent.abspath(),m_node.name)]=h_node - task=Task.classes['moc'](env=self.env,generator=self.generator) - task.set_inputs(h_node) - task.set_outputs(m_node) - gen=bld.producer - gen.outstanding.insert(0,task) - gen.total+=1 - moctasks.append(task) - tmp_lst=bld.raw_deps[self.uid()]=mocfiles - lst=bld.node_deps.get(self.uid(),()) - for d in lst: - name=d.name - if name.endswith('.moc'): - task=Task.classes['moc'](env=self.env,generator=self.generator) - task.set_inputs(bld.node_deps[(self.inputs[0].parent.abspath(),name)]) - task.set_outputs(d) - gen=bld.producer - gen.outstanding.insert(0,task) - gen.total+=1 - moctasks.append(task) - self.run_after.update(set(moctasks)) - self.moc_done=1 - run=Task.classes['cxx'].__dict__['run'] -class trans_update(Task.Task): - run_str='${QT_LUPDATE} ${SRC} -ts ${TGT}' - color='BLUE' -Task.update_outputs(trans_update) -class XMLHandler(ContentHandler): - def __init__(self): - self.buf=[] - self.files=[] - def startElement(self,name,attrs): - if name=='file': - self.buf=[] - def endElement(self,name): - if name=='file': - self.files.append(str(''.join(self.buf))) - def characters(self,cars): - self.buf.append(cars) -def create_rcc_task(self,node): - rcnode=node.change_ext('_rc.cpp') - rcctask=self.create_task('rcc',node,rcnode) - cpptask=self.create_task('cxx',rcnode,rcnode.change_ext('.o')) - try: - self.compiled_tasks.append(cpptask) - except AttributeError: - self.compiled_tasks=[cpptask] - return cpptask -def create_uic_task(self,node): - uictask=self.create_task('ui4',node) - uictask.outputs=[self.path.find_or_declare(self.env['ui_PATTERN']%node.name[:-3])] -def add_lang(self,node): - self.lang=self.to_list(getattr(self,'lang',[]))+[node] -def apply_qt4(self): - if getattr(self,'lang',None): - qmtasks=[] - for x in self.to_list(self.lang): - if isinstance(x,str): - x=self.path.find_resource(x+'.ts') - qmtasks.append(self.create_task('ts2qm',x,x.change_ext('.qm'))) - if getattr(self,'update',None)and Options.options.trans_qt4: - cxxnodes=[a.inputs[0]for a in self.compiled_tasks]+[a.inputs[0]for a in self.tasks if getattr(a,'inputs',None)and a.inputs[0].name.endswith('.ui')] - for x in qmtasks: - self.create_task('trans_update',cxxnodes,x.inputs) - if getattr(self,'langname',None): - qmnodes=[x.outputs[0]for x in qmtasks] - rcnode=self.langname - if isinstance(rcnode,str): - rcnode=self.path.find_or_declare(rcnode+'.qrc') - t=self.create_task('qm2rcc',qmnodes,rcnode) - k=create_rcc_task(self,t.outputs[0]) - self.link_task.inputs.append(k.outputs[0]) - lst=[] - for flag in self.to_list(self.env['CXXFLAGS']): - if len(flag)<2:continue - f=flag[0:2] - if f in['-D','-I','/D','/I']: - if(f[0]=='/'): - lst.append('-'+flag[1:]) - else: - lst.append(flag) - self.env['MOC_FLAGS']=lst -def cxx_hook(self,node): - return self.create_compiled_task('qxx',node) -class rcc(Task.Task): - color='BLUE' - run_str='${QT_RCC} -name ${SRC[0].name} ${SRC[0].abspath()} ${RCC_ST} -o ${TGT}' - ext_out=['.h'] - def scan(self): - node=self.inputs[0] - if not has_xml: - Logs.error('no xml support was found, the rcc dependencies will be incomplete!') - return([],[]) - parser=make_parser() - curHandler=XMLHandler() - parser.setContentHandler(curHandler) - fi=open(self.inputs[0].abspath()) - parser.parse(fi) - fi.close() - nodes=[] - names=[] - root=self.inputs[0].parent - for x in curHandler.files: - nd=root.find_resource(x) - if nd:nodes.append(nd) - else:names.append(x) - return(nodes,names) -class moc(Task.Task): - color='BLUE' - run_str='${QT_MOC} ${MOC_FLAGS} ${MOCCPPPATH_ST:INCPATHS} ${MOCDEFINES_ST:DEFINES} ${SRC} ${MOC_ST} ${TGT}' -class ui4(Task.Task): - color='BLUE' - run_str='${QT_UIC} ${SRC} -o ${TGT}' - ext_out=['.h'] -class ts2qm(Task.Task): - color='BLUE' - run_str='${QT_LRELEASE} ${QT_LRELEASE_FLAGS} ${SRC} -qm ${TGT}' -class qm2rcc(Task.Task): - color='BLUE' - after='ts2qm' - def run(self): - txt='\n'.join(['%s'%k.path_from(self.outputs[0].parent)for k in self.inputs]) - code='\n\n%s\n\n'%txt - self.outputs[0].write(code) -def configure(self): - self.find_qt4_binaries() - self.set_qt4_libs_to_check() - self.find_qt4_libraries() - self.add_qt4_rpath() - self.simplify_qt4_libs() -def find_qt4_binaries(self): - env=self.env - opt=Options.options - qtdir=getattr(opt,'qtdir','') - qtbin=getattr(opt,'qtbin','') - paths=[] - if qtdir: - qtbin=os.path.join(qtdir,'bin') - if not qtdir: - qtdir=self.environ.get('QT4_ROOT','') - qtbin=os.path.join(qtdir,'bin') - if qtbin: - paths=[qtbin] - if not qtdir: - paths=os.environ.get('PATH','').split(os.pathsep) - paths.append('/usr/share/qt4/bin/') - try: - lst=Utils.listdir('/usr/local/Trolltech/') - except OSError: - pass - else: - if lst: - lst.sort() - lst.reverse() - qtdir='/usr/local/Trolltech/%s/'%lst[0] - qtbin=os.path.join(qtdir,'bin') - paths.append(qtbin) - cand=None - prev_ver=['4','0','0'] - for qmk in['qmake-qt4','qmake4','qmake']: - try: - qmake=self.find_program(qmk,path_list=paths) - except self.errors.ConfigurationError: - pass - else: - try: - version=self.cmd_and_log([qmake,'-query','QT_VERSION']).strip() - except self.errors.ConfigurationError: - pass - else: - if version: - new_ver=version.split('.') - if new_ver>prev_ver: - cand=qmake - prev_ver=new_ver - if cand: - self.env.QMAKE=cand - else: - self.fatal('Could not find qmake for qt4') - qtbin=self.cmd_and_log([self.env.QMAKE,'-query','QT_INSTALL_BINS']).strip()+os.sep - def find_bin(lst,var): - for f in lst: - try: - ret=self.find_program(f,path_list=paths) - except self.errors.ConfigurationError: - pass - else: - env[var]=ret - break - find_bin(['uic-qt3','uic3'],'QT_UIC3') - find_bin(['uic-qt4','uic'],'QT_UIC') - if not env['QT_UIC']: - self.fatal('cannot find the uic compiler for qt4') - try: - uicver=self.cmd_and_log(env['QT_UIC']+" -version 2>&1").strip() - except self.errors.ConfigurationError: - self.fatal('this uic compiler is for qt3, add uic for qt4 to your path') - uicver=uicver.replace('Qt User Interface Compiler ','').replace('User Interface Compiler for Qt','') - self.msg('Checking for uic version','%s'%uicver) - if uicver.find(' 3.')!=-1: - self.fatal('this uic compiler is for qt3, add uic for qt4 to your path') - find_bin(['moc-qt4','moc'],'QT_MOC') - find_bin(['rcc'],'QT_RCC') - find_bin(['lrelease-qt4','lrelease'],'QT_LRELEASE') - find_bin(['lupdate-qt4','lupdate'],'QT_LUPDATE') - env['UIC3_ST']='%s -o %s' - env['UIC_ST']='%s -o %s' - env['MOC_ST']='-o' - env['ui_PATTERN']='ui_%s.h' - env['QT_LRELEASE_FLAGS']=['-silent'] - env.MOCCPPPATH_ST='-I%s' - env.MOCDEFINES_ST='-D%s' -def find_qt4_libraries(self): - qtlibs=getattr(Options.options,'qtlibs','') - if not qtlibs: - try: - qtlibs=self.cmd_and_log([self.env.QMAKE,'-query','QT_INSTALL_LIBS']).strip() - except Errors.WafError: - qtdir=self.cmd_and_log([self.env.QMAKE,'-query','QT_INSTALL_PREFIX']).strip()+os.sep - qtlibs=os.path.join(qtdir,'lib') - self.msg('Found the Qt4 libraries in',qtlibs) - qtincludes=self.cmd_and_log([self.env.QMAKE,'-query','QT_INSTALL_HEADERS']).strip() - env=self.env - if not'PKG_CONFIG_PATH'in os.environ: - os.environ['PKG_CONFIG_PATH']='%s:%s/pkgconfig:/usr/lib/qt4/lib/pkgconfig:/opt/qt4/lib/pkgconfig:/usr/lib/qt4/lib:/opt/qt4/lib'%(qtlibs,qtlibs) - try: - self.check_cfg(atleast_pkgconfig_version='0.1') - except self.errors.ConfigurationError: - for i in self.qt4_vars: - uselib=i.upper() - if Utils.unversioned_sys_platform()=="darwin": - frameworkName=i+".framework" - qtDynamicLib=os.path.join(qtlibs,frameworkName,i) - if os.path.exists(qtDynamicLib): - env.append_unique('FRAMEWORK_'+uselib,i) - self.msg('Checking for %s'%i,qtDynamicLib,'GREEN') - else: - self.msg('Checking for %s'%i,False,'YELLOW') - env.append_unique('INCLUDES_'+uselib,os.path.join(qtlibs,frameworkName,'Headers')) - elif sys.platform!="win32": - qtDynamicLib=os.path.join(qtlibs,"lib"+i+".so") - qtStaticLib=os.path.join(qtlibs,"lib"+i+".a") - if os.path.exists(qtDynamicLib): - env.append_unique('LIB_'+uselib,i) - self.msg('Checking for %s'%i,qtDynamicLib,'GREEN') - elif os.path.exists(qtStaticLib): - env.append_unique('LIB_'+uselib,i) - self.msg('Checking for %s'%i,qtStaticLib,'GREEN') - else: - self.msg('Checking for %s'%i,False,'YELLOW') - env.append_unique('LIBPATH_'+uselib,qtlibs) - env.append_unique('INCLUDES_'+uselib,qtincludes) - env.append_unique('INCLUDES_'+uselib,os.path.join(qtincludes,i)) - else: - for k in("lib%s.a","lib%s4.a","%s.lib","%s4.lib"): - lib=os.path.join(qtlibs,k%i) - if os.path.exists(lib): - env.append_unique('LIB_'+uselib,i+k[k.find("%s")+2:k.find('.')]) - self.msg('Checking for %s'%i,lib,'GREEN') - break - else: - self.msg('Checking for %s'%i,False,'YELLOW') - env.append_unique('LIBPATH_'+uselib,qtlibs) - env.append_unique('INCLUDES_'+uselib,qtincludes) - env.append_unique('INCLUDES_'+uselib,os.path.join(qtincludes,i)) - uselib=i.upper()+"_debug" - for k in("lib%sd.a","lib%sd4.a","%sd.lib","%sd4.lib"): - lib=os.path.join(qtlibs,k%i) - if os.path.exists(lib): - env.append_unique('LIB_'+uselib,i+k[k.find("%s")+2:k.find('.')]) - self.msg('Checking for %s'%i,lib,'GREEN') - break - else: - self.msg('Checking for %s'%i,False,'YELLOW') - env.append_unique('LIBPATH_'+uselib,qtlibs) - env.append_unique('INCLUDES_'+uselib,qtincludes) - env.append_unique('INCLUDES_'+uselib,os.path.join(qtincludes,i)) - else: - for i in self.qt4_vars_debug+self.qt4_vars: - self.check_cfg(package=i,args='--cflags --libs',mandatory=False) -def simplify_qt4_libs(self): - env=self.env - def process_lib(vars_,coreval): - for d in vars_: - var=d.upper() - if var=='QTCORE': - continue - value=env['LIBPATH_'+var] - if value: - core=env[coreval] - accu=[] - for lib in value: - if lib in core: - continue - accu.append(lib) - env['LIBPATH_'+var]=accu - process_lib(self.qt4_vars,'LIBPATH_QTCORE') - process_lib(self.qt4_vars_debug,'LIBPATH_QTCORE_DEBUG') -def add_qt4_rpath(self): - env=self.env - if Options.options.want_rpath: - def process_rpath(vars_,coreval): - for d in vars_: - var=d.upper() - value=env['LIBPATH_'+var] - if value: - core=env[coreval] - accu=[] - for lib in value: - if var!='QTCORE': - if lib in core: - continue - accu.append('-Wl,--rpath='+lib) - env['RPATH_'+var]=accu - process_rpath(self.qt4_vars,'LIBPATH_QTCORE') - process_rpath(self.qt4_vars_debug,'LIBPATH_QTCORE_DEBUG') -def set_qt4_libs_to_check(self): - if not hasattr(self,'qt4_vars'): - self.qt4_vars=QT4_LIBS - self.qt4_vars=Utils.to_list(self.qt4_vars) - if not hasattr(self,'qt4_vars_debug'): - self.qt4_vars_debug=[a+'_debug'for a in self.qt4_vars] - self.qt4_vars_debug=Utils.to_list(self.qt4_vars_debug) -def options(opt): - opt.add_option('--want-rpath',action='store_true',default=False,dest='want_rpath',help='enable the rpath for qt libraries') - opt.add_option('--header-ext',type='string',default='',help='header extension for moc files',dest='qt_header_ext') - for i in'qtdir qtbin qtlibs'.split(): - opt.add_option('--'+i,type='string',default='',dest=i) - opt.add_option('--translate',action="store_true",help="collect translation strings",dest="trans_qt4",default=False) - -extension(*EXT_RCC)(create_rcc_task) -extension(*EXT_UI)(create_uic_task) -extension('.ts')(add_lang) -feature('qt4')(apply_qt4) -after_method('apply_link')(apply_qt4) -extension(*EXT_QT4)(cxx_hook) -conf(find_qt4_binaries) -conf(find_qt4_libraries) -conf(simplify_qt4_libs) -conf(add_qt4_rpath) -conf(set_qt4_libs_to_check) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/qt5.py glmark2-2021.02/waflib/Tools/qt5.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/qt5.py 1970-01-01 08:00:00.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/qt5.py 2021-04-25 01:47:22.000000000 +0800 @@ -0,0 +1,506 @@ +#! /usr/bin/env python +# encoding: utf-8 +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file + +try: + from xml.sax import make_parser + from xml.sax.handler import ContentHandler +except ImportError: + has_xml=False + ContentHandler=object +else: + has_xml=True +import os,sys +from waflib.Tools import cxx +from waflib import Task,Utils,Options,Errors,Context +from waflib.TaskGen import feature,after_method,extension,before_method +from waflib.Configure import conf +from waflib import Logs +MOC_H=['.h','.hpp','.hxx','.hh'] +EXT_RCC=['.qrc'] +EXT_UI=['.ui'] +EXT_QT5=['.cpp','.cc','.cxx','.C'] +QT5_LIBS=''' +qtmain +Qt5Bluetooth +Qt5CLucene +Qt5Concurrent +Qt5Core +Qt5DBus +Qt5Declarative +Qt5DesignerComponents +Qt5Designer +Qt5Gui +Qt5Help +Qt5MultimediaQuick_p +Qt5Multimedia +Qt5MultimediaWidgets +Qt5Network +Qt5Nfc +Qt5OpenGL +Qt5Positioning +Qt5PrintSupport +Qt5Qml +Qt5QuickParticles +Qt5Quick +Qt5QuickTest +Qt5Script +Qt5ScriptTools +Qt5Sensors +Qt5SerialPort +Qt5Sql +Qt5Svg +Qt5Test +Qt5WebKit +Qt5WebKitWidgets +Qt5Widgets +Qt5WinExtras +Qt5X11Extras +Qt5XmlPatterns +Qt5Xml''' +class qxx(Task.classes['cxx']): + def __init__(self,*k,**kw): + Task.Task.__init__(self,*k,**kw) + self.moc_done=0 + def runnable_status(self): + if self.moc_done: + return Task.Task.runnable_status(self) + else: + for t in self.run_after: + if not t.hasrun: + return Task.ASK_LATER + self.add_moc_tasks() + return Task.Task.runnable_status(self) + def create_moc_task(self,h_node,m_node): + try: + moc_cache=self.generator.bld.moc_cache + except AttributeError: + moc_cache=self.generator.bld.moc_cache={} + try: + return moc_cache[h_node] + except KeyError: + tsk=moc_cache[h_node]=Task.classes['moc'](env=self.env,generator=self.generator) + tsk.set_inputs(h_node) + tsk.set_outputs(m_node) + tsk.env.append_unique('MOC_FLAGS','-i') + if self.generator: + self.generator.tasks.append(tsk) + gen=self.generator.bld.producer + gen.outstanding.appendleft(tsk) + gen.total+=1 + return tsk + else: + delattr(self,'cache_sig') + def add_moc_tasks(self): + node=self.inputs[0] + bld=self.generator.bld + try: + self.signature() + except KeyError: + pass + else: + delattr(self,'cache_sig') + include_nodes=[node.parent]+self.generator.includes_nodes + moctasks=[] + mocfiles=set([]) + for d in bld.raw_deps.get(self.uid(),[]): + if not d.endswith('.moc'): + continue + if d in mocfiles: + continue + mocfiles.add(d) + h_node=None + base2=d[:-4] + prefix=node.name[:node.name.rfind('.')] + if base2==prefix: + h_node=node + else: + for x in include_nodes: + for e in MOC_H: + h_node=x.find_node(base2+e) + if h_node: + break + else: + continue + break + if h_node: + m_node=h_node.change_ext('.moc') + else: + raise Errors.WafError('No source found for %r which is a moc file'%d) + task=self.create_moc_task(h_node,m_node) + moctasks.append(task) + self.run_after.update(set(moctasks)) + self.moc_done=1 +class trans_update(Task.Task): + run_str='${QT_LUPDATE} ${SRC} -ts ${TGT}' + color='BLUE' +class XMLHandler(ContentHandler): + def __init__(self): + self.buf=[] + self.files=[] + def startElement(self,name,attrs): + if name=='file': + self.buf=[] + def endElement(self,name): + if name=='file': + self.files.append(str(''.join(self.buf))) + def characters(self,cars): + self.buf.append(cars) +@extension(*EXT_RCC) +def create_rcc_task(self,node): + rcnode=node.change_ext('_rc.cpp') + self.create_task('rcc',node,rcnode) + cpptask=self.create_task('cxx',rcnode,rcnode.change_ext('.o')) + try: + self.compiled_tasks.append(cpptask) + except AttributeError: + self.compiled_tasks=[cpptask] + return cpptask +@extension(*EXT_UI) +def create_uic_task(self,node): + uictask=self.create_task('ui5',node) + uictask.outputs=[node.parent.find_or_declare(self.env.ui_PATTERN%node.name[:-3])] +@extension('.ts') +def add_lang(self,node): + self.lang=self.to_list(getattr(self,'lang',[]))+[node] +@feature('qt5') +@before_method('process_source') +def process_mocs(self): + lst=self.to_nodes(getattr(self,'moc',[])) + self.source=self.to_list(getattr(self,'source',[])) + for x in lst: + prefix=x.name[:x.name.rfind('.')] + moc_target='moc_%s.%d.cpp'%(prefix,self.idx) + moc_node=x.parent.find_or_declare(moc_target) + self.source.append(moc_node) + self.create_task('moc',x,moc_node) +@feature('qt5') +@after_method('apply_link') +def apply_qt5(self): + if getattr(self,'lang',None): + qmtasks=[] + for x in self.to_list(self.lang): + if isinstance(x,str): + x=self.path.find_resource(x+'.ts') + qmtasks.append(self.create_task('ts2qm',x,x.change_ext('.qm'))) + if getattr(self,'update',None)and Options.options.trans_qt5: + cxxnodes=[a.inputs[0]for a in self.compiled_tasks]+[a.inputs[0]for a in self.tasks if getattr(a,'inputs',None)and a.inputs[0].name.endswith('.ui')] + for x in qmtasks: + self.create_task('trans_update',cxxnodes,x.inputs) + if getattr(self,'langname',None): + qmnodes=[x.outputs[0]for x in qmtasks] + rcnode=self.langname + if isinstance(rcnode,str): + rcnode=self.path.find_or_declare(rcnode+'.qrc') + t=self.create_task('qm2rcc',qmnodes,rcnode) + k=create_rcc_task(self,t.outputs[0]) + self.link_task.inputs.append(k.outputs[0]) + lst=[] + for flag in self.to_list(self.env.CXXFLAGS): + if len(flag)<2:continue + f=flag[0:2] + if f in('-D','-I','/D','/I'): + if(f[0]=='/'): + lst.append('-'+flag[1:]) + else: + lst.append(flag) + self.env.append_value('MOC_FLAGS',lst) +@extension(*EXT_QT5) +def cxx_hook(self,node): + return self.create_compiled_task('qxx',node) +class rcc(Task.Task): + color='BLUE' + run_str='${QT_RCC} -name ${tsk.rcname()} ${SRC[0].abspath()} ${RCC_ST} -o ${TGT}' + ext_out=['.h'] + def rcname(self): + return os.path.splitext(self.inputs[0].name)[0] + def scan(self): + if not has_xml: + Logs.error('No xml.sax support was found, rcc dependencies will be incomplete!') + return([],[]) + parser=make_parser() + curHandler=XMLHandler() + parser.setContentHandler(curHandler) + fi=open(self.inputs[0].abspath(),'r') + try: + parser.parse(fi) + finally: + fi.close() + nodes=[] + names=[] + root=self.inputs[0].parent + for x in curHandler.files: + nd=root.find_resource(x) + if nd:nodes.append(nd) + else:names.append(x) + return(nodes,names) +class moc(Task.Task): + color='BLUE' + run_str='${QT_MOC} ${MOC_FLAGS} ${MOCCPPPATH_ST:INCPATHS} ${MOCDEFINES_ST:DEFINES} ${SRC} ${MOC_ST} ${TGT}' +class ui5(Task.Task): + color='BLUE' + run_str='${QT_UIC} ${SRC} -o ${TGT}' + ext_out=['.h'] +class ts2qm(Task.Task): + color='BLUE' + run_str='${QT_LRELEASE} ${QT_LRELEASE_FLAGS} ${SRC} -qm ${TGT}' +class qm2rcc(Task.Task): + color='BLUE' + after='ts2qm' + def run(self): + txt='\n'.join(['%s'%k.path_from(self.outputs[0].parent)for k in self.inputs]) + code='\n\n%s\n\n'%txt + self.outputs[0].write(code) +def configure(self): + self.find_qt5_binaries() + self.set_qt5_libs_to_check() + self.set_qt5_defines() + self.find_qt5_libraries() + self.add_qt5_rpath() + self.simplify_qt5_libs() + if not has_xml: + Logs.error('No xml.sax support was found, rcc dependencies will be incomplete!') + frag='#include \nint main(int argc, char **argv) {return 0;}\n' + uses='QT5CORE QT5WIDGETS QT5GUI' + for flag in[[],'-fPIE','-fPIC','-std=c++11',['-std=c++11','-fPIE'],['-std=c++11','-fPIC']]: + msg='See if Qt files compile ' + if flag: + msg+='with %s'%flag + try: + self.check(features='qt5 cxx',use=uses,uselib_store='qt5',cxxflags=flag,fragment=frag,msg=msg) + except self.errors.ConfigurationError: + pass + else: + break + else: + self.fatal('Could not build a simple Qt application') + from waflib import Utils + if Utils.unversioned_sys_platform()=='freebsd': + frag='#include \nint main(int argc, char **argv) { QApplication app(argc, argv); return NULL != (void*) (&app);}\n' + try: + self.check(features='qt5 cxx cxxprogram',use=uses,fragment=frag,msg='Can we link Qt programs on FreeBSD directly?') + except self.errors.ConfigurationError: + self.check(features='qt5 cxx cxxprogram',use=uses,uselib_store='qt5',libpath='/usr/local/lib',fragment=frag,msg='Is /usr/local/lib required?') +@conf +def find_qt5_binaries(self): + env=self.env + opt=Options.options + qtdir=getattr(opt,'qtdir','') + qtbin=getattr(opt,'qtbin','') + paths=[] + if qtdir: + qtbin=os.path.join(qtdir,'bin') + if not qtdir: + qtdir=self.environ.get('QT5_ROOT','') + qtbin=self.environ.get('QT5_BIN')or os.path.join(qtdir,'bin') + if qtbin: + paths=[qtbin] + if not qtdir: + paths=self.environ.get('PATH','').split(os.pathsep) + paths.extend(['/usr/share/qt5/bin','/usr/local/lib/qt5/bin']) + try: + lst=Utils.listdir('/usr/local/Trolltech/') + except OSError: + pass + else: + if lst: + lst.sort() + lst.reverse() + qtdir='/usr/local/Trolltech/%s/'%lst[0] + qtbin=os.path.join(qtdir,'bin') + paths.append(qtbin) + cand=None + prev_ver=['5','0','0'] + for qmk in('qmake-qt5','qmake5','qmake'): + try: + qmake=self.find_program(qmk,path_list=paths) + except self.errors.ConfigurationError: + pass + else: + try: + version=self.cmd_and_log(qmake+['-query','QT_VERSION']).strip() + except self.errors.WafError: + pass + else: + if version: + new_ver=version.split('.') + if new_ver>prev_ver: + cand=qmake + prev_ver=new_ver + if not cand: + try: + self.find_program('qtchooser') + except self.errors.ConfigurationError: + pass + else: + cmd=self.env.QTCHOOSER+['-qt=5','-run-tool=qmake'] + try: + version=self.cmd_and_log(cmd+['-query','QT_VERSION']) + except self.errors.WafError: + pass + else: + cand=cmd + if cand: + self.env.QMAKE=cand + else: + self.fatal('Could not find qmake for qt5') + self.env.QT_HOST_BINS=qtbin=self.cmd_and_log(self.env.QMAKE+['-query','QT_HOST_BINS']).strip() + paths.insert(0,qtbin) + def find_bin(lst,var): + if var in env: + return + for f in lst: + try: + ret=self.find_program(f,path_list=paths) + except self.errors.ConfigurationError: + pass + else: + env[var]=ret + break + find_bin(['uic-qt5','uic'],'QT_UIC') + if not env.QT_UIC: + self.fatal('cannot find the uic compiler for qt5') + self.start_msg('Checking for uic version') + uicver=self.cmd_and_log(env.QT_UIC+['-version'],output=Context.BOTH) + uicver=''.join(uicver).strip() + uicver=uicver.replace('Qt User Interface Compiler ','').replace('User Interface Compiler for Qt','') + self.end_msg(uicver) + if uicver.find(' 3.')!=-1 or uicver.find(' 4.')!=-1: + self.fatal('this uic compiler is for qt3 or qt5, add uic for qt5 to your path') + find_bin(['moc-qt5','moc'],'QT_MOC') + find_bin(['rcc-qt5','rcc'],'QT_RCC') + find_bin(['lrelease-qt5','lrelease'],'QT_LRELEASE') + find_bin(['lupdate-qt5','lupdate'],'QT_LUPDATE') + env.UIC_ST='%s -o %s' + env.MOC_ST='-o' + env.ui_PATTERN='ui_%s.h' + env.QT_LRELEASE_FLAGS=['-silent'] + env.MOCCPPPATH_ST='-I%s' + env.MOCDEFINES_ST='-D%s' +@conf +def find_single_qt5_lib(self,name,uselib,qtlibs,qtincludes,force_static): + env=self.env + if force_static: + exts=('.a','.lib') + prefix='STLIB' + else: + exts=('.so','.lib') + prefix='LIB' + def lib_names(): + for x in exts: + for k in('','5')if Utils.is_win32 else['']: + for p in('lib',''): + yield(p,name,k,x) + for tup in lib_names(): + k=''.join(tup) + path=os.path.join(qtlibs,k) + if os.path.exists(path): + if env.DEST_OS=='win32': + libval=''.join(tup[:-1]) + else: + libval=name + env.append_unique(prefix+'_'+uselib,libval) + env.append_unique('%sPATH_%s'%(prefix,uselib),qtlibs) + env.append_unique('INCLUDES_'+uselib,qtincludes) + env.append_unique('INCLUDES_'+uselib,os.path.join(qtincludes,name.replace('Qt5','Qt'))) + return k + return False +@conf +def find_qt5_libraries(self): + env=self.env + qtlibs=getattr(Options.options,'qtlibs',None)or self.environ.get('QT5_LIBDIR') + if not qtlibs: + try: + qtlibs=self.cmd_and_log(env.QMAKE+['-query','QT_INSTALL_LIBS']).strip() + except Errors.WafError: + qtdir=self.cmd_and_log(env.QMAKE+['-query','QT_INSTALL_PREFIX']).strip() + qtlibs=os.path.join(qtdir,'lib') + self.msg('Found the Qt5 libraries in',qtlibs) + qtincludes=self.environ.get('QT5_INCLUDES')or self.cmd_and_log(env.QMAKE+['-query','QT_INSTALL_HEADERS']).strip() + force_static=self.environ.get('QT5_FORCE_STATIC') + try: + if self.environ.get('QT5_XCOMPILE'): + self.fatal('QT5_XCOMPILE Disables pkg-config detection') + self.check_cfg(atleast_pkgconfig_version='0.1') + except self.errors.ConfigurationError: + for i in self.qt5_vars: + uselib=i.upper() + if Utils.unversioned_sys_platform()=='darwin': + frameworkName=i+'.framework' + qtDynamicLib=os.path.join(qtlibs,frameworkName,i) + if os.path.exists(qtDynamicLib): + env.append_unique('FRAMEWORK_'+uselib,i) + self.msg('Checking for %s'%i,qtDynamicLib,'GREEN') + else: + self.msg('Checking for %s'%i,False,'YELLOW') + env.append_unique('INCLUDES_'+uselib,os.path.join(qtlibs,frameworkName,'Headers')) + else: + for j in('','d'): + k='_DEBUG'if j=='d'else'' + ret=self.find_single_qt5_lib(i+j,uselib+k,qtlibs,qtincludes,force_static) + if not force_static and not ret: + ret=self.find_single_qt5_lib(i+j,uselib+k,qtlibs,qtincludes,True) + self.msg('Checking for %s'%(i+j),ret,'GREEN'if ret else'YELLOW') + else: + path='%s:%s:%s/pkgconfig:/usr/lib/qt5/lib/pkgconfig:/opt/qt5/lib/pkgconfig:/usr/lib/qt5/lib:/opt/qt5/lib'%(self.environ.get('PKG_CONFIG_PATH',''),qtlibs,qtlibs) + for i in self.qt5_vars_debug+self.qt5_vars: + self.check_cfg(package=i,args='--cflags --libs',mandatory=False,force_static=force_static,pkg_config_path=path) +@conf +def simplify_qt5_libs(self): + env=self.env + def process_lib(vars_,coreval): + for d in vars_: + var=d.upper() + if var=='QTCORE': + continue + value=env['LIBPATH_'+var] + if value: + core=env[coreval] + accu=[] + for lib in value: + if lib in core: + continue + accu.append(lib) + env['LIBPATH_'+var]=accu + process_lib(self.qt5_vars,'LIBPATH_QTCORE') + process_lib(self.qt5_vars_debug,'LIBPATH_QTCORE_DEBUG') +@conf +def add_qt5_rpath(self): + env=self.env + if getattr(Options.options,'want_rpath',False): + def process_rpath(vars_,coreval): + for d in vars_: + var=d.upper() + value=env['LIBPATH_'+var] + if value: + core=env[coreval] + accu=[] + for lib in value: + if var!='QTCORE': + if lib in core: + continue + accu.append('-Wl,--rpath='+lib) + env['RPATH_'+var]=accu + process_rpath(self.qt5_vars,'LIBPATH_QTCORE') + process_rpath(self.qt5_vars_debug,'LIBPATH_QTCORE_DEBUG') +@conf +def set_qt5_libs_to_check(self): + if not hasattr(self,'qt5_vars'): + self.qt5_vars=QT5_LIBS + self.qt5_vars=Utils.to_list(self.qt5_vars) + if not hasattr(self,'qt5_vars_debug'): + self.qt5_vars_debug=[a+'_DEBUG'for a in self.qt5_vars] + self.qt5_vars_debug=Utils.to_list(self.qt5_vars_debug) +@conf +def set_qt5_defines(self): + if sys.platform!='win32': + return + for x in self.qt5_vars: + y=x.replace('Qt5','Qt')[2:].upper() + self.env.append_unique('DEFINES_%s'%x.upper(),'QT_%s_LIB'%y) + self.env.append_unique('DEFINES_%s_DEBUG'%x.upper(),'QT_%s_LIB'%y) +def options(opt): + opt.add_option('--want-rpath',action='store_true',default=False,dest='want_rpath',help='enable the rpath for qt libraries') + for i in'qtdir qtbin qtlibs'.split(): + opt.add_option('--'+i,type='string',default='',dest=i) + opt.add_option('--translate',action='store_true',help='collect translation strings',dest='trans_qt5',default=False) diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/ruby.py glmark2-2021.02/waflib/Tools/ruby.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/ruby.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/ruby.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,11 +1,13 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import os -from waflib import Task,Options,Utils -from waflib.TaskGen import before_method,feature,after_method,Task,extension +from waflib import Options,Utils,Task +from waflib.TaskGen import before_method,feature,extension from waflib.Configure import conf +@feature('rubyext') +@before_method('apply_incpaths','process_source','apply_bundle','apply_link') def init_rubyext(self): self.install_path='${ARCHDIR_RUBY}' self.uselib=self.to_list(getattr(self,'uselib','')) @@ -13,31 +15,29 @@ self.uselib.append('RUBY') if not'RUBYEXT'in self.uselib: self.uselib.append('RUBYEXT') +@feature('rubyext') +@before_method('apply_link','propagate_uselib_vars') def apply_ruby_so_name(self): - self.env['cshlib_PATTERN']=self.env['cxxshlib_PATTERN']=self.env['rubyext_PATTERN'] + self.env.cshlib_PATTERN=self.env.cxxshlib_PATTERN=self.env.rubyext_PATTERN +@conf def check_ruby_version(self,minver=()): - if Options.options.rubybinary: - self.env.RUBY=Options.options.rubybinary - else: - self.find_program('ruby',var='RUBY') - ruby=self.env.RUBY + ruby=self.find_program('ruby',var='RUBY',value=Options.options.rubybinary) try: - version=self.cmd_and_log([ruby,'-e','puts defined?(VERSION) ? VERSION : RUBY_VERSION']).strip() - except: + version=self.cmd_and_log(ruby+['-e','puts defined?(VERSION) ? VERSION : RUBY_VERSION']).strip() + except Exception: self.fatal('could not determine ruby version') self.env.RUBY_VERSION=version try: ver=tuple(map(int,version.split("."))) - except: + except Exception: self.fatal('unsupported ruby version %r'%version) cver='' if minver: + cver='> '+'.'.join(str(x)for x in minver) if ver=(1,9,0): ruby_hdrdir=read_config('rubyhdrdir') cpppath+=ruby_hdrdir + if version>=(2,0,0): + cpppath+=read_config('rubyarchhdrdir') cpppath+=[os.path.join(ruby_hdrdir[0],read_config('arch')[0])] - self.check(header_name='ruby.h',includes=cpppath,errmsg='could not find ruby header file') + self.check(header_name='ruby.h',includes=cpppath,errmsg='could not find ruby header file',link_header_test=False) self.env.LIBPATH_RUBYEXT=read_config('libdir') self.env.LIBPATH_RUBYEXT+=archdir self.env.INCLUDES_RUBYEXT=cpppath @@ -77,28 +77,21 @@ self.env.LIBDIR_RUBY=Options.options.rubylibdir else: self.env.LIBDIR_RUBY=read_config('sitelibdir')[0] +@conf def check_ruby_module(self,module_name): self.start_msg('Ruby module %s'%module_name) try: - self.cmd_and_log([self.env['RUBY'],'-e','require \'%s\';puts 1'%module_name]) - except: + self.cmd_and_log(self.env.RUBY+['-e','require \'%s\';puts 1'%module_name]) + except Exception: self.end_msg(False) self.fatal('Could not find the ruby module %r'%module_name) self.end_msg(True) +@extension('.rb') def process(self,node): - tsk=self.create_task('run_ruby',node) + return self.create_task('run_ruby',node) class run_ruby(Task.Task): run_str='${RUBY} ${RBFLAGS} -I ${SRC[0].parent.abspath()} ${SRC}' def options(opt): opt.add_option('--with-ruby-archdir',type='string',dest='rubyarchdir',help='Specify directory where to install arch specific files') opt.add_option('--with-ruby-libdir',type='string',dest='rubylibdir',help='Specify alternate ruby library path') opt.add_option('--with-ruby-binary',type='string',dest='rubybinary',help='Specify alternate ruby binary') - -feature('rubyext')(init_rubyext) -before_method('apply_incpaths','apply_lib_vars','apply_bundle','apply_link')(init_rubyext) -feature('rubyext')(apply_ruby_so_name) -before_method('apply_link','propagate_uselib')(apply_ruby_so_name) -conf(check_ruby_version) -conf(check_ruby_ext_devel) -conf(check_ruby_module) -extension('.rb')(process) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/suncc.py glmark2-2021.02/waflib/Tools/suncc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/suncc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/suncc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,47 +1,43 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os -from waflib import Utils from waflib.Tools import ccroot,ar from waflib.Configure import conf +@conf def find_scc(conf): v=conf.env - cc=None - if v['CC']:cc=v['CC'] - elif'CC'in conf.environ:cc=conf.environ['CC'] - if not cc:cc=conf.find_program('cc',var='CC') - if not cc:conf.fatal('Could not find a Sun C compiler') - cc=conf.cmd_to_list(cc) + cc=conf.find_program('cc',var='CC') try: conf.cmd_and_log(cc+['-flags']) - except: + except Exception: conf.fatal('%r is not a Sun compiler'%cc) - v['CC']=cc - v['CC_NAME']='sun' + v.CC_NAME='sun' + conf.get_suncc_version(cc) +@conf def scc_common_flags(conf): v=conf.env - v['CC_SRC_F']=[] - v['CC_TGT_F']=['-c','-o'] - if not v['LINK_CC']:v['LINK_CC']=v['CC'] - v['CCLNK_SRC_F']='' - v['CCLNK_TGT_F']=['-o'] - v['CPPPATH_ST']='-I%s' - v['DEFINES_ST']='-D%s' - v['LIB_ST']='-l%s' - v['LIBPATH_ST']='-L%s' - v['STLIB_ST']='-l%s' - v['STLIBPATH_ST']='-L%s' - v['SONAME_ST']='-Wl,-h,%s' - v['SHLIB_MARKER']='-Bdynamic' - v['STLIB_MARKER']='-Bstatic' - v['cprogram_PATTERN']='%s' - v['CFLAGS_cshlib']=['-Kpic','-DPIC'] - v['LINKFLAGS_cshlib']=['-G'] - v['cshlib_PATTERN']='lib%s.so' - v['LINKFLAGS_cstlib']=['-Bstatic'] - v['cstlib_PATTERN']='lib%s.a' + v.CC_SRC_F=[] + v.CC_TGT_F=['-c','-o',''] + if not v.LINK_CC: + v.LINK_CC=v.CC + v.CCLNK_SRC_F='' + v.CCLNK_TGT_F=['-o',''] + v.CPPPATH_ST='-I%s' + v.DEFINES_ST='-D%s' + v.LIB_ST='-l%s' + v.LIBPATH_ST='-L%s' + v.STLIB_ST='-l%s' + v.STLIBPATH_ST='-L%s' + v.SONAME_ST='-Wl,-h,%s' + v.SHLIB_MARKER='-Bdynamic' + v.STLIB_MARKER='-Bstatic' + v.cprogram_PATTERN='%s' + v.CFLAGS_cshlib=['-xcode=pic32','-DPIC'] + v.LINKFLAGS_cshlib=['-G'] + v.cshlib_PATTERN='lib%s.so' + v.LINKFLAGS_cstlib=['-Bstatic'] + v.cstlib_PATTERN='lib%s.a' def configure(conf): conf.find_scc() conf.find_ar() @@ -49,6 +45,3 @@ conf.cc_load_tools() conf.cc_add_flags() conf.link_add_flags() - -conf(find_scc) -conf(scc_common_flags) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/suncxx.py glmark2-2021.02/waflib/Tools/suncxx.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/suncxx.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/suncxx.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,48 +1,43 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os -from waflib import Utils from waflib.Tools import ccroot,ar from waflib.Configure import conf +@conf def find_sxx(conf): v=conf.env - cc=None - if v['CXX']:cc=v['CXX'] - elif'CXX'in conf.environ:cc=conf.environ['CXX'] - if not cc:cc=conf.find_program('CC',var='CXX') - if not cc:cc=conf.find_program('c++',var='CXX') - if not cc:conf.fatal('Could not find a Sun C++ compiler') - cc=conf.cmd_to_list(cc) + cc=conf.find_program(['CC','c++'],var='CXX') try: conf.cmd_and_log(cc+['-flags']) - except: + except Exception: conf.fatal('%r is not a Sun compiler'%cc) - v['CXX']=cc - v['CXX_NAME']='sun' + v.CXX_NAME='sun' + conf.get_suncc_version(cc) +@conf def sxx_common_flags(conf): v=conf.env - v['CXX_SRC_F']=[] - v['CXX_TGT_F']=['-c','-o'] - if not v['LINK_CXX']:v['LINK_CXX']=v['CXX'] - v['CXXLNK_SRC_F']=[] - v['CXXLNK_TGT_F']=['-o'] - v['CPPPATH_ST']='-I%s' - v['DEFINES_ST']='-D%s' - v['LIB_ST']='-l%s' - v['LIBPATH_ST']='-L%s' - v['STLIB_ST']='-l%s' - v['STLIBPATH_ST']='-L%s' - v['SONAME_ST']='-Wl,-h,%s' - v['SHLIB_MARKER']='-Bdynamic' - v['STLIB_MARKER']='-Bstatic' - v['cxxprogram_PATTERN']='%s' - v['CXXFLAGS_cxxshlib']=['-Kpic','-DPIC'] - v['LINKFLAGS_cxxshlib']=['-G'] - v['cxxshlib_PATTERN']='lib%s.so' - v['LINKFLAGS_cxxstlib']=['-Bstatic'] - v['cxxstlib_PATTERN']='lib%s.a' + v.CXX_SRC_F=[] + v.CXX_TGT_F=['-c','-o',''] + if not v.LINK_CXX: + v.LINK_CXX=v.CXX + v.CXXLNK_SRC_F=[] + v.CXXLNK_TGT_F=['-o',''] + v.CPPPATH_ST='-I%s' + v.DEFINES_ST='-D%s' + v.LIB_ST='-l%s' + v.LIBPATH_ST='-L%s' + v.STLIB_ST='-l%s' + v.STLIBPATH_ST='-L%s' + v.SONAME_ST='-Wl,-h,%s' + v.SHLIB_MARKER='-Bdynamic' + v.STLIB_MARKER='-Bstatic' + v.cxxprogram_PATTERN='%s' + v.CXXFLAGS_cxxshlib=['-xcode=pic32','-DPIC'] + v.LINKFLAGS_cxxshlib=['-G'] + v.cxxshlib_PATTERN='lib%s.so' + v.LINKFLAGS_cxxstlib=['-Bstatic'] + v.cxxstlib_PATTERN='lib%s.a' def configure(conf): conf.find_sxx() conf.find_ar() @@ -50,6 +45,3 @@ conf.cxx_load_tools() conf.cxx_add_flags() conf.link_add_flags() - -conf(find_sxx) -conf(sxx_common_flags) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/tex.py glmark2-2021.02/waflib/Tools/tex.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/tex.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/tex.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,33 +1,33 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file import os,re -from waflib import Utils,Task,Errors +from waflib import Utils,Task,Errors,Logs,Node from waflib.TaskGen import feature,before_method -from waflib.Logs import error,warn,debug re_bibunit=re.compile(r'\\(?Pputbib)\[(?P[^\[\]]*)\]',re.M) def bibunitscan(self): node=self.inputs[0] nodes=[] if not node:return nodes - code=Utils.readf(node.abspath()) + code=node.read() for match in re_bibunit.finditer(code): path=match.group('file') if path: - for k in['','.bib']: - debug('tex: trying %s%s'%(path,k)) + for k in('','.bib'): + Logs.debug('tex: trying %s%s',path,k) fi=node.parent.find_resource(path+k) if fi: nodes.append(fi) else: - debug('tex: could not find %s'%path) - debug("tex: found the following bibunit files: %s"%nodes) + Logs.debug('tex: could not find %s',path) + Logs.debug('tex: found the following bibunit files: %s',nodes) return nodes -exts_deps_tex=['','.ltx','.tex','.bib','.pdf','.png','.eps','.ps'] +exts_deps_tex=['','.ltx','.tex','.bib','.pdf','.png','.eps','.ps','.sty'] exts_tex=['.ltx','.tex'] -re_tex=re.compile(r'\\(?Pinclude|bibliography|putbib|includegraphics|input|import|bringin|lstinputlisting)(\[[^\[\]]*\])?{(?P[^{}]*)}',re.M) +re_tex=re.compile(r'\\(?Pusepackage|RequirePackage|include|bibliography([^\[\]{}]*)|putbib|includegraphics|input|import|bringin|lstinputlisting)(\[[^\[\]]*\])?{(?P[^{}]*)}',re.M) g_bibtex_re=re.compile('bibdata',re.M) +g_glossaries_re=re.compile('\\@newglossary',re.M) class tex(Task.Task): bibtex_fun,_=Task.compile_fun('${BIBTEX} ${BIBTEXFLAGS} ${SRCFILE}',shell=False) bibtex_fun.__doc__=""" @@ -37,6 +37,13 @@ makeindex_fun.__doc__=""" Execute the program **makeindex** """ + makeglossaries_fun,_=Task.compile_fun('${MAKEGLOSSARIES} ${SRCFILE}',shell=False) + makeglossaries_fun.__doc__=""" + Execute the program **makeglossaries** + """ + def exec_command(self,cmd,**kw): + kw['stdout']=kw['stderr']=None + return super(tex,self).exec_command(cmd,**kw) def scan_aux(self,node): nodes=[node] re_aux=re.compile(r'\\@input{(?P[^{}]*)}',re.M) @@ -46,7 +53,7 @@ path=match.group('file') found=node.parent.find_or_declare(path) if found and found not in nodes: - debug('tex: found aux node '+found.abspath()) + Logs.debug('tex: found aux node %r',found) nodes.append(found) parse_node(found) parse_node(node) @@ -64,114 +71,157 @@ code=node.read() global re_tex for match in re_tex.finditer(code): + multibib=match.group('type') + if multibib and multibib.startswith('bibliography'): + multibib=multibib[len('bibliography'):] + if multibib.startswith('style'): + continue + else: + multibib=None for path in match.group('file').split(','): if path: add_name=True found=None for k in exts_deps_tex: - debug('tex: trying %s%s'%(path,k)) - found=node.parent.find_resource(path+k) - if found and not found in self.outputs: + for up in self.texinputs_nodes: + Logs.debug('tex: trying %s%s',path,k) + found=up.find_resource(path+k) + if found: + break + for tsk in self.generator.tasks: + if not found or found in tsk.outputs: + break + else: nodes.append(found) add_name=False for ext in exts_tex: if found.name.endswith(ext): parse_node(found) break + if found and multibib and found.name.endswith('.bib'): + try: + self.multibibs.append(found) + except AttributeError: + self.multibibs=[found] if add_name: names.append(path) parse_node(node) for x in nodes: x.parent.get_bld().mkdir() - debug("tex: found the following : %s and names %s"%(nodes,names)) + Logs.debug("tex: found the following : %s and names %s",nodes,names) return(nodes,names) def check_status(self,msg,retcode): if retcode!=0: - raise Errors.WafError("%r command exit status %r"%(msg,retcode)) + raise Errors.WafError('%r command exit status %r'%(msg,retcode)) def bibfile(self): - need_bibtex=False - try: - for aux_node in self.aux_nodes: + for aux_node in self.aux_nodes: + try: ct=aux_node.read() - if g_bibtex_re.findall(ct): - need_bibtex=True - break - except(OSError,IOError): - error('error bibtex scan') - else: - if need_bibtex: - warn('calling bibtex') + except EnvironmentError: + Logs.error('Error reading %s: %r',aux_node.abspath()) + continue + if g_bibtex_re.findall(ct): + Logs.info('calling bibtex') self.env.env={} self.env.env.update(os.environ) - self.env.env.update({'BIBINPUTS':self.TEXINPUTS,'BSTINPUTS':self.TEXINPUTS}) - self.env.SRCFILE=self.aux_nodes[0].name[:-4] + self.env.env.update({'BIBINPUTS':self.texinputs(),'BSTINPUTS':self.texinputs()}) + self.env.SRCFILE=aux_node.name[:-4] self.check_status('error when calling bibtex',self.bibtex_fun()) + for node in getattr(self,'multibibs',[]): + self.env.env={} + self.env.env.update(os.environ) + self.env.env.update({'BIBINPUTS':self.texinputs(),'BSTINPUTS':self.texinputs()}) + self.env.SRCFILE=node.name[:-4] + self.check_status('error when calling bibtex',self.bibtex_fun()) def bibunits(self): try: bibunits=bibunitscan(self) - except FSError: - error('error bibunitscan') + except OSError: + Logs.error('error bibunitscan') else: if bibunits: - fn=['bu'+str(i)for i in xrange(1,len(bibunits)+1)] + fn=['bu'+str(i)for i in range(1,len(bibunits)+1)] if fn: - warn('calling bibtex on bibunits') + Logs.info('calling bibtex on bibunits') for f in fn: - self.env.env={'BIBINPUTS':self.TEXINPUTS,'BSTINPUTS':self.TEXINPUTS} + self.env.env={'BIBINPUTS':self.texinputs(),'BSTINPUTS':self.texinputs()} self.env.SRCFILE=f self.check_status('error when calling bibtex',self.bibtex_fun()) def makeindex(self): + self.idx_node=self.inputs[0].change_ext('.idx') try: idx_path=self.idx_node.abspath() os.stat(idx_path) except OSError: - warn('index file %s absent, not calling makeindex'%idx_path) + Logs.info('index file %s absent, not calling makeindex',idx_path) else: - warn('calling makeindex') + Logs.info('calling makeindex') self.env.SRCFILE=self.idx_node.name self.env.env={} self.check_status('error when calling makeindex %s'%idx_path,self.makeindex_fun()) + def bibtopic(self): + p=self.inputs[0].parent.get_bld() + if os.path.exists(os.path.join(p.abspath(),'btaux.aux')): + self.aux_nodes+=p.ant_glob('*[0-9].aux') + def makeglossaries(self): + src_file=self.inputs[0].abspath() + base_file=os.path.basename(src_file) + base,_=os.path.splitext(base_file) + for aux_node in self.aux_nodes: + try: + ct=aux_node.read() + except EnvironmentError: + Logs.error('Error reading %s: %r',aux_node.abspath()) + continue + if g_glossaries_re.findall(ct): + if not self.env.MAKEGLOSSARIES: + raise Errors.WafError("The program 'makeglossaries' is missing!") + Logs.warn('calling makeglossaries') + self.env.SRCFILE=base + self.check_status('error when calling makeglossaries %s'%base,self.makeglossaries_fun()) + return + def texinputs(self): + return os.pathsep.join([k.abspath()for k in self.texinputs_nodes])+os.pathsep def run(self): env=self.env - if not env['PROMPT_LATEX']: + if not env.PROMPT_LATEX: env.append_value('LATEXFLAGS','-interaction=batchmode') env.append_value('PDFLATEXFLAGS','-interaction=batchmode') env.append_value('XELATEXFLAGS','-interaction=batchmode') - fun=self.texfun - node=self.inputs[0] - srcfile=node.abspath() - texinputs=self.env.TEXINPUTS or'' - self.TEXINPUTS=node.parent.get_bld().abspath()+os.pathsep+node.parent.get_src().abspath()+os.pathsep+texinputs+os.pathsep - self.aux_node=node.change_ext('.aux') - self.cwd=self.inputs[0].parent.get_bld().abspath() - warn('first pass on %s'%self.__class__.__name__) - self.env.env={} - self.env.env.update(os.environ) - self.env.env.update({'TEXINPUTS':self.TEXINPUTS}) - self.env.SRCFILE=srcfile - self.check_status('error when calling latex',fun()) - self.aux_nodes=self.scan_aux(node.change_ext('.aux')) - self.idx_node=node.change_ext('.idx') + self.cwd=self.inputs[0].parent.get_bld() + Logs.info('first pass on %s',self.__class__.__name__) + cur_hash=self.hash_aux_nodes() + self.call_latex() + self.hash_aux_nodes() + self.bibtopic() self.bibfile() self.bibunits() self.makeindex() - hash='' + self.makeglossaries() for i in range(10): - prev_hash=hash - try: - hashes=[Utils.h_file(x.abspath())for x in self.aux_nodes] - hash=Utils.h_list(hashes) - except(OSError,IOError): - error('could not read aux.h') - pass - if hash and hash==prev_hash: + prev_hash=cur_hash + cur_hash=self.hash_aux_nodes() + if not cur_hash: + Logs.error('No aux.h to process') + if cur_hash and cur_hash==prev_hash: break - warn('calling %s'%self.__class__.__name__) - self.env.env={} - self.env.env.update(os.environ) - self.env.env.update({'TEXINPUTS':self.TEXINPUTS}) - self.env.SRCFILE=srcfile - self.check_status('error when calling %s'%self.__class__.__name__,fun()) + Logs.info('calling %s',self.__class__.__name__) + self.call_latex() + def hash_aux_nodes(self): + try: + self.aux_nodes + except AttributeError: + try: + self.aux_nodes=self.scan_aux(self.inputs[0].change_ext('.aux')) + except IOError: + return None + return Utils.h_list([Utils.h_file(x.abspath())for x in self.aux_nodes]) + def call_latex(self): + self.env.env={} + self.env.env.update(os.environ) + self.env.env.update({'TEXINPUTS':self.texinputs()}) + self.env.SRCFILE=self.inputs[0].abspath() + self.check_status('error when calling latex',self.texfun()) class latex(tex): texfun,vars=Task.compile_fun('${LATEX} ${LATEXFLAGS} ${SRCFILE}',shell=False) class pdflatex(tex): @@ -190,18 +240,25 @@ run_str='${PDF2PS} ${PDF2PSFLAGS} ${SRC} ${TGT}' color='BLUE' after=['latex','pdflatex','xelatex'] +@feature('tex') +@before_method('process_source') def apply_tex(self): - if not getattr(self,'type',None)in['latex','pdflatex','xelatex']: + if not getattr(self,'type',None)in('latex','pdflatex','xelatex'): self.type='pdflatex' - tree=self.bld outs=Utils.to_list(getattr(self,'outs',[])) - self.env['PROMPT_LATEX']=getattr(self,'prompt',1) + self.env.PROMPT_LATEX=getattr(self,'prompt',1) deps_lst=[] if getattr(self,'deps',None): deps=self.to_list(self.deps) - for filename in deps: - n=self.path.find_resource(filename) - if not n in deps_lst:deps_lst.append(n) + for dep in deps: + if isinstance(dep,str): + n=self.path.find_resource(dep) + if not n: + self.bld.fatal('Could not find %r for %r'%(dep,self)) + if not n in deps_lst: + deps_lst.append(n) + elif isinstance(dep,Node.Node): + deps_lst.append(dep) for node in self.to_nodes(self.source): if self.type=='latex': task=self.create_task('latex',node,node.change_ext('.dvi')) @@ -211,32 +268,44 @@ task=self.create_task('xelatex',node,node.change_ext('.pdf')) task.env=self.env if deps_lst: - try: - lst=tree.node_deps[task.uid()] - for n in deps_lst: - if not n in lst: - lst.append(n) - except KeyError: - tree.node_deps[task.uid()]=deps_lst + for n in deps_lst: + if not n in task.dep_nodes: + task.dep_nodes.append(n) + if hasattr(self,'texinputs_nodes'): + task.texinputs_nodes=self.texinputs_nodes + else: + task.texinputs_nodes=[node.parent,node.parent.get_bld(),self.path,self.path.get_bld()] + lst=os.environ.get('TEXINPUTS','') + if self.env.TEXINPUTS: + lst+=os.pathsep+self.env.TEXINPUTS + if lst: + lst=lst.split(os.pathsep) + for x in lst: + if x: + if os.path.isabs(x): + p=self.bld.root.find_node(x) + if p: + task.texinputs_nodes.append(p) + else: + Logs.error('Invalid TEXINPUTS folder %s',x) + else: + Logs.error('Cannot resolve relative paths in TEXINPUTS %s',x) if self.type=='latex': if'ps'in outs: tsk=self.create_task('dvips',task.outputs,node.change_ext('.ps')) - tsk.env.env={'TEXINPUTS':node.parent.abspath()+os.pathsep+self.path.abspath()+os.pathsep+self.path.get_bld().abspath()} + tsk.env.env=dict(os.environ) if'pdf'in outs: tsk=self.create_task('dvipdf',task.outputs,node.change_ext('.pdf')) - tsk.env.env={'TEXINPUTS':node.parent.abspath()+os.pathsep+self.path.abspath()+os.pathsep+self.path.get_bld().abspath()} + tsk.env.env=dict(os.environ) elif self.type=='pdflatex': if'ps'in outs: self.create_task('pdf2ps',task.outputs,node.change_ext('.ps')) self.source=[] def configure(self): v=self.env - for p in'tex latex pdflatex xelatex bibtex dvips dvipdf ps2pdf makeindex pdf2ps'.split(): + for p in'tex latex pdflatex xelatex bibtex dvips dvipdf ps2pdf makeindex pdf2ps makeglossaries'.split(): try: self.find_program(p,var=p.upper()) except self.errors.ConfigurationError: pass - v['DVIPSFLAGS']='-Ppdf' - -feature('tex')(apply_tex) -before_method('process_source')(apply_tex) \ No newline at end of file + v.DVIPSFLAGS='-Ppdf' diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/vala.py glmark2-2021.02/waflib/Tools/vala.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/vala.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/vala.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,202 +1,203 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os.path,shutil,re -from waflib import Context,Task,Utils,Logs,Options,Errors -from waflib.TaskGen import extension +import re +from waflib import Context,Task,Utils,Logs,Options,Errors,Node +from waflib.TaskGen import extension,taskgen_method from waflib.Configure import conf class valac(Task.Task): vars=["VALAC","VALAC_VERSION","VALAFLAGS"] ext_out=['.h'] def run(self): - env=self.env - cmd=[env['VALAC'],'-C','--quiet'] - cmd.extend(Utils.to_list(env['VALAFLAGS'])) - if self.threading: - cmd.append('--thread') - if self.profile: - cmd.append('--profile=%s'%self.profile) - if self.target_glib: - cmd.append('--target-glib=%s'%self.target_glib) - if self.is_lib: - cmd.append('--library='+self.target) - for x in self.outputs: - if x.name.endswith('.h'): - cmd.append('--header='+x.name) - if self.gir: - cmd.append('--gir=%s.gir'%self.gir) - for vapi_dir in self.vapi_dirs: - cmd.append('--vapidir=%s'%vapi_dir) - for package in self.packages: - cmd.append('--pkg=%s'%package) - for package in self.packages_private: - cmd.append('--pkg=%s'%package) - for define in self.vala_defines: - cmd.append('--define=%s'%define) - cmd.extend([a.abspath()for a in self.inputs]) - ret=self.exec_command(cmd,cwd=self.outputs[0].parent.abspath()) + cmd=self.env.VALAC+self.env.VALAFLAGS + resources=getattr(self,'vala_exclude',[]) + cmd.extend([a.abspath()for a in self.inputs if a not in resources]) + ret=self.exec_command(cmd,cwd=self.vala_dir_node.abspath()) if ret: return ret - for x in self.outputs: - if id(x.parent)!=id(self.outputs[0].parent): - shutil.move(self.outputs[0].parent.abspath()+os.sep+x.name,x.abspath()) - if self.packages and getattr(self,'deps_node',None): - self.deps_node.write('\n'.join(self.packages)) + if self.generator.dump_deps_node: + self.generator.dump_deps_node.write('\n'.join(self.generator.packages)) return ret -def vala_file(self,node): - valatask=getattr(self,"valatask",None) - if not valatask: - def _get_api_version(): - api_version='1.0' - if hasattr(Context.g_module,'API_VERSION'): - version=Context.g_module.API_VERSION.split(".") - if version[0]=="0": - api_version="0."+version[1] - else: - api_version=version[0]+".0" - return api_version - valatask=self.create_task('valac') - self.valatask=valatask - self.includes=Utils.to_list(getattr(self,'includes',[])) - self.uselib=self.to_list(getattr(self,'uselib',[])) - valatask.packages=[] - valatask.packages_private=Utils.to_list(getattr(self,'packages_private',[])) - valatask.vapi_dirs=[] - valatask.target=self.target - valatask.threading=False - valatask.install_path=getattr(self,'install_path','') - valatask.profile=getattr(self,'profile','gobject') - valatask.vala_defines=getattr(self,'vala_defines',[]) - valatask.target_glib=None - valatask.gir=getattr(self,'gir',None) - valatask.gir_path=getattr(self,'gir_path','${DATAROOTDIR}/gir-1.0') - valatask.vapi_path=getattr(self,'vapi_path','${DATAROOTDIR}/vala/vapi') - valatask.pkg_name=getattr(self,'pkg_name',self.env['PACKAGE']) - valatask.header_path=getattr(self,'header_path','${INCLUDEDIR}/%s-%s'%(valatask.pkg_name,_get_api_version())) - valatask.install_binding=getattr(self,'install_binding',True) - valatask.is_lib=False - if not'cprogram'in self.features: - valatask.is_lib=True - packages=Utils.to_list(getattr(self,'packages',[])) - vapi_dirs=Utils.to_list(getattr(self,'vapi_dirs',[])) - includes=[] - if hasattr(self,'use'): - local_packages=Utils.to_list(self.use)[:] - seen=[] - while len(local_packages)>0: - package=local_packages.pop() - if package in seen: - continue - seen.append(package) - try: - package_obj=self.bld.get_tgen_by_name(package) - except Errors.WafError: - continue - package_name=package_obj.target - package_node=package_obj.path - package_dir=package_node.path_from(self.path) - for task in package_obj.tasks: - for output in task.outputs: - if output.name==package_name+".vapi": - valatask.set_run_after(task) - if package_name not in packages: - packages.append(package_name) - if package_dir not in vapi_dirs: - vapi_dirs.append(package_dir) - if package_dir not in includes: - includes.append(package_dir) - if hasattr(package_obj,'use'): - lst=self.to_list(package_obj.use) - lst.reverse() - local_packages=[pkg for pkg in lst if pkg not in seen]+local_packages - valatask.packages=packages - for vapi_dir in vapi_dirs: - try: - valatask.vapi_dirs.append(self.path.find_dir(vapi_dir).abspath()) - valatask.vapi_dirs.append(self.path.find_dir(vapi_dir).get_bld().abspath()) - except AttributeError: - Logs.warn("Unable to locate Vala API directory: '%s'"%vapi_dir) - self.includes.append(self.bld.srcnode.abspath()) - self.includes.append(self.bld.bldnode.abspath()) - for include in includes: +@taskgen_method +def init_vala_task(self): + self.profile=getattr(self,'profile','gobject') + self.packages=packages=Utils.to_list(getattr(self,'packages',[])) + self.use=Utils.to_list(getattr(self,'use',[])) + if packages and not self.use: + self.use=packages[:] + if self.profile=='gobject': + if not'GOBJECT'in self.use: + self.use.append('GOBJECT') + def addflags(flags): + self.env.append_value('VALAFLAGS',flags) + if self.profile: + addflags('--profile=%s'%self.profile) + valatask=self.valatask + if hasattr(self,'vala_dir'): + if isinstance(self.vala_dir,str): + valatask.vala_dir_node=self.path.get_bld().make_node(self.vala_dir) try: - self.includes.append(self.path.find_dir(include).abspath()) - self.includes.append(self.path.find_dir(include).get_bld().abspath()) - except AttributeError: - Logs.warn("Unable to locate include directory: '%s'"%include) - if valatask.profile=='gobject': - if hasattr(self,'target_glib'): - Logs.warn('target_glib on vala tasks is not supported --vala-target-glib=MAJOR.MINOR from the vala tool options') - if getattr(Options.options,'vala_target_glib',None): - valatask.target_glib=Options.options.vala_target_glib - if not'GOBJECT'in self.uselib: - self.uselib.append('GOBJECT') - if hasattr(self,'threading'): - if valatask.profile=='gobject': - valatask.threading=self.threading - if not'GTHREAD'in self.uselib: - self.uselib.append('GTHREAD') + valatask.vala_dir_node.mkdir() + except OSError: + raise self.bld.fatal('Cannot create the vala dir %r'%valatask.vala_dir_node) + else: + valatask.vala_dir_node=self.vala_dir + else: + valatask.vala_dir_node=self.path.get_bld() + addflags('--directory=%s'%valatask.vala_dir_node.abspath()) + if hasattr(self,'thread'): + if self.profile=='gobject': + if not'GTHREAD'in self.use: + self.use.append('GTHREAD') + else: + Logs.warn('Profile %s means no threading support',self.profile) + self.thread=False + if self.thread: + addflags('--thread') + self.is_lib='cprogram'not in self.features + if self.is_lib: + addflags('--library=%s'%self.target) + h_node=valatask.vala_dir_node.find_or_declare('%s.h'%self.target) + valatask.outputs.append(h_node) + addflags('--header=%s'%h_node.name) + valatask.outputs.append(valatask.vala_dir_node.find_or_declare('%s.vapi'%self.target)) + if getattr(self,'gir',None): + gir_node=valatask.vala_dir_node.find_or_declare('%s.gir'%self.gir) + addflags('--gir=%s'%gir_node.name) + valatask.outputs.append(gir_node) + self.vala_target_glib=getattr(self,'vala_target_glib',getattr(Options.options,'vala_target_glib',None)) + if self.vala_target_glib: + addflags('--target-glib=%s'%self.vala_target_glib) + addflags(['--define=%s'%x for x in Utils.to_list(getattr(self,'vala_defines',[]))]) + packages_private=Utils.to_list(getattr(self,'packages_private',[])) + addflags(['--pkg=%s'%x for x in packages_private]) + def _get_api_version(): + api_version='1.0' + if hasattr(Context.g_module,'API_VERSION'): + version=Context.g_module.API_VERSION.split(".") + if version[0]=="0": + api_version="0."+version[1] else: - Logs.warn("Profile %s does not have threading support"%valatask.profile) - if valatask.is_lib: - valatask.outputs.append(self.path.find_or_declare('%s.h'%self.target)) - valatask.outputs.append(self.path.find_or_declare('%s.vapi'%self.target)) - if valatask.gir: - valatask.outputs.append(self.path.find_or_declare('%s.gir'%self.gir)) - if valatask.packages: - d=self.path.find_or_declare('%s.deps'%self.target) - valatask.outputs.append(d) - valatask.deps_node=d - valatask.inputs.append(node) - c_node=node.change_ext('.c') - valatask.outputs.append(c_node) - self.source.append(c_node) - if valatask.is_lib and valatask.install_binding: + api_version=version[0]+".0" + return api_version + self.includes=Utils.to_list(getattr(self,'includes',[])) + valatask.install_path=getattr(self,'install_path','') + valatask.vapi_path=getattr(self,'vapi_path','${DATAROOTDIR}/vala/vapi') + valatask.pkg_name=getattr(self,'pkg_name',self.env.PACKAGE) + valatask.header_path=getattr(self,'header_path','${INCLUDEDIR}/%s-%s'%(valatask.pkg_name,_get_api_version())) + valatask.install_binding=getattr(self,'install_binding',True) + self.vapi_dirs=vapi_dirs=Utils.to_list(getattr(self,'vapi_dirs',[])) + if hasattr(self,'use'): + local_packages=Utils.to_list(self.use)[:] + seen=[] + while len(local_packages)>0: + package=local_packages.pop() + if package in seen: + continue + seen.append(package) + try: + package_obj=self.bld.get_tgen_by_name(package) + except Errors.WafError: + continue + package_name=package_obj.target + for task in package_obj.tasks: + for output in task.outputs: + if output.name==package_name+".vapi": + valatask.set_run_after(task) + if package_name not in packages: + packages.append(package_name) + if output.parent not in vapi_dirs: + vapi_dirs.append(output.parent) + if output.parent not in self.includes: + self.includes.append(output.parent) + if hasattr(package_obj,'use'): + lst=self.to_list(package_obj.use) + lst.reverse() + local_packages=[pkg for pkg in lst if pkg not in seen]+local_packages + addflags(['--pkg=%s'%p for p in packages]) + for vapi_dir in vapi_dirs: + if isinstance(vapi_dir,Node.Node): + v_node=vapi_dir + else: + v_node=self.path.find_dir(vapi_dir) + if not v_node: + Logs.warn('Unable to locate Vala API directory: %r',vapi_dir) + else: + addflags('--vapidir=%s'%v_node.abspath()) + self.dump_deps_node=None + if self.is_lib and self.packages: + self.dump_deps_node=valatask.vala_dir_node.find_or_declare('%s.deps'%self.target) + valatask.outputs.append(self.dump_deps_node) + if self.is_lib and valatask.install_binding: headers_list=[o for o in valatask.outputs if o.suffix()==".h"] try: self.install_vheader.source=headers_list except AttributeError: - self.install_vheader=self.bld.install_files(valatask.header_path,headers_list,self.env) + self.install_vheader=self.add_install_files(install_to=valatask.header_path,install_from=headers_list) vapi_list=[o for o in valatask.outputs if(o.suffix()in(".vapi",".deps"))] try: self.install_vapi.source=vapi_list except AttributeError: - self.install_vapi=self.bld.install_files(valatask.vapi_path,vapi_list,self.env) - gir_list=[o for o in valatask.outputs if o.suffix()==".gir"] + self.install_vapi=self.add_install_files(install_to=valatask.vapi_path,install_from=vapi_list) + gir_list=[o for o in valatask.outputs if o.suffix()=='.gir'] try: self.install_gir.source=gir_list except AttributeError: - self.install_gir=self.bld.install_files(valatask.gir_path,gir_list,self.env) -valac=Task.update_outputs(valac) + self.install_gir=self.add_install_files(install_to=getattr(self,'gir_path','${DATAROOTDIR}/gir-1.0'),install_from=gir_list) + if hasattr(self,'vala_resources'): + nodes=self.to_nodes(self.vala_resources) + valatask.vala_exclude=getattr(valatask,'vala_exclude',[])+nodes + valatask.inputs.extend(nodes) + for x in nodes: + addflags(['--gresources',x.abspath()]) +@extension('.vala','.gs') +def vala_file(self,node): + try: + valatask=self.valatask + except AttributeError: + valatask=self.valatask=self.create_task('valac') + self.init_vala_task() + valatask.inputs.append(node) + name=node.name[:node.name.rfind('.')]+'.c' + c_node=valatask.vala_dir_node.find_or_declare(name) + valatask.outputs.append(c_node) + self.source.append(c_node) +@conf def find_valac(self,valac_name,min_version): valac=self.find_program(valac_name,var='VALAC') try: - output=self.cmd_and_log(valac+' --version') + output=self.cmd_and_log(valac+['--version']) except Exception: valac_version=None else: - ver=re.search(r'\d+.\d+.\d+',output).group(0).split('.') + ver=re.search(r'\d+.\d+.\d+',output).group().split('.') valac_version=tuple([int(x)for x in ver]) self.msg('Checking for %s version >= %r'%(valac_name,min_version),valac_version,valac_version and valac_version>=min_version) if valac and valac_version= %r"%(valac_name,valac_version,min_version)) - self.env['VALAC_VERSION']=valac_version + self.env.VALAC_VERSION=valac_version return valac +@conf def check_vala(self,min_version=(0,8,0),branch=None): + if self.env.VALA_MINVER: + min_version=self.env.VALA_MINVER + if self.env.VALA_MINVER_BRANCH: + branch=self.env.VALA_MINVER_BRANCH if not branch: branch=min_version[:2] try: find_valac(self,'valac-%d.%d'%(branch[0],branch[1]),min_version) except self.errors.ConfigurationError: find_valac(self,'valac',min_version) +@conf def check_vala_deps(self): - if not self.env['HAVE_GOBJECT']: + if not self.env.HAVE_GOBJECT: pkg_args={'package':'gobject-2.0','uselib_store':'GOBJECT','args':'--cflags --libs'} if getattr(Options.options,'vala_target_glib',None): pkg_args['atleast_version']=Options.options.vala_target_glib self.check_cfg(**pkg_args) - if not self.env['HAVE_GTHREAD']: + if not self.env.HAVE_GTHREAD: pkg_args={'package':'gthread-2.0','uselib_store':'GTHREAD','args':'--cflags --libs'} if getattr(Options.options,'vala_target_glib',None): pkg_args['atleast_version']=Options.options.vala_target_glib @@ -205,12 +206,9 @@ self.load('gnu_dirs') self.check_vala_deps() self.check_vala() + self.add_os_flags('VALAFLAGS') + self.env.append_unique('VALAFLAGS',['-C']) def options(opt): opt.load('gnu_dirs') valaopts=opt.add_option_group('Vala Compiler Options') valaopts.add_option('--vala-target-glib',default=None,dest='vala_target_glib',metavar='MAJOR.MINOR',help='Target version of glib for Vala GObject code generation') - -extension('.vala','.gs')(vala_file) -conf(find_valac) -conf(check_vala) -conf(check_vala_deps) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/waf_unit_test.py glmark2-2021.02/waflib/Tools/waf_unit_test.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/waf_unit_test.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/waf_unit_test.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,63 +1,99 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys -from waflib.TaskGen import feature,after_method +import os +from waflib.TaskGen import feature,after_method,taskgen_method from waflib import Utils,Task,Logs,Options +from waflib.Tools import ccroot testlock=Utils.threading.Lock() +@feature('test') +@after_method('apply_link','process_use') def make_test(self): - if getattr(self,'link_task',None): - self.create_task('utest',self.link_task.outputs) + if not getattr(self,'link_task',None): + return + tsk=self.create_task('utest',self.link_task.outputs) + if getattr(self,'ut_str',None): + self.ut_run,lst=Task.compile_fun(self.ut_str,shell=getattr(self,'ut_shell',False)) + tsk.vars=lst+tsk.vars + if getattr(self,'ut_cwd',None): + if isinstance(self.ut_cwd,str): + if os.path.isabs(self.ut_cwd): + self.ut_cwd=self.bld.root.make_node(self.ut_cwd) + else: + self.ut_cwd=self.path.make_node(self.ut_cwd) + else: + self.ut_cwd=tsk.inputs[0].parent + if not hasattr(self,'ut_paths'): + paths=[] + for x in self.tmp_use_sorted: + try: + y=self.bld.get_tgen_by_name(x).link_task + except AttributeError: + pass + else: + if not isinstance(y,ccroot.stlink_task): + paths.append(y.outputs[0].parent.abspath()) + self.ut_paths=os.pathsep.join(paths)+os.pathsep + if not hasattr(self,'ut_env'): + self.ut_env=dct=dict(os.environ) + def add_path(var): + dct[var]=self.ut_paths+dct.get(var,'') + if Utils.is_win32: + add_path('PATH') + elif Utils.unversioned_sys_platform()=='darwin': + add_path('DYLD_LIBRARY_PATH') + add_path('LD_LIBRARY_PATH') + else: + add_path('LD_LIBRARY_PATH') +@taskgen_method +def add_test_results(self,tup): + Logs.debug("ut: %r",tup) + self.utest_result=tup + try: + self.bld.utest_results.append(tup) + except AttributeError: + self.bld.utest_results=[tup] class utest(Task.Task): color='PINK' after=['vnum','inst'] vars=[] def runnable_status(self): + if getattr(Options.options,'no_tests',False): + return Task.SKIP_ME ret=super(utest,self).runnable_status() if ret==Task.SKIP_ME: if getattr(Options.options,'all_tests',False): return Task.RUN_ME return ret + def get_test_env(self): + return self.generator.ut_env + def post_run(self): + super(utest,self).post_run() + if getattr(Options.options,'clear_failed_tests',False)and self.waf_unit_test_results[1]: + self.generator.bld.task_sigs[self.uid()]=None def run(self): - filename=self.inputs[0].abspath() - self.ut_exec=getattr(self,'ut_exec',[filename]) + if hasattr(self.generator,'ut_run'): + return self.generator.ut_run(self) + self.ut_exec=getattr(self.generator,'ut_exec',[self.inputs[0].abspath()]) if getattr(self.generator,'ut_fun',None): self.generator.ut_fun(self) - try: - fu=getattr(self.generator.bld,'all_test_paths') - except AttributeError: - fu=os.environ.copy() - self.generator.bld.all_test_paths=fu - lst=[] - for g in self.generator.bld.groups: - for tg in g: - if getattr(tg,'link_task',None): - lst.append(tg.link_task.outputs[0].parent.abspath()) - def add_path(dct,path,var): - dct[var]=os.pathsep.join(Utils.to_list(path)+[os.environ.get(var,'')]) - if Utils.is_win32: - add_path(fu,lst,'PATH') - elif Utils.unversioned_sys_platform()=='darwin': - add_path(fu,lst,'DYLD_LIBRARY_PATH') - add_path(fu,lst,'LD_LIBRARY_PATH') - else: - add_path(fu,lst,'LD_LIBRARY_PATH') - cwd=getattr(self.generator,'ut_cwd','')or self.inputs[0].parent.abspath() - proc=Utils.subprocess.Popen(self.ut_exec,cwd=cwd,env=fu,stderr=Utils.subprocess.PIPE,stdout=Utils.subprocess.PIPE) + testcmd=getattr(self.generator,'ut_cmd',False)or getattr(Options.options,'testcmd',False) + if testcmd: + self.ut_exec=(testcmd%' '.join(self.ut_exec)).split(' ') + return self.exec_command(self.ut_exec) + def exec_command(self,cmd,**kw): + Logs.debug('runner: %r',cmd) + proc=Utils.subprocess.Popen(cmd,cwd=self.get_cwd().abspath(),env=self.get_test_env(),stderr=Utils.subprocess.PIPE,stdout=Utils.subprocess.PIPE) (stdout,stderr)=proc.communicate() - tup=(filename,proc.returncode,stdout,stderr) - self.generator.utest_result=tup + self.waf_unit_test_results=tup=(self.inputs[0].abspath(),proc.returncode,stdout,stderr) testlock.acquire() try: - bld=self.generator.bld - Logs.debug("ut: %r",tup) - try: - bld.utest_results.append(tup) - except AttributeError: - bld.utest_results=[tup] + return self.generator.add_test_results(tup) finally: testlock.release() + def get_cwd(self): + return self.generator.ut_cwd def summary(bld): lst=getattr(bld,'utest_results',[]) if lst: @@ -72,8 +108,18 @@ for(f,code,out,err)in lst: if code: Logs.pprint('CYAN',' %s'%f) +def set_exit_code(bld): + lst=getattr(bld,'utest_results',[]) + for(f,code,out,err)in lst: + if code: + msg=[] + if out: + msg.append('stdout:%s%s'%(os.linesep,out.decode('utf-8'))) + if err: + msg.append('stderr:%s%s'%(os.linesep,err.decode('utf-8'))) + bld.fatal(os.linesep.join(msg)) def options(opt): + opt.add_option('--notests',action='store_true',default=False,help='Exec no unit tests',dest='no_tests') opt.add_option('--alltests',action='store_true',default=False,help='Exec all unit tests',dest='all_tests') - -feature('test')(make_test) -after_method('apply_link')(make_test) \ No newline at end of file + opt.add_option('--clear-failed',action='store_true',default=False,help='Force failed unit tests to run again next time',dest='clear_failed_tests') + opt.add_option('--testcmd',action='store',default=False,help='Run the unit tests using the test-cmd string'' example "--test-cmd="valgrind --error-exitcode=1'' %s" to run under valgrind',dest='testcmd') diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/winres.py glmark2-2021.02/waflib/Tools/winres.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/winres.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/winres.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,34 +1,53 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file +import re from waflib import Task from waflib.TaskGen import extension +from waflib.Tools import c_preproc +@extension('.rc') def rc_file(self,node): obj_ext='.rc.o' - if self.env['WINRC_TGT_F']=='/fo': + if self.env.WINRC_TGT_F=='/fo': obj_ext='.res' rctask=self.create_task('winrc',node,node.change_ext(obj_ext)) try: self.compiled_tasks.append(rctask) except AttributeError: self.compiled_tasks=[rctask] +re_lines=re.compile('(?:^[ \t]*(#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*?)\s*$)|''(?:^\w+[ \t]*(ICON|BITMAP|CURSOR|HTML|FONT|MESSAGETABLE|TYPELIB|REGISTRY|D3DFX)[ \t]*(.*?)\s*$)',re.IGNORECASE|re.MULTILINE) +class rc_parser(c_preproc.c_parser): + def filter_comments(self,node): + code=node.read() + if c_preproc.use_trigraphs: + for(a,b)in c_preproc.trig_def:code=code.split(a).join(b) + code=c_preproc.re_nl.sub('',code) + code=c_preproc.re_cpp.sub(c_preproc.repl,code) + ret=[] + for m in re.finditer(re_lines,code): + if m.group(2): + ret.append((m.group(2),m.group(3))) + else: + ret.append(('include',m.group(5))) + return ret class winrc(Task.Task): run_str='${WINRC} ${WINRCFLAGS} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${WINRC_TGT_F} ${TGT} ${WINRC_SRC_F} ${SRC}' color='BLUE' + def scan(self): + tmp=rc_parser(self.generator.includes_nodes) + tmp.start(self.inputs[0],self.env) + return(tmp.nodes,tmp.names) def configure(conf): v=conf.env - v['WINRC_TGT_F']='-o' - v['WINRC_SRC_F']='-i' - if not conf.env.WINRC: + v.WINRC_TGT_F='-o' + v.WINRC_SRC_F='-i' + if not v.WINRC: if v.CC_NAME=='msvc': - conf.find_program('RC',var='WINRC',path_list=v['PATH']) - v['WINRC_TGT_F']='/fo' - v['WINRC_SRC_F']='' + conf.find_program('RC',var='WINRC',path_list=v.PATH) + v.WINRC_TGT_F='/fo' + v.WINRC_SRC_F='' else: - conf.find_program('windres',var='WINRC',path_list=v['PATH']) - if not conf.env.WINRC: + conf.find_program('windres',var='WINRC',path_list=v.PATH) + if not v.WINRC: conf.fatal('winrc was not found!') - v['WINRCFLAGS']=[] - -extension('.rc')(rc_file) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/xlc.py glmark2-2021.02/waflib/Tools/xlc.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/xlc.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/xlc.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,39 +1,40 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib.Tools import ccroot,ar from waflib.Configure import conf +@conf def find_xlc(conf): cc=conf.find_program(['xlc_r','xlc'],var='CC') - cc=conf.cmd_to_list(cc) conf.get_xlc_version(cc) conf.env.CC_NAME='xlc' - conf.env.CC=cc +@conf def xlc_common_flags(conf): v=conf.env - v['CC_SRC_F']=[] - v['CC_TGT_F']=['-c','-o'] - if not v['LINK_CC']:v['LINK_CC']=v['CC'] - v['CCLNK_SRC_F']=[] - v['CCLNK_TGT_F']=['-o'] - v['CPPPATH_ST']='-I%s' - v['DEFINES_ST']='-D%s' - v['LIB_ST']='-l%s' - v['LIBPATH_ST']='-L%s' - v['STLIB_ST']='-l%s' - v['STLIBPATH_ST']='-L%s' - v['RPATH_ST']='-Wl,-rpath,%s' - v['SONAME_ST']=[] - v['SHLIB_MARKER']=[] - v['STLIB_MARKER']=[] - v['LINKFLAGS_cprogram']=['-Wl,-brtl'] - v['cprogram_PATTERN']='%s' - v['CFLAGS_cshlib']=['-fPIC'] - v['LINKFLAGS_cshlib']=['-G','-Wl,-brtl,-bexpfull'] - v['cshlib_PATTERN']='lib%s.so' - v['LINKFLAGS_cstlib']=[] - v['cstlib_PATTERN']='lib%s.a' + v.CC_SRC_F=[] + v.CC_TGT_F=['-c','-o'] + if not v.LINK_CC: + v.LINK_CC=v.CC + v.CCLNK_SRC_F=[] + v.CCLNK_TGT_F=['-o'] + v.CPPPATH_ST='-I%s' + v.DEFINES_ST='-D%s' + v.LIB_ST='-l%s' + v.LIBPATH_ST='-L%s' + v.STLIB_ST='-l%s' + v.STLIBPATH_ST='-L%s' + v.RPATH_ST='-Wl,-rpath,%s' + v.SONAME_ST=[] + v.SHLIB_MARKER=[] + v.STLIB_MARKER=[] + v.LINKFLAGS_cprogram=['-Wl,-brtl'] + v.cprogram_PATTERN='%s' + v.CFLAGS_cshlib=['-fPIC'] + v.LINKFLAGS_cshlib=['-G','-Wl,-brtl,-bexpfull'] + v.cshlib_PATTERN='lib%s.so' + v.LINKFLAGS_cstlib=[] + v.cstlib_PATTERN='lib%s.a' def configure(conf): conf.find_xlc() conf.find_ar() @@ -41,6 +42,3 @@ conf.cc_load_tools() conf.cc_add_flags() conf.link_add_flags() - -conf(find_xlc) -conf(xlc_common_flags) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/xlcxx.py glmark2-2021.02/waflib/Tools/xlcxx.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Tools/xlcxx.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Tools/xlcxx.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,39 +1,40 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file from waflib.Tools import ccroot,ar from waflib.Configure import conf +@conf def find_xlcxx(conf): cxx=conf.find_program(['xlc++_r','xlc++'],var='CXX') - cxx=conf.cmd_to_list(cxx) conf.get_xlc_version(cxx) conf.env.CXX_NAME='xlc++' - conf.env.CXX=cxx +@conf def xlcxx_common_flags(conf): v=conf.env - v['CXX_SRC_F']=[] - v['CXX_TGT_F']=['-c','-o'] - if not v['LINK_CXX']:v['LINK_CXX']=v['CXX'] - v['CXXLNK_SRC_F']=[] - v['CXXLNK_TGT_F']=['-o'] - v['CPPPATH_ST']='-I%s' - v['DEFINES_ST']='-D%s' - v['LIB_ST']='-l%s' - v['LIBPATH_ST']='-L%s' - v['STLIB_ST']='-l%s' - v['STLIBPATH_ST']='-L%s' - v['RPATH_ST']='-Wl,-rpath,%s' - v['SONAME_ST']=[] - v['SHLIB_MARKER']=[] - v['STLIB_MARKER']=[] - v['LINKFLAGS_cxxprogram']=['-Wl,-brtl'] - v['cxxprogram_PATTERN']='%s' - v['CXXFLAGS_cxxshlib']=['-fPIC'] - v['LINKFLAGS_cxxshlib']=['-G','-Wl,-brtl,-bexpfull'] - v['cxxshlib_PATTERN']='lib%s.so' - v['LINKFLAGS_cxxstlib']=[] - v['cxxstlib_PATTERN']='lib%s.a' + v.CXX_SRC_F=[] + v.CXX_TGT_F=['-c','-o'] + if not v.LINK_CXX: + v.LINK_CXX=v.CXX + v.CXXLNK_SRC_F=[] + v.CXXLNK_TGT_F=['-o'] + v.CPPPATH_ST='-I%s' + v.DEFINES_ST='-D%s' + v.LIB_ST='-l%s' + v.LIBPATH_ST='-L%s' + v.STLIB_ST='-l%s' + v.STLIBPATH_ST='-L%s' + v.RPATH_ST='-Wl,-rpath,%s' + v.SONAME_ST=[] + v.SHLIB_MARKER=[] + v.STLIB_MARKER=[] + v.LINKFLAGS_cxxprogram=['-Wl,-brtl'] + v.cxxprogram_PATTERN='%s' + v.CXXFLAGS_cxxshlib=['-fPIC'] + v.LINKFLAGS_cxxshlib=['-G','-Wl,-brtl,-bexpfull'] + v.cxxshlib_PATTERN='lib%s.so' + v.LINKFLAGS_cxxstlib=[] + v.cxxstlib_PATTERN='lib%s.a' def configure(conf): conf.find_xlcxx() conf.find_ar() @@ -41,6 +42,3 @@ conf.cxx_load_tools() conf.cxx_add_flags() conf.link_add_flags() - -conf(find_xlcxx) -conf(xlcxx_common_flags) \ No newline at end of file diff -Nru glmark2-2014.03+git20150611.fa71af2d/waflib/Utils.py glmark2-2021.02/waflib/Utils.py --- glmark2-2014.03+git20150611.fa71af2d/waflib/Utils.py 2015-06-11 22:39:36.000000000 +0800 +++ glmark2-2021.02/waflib/Utils.py 2021-04-25 01:47:22.000000000 +0800 @@ -1,43 +1,40 @@ #! /usr/bin/env python # encoding: utf-8 -# WARNING! Do not edit! http://waf.googlecode.com/git/docs/wafbook/single.html#_obtaining_the_waf_file +# WARNING! Do not edit! https://waf.io/book/index.html#_obtaining_the_waf_file -import os,sys,errno,traceback,inspect,re,shutil,datetime,gc +import os,sys,errno,traceback,inspect,re,datetime,platform,base64 try: - import subprocess -except: - try: - import waflib.extras.subprocess as subprocess - except: - print("The subprocess module is missing (python2.3?):\n try calling 'waf update --files=subprocess'\n or add a copy of subprocess.py to the python libraries") -try: - from collections import deque + import cPickle except ImportError: - class deque(list): - def popleft(self): - return self.pop(0) + import pickle as cPickle +if os.name=='posix'and sys.version_info[0]<3: + try: + import subprocess32 as subprocess + except ImportError: + import subprocess +else: + import subprocess +from collections import deque,defaultdict try: import _winreg as winreg -except: +except ImportError: try: import winreg - except: + except ImportError: winreg=None from waflib import Errors try: - from collections import UserDict -except: - from UserDict import UserDict -try: from hashlib import md5 -except: +except ImportError: try: from md5 import md5 - except: + except ImportError: pass try: import threading -except: +except ImportError: + if not'JOBS'in os.environ: + os.environ['JOBS']='1' class threading(object): pass class Lock(object): @@ -46,59 +43,182 @@ def release(self): pass threading.Lock=threading.Thread=Lock -else: - run_old=threading.Thread.run - def run(*args,**kwargs): - try: - run_old(*args,**kwargs) - except(KeyboardInterrupt,SystemExit): - raise - except: - sys.excepthook(*sys.exc_info()) - threading.Thread.run=run -SIG_NIL='iluvcuteoverload' +SIG_NIL='SIG_NIL_SIG_NIL_'.encode() O644=420 O755=493 rot_chr=['\\','|','/','-'] rot_idx=0 -try: - from collections import defaultdict -except ImportError: - class defaultdict(dict): - def __init__(self,default_factory): - super(defaultdict,self).__init__() - self.default_factory=default_factory - def __getitem__(self,key): - try: - return super(defaultdict,self).__getitem__(key) - except KeyError: - value=self.default_factory() - self[key]=value - return value -is_win32=sys.platform in('win32','cli') -indicator='\x1b[K%s%s%s\r' -if is_win32 and'NOCOLOR'in os.environ: - indicator='%s%s%s\r' -def readf(fname,m='r'): +class ordered_iter_dict(dict): + def __init__(self,*k,**kw): + self.lst=deque() + dict.__init__(self,*k,**kw) + def clear(self): + dict.clear(self) + self.lst=deque() + def __setitem__(self,key,value): + if key in dict.keys(self): + self.lst.remove(key) + dict.__setitem__(self,key,value) + self.lst.append(key) + def __delitem__(self,key): + dict.__delitem__(self,key) + try: + self.lst.remove(key) + except ValueError: + pass + def __iter__(self): + return reversed(self.lst) + def keys(self): + return reversed(self.lst) +class lru_node(object): + __slots__=('next','prev','key','val') + def __init__(self): + self.next=self + self.prev=self + self.key=None + self.val=None +class lru_cache(object): + __slots__=('maxlen','table','head') + def __init__(self,maxlen=100): + self.maxlen=maxlen + self.table={} + self.head=lru_node() + for x in range(maxlen-1): + node=lru_node() + node.prev=self.head.prev + node.next=self.head + node.prev.next=node + node.next.prev=node + def __getitem__(self,key): + node=self.table[key] + if node is self.head: + return node.val + node.prev.next=node.next + node.next.prev=node.prev + node.next=self.head.next + node.prev=self.head + node.next.prev=node + node.prev.next=node + self.head=node + return node.val + def __setitem__(self,key,val): + node=self.head=self.head.next + try: + del self.table[node.key] + except KeyError: + pass + node.key=key + node.val=val + self.table[key]=node +is_win32=os.sep=='\\'or sys.platform=='win32' +def readf(fname,m='r',encoding='ISO8859-1'): + if sys.hexversion>0x3000000 and not'b'in m: + m+='b' + f=open(fname,m) + try: + txt=f.read() + finally: + f.close() + if encoding: + txt=txt.decode(encoding) + else: + txt=txt.decode() + else: + f=open(fname,m) + try: + txt=f.read() + finally: + f.close() + return txt +def writef(fname,data,m='w',encoding='ISO8859-1'): + if sys.hexversion>0x3000000 and not'b'in m: + data=data.encode(encoding) + m+='b' f=open(fname,m) try: - txt=f.read() + f.write(data) + finally: + f.close() +def h_file(fname): + f=open(fname,'rb') + m=md5() + try: + while fname: + fname=f.read(200000) + m.update(fname) finally: f.close() + return m.digest() +def readf_win32(f,m='r',encoding='ISO8859-1'): + flags=os.O_NOINHERIT|os.O_RDONLY + if'b'in m: + flags|=os.O_BINARY + if'+'in m: + flags|=os.O_RDWR + try: + fd=os.open(f,flags) + except OSError: + raise IOError('Cannot read from %r'%f) + if sys.hexversion>0x3000000 and not'b'in m: + m+='b' + f=os.fdopen(fd,m) + try: + txt=f.read() + finally: + f.close() + if encoding: + txt=txt.decode(encoding) + else: + txt=txt.decode() + else: + f=os.fdopen(fd,m) + try: + txt=f.read() + finally: + f.close() return txt -def h_file(filename): - f=open(filename,'rb') +def writef_win32(f,data,m='w',encoding='ISO8859-1'): + if sys.hexversion>0x3000000 and not'b'in m: + data=data.encode(encoding) + m+='b' + flags=os.O_CREAT|os.O_TRUNC|os.O_WRONLY|os.O_NOINHERIT + if'b'in m: + flags|=os.O_BINARY + if'+'in m: + flags|=os.O_RDWR + try: + fd=os.open(f,flags) + except OSError: + raise OSError('Cannot write to %r'%f) + f=os.fdopen(fd,m) + try: + f.write(data) + finally: + f.close() +def h_file_win32(fname): + try: + fd=os.open(fname,os.O_BINARY|os.O_RDONLY|os.O_NOINHERIT) + except OSError: + raise OSError('Cannot read from %r'%fname) + f=os.fdopen(fd,'rb') m=md5() try: - while filename: - filename=f.read(100000) - m.update(filename) + while fname: + fname=f.read(200000) + m.update(fname) finally: f.close() return m.digest() +readf_unix=readf +writef_unix=writef +h_file_unix=h_file +if hasattr(os,'O_NOINHERIT')and sys.hexversion<0x3040000: + readf=readf_win32 + writef=writef_win32 + h_file=h_file_win32 try: x=''.encode('hex') -except: +except LookupError: import binascii def to_hex(s): ret=binascii.hexlify(s) @@ -114,27 +234,27 @@ :param s: string to convert :type s: string """ +def listdir_win32(s): + if not s: + try: + import ctypes + except ImportError: + return[x+':\\'for x in'ABCDEFGHIJKLMNOPQRSTUVWXYZ'] + else: + dlen=4 + maxdrives=26 + buf=ctypes.create_string_buffer(maxdrives*dlen) + ndrives=ctypes.windll.kernel32.GetLogicalDriveStringsA(maxdrives*dlen,ctypes.byref(buf)) + return[str(buf.raw[4*i:4*i+2].decode('ascii'))for i in range(int(ndrives/dlen))] + if len(s)==2 and s[1]==":": + s+=os.sep + if not os.path.isdir(s): + e=OSError('%s is not a directory'%s) + e.errno=errno.ENOENT + raise e + return os.listdir(s) listdir=os.listdir if is_win32: - def listdir_win32(s): - if not s: - try: - import ctypes - except: - return[x+':\\'for x in list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')] - else: - dlen=4 - maxdrives=26 - buf=ctypes.create_string_buffer(maxdrives*dlen) - ndrives=ctypes.windll.kernel32.GetLogicalDriveStringsA(maxdrives,ctypes.byref(buf)) - return[buf.raw[4*i:4*i+3].decode('ascii')for i in range(int(ndrives/dlen))] - if len(s)==2 and s[1]==":": - s+=os.sep - if not os.path.isdir(s): - e=OSError() - e.errno=errno.ENOENT - raise e - return os.listdir(s) listdir=listdir_win32 def num2ver(ver): if isinstance(ver,str): @@ -147,26 +267,13 @@ return ret return ver def ex_stack(): - exc_type,exc_value,tb=sys.exc_info() - exc_lines=traceback.format_exception(exc_type,exc_value,tb) - return''.join(exc_lines) -def to_list(sth): - if isinstance(sth,str): - return sth.split() + return traceback.format_exc() +def to_list(val): + if isinstance(val,str): + return val.split() else: - return sth -re_nl=re.compile('\r*\n',re.M) -def str_to_dict(txt): - tbl={} - lines=re_nl.split(txt) - for x in lines: - x=x.strip() - if not x or x.startswith('#')or x.find('=')<0: - continue - tmp=x.split('=') - tbl[tmp[0].strip()]='='.join(tmp[1:]).strip() - return tbl -def split_path(path): + return val +def split_path_unix(path): return path.split('/') def split_path_cygwin(path): if path.startswith('//'): @@ -174,56 +281,95 @@ ret[0]='/'+ret[0] return ret return path.split('/') -re_sp=re.compile('[/\\\\]') +re_sp=re.compile('[/\\\\]+') def split_path_win32(path): if path.startswith('\\\\'): - ret=re.split(re_sp,path)[2:] + ret=re_sp.split(path)[2:] ret[0]='\\'+ret[0] return ret - return re.split(re_sp,path) + return re_sp.split(path) +msysroot=None +def split_path_msys(path): + if path.startswith(('/','\\'))and not path.startswith(('\\','\\\\')): + global msysroot + if not msysroot: + msysroot=subprocess.check_output(['cygpath','-w','/']).decode(sys.stdout.encoding or'iso8859-1') + msysroot=msysroot.strip() + path=os.path.normpath(msysroot+os.sep+path) + return split_path_win32(path) if sys.platform=='cygwin': split_path=split_path_cygwin elif is_win32: - split_path=split_path_win32 + if os.environ.get('MSYSTEM'): + split_path=split_path_msys + else: + split_path=split_path_win32 +else: + split_path=split_path_unix split_path.__doc__=""" -Split a path by / or \\. This function is not like os.path.split +Splits a path by / or \\; do not confuse this function with with ``os.path.split`` :type path: string :param path: path to split -:return: list of strings +:return: list of string """ def check_dir(path): if not os.path.isdir(path): try: os.makedirs(path) - except OSError ,e: + except OSError as e: if not os.path.isdir(path): raise Errors.WafError('Cannot create the folder %r'%path,ex=e) +def check_exe(name,env=None): + if not name: + raise ValueError('Cannot execute an empty string!') + def is_exe(fpath): + return os.path.isfile(fpath)and os.access(fpath,os.X_OK) + fpath,fname=os.path.split(name) + if fpath and is_exe(name): + return os.path.abspath(name) + else: + env=env or os.environ + for path in env['PATH'].split(os.pathsep): + path=path.strip('"') + exe_file=os.path.join(path,name) + if is_exe(exe_file): + return os.path.abspath(exe_file) + return None def def_attrs(cls,**kw): for k,v in kw.items(): if not hasattr(cls,k): setattr(cls,k,v) def quote_define_name(s): - fu=re.compile("[^a-zA-Z0-9]").sub("_",s) + fu=re.sub('[^a-zA-Z0-9]','_',s) + fu=re.sub('_+','_',fu) fu=fu.upper() return fu def h_list(lst): - m=md5() - m.update(str(lst)) - return m.digest() + return md5(repr(lst).encode()).digest() def h_fun(fun): try: return fun.code except AttributeError: try: h=inspect.getsource(fun) - except IOError: - h="nocode" + except EnvironmentError: + h='nocode' try: fun.code=h except AttributeError: pass return h +def h_cmd(ins): + if isinstance(ins,str): + ret=ins + elif isinstance(ins,list)or isinstance(ins,tuple): + ret=str([h_cmd(x)for x in ins]) + else: + ret=str(h_fun(ins)) + if sys.hexversion>0x3000000: + ret=ret.encode('iso8859-1','xmlcharrefreplace') + return ret reg_subst=re.compile(r"(\\\\)|(\$\$)|\$\{([^}]+)\}") def subst_vars(expr,params): def repl_var(m): @@ -244,7 +390,7 @@ return'elf' def unversioned_sys_platform(): s=sys.platform - if s=='java': + if s.startswith('java'): from java.lang import System s=System.getProperty('os.name') if s=='Mac OS X': @@ -254,13 +400,16 @@ elif s=='OS/2': return'os2' elif s=='HP-UX': - return'hpux' + return'hp-ux' elif s in('SunOS','Solaris'): return'sunos' else:s=s.lower() if s=='powerpc': return'darwin' - if s=='win32'or s.endswith('os2')and s!='sunos2':return s + if s=='win32'or s=='os2': + return s + if s=='cli'and os.name=='nt': + return'win32' return re.split('\d+$',s)[0] def nada(*k,**kw): pass @@ -269,10 +418,10 @@ self.start_time=datetime.datetime.utcnow() def __str__(self): delta=datetime.datetime.utcnow()-self.start_time - days=int(delta.days) - hours=delta.seconds//3600 - minutes=(delta.seconds-hours*3600)//60 - seconds=delta.seconds-hours*3600-minutes*60+float(delta.microseconds)/1000/1000 + days=delta.days + hours,rem=divmod(delta.seconds,3600) + minutes,seconds=divmod(rem,60) + seconds+=delta.microseconds*1e-6 result='' if days: result+='%dd'%days @@ -281,18 +430,6 @@ if days or hours or minutes: result+='%dm'%minutes return'%s%.3fs'%(result,seconds) -if is_win32: - old=shutil.copy2 - def copy2(src,dst): - old(src,dst) - shutil.copystat(src,dst) - setattr(shutil,'copy2',copy2) -if os.name=='java': - try: - gc.disable() - gc.enable() - except NotImplementedError: - gc.disable=gc.enable def read_la_file(path): sp=re.compile(r'^([^=]+)=\'(.*)\'$') dc={} @@ -303,26 +440,17 @@ except ValueError: pass return dc -def nogc(fun): - def f(*k,**kw): - try: - gc.disable() - ret=fun(*k,**kw) - finally: - gc.enable() - return ret - f.__doc__=fun.__doc__ - return f def run_once(fun): cache={} - def wrap(k): + def wrap(*k): try: return cache[k] except KeyError: - ret=fun(k) + ret=fun(*k) cache[k]=ret return ret wrap.__cache__=cache + wrap.__name__=fun.__name__ return wrap def get_registry_app_path(key,filename): if not winreg: @@ -334,3 +462,74 @@ else: if os.path.isfile(result): return result +def lib64(): + if os.sep=='/': + if platform.architecture()[0]=='64bit': + if os.path.exists('/usr/lib64')and not os.path.exists('/usr/lib32'): + return'64' + return'' +def sane_path(p): + return os.path.abspath(os.path.expanduser(p)) +process_pool=[] +def get_process(): + try: + return process_pool.pop() + except IndexError: + filepath=os.path.dirname(os.path.abspath(__file__))+os.sep+'processor.py' + cmd=[sys.executable,'-c',readf(filepath)] + return subprocess.Popen(cmd,stdout=subprocess.PIPE,stdin=subprocess.PIPE,bufsize=0) +def run_prefork_process(cmd,kwargs,cargs): + if not'env'in kwargs: + kwargs['env']=dict(os.environ) + try: + obj=base64.b64encode(cPickle.dumps([cmd,kwargs,cargs])) + except TypeError: + return run_regular_process(cmd,kwargs,cargs) + proc=get_process() + if not proc: + return run_regular_process(cmd,kwargs,cargs) + proc.stdin.write(obj) + proc.stdin.write('\n'.encode()) + proc.stdin.flush() + obj=proc.stdout.readline() + if not obj: + raise OSError('Preforked sub-process %r died'%proc.pid) + process_pool.append(proc) + ret,out,err,ex,trace=cPickle.loads(base64.b64decode(obj)) + if ex: + if ex=='OSError': + raise OSError(trace) + elif ex=='ValueError': + raise ValueError(trace) + else: + raise Exception(trace) + return ret,out,err +def run_regular_process(cmd,kwargs,cargs={}): + proc=subprocess.Popen(cmd,**kwargs) + if kwargs.get('stdout')or kwargs.get('stderr'): + out,err=proc.communicate(**cargs) + status=proc.returncode + else: + out,err=(None,None) + status=proc.wait(**cargs) + return status,out,err +def run_process(cmd,kwargs,cargs={}): + if kwargs.get('stdout')and kwargs.get('stderr'): + return run_prefork_process(cmd,kwargs,cargs) + else: + return run_regular_process(cmd,kwargs,cargs) +def alloc_process_pool(n,force=False): + global run_process,get_process,alloc_process_pool + if not force: + n=max(n-len(process_pool),0) + try: + lst=[get_process()for x in range(n)] + except OSError: + run_process=run_regular_process + get_process=alloc_process_pool=nada + else: + for x in lst: + process_pool.append(x) +if sys.platform=='cli'or not sys.executable: + run_process=run_regular_process + get_process=alloc_process_pool=nada diff -Nru glmark2-2014.03+git20150611.fa71af2d/wscript glmark2-2021.02/wscript --- glmark2-2014.03+git20150611.fa71af2d/wscript 2015-06-11 22:41:04.000000000 +0800 +++ glmark2-2021.02/wscript 2021-04-25 01:47:22.000000000 +0800 @@ -1,27 +1,32 @@ -import commands -import subprocess -import os -import Options -import Scripting +import os, platform from waflib import Context out = 'build' top = '.' -VERSION = '2014.03+git20150611.fa71af2d' +VERSION = '2021.02' APPNAME = 'glmark2' FLAVORS = { - 'x11-gl' : 'glmark2', - 'x11-glesv2' : 'glmark2-es2', + 'dispmanx-glesv2' : 'glmark2-es2-dispmanx', 'drm-gl' : 'glmark2-drm', 'drm-glesv2' : 'glmark2-es2-drm', 'mir-gl' : 'glmark2-mir', 'mir-glesv2' : 'glmark2-es2-mir', 'wayland-gl' : 'glmark2-wayland', - 'wayland-glesv2' : 'glmark2-es2-wayland' + 'wayland-glesv2' : 'glmark2-es2-wayland', + 'win32-gl': 'glmark2-win32', + 'win32-glesv2': 'glmark2-es2', + 'x11-gl' : 'glmark2', + 'x11-glesv2' : 'glmark2-es2', } -FLAVORS_STR = ", ".join(FLAVORS.keys()) +FLAVORS_STR = ", ".join(sorted(list(FLAVORS) + ['all-linux', 'all-win32'])) + +def linux_flavors(): + return [f for f in FLAVORS.keys() if not f.startswith('win32')] + +def win32_flavors(): + return [f for f in FLAVORS.keys() if f.startswith('win32')] def option_list_cb(option, opt, value, parser): value = value.split(',') @@ -36,68 +41,146 @@ return False def options(opt): - opt.tool_options('gnu_dirs') - opt.tool_options('compiler_cc') - opt.tool_options('compiler_cxx') + opt.load('gnu_dirs') + opt.load('compiler_c') + opt.load('compiler_cxx') opt.add_option('--with-flavors', type = 'string', action='callback', callback=option_list_cb, dest = 'flavors', - help = "a list of flavors to build (%s, all)" % FLAVORS_STR) + help = "a list of flavors to build (%s, all-linux (except dispmanx-glesv2), all-win32)" % FLAVORS_STR) opt.parser.set_default('flavors', []) + opt.parser.add_option('--version-suffix', type='string', action='store', dest='versionsuffix', + default='', help='add a suffix to the version number') + opt.add_option('--no-debug', action='store_false', dest = 'debug', default = True, help='disable compiler debug information') opt.add_option('--no-opt', action='store_false', dest = 'opt', default = True, help='disable compiler optimizations') - opt.add_option('--no-werror', action='store_false', dest = 'werror', - default = True, help='disable treating compiler warnings as errors') opt.add_option('--data-path', action='store', dest = 'data_path', help='path to main data (also see --data(root)dir)') opt.add_option('--extras-path', action='store', dest = 'extras_path', help='path to additional data (models, shaders, textures)') +def get_data_path(ctx): + if ctx.options.data_path is not None: + return ctx.options.data_path + else: + return os.path.join(ctx.env.DATADIR, 'glmark2') + def configure(ctx): # Special 'all' flavor - if 'all' in Options.options.flavors: - Options.options.flavors = list(set(Options.options.flavors) | set(FLAVORS.keys())) - Options.options.flavors.remove('all') + if 'all-linux' in ctx.options.flavors: + ctx.options.flavors = list(set(ctx.options.flavors) | set(linux_flavors())) + ctx.options.flavors.remove('all-linux') + # dispmanx is a special case, we don't want to include it in all + ctx.options.flavors.remove('dispmanx-glesv2') + + if 'all-win32' in ctx.options.flavors: + ctx.options.flavors = list(set(ctx.options.flavors) | set(win32_flavors())) + ctx.options.flavors.remove('all-win32') + + is_win = any(True for f in win32_flavors() if f in ctx.options.flavors) + is_linux = any(True for f in linux_flavors() if f in ctx.options.flavors) + + if is_win and is_linux: + ctx.fatal('Simultaneous Windows and Linux builds are not supported') # Ensure the flavors are valid - for flavor in Options.options.flavors: + for flavor in ctx.options.flavors: if flavor not in FLAVORS: ctx.fatal('Unknown flavor: %s. Supported flavors are %s' % (flavor, FLAVORS_STR)) - if not Options.options.flavors: - ctx.fatal('You need to select at least one flavor. Supported flavors are %s' % FLAVORS_STR) + if not ctx.options.flavors: + ctx.fatal('You need to select at least one flavor with --with-flavors=flavor1[,flavor2]...\n' + + 'Supported flavors are %s' % FLAVORS_STR) for flavor in FLAVORS: - if flavor in Options.options.flavors: + if flavor in ctx.options.flavors: ctx.env["FLAVOR_%s" % flavor.upper().replace('-','_')] = FLAVORS[flavor] - ctx.check_tool('gnu_dirs') - ctx.check_tool('compiler_cc') - ctx.check_tool('compiler_cxx') + ctx.load('gnu_dirs') + ctx.load('compiler_c') + ctx.load('compiler_cxx') + + if is_win: + configure_win32(ctx) + else: + configure_linux(ctx) + + ctx.env.append_unique('DEFINES', 'GLMARK_VERSION="%s"' % (VERSION + ctx.options.versionsuffix)) + ctx.env.GLMARK2_VERSION = (VERSION + ctx.options.versionsuffix) + + ctx.msg("Prefix", ctx.env.PREFIX, color = 'PINK') + ctx.msg("Data path", get_data_path(ctx), color = 'PINK') + ctx.msg("Including extras", "Yes" if ctx.env.HAVE_EXTRAS else "No", + color = 'PINK'); + if ctx.env.HAVE_EXTRAS: + ctx.msg("Extras path", ctx.options.extras_path, color = 'PINK') + ctx.msg("Building flavors", ctx.options.flavors) + + +def configure_win32(ctx): + # Check required headers + req_headers = ['stdlib.h', 'string.h', 'stdint.h', 'stdio.h', 'windows.h'] + for header in req_headers: + ctx.check_cc(header_name = header, auto_add_header_name = True, mandatory = True) + + req_libs = [('user32', 'user32'), ('opengl32', 'opengl32'), ('gdi32', 'gdi32')] + for (lib, uselib) in req_libs: + ctx.check_cc(lib = lib, uselib_store = uselib) + + # Prepend CXX flags so that they can be overriden by the + # CXXFLAGS environment variable + if ctx.options.opt: + ctx.env.prepend_value('CXXFLAGS', '-O2') + if ctx.env.CXX_NAME != 'msvc': + if ctx.options.debug: + ctx.env.prepend_value('CXXFLAGS', '-g') + ctx.env.prepend_value('CXXFLAGS', '-std=c++14 -Wall -Wextra -Wnon-virtual-dtor'.split(' ')) + else: + ctx.env.prepend_value('CXXFLAGS', '/EHsc /wd4312'.split(' ')) + + ctx.env.HAVE_EXTRAS = False + if ctx.options.extras_path is not None: + ctx.env.HAVE_EXTRAS = True + ctx.env.append_unique('GLMARK_EXTRAS_PATH', ctx.options.extras_path) + ctx.env.append_unique('DEFINES', 'GLMARK_EXTRAS_PATH="%s"' % ctx.options.extras_path) + + data_path = get_data_path(ctx) + + # Necessary for M_PI + ctx.env.append_unique('DEFINES', '_USE_MATH_DEFINES') + ctx.env.append_unique('DEFINES', 'WIN32') + # String contants have issues with Windows slashes + ctx.env.append_unique('DEFINES', 'GLMARK_DATA_PATH="%s"' % data_path.replace('\\', '/')) + ctx.env.append_unique('GLMARK_DATA_PATH', data_path) + +def configure_linux(ctx): # Check required headers - req_headers = ['stdlib.h', 'string.h', 'unistd.h', 'stdint.h', 'stdio.h', 'jpeglib.h'] + req_headers = ['stdlib.h', 'string.h', 'stdint.h', 'stdio.h', 'dlfcn.h', + 'unistd.h', 'jpeglib.h', 'math.h', 'string.h'] for header in req_headers: - ctx.check_cxx(header_name = header, auto_add_header_name = True, mandatory = True) + ctx.check_cc(header_name = header, auto_add_header_name = True, mandatory = True) # Check for required libs req_libs = [('m', 'm'), ('jpeg', 'jpeg')] for (lib, uselib) in req_libs: - ctx.check_cxx(lib = lib, uselib_store = uselib) + ctx.check_cc(lib = lib, uselib_store = uselib) - # Check required functions - req_funcs = [('memset', 'string.h', []) ,('sqrt', 'math.h', ['m'])] - for func, header, uselib in req_funcs: - ctx.check_cxx(function_name = func, header_name = header, - uselib = uselib, mandatory = True) + # Check for required functions. Note that headers from req_headers + # are already included for this check. + ctx.check_cc(function_name = 'memset', mandatory = True) + # Special test code for sqrt, to make clang/libc++ happy. + ctx.check_cc(fragment = 'int main(void) { double (*p)(double); p = sqrt; }', + msg = 'Checking for function sqrt', + uselib = ['m'], mandatory = True) # Check for a supported version of libpng - supp_png_pkgs = (('libpng12', '1.2'), ('libpng15', '1.5'), ('libpng16', '1.6'),) have_png = False + supp_png_pkgs = (('libpng12', '1.2'), ('libpng15', '1.5'), ('libpng16', '1.6'),) for (pkg, atleast) in supp_png_pkgs: try: pkg_ver = ctx.check_cfg(package=pkg, uselib_store='libpng', atleast_version=atleast, @@ -111,16 +194,28 @@ if not have_png: ctx.fatal('You need to install a supported version of libpng: ' + str(supp_png_pkgs)) + dispmanx = 'dispmanx-glesv2' in ctx.options.flavors + if dispmanx: + # dispmanx uses custom Broadcom libraries that don't follow standard + # Linux packaging. Just force the library setup here. + if len(ctx.options.flavors) != 1: + ctx.fatal("dispmanx can't be built with any other flavor") + + ctx.env.append_value('CXXFLAGS', '-I/opt/vc/include') + + ctx.check_cxx(lib = 'brcmGLESv2', uselib_store = 'glesv2', libpath='/opt/vc/lib') + ctx.check_cxx(lib = ['brcmEGL', 'brcmGLESv2'], uselib_store = 'egl', libpath='/opt/vc/lib') + ctx.check_cxx(lib = ['bcm_host', 'vcos', 'vchiq_arm'], uselib_store = 'dispmanx', libpath='/opt/vc/lib') + # Check optional packages - opt_pkgs = [('x11', 'x11', None, list_contains(Options.options.flavors, 'x11')), - ('gl', 'gl', None, list_contains(Options.options.flavors, 'gl$')), - ('egl', 'egl', None, list_contains(Options.options.flavors, 'glesv2$')), - ('glesv2', 'glesv2', None, list_contains(Options.options.flavors, 'glesv2$')), - ('libdrm','drm', None, list_contains(Options.options.flavors, 'drm')), - ('gbm','gbm', None, list_contains(Options.options.flavors, 'drm')), - ('mirclient','mirclient', '0.13', list_contains(Options.options.flavors, 'mir')), - ('wayland-client','wayland-client', None, list_contains(Options.options.flavors, 'wayland')), - ('wayland-egl','wayland-egl', None, list_contains(Options.options.flavors, 'wayland'))] + opt_pkgs = [('x11', 'x11', None, list_contains(ctx.options.flavors, 'x11')), + ('libdrm','drm', None, list_contains(ctx.options.flavors, 'drm')), + ('gbm','gbm', None, list_contains(ctx.options.flavors, 'drm')), + ('libudev', 'udev', None, list_contains(ctx.options.flavors, 'drm')), + ('mirclient','mirclient', '0.13', list_contains(ctx.options.flavors, 'mir')), + ('wayland-client','wayland-client', None, list_contains(ctx.options.flavors, 'wayland')), + ('wayland-cursor','wayland-cursor', None, list_contains(ctx.options.flavors, 'wayland')), + ('wayland-egl','wayland-egl', None, list_contains(ctx.options.flavors, 'wayland'))] for (pkg, uselib, atleast, mandatory) in opt_pkgs: if atleast is None: ctx.check_cfg(package = pkg, uselib_store = uselib, @@ -129,40 +224,31 @@ ctx.check_cfg(package = pkg, uselib_store = uselib, atleast_version=atleast, args = '--cflags --libs', mandatory = mandatory) + if list_contains(ctx.options.flavors, 'wayland'): + # wayland-protocols >= 1.12 required for xdg-shell stable + ctx.check_cfg(package = 'wayland-protocols', atleast_version = '1.12', + variables = ['pkgdatadir'], uselib_store = 'WAYLAND_PROTOCOLS') + ctx.check_cfg(package = 'wayland-scanner', variables = ['wayland_scanner'], + uselib_store = 'WAYLAND_SCANNER') # Prepend CXX flags so that they can be overriden by the # CXXFLAGS environment variable - if Options.options.opt: + if ctx.options.opt: ctx.env.prepend_value('CXXFLAGS', '-O2') - if Options.options.debug: + if ctx.options.debug: ctx.env.prepend_value('CXXFLAGS', '-g') - if Options.options.werror: - ctx.env.prepend_value('CXXFLAGS', '-Werror') - ctx.env.prepend_value('CXXFLAGS', '-Wall -Wextra -Wnon-virtual-dtor'.split(' ')) + ctx.env.prepend_value('CXXFLAGS', '-std=c++14 -Wall -Wextra -Wnon-virtual-dtor'.split(' ')) ctx.env.HAVE_EXTRAS = False - if Options.options.extras_path is not None: + if ctx.options.extras_path is not None: ctx.env.HAVE_EXTRAS = True - ctx.env.append_unique('GLMARK_EXTRAS_PATH', Options.options.extras_path) - ctx.env.append_unique('DEFINES', 'GLMARK_EXTRAS_PATH="%s"' % Options.options.extras_path) + ctx.env.append_unique('GLMARK_EXTRAS_PATH', ctx.options.extras_path) + ctx.env.append_unique('DEFINES', 'GLMARK_EXTRAS_PATH="%s"' % ctx.options.extras_path) - if Options.options.data_path is not None: - data_path = Options.options.data_path - else: - data_path = os.path.join(ctx.env.DATADIR, 'glmark2') + data_path = get_data_path(ctx) - ctx.env.append_unique('GLMARK_DATA_PATH', data_path) ctx.env.append_unique('DEFINES', 'GLMARK_DATA_PATH="%s"' % data_path) - ctx.env.append_unique('DEFINES', 'GLMARK_VERSION="%s"' % VERSION) - ctx.env.GLMARK2_VERSION = VERSION - - ctx.msg("Prefix", ctx.env.PREFIX, color = 'PINK') - ctx.msg("Data path", data_path, color = 'PINK') - ctx.msg("Including extras", "Yes" if ctx.env.HAVE_EXTRAS else "No", - color = 'PINK'); - if ctx.env.HAVE_EXTRAS: - ctx.msg("Extras path", Options.options.extras_path, color = 'PINK') - ctx.msg("Building flavors", Options.options.flavors) + ctx.env.append_unique('GLMARK_DATA_PATH', data_path) def build(ctx): ctx.recurse('src')